![]() ![]() | ![]() |
Fix more problems with transaction failure.
1: #!/usr/bin/perl 2: # The LearningOnline Network with CAPA 3: # lonc maintains the connections to remote computers 4: # 5: # $Id: loncnew,v 1.11 2003/06/25 01:54:44 foxr Exp $ 6: # 7: # Copyright Michigan State University Board of Trustees 8: # 9: # This file is part of the LearningOnline Network with CAPA (LON-CAPA). 10: # 11: # LON-CAPA is free software; you can redistribute it and/or modify 12: # it under the terms of the GNU General Public License as published by 13: # the Free Software Foundation; either version 2 of the License, or 14: # (at your option) any later version. 15: # 16: # LON-CAPA is distributed in the hope that it will be useful, 17: # but WITHOUT ANY WARRANTY; without even the implied warranty of 18: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19: # GNU General Public License for more details. 20: # 21: # You should have received a copy of the GNU General Public License 22: # along with LON-CAPA; if not, write to the Free Software 23: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 24: # 25: # /home/httpd/html/adm/gpl.txt 26: # 27: # http://www.lon-capa.org/ 28: # 29: # 30: # new lonc handles n requestors spread out bver m connections to londs. 31: # This module is based on the Event class. 32: # Development iterations: 33: # - Setup basic event loop. (done) 34: # - Add timer dispatch. (done) 35: # - Add ability to accept lonc UNIX domain sockets. (done) 36: # - Add ability to create/negotiate lond connections (done). 37: # - Add general logic for dispatching requests and timeouts. (done). 38: # - Add support for the lonc/lond requests. (done). 39: # - Add logging/status monitoring. 40: # - Add Signal handling - HUP restarts. USR1 status report. 41: # - Add Configuration file I/O (done). 42: # - Add management/status request interface. 43: # - Add deferred request capability. (done) 44: # - Detect transmission timeouts. 45: # 46: 47: # Change log: 48: # $Log: loncnew,v $ 49: # Revision 1.11 2003/06/25 01:54:44 foxr 50: # Fix more problems with transaction failure. 51: # 52: # Revision 1.10 2003/06/24 02:46:04 foxr 53: # Put a limit on the number of times we'll retry a connection. 54: # Start getting the signal stuff put in as well...note that need to get signals 55: # going or else 6the client will permanently give up on dead servers. 56: # 57: # Revision 1.9 2003/06/13 02:38:43 foxr 58: # Add logging in 'expected format' 59: # 60: # Revision 1.8 2003/06/11 02:04:35 foxr 61: # Support delayed transactions... this is done uniformly by encapsulating 62: # transactions in an object ... a LondTransaction that is implemented by 63: # LondTransaction.pm 64: # 65: # Revision 1.7 2003/06/03 01:59:39 foxr 66: # complete coding to support deferred transactions. 67: # 68: # 69: 70: use lib "/home/httpd/lib/perl/"; 71: use lib "/home/foxr/newloncapa/types"; 72: use Event qw(:DEFAULT ); 73: use POSIX qw(:signal_h); 74: use IO::Socket; 75: use IO::Socket::INET; 76: use IO::Socket::UNIX; 77: use IO::File; 78: use IO::Handle; 79: use Socket; 80: use Crypt::IDEA; 81: use LONCAPA::Queue; 82: use LONCAPA::Stack; 83: use LONCAPA::LondConnection; 84: use LONCAPA::LondTransaction; 85: use LONCAPA::Configuration; 86: use LONCAPA::HashIterator; 87: 88: 89: # 90: # Disable all signals we might receive from outside for now. 91: # 92: $SIG{QUIT} = IGNORE; 93: $SIG{HUP} = IGNORE; 94: $SIG{USR1} = IGNORE; 95: $SIG{INT} = IGNORE; 96: $SIG{CHLD} = IGNORE; 97: $SIG{__DIE__} = IGNORE; 98: 99: 100: # Read the httpd configuration file to get perl variables 101: # normally set in apache modules: 102: 103: my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf'); 104: my %perlvar = %{$perlvarref}; 105: 106: # 107: # parent and shared variables. 108: 109: my %ChildHash; # by pid -> host. 110: 111: 112: my $MaxConnectionCount = 10; # Will get from config later. 113: my $ClientConnection = 0; # Uniquifier for client events. 114: 115: my $DebugLevel = 0; 116: my $IdleTimeout= 3600; # Wait an hour before pruning connections. 117: 118: # 119: # The variables below are only used by the child processes. 120: # 121: my $RemoteHost; # Name of host child is talking to. 122: my $UnixSocketDir= "/home/httpd/sockets"; 123: my $IdleConnections = Stack->new(); # Set of idle connections 124: my %ActiveConnections; # Connections to the remote lond. 125: my %ActiveTransactions; # LondTransactions in flight. 126: my %ActiveClients; # Serial numbers of active clients by socket. 127: my $WorkQueue = Queue->new(); # Queue of pending transactions. 128: my $ConnectionCount = 0; 129: my $IdleSeconds = 0; # Number of seconds idle. 130: my $Status = ""; # Current status string. 131: my $ConnectionRetries=5; # Number of connection retries allowed. 132: my $ConnectionRetriesLeft=5; # Number of connection retries remaining. 133: 134: # 135: # The hash below gives the HTML format for log messages 136: # given a severity. 137: # 138: my %LogFormats; 139: 140: $LogFormats{"CRITICAL"} = "<font color=red>CRITICAL: %s</font>"; 141: $LogFormats{"SUCCESS"} = "<font color=green>SUCCESS: %s</font>"; 142: $LogFormats{"INFO"} = "<font color=yellow>INFO: %s</font>"; 143: $LogFormats{"WARNING"} = "<font color=blue>WARNING: %s</font>"; 144: $LogFormats{"DEFAULT"} = " %s "; 145: 146: 147: 148: =pod 149: 150: =head2 LogPerm 151: 152: Makes an entry into the permanent log file. 153: 154: =cut 155: sub LogPerm { 156: my $message=shift; 157: my $execdir=$perlvar{'lonDaemons'}; 158: my $now=time; 159: my $local=localtime($now); 160: my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log"); 161: print $fh "$now:$message:$local\n"; 162: } 163: 164: =pod 165: 166: =head2 Log 167: 168: Logs a message to the log file. 169: Parameters: 170: 171: =item severity 172: 173: One of CRITICAL, WARNING, INFO, SUCCESS used to select the 174: format string used to format the message. if the severity is 175: not a defined severity the Default format string is used. 176: 177: =item message 178: 179: The base message. In addtion to the format string, the message 180: will be appended to a string containing the name of our remote 181: host and the time will be formatted into the message. 182: 183: =cut 184: 185: sub Log { 186: my $severity = shift; 187: my $message = shift; 188: 189: if(!$LogFormats{$severity}) { 190: $severity = "DEFAULT"; 191: } 192: 193: my $format = $LogFormats{$severity}; 194: 195: # Put the window dressing in in front of the message format: 196: 197: my $now = time; 198: my $local = localtime($now); 199: my $finalformat = "$local ($$) [$RemoteHost] [$Status] "; 200: my $finalformat = $finalformat.$format."\n"; 201: 202: # open the file and put the result. 203: 204: my $execdir = $perlvar{'lonDaemons'}; 205: my $fh = IO::File->new(">>$execdir/logs/lonc.log"); 206: my $msg = sprintf($finalformat, $message); 207: print $fh $msg; 208: 209: 210: } 211: 212: 213: =pod 214: 215: =head2 GetPeerName 216: 217: Returns the name of the host that a socket object is connected to. 218: 219: =cut 220: 221: sub GetPeername { 222: my $connection = shift; 223: my $AdrFamily = shift; 224: my $peer = $connection->peername(); 225: my $peerport; 226: my $peerip; 227: if($AdrFamily == AF_INET) { 228: ($peerport, $peerip) = sockaddr_in($peer); 229: my $peername = gethostbyaddr($iaddr, $AdrFamily); 230: return $peername; 231: } elsif ($AdrFamily == AF_UNIX) { 232: my $peerfile; 233: ($peerfile) = sockaddr_un($peer); 234: return $peerfile; 235: } 236: } 237: #----------------------------- Timer management ------------------------ 238: =pod 239: 240: =head2 Debug 241: 242: Invoked to issue a debug message. 243: 244: =cut 245: 246: sub Debug { 247: my $level = shift; 248: my $message = shift; 249: if ($level <= $DebugLevel) { 250: print $message." host = ".$RemoteHost."\n"; 251: } 252: } 253: 254: sub SocketDump { 255: my $level = shift; 256: my $socket= shift; 257: if($level <= $DebugLevel) { 258: $socket->Dump(); 259: } 260: } 261: 262: =pod 263: 264: =head2 ShowStatus 265: 266: Place some text as our pid status. 267: and as what we return in a SIGUSR1 268: 269: =cut 270: sub ShowStatus { 271: my $state = shift; 272: my $now = time; 273: my $local = localtime($now); 274: $Status = $local.": ".$state; 275: $0='lonc: '.$state.' '.$local; 276: } 277: 278: =pod 279: 280: =head2 Tick 281: 282: Invoked each timer tick. 283: 284: =cut 285: 286: 287: sub Tick { 288: my $client; 289: ShowStatus(GetServerHost()." Connection count: ".$ConnectionCount); 290: Debug(10,"Tick"); 291: Debug(10," Current connection count: ".$ConnectionCount); 292: foreach $client (keys %ActiveClients) { 293: Debug(10," Have client: with id: ".$ActiveClients{$client}); 294: } 295: # Is it time to prune connection count: 296: 297: 298: if($IdleConnections->Count() && 299: ($WorkQueue->Count() == 0)) { # Idle connections and nothing to do? 300: $IdleSeconds++; 301: if($IdleSeconds > $IdleTimeout) { # Prune a connection... 302: $Socket = $IdleConnections->pop(); 303: KillSocket($Socket); 304: } 305: } else { 306: $IdleSeconds = 0; # Reset idle count if not idle. 307: } 308: 309: # Do we have work in the queue, but no connections to service them? 310: # If so, try to make some new connections to get things going again. 311: # 312: 313: my $Requests = $WorkQueue->Count(); 314: if (($ConnectionCount == 0) && ($Requests > 0)) { 315: if ($ConnectionRetriesLeft > 0) { 316: my $Connections = ($Requests <= $MaxConnectionCount) ? 317: $Requests : $MaxConnectionCount; 318: Debug(1,"Work but no connections, start ".$Connections." of them"); 319: for ($i =0; $i < $Connections; $i++) { 320: MakeLondConnection(); 321: } 322: } else { 323: Debug(1,"Work in queue, but gave up on connections..flushing\n"); 324: EmptyQueue(); # Connections can't be established. 325: } 326: 327: } 328: } 329: 330: =pod 331: 332: =head2 SetupTimer 333: 334: Sets up a 1 per sec recurring timer event. The event handler is used to: 335: 336: =item 337: 338: Trigger timeouts on communications along active sockets. 339: 340: =item 341: 342: Trigger disconnections of idle sockets. 343: 344: =cut 345: 346: sub SetupTimer { 347: Debug(6, "SetupTimer"); 348: Event->timer(interval => 1, debug => 1, cb => \&Tick ); 349: } 350: 351: =pod 352: 353: =head2 ServerToIdle 354: 355: This function is called when a connection to the server is 356: ready for more work. 357: 358: If there is work in the Work queue the top element is dequeued 359: and the connection will start to work on it. If the work queue is 360: empty, the connection is pushed on the idle connection stack where 361: it will either get another work unit, or alternatively, if it sits there 362: long enough, it will be shut down and released. 363: 364: =cut 365: 366: sub ServerToIdle { 367: my $Socket = shift; # Get the socket. 368: delete($ActiveTransactions{$Socket}); # Server has no transaction 369: 370: &Debug(6, "Server to idle"); 371: 372: # If there's work to do, start the transaction: 373: 374: $reqdata = $WorkQueue->dequeue(); # This is a LondTransaction 375: unless($reqdata eq undef) { 376: Debug(9, "Queue gave request data: ".$reqdata->getRequest()); 377: &StartRequest($Socket, $reqdata); 378: 379: } else { 380: 381: # There's no work waiting, so push the server to idle list. 382: &Debug(8, "No new work requests, server connection going idle"); 383: $IdleConnections->push($Socket); 384: } 385: } 386: 387: =pod 388: 389: =head2 ClientWritable 390: 391: Event callback for when a client socket is writable. 392: 393: This callback is established when a transaction reponse is 394: avaiable from lond. The response is forwarded to the unix socket 395: as it becomes writable in this sub. 396: 397: Parameters: 398: 399: =item Event 400: 401: The event that has been triggered. Event->w->data is 402: the data and Event->w->fd is the socket to write. 403: 404: =cut 405: 406: sub ClientWritable { 407: my $Event = shift; 408: my $Watcher = $Event->w; 409: my $Data = $Watcher->data; 410: my $Socket = $Watcher->fd; 411: 412: # Try to send the data: 413: 414: &Debug(6, "ClientWritable writing".$Data); 415: &Debug(9, "Socket is: ".$Socket); 416: 417: if($Socket->connected) { 418: my $result = $Socket->send($Data, 0); 419: 420: # $result undefined: the write failed. 421: # otherwise $result is the number of bytes written. 422: # Remove that preceding string from the data. 423: # If the resulting data is empty, destroy the watcher 424: # and set up a read event handler to accept the next 425: # request. 426: 427: &Debug(9,"Send result is ".$result." Defined: ".defined($result)); 428: if(defined($result)) { 429: &Debug(9, "send result was defined"); 430: if($result == length($Data)) { # Entire string sent. 431: &Debug(9, "ClientWritable data all written"); 432: $Watcher->cancel(); 433: # 434: # Set up to read next request from socket: 435: 436: my $descr = sprintf("Connection to lonc client %d", 437: $ActiveClients{$Socket}); 438: Event->io(cb => \&ClientRequest, 439: poll => 'r', 440: desc => $descr, 441: data => "", 442: fd => $Socket); 443: 444: } else { # Partial string sent. 445: $Watcher->data(substr($Data, $result)); 446: } 447: 448: } else { # Error of some sort... 449: 450: # Some errnos are possible: 451: my $errno = $!; 452: if($errno == POSIX::EWOULDBLOCK || 453: $errno == POSIX::EAGAIN || 454: $errno == POSIX::EINTR) { 455: # No action taken? 456: } else { # Unanticipated errno. 457: &Debug(5,"ClientWritable error or peer shutdown".$RemoteHost); 458: $Watcher->cancel; # Stop the watcher. 459: $Socket->shutdown(2); # Kill connection 460: $Socket->close(); # Close the socket. 461: } 462: 463: } 464: } else { 465: $Watcher->cancel(); # A delayed request...just cancel. 466: } 467: } 468: 469: =pod 470: 471: =head2 CompleteTransaction 472: 473: Called when the reply data has been received for a lond 474: transaction. The reply data must now be sent to the 475: ultimate client on the other end of the Unix socket. This is 476: done by setting up a writable event for the socket with the 477: data the reply data. 478: 479: Parameters: 480: 481: =item Socket 482: 483: Socket on which the lond transaction occured. This is a 484: LondConnection. The data received is in the TransactionReply member. 485: 486: =item Transaction 487: 488: The transaction that is being completed. 489: 490: =cut 491: 492: sub CompleteTransaction { 493: &Debug(6,"Complete transaction"); 494: my $Socket = shift; 495: my $Transaction = shift; 496: 497: if (!$Transaction->isDeferred()) { # Normal transaction 498: my $data = $Socket->GetReply(); # Data to send. 499: StartClientReply($Transaction, $data); 500: } else { # Delete deferred transaction file. 501: Log("SUCCESS", "A delayed transaction was completed"); 502: LogPerm("S:$Client:".$Transaction->getRequest()); 503: unlink $Transaction->getFile(); 504: } 505: } 506: =pod 507: =head1 StartClientReply 508: 509: Initiates a reply to a client where the reply data is a parameter. 510: 511: =head2 parameters: 512: 513: =item Transaction 514: 515: The transaction for which we are responding to the client. 516: 517: =item data 518: 519: The data to send to apached client. 520: 521: =cut 522: sub StartClientReply { 523: my $Transaction = shift; 524: my $data = shift; 525: 526: my $Client = $Transaction->getClient(); 527: 528: &Debug(8," Reply was: ".$data); 529: my $Serial = $ActiveClients{$Client}; 530: my $desc = sprintf("Connection to lonc client %d", 531: 532: $Serial); 533: Event->io(fd => $Client, 534: poll => "w", 535: desc => $desc, 536: cb => \&ClientWritable, 537: data => $data); 538: } 539: =pod 540: =head2 FailTransaction 541: 542: Finishes a transaction with failure because the associated lond socket 543: disconnected. There are two possibilities: 544: - The transaction is deferred: in which case we just quietly 545: delete the transaction since there is no client connection. 546: - The transaction is 'live' in which case we initiate the sending 547: of "con_lost" to the client. 548: 549: Deleting the transaction means killing it from the 550: %ActiveTransactions hash. 551: 552: Parameters: 553: 554: =item client 555: 556: The LondTransaction we are failing. 557: 558: =cut 559: 560: sub FailTransaction { 561: my $transaction = shift; 562: Debug(1, "Failing transaction: ".$transaction->getRequest()); 563: if (!$transaction->isDeferred()) { # If the transaction is deferred we'll get to it. 564: my $client = $transaction->getClient(); 565: Debug(1," Replying con_lost to ".$transaction->getRequest()); 566: StartClientReply($transaction, "con_lost\n"); 567: } 568: 569: } 570: 571: =pod 572: =head1 EmptyQueue 573: 574: Fails all items in the work queue with con_lost. 575: Note that each item in the work queue is a transaction. 576: 577: =cut 578: sub EmptyQueue { 579: while($WorkQueue->Count()) { 580: my $request = $WorkQueue->dequeue(); # This is a transaction 581: FailTransaction($request); 582: } 583: } 584: 585: =pod 586: 587: =head2 CloseAllLondConnections 588: 589: Close all connections open on lond prior to exit e.g. 590: 591: =cut 592: sub CloseAllLondConnections { 593: foreach $Socket (keys %ActiveConnections) { 594: KillSocket($Socket); 595: } 596: } 597: =cut 598: 599: =pod 600: 601: =head2 KillSocket 602: 603: Destroys a socket. This function can be called either when a socket 604: has died of 'natural' causes or because a socket needs to be pruned due to 605: idleness. If the socket has died naturally, if there are no longer any 606: live connections a new connection is created (in case there are transactions 607: in the queue). If the socket has been pruned, it is never re-created. 608: 609: Parameters: 610: 611: =item Socket 612: 613: The socket to kill off. 614: 615: =item Restart 616: 617: nonzero if we are allowed to create a new connection. 618: 619: 620: =cut 621: sub KillSocket { 622: my $Socket = shift; 623: 624: $Socket->Shutdown(); 625: 626: # If the socket came from the active connection set, 627: # delete its transaction... note that FailTransaction should 628: # already have been called!!! 629: # otherwise it came from the idle set. 630: # 631: 632: if(exists($ActiveTransactions{$Socket})) { 633: delete ($ActiveTransactions{$Socket}); 634: } 635: if(exists($ActiveConnections{$Socket})) { 636: delete($ActiveConnections{$Socket}); 637: } 638: $ConnectionCount--; 639: 640: # If the connection count has gone to zero and there is work in the 641: # work queue, the work all gets failed with con_lost. 642: # 643: if($ConnectionCount == 0) { 644: EmptyQueue; 645: } 646: } 647: 648: =pod 649: 650: =head2 LondReadable 651: 652: This function is called whenever a lond connection 653: is readable. The action is state dependent: 654: 655: =head3 State=Initialized 656: 657: We''re waiting for the challenge, this is a no-op until the 658: state changes. 659: 660: =head3 State=Challenged 661: 662: The challenge has arrived we need to transition to Writable. 663: The connection must echo the challenge back. 664: 665: =head3 State=ChallengeReplied 666: 667: The challenge has been replied to. The we are receiveing the 668: 'ok' from the partner. 669: 670: =head3 State=RequestingKey 671: 672: The ok has been received and we need to send the request for 673: an encryption key. Transition to writable for that. 674: 675: =head3 State=ReceivingKey 676: 677: The the key has been requested, now we are reading the new key. 678: 679: =head3 State=Idle 680: 681: The encryption key has been negotiated or we have finished 682: reading data from the a transaction. If the callback data has 683: a client as well as the socket iformation, then we are 684: doing a transaction and the data received is relayed to the client 685: before the socket is put on the idle list. 686: 687: =head3 State=SendingRequest 688: 689: I do not think this state can be received here, but if it is, 690: the appropriate thing to do is to transition to writable, and send 691: the request. 692: 693: =head3 State=ReceivingReply 694: 695: We finished sending the request to the server and now transition 696: to readable to receive the reply. 697: 698: The parameter to this function are: 699: 700: The event. Implicit in this is the watcher and its data. The data 701: contains at least the lond connection object and, if a 702: transaction is in progress, the socket attached to the local client. 703: 704: =cut 705: 706: sub LondReadable { 707: 708: my $Event = shift; 709: my $Watcher = $Event->w; 710: my $Socket = $Watcher->data; 711: my $client = undef; 712: 713: &Debug(6,"LondReadable called state = ".$State); 714: 715: 716: my $State = $Socket->GetState(); # All action depends on the state. 717: 718: SocketDump(6, $Socket); 719: 720: if($Socket->Readable() != 0) { 721: # bad return from socket read. Currently this means that 722: # The socket has become disconnected. We fail the transaction. 723: 724: if(exists($ActiveTransactions{$Socket})) { 725: Debug(3,"Lond connection lost failing transaction"); 726: FailTransaction($ActiveTransactions{$Socket}); 727: } 728: $Watcher->cancel(); 729: KillSocket($Socket); 730: return; 731: } 732: SocketDump(6,$Socket); 733: 734: $State = $Socket->GetState(); # Update in case of transition. 735: &Debug(6, "After read, state is ".$State); 736: 737: if($State eq "Initialized") { 738: 739: 740: } elsif ($State eq "ChallengeReceived") { 741: # The challenge must be echoed back; The state machine 742: # in the connection takes care of setting that up. Just 743: # need to transition to writable: 744: 745: $Watcher->cb(\&LondWritable); 746: $Watcher->poll("w"); 747: 748: } elsif ($State eq "ChallengeReplied") { 749: 750: 751: } elsif ($State eq "RequestingKey") { 752: # The ok was received. Now we need to request the key 753: # That requires us to be writable: 754: 755: $Watcher->cb(\&LondWritable); 756: $Watcher->poll("w"); 757: 758: } elsif ($State eq "ReceivingKey") { 759: 760: } elsif ($State eq "Idle") { 761: # If necessary, complete a transaction and then go into the 762: # idle queue. 763: $Watcher->cancel(); 764: if(exists($ActiveTransactions{$Socket})) { 765: Debug(8,"Completing transaction!!"); 766: CompleteTransaction($Socket, 767: $ActiveTransactions{$Socket}); 768: } else { 769: Log("SUCCESS", "Connection ".$ConnectionCount." to " 770: .$RemoteHost." now ready for action"); 771: } 772: ServerToIdle($Socket); # Next work unit or idle. 773: 774: } elsif ($State eq "SendingRequest") { 775: # We need to be writable for this and probably don't belong 776: # here inthe first place. 777: 778: Deubg(6, "SendingRequest state encountered in readable"); 779: $Watcher->poll("w"); 780: $Watcher->cb(\&LondWritable); 781: 782: } elsif ($State eq "ReceivingReply") { 783: 784: 785: } else { 786: # Invalid state. 787: Debug(4, "Invalid state in LondReadable"); 788: } 789: } 790: 791: =pod 792: 793: =head2 LondWritable 794: 795: This function is called whenever a lond connection 796: becomes writable while there is a writeable monitoring 797: event. The action taken is very state dependent: 798: 799: =head3 State = Connected 800: 801: The connection is in the process of sending the 'init' hailing to the 802: lond on the remote end. The connection object''s Writable member is 803: called. On error, ConnectionError is called to destroy the connection 804: and remove it from the ActiveConnections hash 805: 806: =head3 Initialized 807: 808: 'init' has been sent, writability monitoring is removed and 809: readability monitoring is started with LondReadable as the callback. 810: 811: =head3 ChallengeReceived 812: 813: The connection has received the who are you challenge from the remote 814: system, and is in the process of sending the challenge 815: response. Writable is called. 816: 817: =head3 ChallengeReplied 818: 819: The connection has replied to the initial challenge The we switch to 820: monitoring readability looking for the server to reply with 'ok'. 821: 822: =head3 RequestingKey 823: 824: The connection is in the process of requesting its encryption key. 825: Writable is called. 826: 827: =head3 ReceivingKey 828: 829: The connection has sent the request for a key. Switch to readability 830: monitoring to accept the key 831: 832: =head3 SendingRequest 833: 834: The connection is in the process of sending a request to the server. 835: This request is part of a client transaction. All the states until 836: now represent the client setup protocol. Writable is called. 837: 838: =head3 ReceivingReply 839: 840: The connection has sent a request. Now it must receive a reply. 841: Readability monitoring is requested. 842: 843: This function is an event handler and therefore receives as 844: a parameter the event that has fired. The data for the watcher 845: of this event is a reference to a list of one or two elements, 846: depending on state. The first (and possibly only) element is the 847: socket. The second (present only if a request is in progress) 848: is the socket on which to return a reply to the caller. 849: 850: =cut 851: 852: sub LondWritable { 853: my $Event = shift; 854: my $Watcher = $Event->w; 855: my $Socket = $Watcher->data; 856: my $State = $Socket->GetState(); 857: 858: Debug(6,"LondWritable State = ".$State."\n"); 859: 860: 861: # Figure out what to do depending on the state of the socket: 862: 863: 864: 865: 866: SocketDump(6,$Socket); 867: 868: if ($State eq "Connected") { 869: 870: if ($Socket->Writable() != 0) { 871: # The write resulted in an error. 872: # We'll treat this as if the socket got disconnected: 873: Log("WARNING", "Connection to ".$RemoteHost. 874: " has been disconnected"); 875: $Watcher->cancel(); 876: KillSocket($Socket); 877: return; 878: } 879: # "init" is being sent... 880: 881: 882: } elsif ($State eq "Initialized") { 883: 884: # Now that init was sent, we switch 885: # to watching for readability: 886: 887: $Watcher->cb(\&LondReadable); 888: $Watcher->poll("r"); 889: 890: } elsif ($State eq "ChallengeReceived") { 891: # We received the challenge, now we 892: # are echoing it back. This is a no-op, 893: # we're waiting for the state to change 894: 895: if($Socket->Writable() != 0) { 896: 897: $Watcher->cancel(); 898: KillSocket($Socket); 899: return; 900: } 901: 902: } elsif ($State eq "ChallengeReplied") { 903: # The echo was sent back, so we switch 904: # to watching readability. 905: 906: $Watcher->cb(\&LondReadable); 907: $Watcher->poll("r"); 908: 909: } elsif ($State eq "RequestingKey") { 910: # At this time we're requesting the key. 911: # again, this is essentially a no-op. 912: # we'll write the next chunk until the 913: # state changes. 914: 915: if($Socket->Writable() != 0) { 916: # Write resulted in an error. 917: 918: $Watcher->cancel(); 919: KillSocket($Socket); 920: return; 921: 922: } 923: } elsif ($State eq "ReceivingKey") { 924: # Now we need to wait for the key 925: # to come back from the peer: 926: 927: $Watcher->cb(\&LondReadable); 928: $Watcher->poll("r"); 929: 930: } elsif ($State eq "SendingRequest") { 931: # At this time we are sending a request to the 932: # peer... write the next chunk: 933: 934: if($Socket->Writable() != 0) { 935: 936: if(exists($ActiveTransactions{$Socket})) { 937: Debug(3, "Lond connection lost, failing transactions"); 938: FailTransaction($ActiveTransactions{$Socket}); 939: } 940: $Watcher->cancel(); 941: KillSocket($Socket); 942: return; 943: 944: } 945: 946: } elsif ($State eq "ReceivingReply") { 947: # The send has completed. Wait for the 948: # data to come in for a reply. 949: Debug(8,"Writable sent request/receiving reply"); 950: $Watcher->cb(\&LondReadable); 951: $Watcher->poll("r"); 952: 953: } else { 954: # Control only passes here on an error: 955: # the socket state does not match any 956: # of the known states... so an error 957: # must be logged. 958: 959: &Debug(4, "Invalid socket state ".$State."\n"); 960: } 961: 962: } 963: =pod 964: 965: =cut 966: sub QueueDelayed { 967: Debug(3,"QueueDelayed called"); 968: 969: my $path = "$perlvar{'lonSockDir'}/delayed"; 970: 971: Debug(4, "Delayed path: ".$path); 972: opendir(DIRHANDLE, $path); 973: 974: @alldelayed = grep /\.$RemoteHost$/, readdir DIRHANDLE; 975: Debug(4, "Got ".$alldelayed." delayed files"); 976: closedir(DIRHANDLE); 977: my $dfname; 978: my $reqfile; 979: foreach $dfname (sort @alldelayed) { 980: $reqfile = "$path/$dfname"; 981: Debug(4, "queueing ".$reqfile); 982: my $Handle = IO::File->new($reqfile); 983: my $cmd = <$Handle>; 984: chomp $cmd; # There may or may not be a newline... 985: $cmd = $cmd."\ny"; # now for sure there's exactly one newline. 986: my $Transaction = LondTransaction->new($cmd); 987: $Transaction->SetDeferred($reqfile); 988: QueueTransaction($Transaction); 989: } 990: 991: } 992: 993: =pod 994: 995: =head2 MakeLondConnection 996: 997: Create a new lond connection object, and start it towards its initial 998: idleness. Once idle, it becomes elligible to receive transactions 999: from the work queue. If the work queue is not empty when the 1000: connection is completed and becomes idle, it will dequeue an entry and 1001: start off on it. 1002: 1003: =cut 1004: 1005: sub MakeLondConnection { 1006: Debug(4,"MakeLondConnection to ".GetServerHost()." on port " 1007: .GetServerPort()); 1008: 1009: my $Connection = LondConnection->new(&GetServerHost(), 1010: &GetServerPort()); 1011: 1012: if($Connection == undef) { # Needs to be more robust later. 1013: Log("CRITICAL","Failed to make a connection with lond."); 1014: $ConnectionRetriesLeft--; 1015: return 0; # Failure. 1016: } else { 1017: $ConnectionRetriesLeft = $ConnectionRetries; # success resets the count 1018: # The connection needs to have writability 1019: # monitored in order to send the init sequence 1020: # that starts the whole authentication/key 1021: # exchange underway. 1022: # 1023: my $Socket = $Connection->GetSocket(); 1024: if($Socket == undef) { 1025: die "did not get a socket from the connection"; 1026: } else { 1027: &Debug(9,"MakeLondConnection got socket: ".$Socket); 1028: } 1029: 1030: 1031: $event = Event->io(fd => $Socket, 1032: poll => 'w', 1033: cb => \&LondWritable, 1034: data => $Connection, 1035: desc => 'Connection to lond server'); 1036: $ActiveConnections{$Connection} = $event; 1037: 1038: $ConnectionCount++; 1039: Debug(4, "Connection count = ".$ConnectionCount); 1040: if($ConnectionCount == 1) { # First Connection: 1041: QueueDelayed; 1042: } 1043: Log("SUCESS", "Created connection ".$ConnectionCount 1044: ." to host ".GetServerHost()); 1045: return 1; # Return success. 1046: } 1047: 1048: } 1049: 1050: =pod 1051: 1052: =head2 StartRequest 1053: 1054: Starts a lond request going on a specified lond connection. 1055: parameters are: 1056: 1057: =item $Lond 1058: 1059: Connection to the lond that will send the transaction and receive the 1060: reply. 1061: 1062: =item $Client 1063: 1064: Connection to the client that is making this request We got the 1065: request from this socket, and when the request has been relayed to 1066: lond and we get a reply back from lond it will get sent to this 1067: socket. 1068: 1069: =item $Request 1070: 1071: The text of the request to send. 1072: 1073: =cut 1074: 1075: sub StartRequest { 1076: my $Lond = shift; 1077: my $Request = shift; # This is a LondTransaction. 1078: 1079: Debug(6, "StartRequest: ".$Request->getRequest()); 1080: 1081: my $Socket = $Lond->GetSocket(); 1082: 1083: $Request->Activate($Lond); 1084: $ActiveTransactions{$Lond} = $Request; 1085: 1086: $Lond->InitiateTransaction($Request->getRequest()); 1087: $event = Event->io(fd => $Socket, 1088: poll => "w", 1089: cb => \&LondWritable, 1090: data => $Lond, 1091: desc => "lond transaction connection"); 1092: $ActiveConnections{$Lond} = $event; 1093: Debug(8," Start Request made watcher data with ".$event->data."\n"); 1094: } 1095: 1096: =pod 1097: 1098: =head2 QueueTransaction 1099: 1100: If there is an idle lond connection, it is put to work doing this 1101: transaction. Otherwise, the transaction is placed in the work queue. 1102: If placed in the work queue and the maximum number of connections has 1103: not yet been created, a new connection will be started. Our goal is 1104: to eventually have a sufficient number of connections that the work 1105: queue will typically be empty. parameters are: 1106: 1107: =item Socket 1108: 1109: open on the lonc client. 1110: 1111: =item Request 1112: 1113: data to send to the lond. 1114: 1115: =cut 1116: 1117: sub QueueTransaction { 1118: 1119: my $requestData = shift; # This is a LondTransaction. 1120: my $cmd = $requestData->getRequest(); 1121: 1122: Debug(6,"QueueTransaction: ".$cmd); 1123: 1124: my $LondSocket = $IdleConnections->pop(); 1125: if(!defined $LondSocket) { # Need to queue request. 1126: Debug(8,"Must queue..."); 1127: $WorkQueue->enqueue($requestData); 1128: if($ConnectionCount < $MaxConnectionCount) { 1129: Debug(4,"Starting additional lond connection"); 1130: MakeLondConnection(); 1131: } 1132: } else { # Can start the request: 1133: Debug(8,"Can start..."); 1134: StartRequest($LondSocket, $requestData); 1135: } 1136: } 1137: 1138: #-------------------------- Lonc UNIX socket handling --------------------- 1139: 1140: =pod 1141: 1142: =head2 ClientRequest 1143: Callback that is called when data can be read from the UNIX domain 1144: socket connecting us with an apache server process. 1145: 1146: =cut 1147: 1148: sub ClientRequest { 1149: Debug(6, "ClientRequest"); 1150: my $event = shift; 1151: my $watcher = $event->w; 1152: my $socket = $watcher->fd; 1153: my $data = $watcher->data; 1154: my $thisread; 1155: 1156: Debug(9, " Watcher named: ".$watcher->desc); 1157: 1158: my $rv = $socket->recv($thisread, POSIX::BUFSIZ, 0); 1159: Debug(8, "rcv: data length = ".length($thisread) 1160: ." read =".$thisread); 1161: unless (defined $rv && length($thisread)) { 1162: # Likely eof on socket. 1163: Debug(5,"Client Socket closed on lonc for ".$RemoteHost); 1164: close($socket); 1165: $watcher->cancel(); 1166: delete($ActiveClients{$socket}); 1167: return; 1168: } 1169: Debug(8,"Data: ".$data." this read: ".$thisread); 1170: $data = $data.$thisread; # Append new data. 1171: $watcher->data($data); 1172: if($data =~ /(.*\n)/) { # Request entirely read. 1173: if($data eq "close_connection_exit\n") { 1174: Log("CRITICAL", 1175: "Request Close Connection ... exiting"); 1176: CloseAllLondConnections(); 1177: exit; 1178: } 1179: Debug(8, "Complete transaction received: ".$data); 1180: my $Transaction = LondTransaction->new($data); 1181: $Transaction->SetClient($socket); 1182: QueueTransaction($Transaction); 1183: $watcher->cancel(); # Done looking for input data. 1184: } 1185: 1186: } 1187: 1188: 1189: =pod 1190: 1191: =head2 NewClient 1192: 1193: Callback that is called when a connection is received on the unix 1194: socket for a new client of lonc. The callback is parameterized by the 1195: event.. which is a-priori assumed to be an io event, and therefore has 1196: an fd member that is the Listener socket. We Accept the connection 1197: and register a new event on the readability of that socket: 1198: 1199: =cut 1200: 1201: sub NewClient { 1202: Debug(6, "NewClient"); 1203: my $event = shift; # Get the event parameters. 1204: my $watcher = $event->w; 1205: my $socket = $watcher->fd; # Get the event' socket. 1206: my $connection = $socket->accept(); # Accept the client connection. 1207: Debug(8,"Connection request accepted from " 1208: .GetPeername($connection, AF_UNIX)); 1209: 1210: 1211: my $description = sprintf("Connection to lonc client %d", 1212: $ClientConnection); 1213: Debug(9, "Creating event named: ".$description); 1214: Event->io(cb => \&ClientRequest, 1215: poll => 'r', 1216: desc => $description, 1217: data => "", 1218: fd => $connection); 1219: $ActiveClients{$connection} = $ClientConnection; 1220: $ClientConnection++; 1221: } 1222: 1223: =pod 1224: 1225: =head2 GetLoncSocketPath 1226: 1227: Returns the name of the UNIX socket on which to listen for client 1228: connections. 1229: 1230: =cut 1231: 1232: sub GetLoncSocketPath { 1233: return $UnixSocketDir."/".GetServerHost(); 1234: } 1235: 1236: =pod 1237: 1238: =head2 GetServerHost 1239: 1240: Returns the host whose lond we talk with. 1241: 1242: =cut 1243: 1244: sub GetServerHost { 1245: return $RemoteHost; # Setup by the fork. 1246: } 1247: 1248: =pod 1249: 1250: =head2 GetServerPort 1251: 1252: Returns the lond port number. 1253: 1254: =cut 1255: 1256: sub GetServerPort { 1257: return $perlvar{londPort}; 1258: } 1259: 1260: =pod 1261: 1262: =head2 SetupLoncListener 1263: 1264: Setup a lonc listener event. The event is called when the socket 1265: becomes readable.. that corresponds to the receipt of a new 1266: connection. The event handler established will accept the connection 1267: (creating a communcations channel), that int turn will establish 1268: another event handler to subess requests. 1269: 1270: =cut 1271: 1272: sub SetupLoncListener { 1273: 1274: my $socket; 1275: my $SocketName = GetLoncSocketPath(); 1276: unlink($SocketName); 1277: unless ($socket =IO::Socket::UNIX->new(Local => $SocketName, 1278: Listen => 10, 1279: Type => SOCK_STREAM)) { 1280: die "Failed to create a lonc listner socket"; 1281: } 1282: Event->io(cb => \&NewClient, 1283: poll => 'r', 1284: desc => 'Lonc listener Unix Socket', 1285: fd => $socket); 1286: } 1287: 1288: =pod 1289: 1290: =head2 SignalledToDeath 1291: 1292: Called in response to a signal that causes a chid process to die. 1293: 1294: =cut 1295: 1296: =pod 1297: 1298: sub SignalledToDeath { 1299: my ($signal) = @_; 1300: chomp($signal); 1301: Log("CRITICAL", "Abnormal exit. Child $$ for $RemoteHost " 1302: ."died through "."\"$signal\""); 1303: LogPerm("F:lonc: $$ on $RemoteHost signalled to death: " 1304: ."\"$signal\""); 1305: die("Signal abnormal end"); 1306: 1307: } 1308: =head2 ChildProcess 1309: 1310: This sub implements a child process for a single lonc daemon. 1311: 1312: =cut 1313: 1314: sub ChildProcess { 1315: 1316: 1317: # For now turn off signals. 1318: 1319: $SIG{QUIT} = \&SignalledToDeath; 1320: $SIG{HUP} = IGNORE; 1321: $SIG{USR1} = IGNORE; 1322: $SIG{INT} = IGNORE; 1323: $SIG{CHLD} = IGNORE; 1324: $SIG{__DIE__} = \&SignalledToDeath; 1325: 1326: SetupTimer(); 1327: 1328: SetupLoncListener(); 1329: 1330: $Event::Debuglevel = $DebugLevel; 1331: 1332: Debug(9, "Making initial lond connection for ".$RemoteHost); 1333: 1334: # Setup the initial server connection: 1335: 1336: # &MakeLondConnection(); // let first work requirest do it. 1337: 1338: 1339: Debug(9,"Entering event loop"); 1340: my $ret = Event::loop(); # Start the main event loop. 1341: 1342: 1343: die "Main event loop exited!!!"; 1344: } 1345: 1346: # Create a new child for host passed in: 1347: 1348: sub CreateChild { 1349: my $host = shift; 1350: $RemoteHost = $host; 1351: Log("CRITICAL", "Forking server for ".$host); 1352: $pid = fork; 1353: if($pid) { # Parent 1354: $ChildHash{$pid} = $RemoteHost; 1355: } else { # child. 1356: ShowStatus("Connected to ".$RemoteHost); 1357: ChildProcess; 1358: } 1359: 1360: } 1361: # 1362: # Parent process logic pass 1: 1363: # For each entry in the hosts table, we will 1364: # fork off an instance of ChildProcess to service the transactions 1365: # to that host. Each pid will be entered in a global hash 1366: # with the value of the key, the host. 1367: # The parent will then enter a loop to wait for process exits. 1368: # Each exit gets logged and the child gets restarted. 1369: # 1370: 1371: # 1372: # Fork and start in new session so hang-up isn't going to 1373: # happen without intent. 1374: # 1375: 1376: 1377: 1378: 1379: 1380: 1381: ShowStatus("Forming new session"); 1382: my $childpid = fork; 1383: if ($childpid != 0) { 1384: sleep 4; # Give child a chacne to break to 1385: exit 0; # a new sesion. 1386: } 1387: # 1388: # Write my pid into the pid file so I can be located 1389: # 1390: 1391: ShowStatus("Parent writing pid file:"); 1392: $execdir = $perlvar{'lonDaemons'}; 1393: open (PIDSAVE, ">$execdir/logs/lonc.pid"); 1394: print PIDSAVE "$$\n"; 1395: close(PIDSAVE); 1396: 1397: if (POSIX::setsid() < 0) { 1398: print "Could not create new session\n"; 1399: exit -1; 1400: } 1401: 1402: ShowStatus("Forking node servers"); 1403: 1404: Log("CRITICAL", "--------------- Starting children ---------------"); 1405: 1406: my $HostIterator = LondConnection::GetHostIterator; 1407: while (! $HostIterator->end()) { 1408: 1409: $hostentryref = $HostIterator->get(); 1410: CreateChild($hostentryref->[0]); 1411: $HostIterator->next(); 1412: } 1413: 1414: # Maintain the population: 1415: 1416: ShowStatus("Parent keeping the flock"); 1417: 1418: # 1419: # Set up parent signals: 1420: # 1421: $SIG{INT} = &KillThemAll; 1422: $SIG{TERM} = &KillThemAll; 1423: 1424: while(1) { 1425: $deadchild = wait(); 1426: if(exists $ChildHash{$deadchild}) { # need to restart. 1427: $deadhost = $ChildHash{$deadchild}; 1428: delete($ChildHash{$deadchild}); 1429: Log("WARNING","Lost child pid= ".$deadchild. 1430: "Connected to host ".$deadhost); 1431: Log("INFO", "Restarting child procesing ".$deadhost); 1432: CreateChild($deadhost); 1433: } 1434: } 1435: sub KillThemAll { 1436: } 1437: 1438: =head1 Theory 1439: 1440: The event class is used to build this as a single process with an 1441: event driven model. The following events are handled: 1442: 1443: =item UNIX Socket connection Received 1444: 1445: =item Request data arrives on UNIX data transfer socket. 1446: 1447: =item lond connection becomes writable. 1448: 1449: =item timer fires at 1 second intervals. 1450: 1451: All sockets are run in non-blocking mode. Timeouts managed by the timer 1452: handler prevents hung connections. 1453: 1454: Key data structures: 1455: 1456: =item RequestQueue 1457: 1458: A queue of requests received from UNIX sockets that are 1459: waiting for a chance to be forwarded on a lond connection socket. 1460: 1461: =item ActiveConnections 1462: 1463: A hash of lond connections that have transactions in process that are 1464: available to be timed out. 1465: 1466: =item ActiveTransactions 1467: 1468: A hash indexed by lond connections that contain the client reply 1469: socket for each connection that has an active transaction on it. 1470: 1471: =item IdleConnections 1472: 1473: A hash of lond connections that have no work to do. These connections 1474: can be closed if they are idle for a long enough time. 1475: 1476: =cut