Annotation of loncom/loncnew, revision 1.100

1.1       foxr        1: #!/usr/bin/perl
1.2       albertel    2: # The LearningOnline Network with CAPA
                      3: # lonc maintains the connections to remote computers
                      4: #
1.100   ! raeburn     5: # $Id: loncnew,v 1.99 2011/06/17 16:36:29 raeburn Exp $
1.2       albertel    6: #
                      7: # Copyright Michigan State University Board of Trustees
                      8: #
                      9: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
1.17      foxr       10: ## LON-CAPA is free software; you can redistribute it and/or modify
1.2       albertel   11: # it under the terms of the GNU General Public License as published by
                     12: # the Free Software Foundation; either version 2 of the License, or
                     13: # (at your option) any later version.
                     14: #
                     15: # LON-CAPA is distributed in the hope that it will be useful,
                     16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
                     17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     18: # GNU General Public License for more details.
                     19: #
                     20: # You should have received a copy of the GNU General Public License
                     21: # along with LON-CAPA; if not, write to the Free Software
                     22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
                     23: #
                     24: # /home/httpd/html/adm/gpl.txt
                     25: #
                     26: # http://www.lon-capa.org/
                     27: #
1.1       foxr       28: #
1.97      raeburn    29: # new lonc handles n request out over m connections to londs.
1.1       foxr       30: # This module is based on the Event class.
                     31: #   Development iterations:
                     32: #    - Setup basic event loop.   (done)
                     33: #    - Add timer dispatch.       (done)
                     34: #    - Add ability to accept lonc UNIX domain sockets.  (done)
                     35: #    - Add ability to create/negotiate lond connections (done).
1.7       foxr       36: #    - Add general logic for dispatching requests and timeouts. (done).
                     37: #    - Add support for the lonc/lond requests.          (done).
1.38      foxr       38: #    - Add logging/status monitoring.                    (done)
                     39: #    - Add Signal handling - HUP restarts. USR1 status report. (done)
1.7       foxr       40: #    - Add Configuration file I/O                       (done).
1.38      foxr       41: #    - Add management/status request interface.         (done)
1.8       foxr       42: #    - Add deferred request capability.                  (done)
1.38      foxr       43: #    - Detect transmission timeouts.                     (done)
1.7       foxr       44: #
                     45: 
1.23      foxr       46: use strict;
1.1       foxr       47: use lib "/home/httpd/lib/perl/";
                     48: use Event qw(:DEFAULT );
                     49: use POSIX qw(:signal_h);
1.12      foxr       50: use POSIX;
1.1       foxr       51: use IO::Socket;
                     52: use IO::Socket::INET;
                     53: use IO::Socket::UNIX;
1.9       foxr       54: use IO::File;
1.6       foxr       55: use IO::Handle;
1.1       foxr       56: use Socket;
                     57: use Crypt::IDEA;
                     58: use LONCAPA::Queue;
                     59: use LONCAPA::Stack;
                     60: use LONCAPA::LondConnection;
1.7       foxr       61: use LONCAPA::LondTransaction;
1.1       foxr       62: use LONCAPA::Configuration;
1.67      albertel   63: use Fcntl qw(:flock);
1.1       foxr       64: 
                     65: 
                     66: # Read the httpd configuration file to get perl variables
                     67: # normally set in apache modules:
                     68: 
                     69: my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf');
                     70: my %perlvar    = %{$perlvarref};
                     71: 
                     72: #
                     73: #  parent and shared variables.
                     74: 
1.83      albertel   75: my %ChildPid;			# by pid -> host.
                     76: my %ChildHost;			# by host.
1.62      foxr       77: my %listening_to;		# Socket->host table for who the parent
                     78:                                 # is listening to.
                     79: my %parent_dispatchers;         # host-> listener watcher events. 
1.1       foxr       80: 
1.65      foxr       81: my %parent_handlers;		# Parent signal handlers...
                     82: 
1.9       foxr       83: my $MaxConnectionCount = 10;	# Will get from config later.
1.1       foxr       84: my $ClientConnection = 0;	# Uniquifier for client events.
                     85: 
1.9       foxr       86: my $DebugLevel = 0;
1.29      foxr       87: my $NextDebugLevel= 2;		# So Sigint can toggle this.
1.94      foxr       88: my $IdleTimeout= 5*60;		# Seconds to wait prior to pruning connections.
1.1       foxr       89: 
1.39      foxr       90: my $LogTransactions = 0;	# When True, all transactions/replies get logged.
1.65      foxr       91: my $executable      = $0;	# Get the full path to me.
1.39      foxr       92: 
1.1       foxr       93: #
                     94: #  The variables below are only used by the child processes.
                     95: #
                     96: my $RemoteHost;			# Name of host child is talking to.
1.81      albertel   97: my $RemoteHostId;		# default lonid of host child is talking to.
1.82      albertel   98: my @all_host_ids;
1.20      albertel   99: my $UnixSocketDir= $perlvar{'lonSockDir'};
1.1       foxr      100: my $IdleConnections = Stack->new(); # Set of idle connections
                    101: my %ActiveConnections;		# Connections to the remote lond.
1.7       foxr      102: my %ActiveTransactions;		# LondTransactions in flight.
1.1       foxr      103: my %ActiveClients;		# Serial numbers of active clients by socket.
                    104: my $WorkQueue       = Queue->new(); # Queue of pending transactions.
                    105: my $ConnectionCount = 0;
1.4       foxr      106: my $IdleSeconds     = 0;	# Number of seconds idle.
1.9       foxr      107: my $Status          = "";	# Current status string.
1.14      foxr      108: my $RecentLogEntry  = "";
1.72      albertel  109: my $ConnectionRetries=5;	# Number of connection retries allowed.
                    110: my $ConnectionRetriesLeft=5;	# Number of connection retries remaining.
1.40      foxr      111: my $LondVersion     = "unknown"; # Version of lond we talk with.
1.49      foxr      112: my $KeyMode         = "";       # e.g. ssl, local, insecure from last connect.
1.54      foxr      113: my $LondConnecting  = 0;       # True when a connection is being built.
1.1       foxr      114: 
1.60      foxr      115: 
                    116: 
1.62      foxr      117: my $I_am_child      = 0;	# True if this is the child process.
1.57      foxr      118: 
1.1       foxr      119: #
1.9       foxr      120: #   The hash below gives the HTML format for log messages
                    121: #   given a severity.
                    122: #    
                    123: my %LogFormats;
                    124: 
1.45      albertel  125: $LogFormats{"CRITICAL"} = "<font color='red'>CRITICAL: %s</font>";
                    126: $LogFormats{"SUCCESS"}  = "<font color='green'>SUCCESS: %s</font>";
                    127: $LogFormats{"INFO"}     = "<font color='yellow'>INFO: %s</font>";
                    128: $LogFormats{"WARNING"}  = "<font color='blue'>WARNING: %s</font>";
1.9       foxr      129: $LogFormats{"DEFAULT"}  = " %s ";
                    130: 
1.10      foxr      131: 
1.57      foxr      132: #  UpdateStatus;
                    133: #    Update the idle status display to show how many connections
                    134: #    are left, retries and other stuff.
                    135: #
                    136: sub UpdateStatus {
                    137:     if ($ConnectionRetriesLeft > 0) {
                    138: 	ShowStatus(GetServerHost()." Connection count: ".$ConnectionCount
                    139: 		   ." Retries remaining: ".$ConnectionRetriesLeft
                    140: 		   ." ($KeyMode)");
                    141:     } else {
                    142: 	ShowStatus(GetServerHost()." >> DEAD <<");
                    143:     }
                    144: }
                    145: 
1.10      foxr      146: 
                    147: =pod
                    148: 
                    149: =head2 LogPerm
                    150: 
                    151: Makes an entry into the permanent log file.
                    152: 
                    153: =cut
1.69      matthew   154: 
1.10      foxr      155: sub LogPerm {
                    156:     my $message=shift;
                    157:     my $execdir=$perlvar{'lonDaemons'};
                    158:     my $now=time;
                    159:     my $local=localtime($now);
                    160:     my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log");
1.86      albertel  161:     chomp($message);
1.10      foxr      162:     print $fh "$now:$message:$local\n";
                    163: }
1.9       foxr      164: 
                    165: =pod
                    166: 
                    167: =head2 Log
                    168: 
                    169: Logs a message to the log file.
                    170: Parameters:
                    171: 
                    172: =item severity
                    173: 
                    174: One of CRITICAL, WARNING, INFO, SUCCESS used to select the
                    175: format string used to format the message.  if the severity is
                    176: not a defined severity the Default format string is used.
                    177: 
                    178: =item message
                    179: 
                    180: The base message.  In addtion to the format string, the message
                    181: will be appended to a string containing the name of our remote
                    182: host and the time will be formatted into the message.
                    183: 
                    184: =cut
                    185: 
                    186: sub Log {
1.47      foxr      187: 
                    188:     my ($severity, $message) = @_;
                    189: 
1.9       foxr      190:     if(!$LogFormats{$severity}) {
                    191: 	$severity = "DEFAULT";
                    192:     }
                    193: 
                    194:     my $format = $LogFormats{$severity};
                    195:     
                    196:     #  Put the window dressing in in front of the message format:
                    197: 
                    198:     my $now   = time;
                    199:     my $local = localtime($now);
                    200:     my $finalformat = "$local ($$) [$RemoteHost] [$Status] ";
1.76      albertel  201:     $finalformat = $finalformat.$format."\n";
1.9       foxr      202: 
                    203:     # open the file and put the result.
                    204: 
                    205:     my $execdir = $perlvar{'lonDaemons'};
                    206:     my $fh      = IO::File->new(">>$execdir/logs/lonc.log");
                    207:     my $msg = sprintf($finalformat, $message);
1.14      foxr      208:     $RecentLogEntry = $msg;
1.9       foxr      209:     print $fh $msg;
                    210:     
1.10      foxr      211:     
1.9       foxr      212: }
1.6       foxr      213: 
1.3       albertel  214: 
1.1       foxr      215: =pod
1.3       albertel  216: 
                    217: =head2 GetPeerName
                    218: 
                    219: Returns the name of the host that a socket object is connected to.
                    220: 
1.1       foxr      221: =cut
                    222: 
                    223: sub GetPeername {
1.47      foxr      224: 
                    225: 
                    226:     my ($connection, $AdrFamily) = @_;
                    227: 
1.1       foxr      228:     my $peer       = $connection->peername();
                    229:     my $peerport;
                    230:     my $peerip;
                    231:     if($AdrFamily == AF_INET) {
                    232: 	($peerport, $peerip) = sockaddr_in($peer);
1.23      foxr      233: 	my $peername    = gethostbyaddr($peerip, $AdrFamily);
1.1       foxr      234: 	return $peername;
                    235:     } elsif ($AdrFamily == AF_UNIX) {
                    236: 	my $peerfile;
                    237: 	($peerfile) = sockaddr_un($peer);
                    238: 	return $peerfile;
                    239:     }
                    240: }
                    241: =pod
1.3       albertel  242: 
1.1       foxr      243: =head2 Debug
1.3       albertel  244: 
                    245: Invoked to issue a debug message.
                    246: 
1.1       foxr      247: =cut
1.3       albertel  248: 
1.1       foxr      249: sub Debug {
1.47      foxr      250: 
                    251:     my ($level, $message) = @_;
                    252: 
1.1       foxr      253:     if ($level <= $DebugLevel) {
1.23      foxr      254: 	Log("INFO", "-Debug- $message host = $RemoteHost");
1.1       foxr      255:     }
                    256: }
                    257: 
                    258: sub SocketDump {
1.47      foxr      259: 
                    260:     my ($level, $socket) = @_;
                    261: 
1.1       foxr      262:     if($level <= $DebugLevel) {
1.48      foxr      263: 	$socket->Dump(-1);	# Ensure it will get dumped.
1.1       foxr      264:     }
                    265: }
1.3       albertel  266: 
1.1       foxr      267: =pod
1.3       albertel  268: 
1.5       foxr      269: =head2 ShowStatus
                    270: 
                    271:  Place some text as our pid status.
1.10      foxr      272:  and as what we return in a SIGUSR1
1.5       foxr      273: 
                    274: =cut
1.69      matthew   275: 
1.5       foxr      276: sub ShowStatus {
1.10      foxr      277:     my $state = shift;
                    278:     my $now = time;
                    279:     my $local = localtime($now);
                    280:     $Status   = $local.": ".$state;
                    281:     $0='lonc: '.$state.' '.$local;
1.5       foxr      282: }
                    283: 
                    284: =pod
                    285: 
1.69      matthew   286: =head2 SocketTimeout
1.15      foxr      287: 
                    288:     Called when an action on the socket times out.  The socket is 
                    289:    destroyed and any active transaction is failed.
                    290: 
                    291: 
                    292: =cut
1.69      matthew   293: 
1.15      foxr      294: sub SocketTimeout {
                    295:     my $Socket = shift;
1.38      foxr      296:     Log("WARNING", "A socket timeout was detected");
1.52      foxr      297:     Debug(5, " SocketTimeout called: ");
1.48      foxr      298:     $Socket->Dump(0);
1.42      foxr      299:     if(exists($ActiveTransactions{$Socket})) {
1.43      albertel  300: 	FailTransaction($ActiveTransactions{$Socket});
1.42      foxr      301:     }
1.22      foxr      302:     KillSocket($Socket);	# A transaction timeout also counts as
                    303:                                 # a connection failure:
                    304:     $ConnectionRetriesLeft--;
1.42      foxr      305:     if($ConnectionRetriesLeft <= 0) {
1.52      foxr      306: 	Log("CRITICAL", "Host marked DEAD: ".GetServerHost());
1.56      foxr      307: 	$LondConnecting = 0;
1.42      foxr      308:     }
                    309: 
1.15      foxr      310: }
1.80      albertel  311: 
1.64      foxr      312: #
                    313: #   This function should be called by the child in all cases where it must
1.80      albertel  314: #   exit.  The child process must create a lock file for the AF_UNIX socket
                    315: #   in order to prevent connection requests from lonnet in the time between
                    316: #   process exit and the parent picking up the listen again.
                    317: #
1.64      foxr      318: # Parameters:
                    319: #     exit_code           - Exit status value, however see the next parameter.
                    320: #     message             - If this optional parameter is supplied, the exit
                    321: #                           is via a die with this message.
                    322: #
                    323: sub child_exit {
                    324:     my ($exit_code, $message) = @_;
                    325: 
                    326:     # Regardless of how we exit, we may need to do the lock thing:
                    327: 
1.80      albertel  328:     #
                    329:     #  Create a lock file since there will be a time window
                    330:     #  between our exit and the parent's picking up the listen
                    331:     #  during which no listens will be done on the
                    332:     #  lonnet client socket.
                    333:     #
                    334:     my $lock_file = &GetLoncSocketPath().".lock";
                    335:     open(LOCK,">$lock_file");
                    336:     print LOCK "Contents not important";
                    337:     close(LOCK);
1.81      albertel  338:     unlink(&GetLoncSocketPath());
1.64      foxr      339: 
1.80      albertel  340:     if ($message) {
                    341: 	die($message);
1.64      foxr      342:     } else {
                    343: 	exit($exit_code);
                    344:     }
                    345: }
1.35      foxr      346: #----------------------------- Timer management ------------------------
1.15      foxr      347: 
                    348: =pod
                    349: 
1.1       foxr      350: =head2 Tick
1.3       albertel  351: 
1.97      raeburn   352: Invoked each timer tick.
1.3       albertel  353: 
1.1       foxr      354: =cut
                    355: 
1.5       foxr      356: 
1.1       foxr      357: sub Tick {
1.52      foxr      358:     my ($Event)       = @_;
                    359:     my $clock_watcher = $Event->w;
                    360: 
1.1       foxr      361:     my $client;
1.57      foxr      362:     UpdateStatus();
                    363: 
1.4       foxr      364:     # Is it time to prune connection count:
                    365: 
                    366: 
                    367:     if($IdleConnections->Count()  && 
                    368:        ($WorkQueue->Count() == 0)) { # Idle connections and nothing to do?
1.52      foxr      369: 	$IdleSeconds++;
1.4       foxr      370: 	if($IdleSeconds > $IdleTimeout) { # Prune a connection...
1.23      foxr      371: 	    my $Socket = $IdleConnections->pop();
1.6       foxr      372: 	    KillSocket($Socket);
1.54      foxr      373: 	    $IdleSeconds = 0;	# Otherwise all connections get trimmed to fast.
1.57      foxr      374: 	    UpdateStatus();
1.80      albertel  375: 	    if(($ConnectionCount == 0)) {
1.64      foxr      376: 		&child_exit(0);
                    377: 
1.57      foxr      378: 	    }
1.4       foxr      379: 	}
                    380:     } else {
                    381: 	$IdleSeconds = 0;	# Reset idle count if not idle.
                    382:     }
1.15      foxr      383:     #
                    384:     #  For each inflight transaction, tick down its timeout counter.
                    385:     #
1.35      foxr      386: 
1.34      albertel  387:     foreach my $item (keys %ActiveConnections) {
                    388: 	my $State = $ActiveConnections{$item}->data->GetState();
1.35      foxr      389: 	if ($State ne 'Idle') {
1.34      albertel  390: 	    Debug(5,"Ticking Socket $State $item");
                    391: 	    $ActiveConnections{$item}->data->Tick();
                    392: 	}
1.15      foxr      393:     }
1.5       foxr      394:     # Do we have work in the queue, but no connections to service them?
                    395:     # If so, try to make some new connections to get things going again.
                    396:     #
1.57      foxr      397:     #   Note this code is dead now...
                    398:     #
1.5       foxr      399:     my $Requests = $WorkQueue->Count();
1.56      foxr      400:     if (($ConnectionCount == 0)  && ($Requests > 0) && (!$LondConnecting)) { 
1.10      foxr      401: 	if ($ConnectionRetriesLeft > 0) {
1.56      foxr      402: 	    Debug(5,"Work but no connections, Make a new one");
                    403: 	    my $success;
                    404: 	    $success    = &MakeLondConnection;
                    405: 	    if($success == 0) { # All connections failed:
1.29      foxr      406: 		Debug(5,"Work in queue failed to make any connectiouns\n");
1.22      foxr      407: 		EmptyQueue();	# Fail pending transactions with con_lost.
1.42      foxr      408: 		CloseAllLondConnections(); # Should all be closed but....
1.10      foxr      409: 	    }
                    410: 	} else {
1.56      foxr      411: 	    $LondConnecting = 0;
1.22      foxr      412: 	    ShowStatus(GetServerHost()." >>> DEAD!!! <<<");
1.29      foxr      413: 	    Debug(5,"Work in queue, but gave up on connections..flushing\n");
1.10      foxr      414: 	    EmptyQueue();	# Connections can't be established.
1.42      foxr      415: 	    CloseAllLondConnections(); # Should all already be closed but...
1.5       foxr      416: 	}
                    417:        
                    418:     }
1.49      foxr      419:     if ($ConnectionCount == 0) {
                    420: 	$KeyMode = ""; 
1.52      foxr      421: 	$clock_watcher->cancel();
1.49      foxr      422:     }
1.66      albertel  423:     &UpdateStatus();
1.1       foxr      424: }
                    425: 
                    426: =pod
1.3       albertel  427: 
1.1       foxr      428: =head2 SetupTimer
                    429: 
1.3       albertel  430: Sets up a 1 per sec recurring timer event.  The event handler is used to:
1.1       foxr      431: 
1.3       albertel  432: =item
                    433: 
                    434: Trigger timeouts on communications along active sockets.
                    435: 
                    436: =item
                    437: 
                    438: Trigger disconnections of idle sockets.
1.1       foxr      439: 
                    440: =cut
                    441: 
                    442: sub SetupTimer {
1.52      foxr      443:     Debug(6, "SetupTimer");
1.92      foxr      444:     Event->timer(interval => 1, cb => \&Tick,
                    445: 	hard => 1);
1.1       foxr      446: }
1.3       albertel  447: 
1.1       foxr      448: =pod
1.3       albertel  449: 
1.1       foxr      450: =head2 ServerToIdle
1.3       albertel  451: 
                    452: This function is called when a connection to the server is
                    453: ready for more work.
                    454: 
                    455: If there is work in the Work queue the top element is dequeued
1.1       foxr      456: and the connection will start to work on it.  If the work queue is
                    457: empty, the connection is pushed on the idle connection stack where
                    458: it will either get another work unit, or alternatively, if it sits there
                    459: long enough, it will be shut down and released.
                    460: 
1.3       albertel  461: =cut
1.1       foxr      462: 
                    463: sub ServerToIdle {
                    464:     my $Socket   = shift;	# Get the socket.
1.49      foxr      465:     $KeyMode = $Socket->{AuthenticationMode};
1.7       foxr      466:     delete($ActiveTransactions{$Socket}); # Server has no transaction
1.1       foxr      467: 
1.29      foxr      468:     &Debug(5, "Server to idle");
1.1       foxr      469: 
                    470:     #  If there's work to do, start the transaction:
                    471: 
1.23      foxr      472:     my $reqdata = $WorkQueue->dequeue(); # This is a LondTransaction
1.29      foxr      473:     if ($reqdata ne undef)  {
                    474: 	Debug(5, "Queue gave request data: ".$reqdata->getRequest());
1.7       foxr      475: 	&StartRequest($Socket,  $reqdata);
1.8       foxr      476: 
1.1       foxr      477:     } else {
                    478: 	
                    479:     #  There's no work waiting, so push the server to idle list.
1.29      foxr      480: 	&Debug(5, "No new work requests, server connection going idle");
1.1       foxr      481: 	$IdleConnections->push($Socket);
                    482:     }
                    483: }
1.3       albertel  484: 
1.1       foxr      485: =pod
1.3       albertel  486: 
1.1       foxr      487: =head2 ClientWritable
1.3       albertel  488: 
                    489: Event callback for when a client socket is writable.
                    490: 
1.97      raeburn   491: This callback is established when a transaction response is
                    492: available from lond.  The response is forwarded to the unix socket
1.3       albertel  493: as it becomes writable in this sub.
                    494: 
1.1       foxr      495: Parameters:
                    496: 
1.3       albertel  497: =item Event
                    498: 
                    499: The event that has been triggered. Event->w->data is
                    500: the data and Event->w->fd is the socket to write.
1.1       foxr      501: 
                    502: =cut
1.3       albertel  503: 
1.1       foxr      504: sub ClientWritable {
                    505:     my $Event    = shift;
                    506:     my $Watcher  = $Event->w;
1.84      albertel  507:     if (!defined($Watcher)) {
                    508: 	&child_exit(-1,'No watcher for event in ClientWritable');
                    509:     }
1.1       foxr      510:     my $Data     = $Watcher->data;
                    511:     my $Socket   = $Watcher->fd;
                    512: 
                    513:     # Try to send the data:
                    514: 
                    515:     &Debug(6, "ClientWritable writing".$Data);
                    516:     &Debug(9, "Socket is: ".$Socket);
                    517: 
1.6       foxr      518:     if($Socket->connected) {
                    519: 	my $result = $Socket->send($Data, 0);
                    520: 	
                    521: 	# $result undefined: the write failed.
                    522: 	# otherwise $result is the number of bytes written.
                    523: 	# Remove that preceding string from the data.
                    524: 	# If the resulting data is empty, destroy the watcher
                    525: 	# and set up a read event handler to accept the next
                    526: 	# request.
                    527: 	
                    528: 	&Debug(9,"Send result is ".$result." Defined: ".defined($result));
1.29      foxr      529: 	if($result ne undef) {
1.6       foxr      530: 	    &Debug(9, "send result was defined");
                    531: 	    if($result == length($Data)) { # Entire string sent.
                    532: 		&Debug(9, "ClientWritable data all written");
                    533: 		$Watcher->cancel();
                    534: 		#
                    535: 		#  Set up to read next request from socket:
                    536: 		
                    537: 		my $descr     = sprintf("Connection to lonc client %d",
                    538: 					$ActiveClients{$Socket});
                    539: 		Event->io(cb    => \&ClientRequest,
                    540: 			  poll  => 'r',
                    541: 			  desc  => $descr,
                    542: 			  data  => "",
                    543: 			  fd    => $Socket);
                    544: 		
                    545: 	    } else {		# Partial string sent.
                    546: 		$Watcher->data(substr($Data, $result));
1.15      foxr      547: 		if($result == 0) {    # client hung up on us!!
1.52      foxr      548: 		    # Log("INFO", "lonc pipe client hung up on us!");
1.15      foxr      549: 		    $Watcher->cancel;
                    550: 		    $Socket->shutdown(2);
                    551: 		    $Socket->close();
                    552: 		}
1.6       foxr      553: 	    }
                    554: 	    
                    555: 	} else {			# Error of some sort...
                    556: 	    
                    557: 	    # Some errnos are possible:
                    558: 	    my $errno = $!;
                    559: 	    if($errno == POSIX::EWOULDBLOCK   ||
                    560: 	       $errno == POSIX::EAGAIN        ||
                    561: 	       $errno == POSIX::EINTR) {
1.96      foxr      562: 		# No action taken...the socket will be writable firing the event again
                    563: 		# which will result in a retry of the write.
1.6       foxr      564: 	    } else {		# Unanticipated errno.
                    565: 		&Debug(5,"ClientWritable error or peer shutdown".$RemoteHost);
                    566: 		$Watcher->cancel;	# Stop the watcher.
                    567: 		$Socket->shutdown(2); # Kill connection
                    568: 		$Socket->close();	# Close the socket.
                    569: 	    }
1.1       foxr      570: 	    
                    571: 	}
1.6       foxr      572:     } else {
                    573: 	$Watcher->cancel();	# A delayed request...just cancel.
1.84      albertel  574: 	return;
1.1       foxr      575:     }
                    576: }
                    577: 
                    578: =pod
1.3       albertel  579: 
1.1       foxr      580: =head2 CompleteTransaction
1.3       albertel  581: 
                    582: Called when the reply data has been received for a lond 
1.1       foxr      583: transaction.   The reply data must now be sent to the
                    584: ultimate client on the other end of the Unix socket.  This is
                    585: done by setting up a writable event for the socket with the
                    586: data the reply data.
1.3       albertel  587: 
1.1       foxr      588: Parameters:
1.3       albertel  589: 
                    590: =item Socket
                    591: 
1.97      raeburn   592: Socket on which the lond transaction occurred.  This is a
                    593: LondConnection. The data received are in the TransactionReply member.
1.3       albertel  594: 
1.7       foxr      595: =item Transaction
1.3       albertel  596: 
1.7       foxr      597: The transaction that is being completed.
1.1       foxr      598: 
                    599: =cut
1.3       albertel  600: 
1.1       foxr      601: sub CompleteTransaction {
1.29      foxr      602:     &Debug(5,"Complete transaction");
1.47      foxr      603: 
                    604:     my ($Socket, $Transaction) = @_;
1.1       foxr      605: 
1.7       foxr      606:     if (!$Transaction->isDeferred()) { # Normal transaction
                    607: 	my $data   = $Socket->GetReply(); # Data to send.
1.39      foxr      608: 	if($LogTransactions) {
                    609: 	    Log("SUCCESS", "Reply from lond: '$data'");
                    610: 	}
1.7       foxr      611: 	StartClientReply($Transaction, $data);
                    612:     } else {			# Delete deferred transaction file.
1.9       foxr      613: 	Log("SUCCESS", "A delayed transaction was completed");
1.86      albertel  614: 	LogPerm("S:".$Transaction->getClient().":".$Transaction->getRequest());
                    615: 	unlink($Transaction->getFile());
1.7       foxr      616:     }
1.6       foxr      617: }
1.42      foxr      618: 
1.6       foxr      619: =pod
1.42      foxr      620: 
1.6       foxr      621: =head1 StartClientReply
                    622: 
                    623:    Initiates a reply to a client where the reply data is a parameter.
                    624: 
1.7       foxr      625: =head2  parameters:
                    626: 
                    627: =item Transaction
                    628: 
                    629:     The transaction for which we are responding to the client.
                    630: 
                    631: =item data
                    632: 
1.97      raeburn   633:     The data to send to apache client.
1.7       foxr      634: 
1.6       foxr      635: =cut
1.42      foxr      636: 
1.6       foxr      637: sub StartClientReply {
1.1       foxr      638: 
1.47      foxr      639:     my ($Transaction, $data) = @_;
1.12      foxr      640: 
1.7       foxr      641:     my $Client   = $Transaction->getClient();
                    642: 
1.1       foxr      643:     &Debug(8," Reply was: ".$data);
                    644:     my $Serial         = $ActiveClients{$Client};
                    645:     my $desc           = sprintf("Connection to lonc client %d",
                    646: 				 $Serial);
                    647:     Event->io(fd       => $Client,
                    648: 	      poll     => "w",
                    649: 	      desc     => $desc,
                    650: 	      cb       => \&ClientWritable,
                    651: 	      data     => $data);
                    652: }
1.42      foxr      653: 
1.4       foxr      654: =pod
1.42      foxr      655: 
1.4       foxr      656: =head2 FailTransaction
                    657: 
                    658:   Finishes a transaction with failure because the associated lond socket
1.7       foxr      659:   disconnected.  There are two possibilities:
                    660:   - The transaction is deferred: in which case we just quietly
                    661:     delete the transaction since there is no client connection.
                    662:   - The transaction is 'live' in which case we initiate the sending
                    663:     of "con_lost" to the client.
                    664: 
1.42      foxr      665: Deleting the transaction means killing it from the %ActiveTransactions hash.
1.4       foxr      666: 
                    667: Parameters:
                    668: 
                    669: =item client  
                    670:  
1.7       foxr      671:    The LondTransaction we are failing.
                    672:  
1.42      foxr      673: 
1.4       foxr      674: =cut
                    675: 
                    676: sub FailTransaction {
1.7       foxr      677:     my $transaction = shift;
1.52      foxr      678:     
                    679:     #  If the socket is dead, that's already logged.
                    680: 
                    681:     if ($ConnectionRetriesLeft > 0) {
                    682: 	Log("WARNING", "Failing transaction "
1.71      albertel  683: 	    .$transaction->getLoggableRequest());
1.52      foxr      684:     }
1.71      albertel  685:     Debug(1, "Failing transaction: ".$transaction->getLoggableRequest());
1.10      foxr      686:     if (!$transaction->isDeferred()) { # If the transaction is deferred we'll get to it.
1.11      foxr      687: 	my $client  = $transaction->getClient();
1.30      foxr      688: 	Debug(1," Replying con_lost to ".$transaction->getRequest());
1.11      foxr      689: 	StartClientReply($transaction, "con_lost\n");
1.7       foxr      690:     }
1.4       foxr      691: 
                    692: }
                    693: 
                    694: =pod
1.69      matthew   695: 
1.6       foxr      696: =head1  EmptyQueue
1.7       foxr      697: 
1.6       foxr      698:   Fails all items in the work queue with con_lost.
1.7       foxr      699:   Note that each item in the work queue is a transaction.
                    700: 
1.6       foxr      701: =cut
1.69      matthew   702: 
1.6       foxr      703: sub EmptyQueue {
1.22      foxr      704:     $ConnectionRetriesLeft--;	# Counts as connection failure too.
1.6       foxr      705:     while($WorkQueue->Count()) {
1.10      foxr      706: 	my $request = $WorkQueue->dequeue(); # This is a transaction
1.7       foxr      707: 	FailTransaction($request);
1.6       foxr      708:     }
                    709: }
                    710: 
                    711: =pod
1.4       foxr      712: 
1.9       foxr      713: =head2 CloseAllLondConnections
                    714: 
                    715: Close all connections open on lond prior to exit e.g.
                    716: 
                    717: =cut
1.69      matthew   718: 
1.9       foxr      719: sub CloseAllLondConnections {
1.23      foxr      720:     foreach my $Socket (keys %ActiveConnections) {
1.42      foxr      721:       if(exists($ActiveTransactions{$Socket})) {
                    722: 	FailTransaction($ActiveTransactions{$Socket});
                    723:       }
                    724:       KillSocket($Socket);
1.9       foxr      725:     }
                    726: }
                    727: 
                    728: =pod
                    729: 
1.4       foxr      730: =head2 KillSocket
                    731:  
                    732: Destroys a socket.  This function can be called either when a socket
                    733: has died of 'natural' causes or because a socket needs to be pruned due to
                    734: idleness.  If the socket has died naturally, if there are no longer any 
                    735: live connections a new connection is created (in case there are transactions
                    736: in the queue).  If the socket has been pruned, it is never re-created.
                    737: 
                    738: Parameters:
1.1       foxr      739: 
1.4       foxr      740: =item Socket
                    741:  
                    742:   The socket to kill off.
                    743: 
                    744: =item Restart
                    745: 
1.97      raeburn   746: non-zero if we are allowed to create a new connection.
1.4       foxr      747: 
1.69      matthew   748: =cut
1.4       foxr      749: 
                    750: sub KillSocket {
                    751:     my $Socket = shift;
                    752: 
1.17      foxr      753:     Log("WARNING", "Shutting down a socket");
1.9       foxr      754:     $Socket->Shutdown();
                    755: 
1.7       foxr      756:     #  If the socket came from the active connection set,
                    757:     #  delete its transaction... note that FailTransaction should
                    758:     #  already have been called!!!
                    759:     #  otherwise it came from the idle set.
                    760:     #  
1.4       foxr      761:     
                    762:     if(exists($ActiveTransactions{$Socket})) {
                    763: 	delete ($ActiveTransactions{$Socket});
                    764:     }
                    765:     if(exists($ActiveConnections{$Socket})) {
1.90      foxr      766: 	$ActiveConnections{$Socket}->cancel;
1.4       foxr      767: 	delete($ActiveConnections{$Socket});
1.37      albertel  768: 	$ConnectionCount--;
                    769: 	if ($ConnectionCount < 0) { $ConnectionCount = 0; }
1.4       foxr      770:     }
1.6       foxr      771:     #  If the connection count has gone to zero and there is work in the
                    772:     #  work queue, the work all gets failed with con_lost.
                    773:     #
                    774:     if($ConnectionCount == 0) {
1.98      foxr      775: 	$LondConnecting = 0;	# No connections so also not connecting.
1.22      foxr      776: 	EmptyQueue();
1.42      foxr      777: 	CloseAllLondConnections; # Should all already be closed but...
1.4       foxr      778:     }
1.90      foxr      779:     UpdateStatus();
1.4       foxr      780: }
1.1       foxr      781: 
                    782: =pod
1.3       albertel  783: 
1.1       foxr      784: =head2 LondReadable
1.3       albertel  785: 
1.1       foxr      786: This function is called whenever a lond connection
                    787: is readable.  The action is state dependent:
                    788: 
1.3       albertel  789: =head3 State=Initialized
                    790: 
1.100   ! raeburn   791: We are waiting for the challenge, this is a no-op until the
1.1       foxr      792: state changes.
1.3       albertel  793: 
1.1       foxr      794: =head3 State=Challenged 
1.3       albertel  795: 
                    796: The challenge has arrived we need to transition to Writable.
1.1       foxr      797: The connection must echo the challenge back.
1.3       albertel  798: 
1.1       foxr      799: =head3 State=ChallengeReplied
1.3       albertel  800: 
1.97      raeburn   801: The challenge has been replied to.  Then we are receiving the 
1.1       foxr      802: 'ok' from the partner.
1.3       albertel  803: 
1.40      foxr      804: =head3  State=ReadingVersionString
                    805: 
                    806: We have requested the lond version and are reading the
                    807: version back.  Upon completion, we'll store the version away
                    808: for future use(?).
                    809: 
                    810: =head3 State=HostSet
                    811: 
                    812: We have selected the domain name of our peer (multhomed hosts)
                    813: and are getting the reply (presumably ok) back.
                    814: 
1.1       foxr      815: =head3 State=RequestingKey
1.3       albertel  816: 
                    817: The ok has been received and we need to send the request for
1.1       foxr      818: an encryption key.  Transition to writable for that.
1.3       albertel  819: 
1.1       foxr      820: =head3 State=ReceivingKey
1.3       albertel  821: 
                    822: The the key has been requested, now we are reading the new key.
                    823: 
1.1       foxr      824: =head3 State=Idle 
1.3       albertel  825: 
                    826: The encryption key has been negotiated or we have finished 
1.97      raeburn   827: reading data from the a transaction.   If the callback data have
1.99      raeburn   828: a client as well as the socket information, then we are 
1.97      raeburn   829: doing a transaction and the data received are relayed to the client
1.1       foxr      830: before the socket is put on the idle list.
1.3       albertel  831: 
1.1       foxr      832: =head3 State=SendingRequest
1.3       albertel  833: 
                    834: I do not think this state can be received here, but if it is,
1.1       foxr      835: the appropriate thing to do is to transition to writable, and send
                    836: the request.
1.3       albertel  837: 
1.1       foxr      838: =head3 State=ReceivingReply
1.3       albertel  839: 
                    840: We finished sending the request to the server and now transition
1.1       foxr      841: to readable to receive the reply. 
                    842: 
                    843: The parameter to this function are:
1.3       albertel  844: 
1.1       foxr      845: The event. Implicit in this is the watcher and its data.  The data 
1.97      raeburn   846: contain at least the lond connection object and, if a 
1.1       foxr      847: transaction is in progress, the socket attached to the local client.
                    848: 
1.3       albertel  849: =cut
1.1       foxr      850: 
                    851: sub LondReadable {
1.8       foxr      852: 
1.41      albertel  853:     my $Event      = shift;
                    854:     my $Watcher    = $Event->w;
                    855:     my $Socket     = $Watcher->data;
                    856:     my $client     = undef;
1.40      foxr      857: 
1.41      albertel  858:     &Debug(6,"LondReadable called state = ".$Socket->GetState());
1.40      foxr      859: 
                    860: 
1.41      albertel  861:     my $State = $Socket->GetState(); # All action depends on the state.
1.40      foxr      862: 
1.41      albertel  863:     SocketDump(6, $Socket);
                    864:     my $status = $Socket->Readable();
1.40      foxr      865: 
1.41      albertel  866:     &Debug(2, "Socket->Readable returned: $status");
1.40      foxr      867: 
1.41      albertel  868:     if($status != 0) {
                    869: 	# bad return from socket read. Currently this means that
                    870: 	# The socket has become disconnected. We fail the transaction.
1.40      foxr      871: 
1.41      albertel  872: 	Log("WARNING",
                    873: 	    "Lond connection lost.");
                    874: 	if(exists($ActiveTransactions{$Socket})) {
                    875: 	    FailTransaction($ActiveTransactions{$Socket});
1.56      foxr      876: 	} else {
                    877: 	    #  Socket is connecting and failed... need to mark
                    878: 	    #  no longer connecting.
                    879: 	   
                    880: 	    $LondConnecting = 0;
1.41      albertel  881: 	}
                    882: 	$Watcher->cancel();
                    883: 	KillSocket($Socket);
                    884: 	$ConnectionRetriesLeft--;       # Counts as connection failure
                    885: 	return;
                    886:     }
                    887:     SocketDump(6,$Socket);
1.17      foxr      888: 
1.41      albertel  889:     $State = $Socket->GetState(); # Update in case of transition.
                    890:     &Debug(6, "After read, state is ".$State);
1.1       foxr      891: 
1.41      albertel  892:     if($State eq "Initialized") {
1.1       foxr      893: 
                    894: 
1.41      albertel  895:     } elsif ($State eq "ChallengeReceived") {
1.1       foxr      896: 	#  The challenge must be echoed back;  The state machine
                    897: 	# in the connection takes care of setting that up.  Just
                    898: 	# need to transition to writable:
1.41      albertel  899: 	
                    900: 	$Watcher->cb(\&LondWritable);
                    901: 	$Watcher->poll("w");
1.1       foxr      902: 
1.41      albertel  903:     } elsif ($State eq "ChallengeReplied") {
1.1       foxr      904: 
1.41      albertel  905:     } elsif ($State eq "RequestingVersion") {
                    906: 	# Need to ask for the version... that is writiability:
1.1       foxr      907: 
1.41      albertel  908: 	$Watcher->cb(\&LondWritable);
                    909: 	$Watcher->poll("w");
                    910: 
                    911:     } elsif ($State eq "ReadingVersionString") {
                    912: 	# Read the rest of the version string... 
                    913:     } elsif ($State eq "SetHost") {
                    914: 	# Need to request the actual domain get set...
                    915: 
                    916: 	$Watcher->cb(\&LondWritable);
                    917: 	$Watcher->poll("w");
                    918:     } elsif ($State eq "HostSet") {
                    919: 	# Reading the 'ok' from the peer.
                    920: 
                    921:     } elsif ($State eq "RequestingKey") {
1.1       foxr      922: 	#  The ok was received.  Now we need to request the key
                    923: 	#  That requires us to be writable:
                    924: 
1.41      albertel  925: 	$Watcher->cb(\&LondWritable);
                    926: 	$Watcher->poll("w");
1.1       foxr      927: 
1.41      albertel  928:     } elsif ($State eq "ReceivingKey") {
1.1       foxr      929: 
1.41      albertel  930:     } elsif ($State eq "Idle") {
1.40      foxr      931:    
1.41      albertel  932: 	# This is as good a spot as any to get the peer version
                    933: 	# string:
1.40      foxr      934:    
1.41      albertel  935: 	if($LondVersion eq "unknown") {
                    936: 	    $LondVersion = $Socket->PeerVersion();
                    937: 	    Log("INFO", "Connected to lond version: $LondVersion");
                    938: 	}
1.1       foxr      939: 	# If necessary, complete a transaction and then go into the
                    940: 	# idle queue.
1.22      foxr      941: 	#  Note that a trasition to idle indicates a live lond
                    942: 	# on the other end so reset the connection retries.
                    943: 	#
1.41      albertel  944: 	$ConnectionRetriesLeft = $ConnectionRetries; # success resets the count
                    945: 	$Watcher->cancel();
                    946: 	if(exists($ActiveTransactions{$Socket})) {
                    947: 	    Debug(5,"Completing transaction!!");
                    948: 	    CompleteTransaction($Socket, 
                    949: 				$ActiveTransactions{$Socket});
                    950: 	} else {
1.95      foxr      951: 	    my $count = $Socket->GetClientData();
                    952: 	    Log("SUCCESS", "Connection ".$count." to "
1.41      albertel  953: 		.$RemoteHost." now ready for action");
                    954: 	}
                    955: 	ServerToIdle($Socket);	# Next work unit or idle.
1.54      foxr      956: 
                    957: 	#
                    958: 	$LondConnecting = 0;	# Best spot I can think of for this.
                    959: 	# 
1.6       foxr      960: 	
1.41      albertel  961:     } elsif ($State eq "SendingRequest") {
1.1       foxr      962: 	#  We need to be writable for this and probably don't belong
                    963: 	#  here inthe first place.
                    964: 
1.73      albertel  965: 	Debug(6, "SendingRequest state encountered in readable");
1.41      albertel  966: 	$Watcher->poll("w");
                    967: 	$Watcher->cb(\&LondWritable);
1.1       foxr      968: 
1.41      albertel  969:     } elsif ($State eq "ReceivingReply") {
1.1       foxr      970: 
                    971: 
1.41      albertel  972:     } else {
                    973: 	# Invalid state.
                    974: 	Debug(4, "Invalid state in LondReadable");
                    975:     }
1.1       foxr      976: }
1.3       albertel  977: 
1.1       foxr      978: =pod
1.3       albertel  979: 
1.1       foxr      980: =head2 LondWritable
1.3       albertel  981: 
1.1       foxr      982: This function is called whenever a lond connection
                    983: becomes writable while there is a writeable monitoring
                    984: event.  The action taken is very state dependent:
1.3       albertel  985: 
1.1       foxr      986: =head3 State = Connected 
1.3       albertel  987: 
                    988: The connection is in the process of sending the 'init' hailing to the
1.100   ! raeburn   989: lond on the remote end.  The Writable member of the connection object
        !           990: is called.  On error, call ConnectionError to destroy the connection
        !           991: and remove it from the ActiveConnections hash.
1.3       albertel  992: 
1.1       foxr      993: =head3 Initialized
1.3       albertel  994: 
                    995: 'init' has been sent, writability monitoring is removed and
                    996: readability monitoring is started with LondReadable as the callback.
                    997: 
1.1       foxr      998: =head3 ChallengeReceived
1.3       albertel  999: 
                   1000: The connection has received the who are you challenge from the remote
                   1001: system, and is in the process of sending the challenge
                   1002: response. Writable is called.
                   1003: 
1.1       foxr     1004: =head3 ChallengeReplied
1.3       albertel 1005: 
                   1006: The connection has replied to the initial challenge The we switch to
                   1007: monitoring readability looking for the server to reply with 'ok'.
                   1008: 
1.1       foxr     1009: =head3 RequestingKey
1.3       albertel 1010: 
                   1011: The connection is in the process of requesting its encryption key.
                   1012: Writable is called.
                   1013: 
1.1       foxr     1014: =head3 ReceivingKey
1.3       albertel 1015: 
                   1016: The connection has sent the request for a key.  Switch to readability
                   1017: monitoring to accept the key
                   1018: 
1.1       foxr     1019: =head3 SendingRequest
1.3       albertel 1020: 
                   1021: The connection is in the process of sending a request to the server.
                   1022: This request is part of a client transaction.  All the states until
                   1023: now represent the client setup protocol. Writable is called.
                   1024: 
1.1       foxr     1025: =head3 ReceivingReply
                   1026: 
1.3       albertel 1027: The connection has sent a request.  Now it must receive a reply.
                   1028: Readability monitoring is requested.
                   1029: 
                   1030: This function is an event handler and therefore receives as
1.1       foxr     1031: a parameter the event that has fired.  The data for the watcher
                   1032: of this event is a reference to a list of one or two elements,
                   1033: depending on state. The first (and possibly only) element is the
                   1034: socket.  The second (present only if a request is in progress)
                   1035: is the socket on which to return a reply to the caller.
                   1036: 
                   1037: =cut
1.3       albertel 1038: 
1.1       foxr     1039: sub LondWritable {
                   1040:     my $Event   = shift;
                   1041:     my $Watcher = $Event->w;
1.8       foxr     1042:     my $Socket  = $Watcher->data;
                   1043:     my $State   = $Socket->GetState();
1.1       foxr     1044: 
1.8       foxr     1045:     Debug(6,"LondWritable State = ".$State."\n");
1.1       foxr     1046: 
1.8       foxr     1047:  
1.1       foxr     1048:     #  Figure out what to do depending on the state of the socket:
                   1049:     
                   1050: 
                   1051: 
                   1052: 
                   1053:     SocketDump(6,$Socket);
                   1054: 
1.42      foxr     1055:     #  If the socket is writable, we must always write.
                   1056:     # Only by writing will we undergo state transitions.
                   1057:     # Old logic wrote in state specific code below, however
                   1058:     # That forces us at least through another invocation of
                   1059:     # this function after writability is possible again.
                   1060:     # This logic also factors out common code for handling
                   1061:     # write failures... in all cases, write failures 
                   1062:     # Kill the socket.
                   1063:     #  This logic makes the branches of the >big< if below
                   1064:     # so that the writing states are actually NO-OPs.
                   1065: 
                   1066:     if ($Socket->Writable() != 0) {
1.43      albertel 1067: 	#  The write resulted in an error.
                   1068: 	# We'll treat this as if the socket got disconnected:
                   1069: 	Log("WARNING", "Connection to ".$RemoteHost.
                   1070: 	    " has been disconnected");
                   1071: 	if(exists($ActiveTransactions{$Socket})) {
                   1072: 	    FailTransaction($ActiveTransactions{$Socket});
1.56      foxr     1073: 	} else {
                   1074: 	    #  In the process of conneting, so need to turn that off.
                   1075: 	    
                   1076: 	    $LondConnecting = 0;
1.43      albertel 1077: 	}
                   1078: 	$Watcher->cancel();
                   1079: 	KillSocket($Socket);
                   1080: 	return;
1.42      foxr     1081:     }
                   1082: 
                   1083: 
                   1084: 
1.41      albertel 1085:     if      ($State eq "Connected")         {
1.1       foxr     1086: 
1.41      albertel 1087: 	#  "init" is being sent...
1.42      foxr     1088:  
1.41      albertel 1089:     } elsif ($State eq "Initialized")       {
1.4       foxr     1090: 
1.41      albertel 1091: 	# Now that init was sent, we switch 
                   1092: 	# to watching for readability:
1.1       foxr     1093: 
1.41      albertel 1094: 	$Watcher->cb(\&LondReadable);
                   1095: 	$Watcher->poll("r");
                   1096: 	
                   1097:     } elsif ($State eq "ChallengeReceived") {
                   1098: 	# We received the challenge, now we 
                   1099: 	# are echoing it back. This is a no-op,
                   1100: 	# we're waiting for the state to change
1.1       foxr     1101: 	
1.41      albertel 1102:     } elsif ($State eq "ChallengeReplied")  {
                   1103: 	# The echo was sent back, so we switch
                   1104: 	# to watching readability.
                   1105: 
                   1106: 	$Watcher->cb(\&LondReadable);
                   1107: 	$Watcher->poll("r");
                   1108:     } elsif ($State eq "RequestingVersion") {
                   1109: 	# Sending the peer a version request...
1.42      foxr     1110: 
1.41      albertel 1111:     } elsif ($State eq "ReadingVersionString") {
                   1112: 	# Transition to read since we have sent the
                   1113: 	# version command and now just need to read the
                   1114: 	# version string from the peer:
1.40      foxr     1115:       
1.41      albertel 1116: 	$Watcher->cb(\&LondReadable);
                   1117: 	$Watcher->poll("r");
1.40      foxr     1118:       
1.41      albertel 1119:     } elsif ($State eq "SetHost") {
                   1120: 	#  Setting the remote domain...
1.42      foxr     1121: 
1.41      albertel 1122:     } elsif ($State eq "HostSet") {
                   1123: 	# Back to readable to get the ok.
1.40      foxr     1124:       
1.41      albertel 1125: 	$Watcher->cb(\&LondReadable);
                   1126: 	$Watcher->poll("r");
1.40      foxr     1127:       
                   1128: 
1.41      albertel 1129:     } elsif ($State eq "RequestingKey")     {
                   1130: 	# At this time we're requesting the key.
                   1131: 	# again, this is essentially a no-op.
                   1132: 
                   1133:     } elsif ($State eq "ReceivingKey")      {
                   1134: 	# Now we need to wait for the key
                   1135: 	# to come back from the peer:
                   1136: 
                   1137: 	$Watcher->cb(\&LondReadable);
                   1138: 	$Watcher->poll("r");
                   1139: 
                   1140:     } elsif ($State eq "SendingRequest")    {
1.40      foxr     1141:  
1.41      albertel 1142: 	# At this time we are sending a request to the
1.1       foxr     1143: 	# peer... write the next chunk:
                   1144: 
1.41      albertel 1145: 
                   1146:     } elsif ($State eq "ReceivingReply")    {
                   1147: 	# The send has completed.  Wait for the
                   1148: 	# data to come in for a reply.
                   1149: 	Debug(8,"Writable sent request/receiving reply");
                   1150: 	$Watcher->cb(\&LondReadable);
                   1151: 	$Watcher->poll("r");
1.1       foxr     1152: 
1.41      albertel 1153:     } else {
                   1154: 	#  Control only passes here on an error: 
                   1155: 	#  the socket state does not match any
                   1156: 	#  of the known states... so an error
                   1157: 	#  must be logged.
1.1       foxr     1158: 
1.41      albertel 1159: 	&Debug(4, "Invalid socket state ".$State."\n");
                   1160:     }
1.1       foxr     1161:     
                   1162: }
1.81      albertel 1163: 
1.6       foxr     1164: =pod
                   1165:     
                   1166: =cut
1.69      matthew  1167: 
1.81      albertel 1168: 
1.6       foxr     1169: sub QueueDelayed {
1.8       foxr     1170:     Debug(3,"QueueDelayed called");
                   1171: 
1.6       foxr     1172:     my $path = "$perlvar{'lonSockDir'}/delayed";
1.8       foxr     1173: 
                   1174:     Debug(4, "Delayed path: ".$path);
1.6       foxr     1175:     opendir(DIRHANDLE, $path);
1.75      albertel 1176: 
1.82      albertel 1177:     my $host_id_re = '(?:'.join('|',map {quotemeta($_)} (@all_host_ids)).')';
1.75      albertel 1178:     my @alldelayed = grep(/\.$host_id_re$/, readdir(DIRHANDLE));
1.6       foxr     1179:     closedir(DIRHANDLE);
1.75      albertel 1180:     foreach my $dfname (sort(@alldelayed)) {
                   1181: 	my $reqfile = "$path/$dfname";
                   1182: 	my ($host_id) = ($dfname =~ /\.([^.]*)$/);
                   1183: 	Debug(4, "queueing ".$reqfile." for $host_id");
1.6       foxr     1184: 	my $Handle = IO::File->new($reqfile);
                   1185: 	my $cmd    = <$Handle>;
1.8       foxr     1186: 	chomp $cmd;		# There may or may not be a newline...
1.12      foxr     1187: 	$cmd = $cmd."\n";	# now for sure there's exactly one newline.
1.75      albertel 1188: 	my $Transaction = LondTransaction->new("sethost:$host_id:$cmd");
1.7       foxr     1189: 	$Transaction->SetDeferred($reqfile);
                   1190: 	QueueTransaction($Transaction);
1.6       foxr     1191:     }
                   1192:     
                   1193: }
1.1       foxr     1194: 
                   1195: =pod
1.3       albertel 1196: 
1.1       foxr     1197: =head2 MakeLondConnection
1.3       albertel 1198: 
                   1199: Create a new lond connection object, and start it towards its initial
1.97      raeburn  1200: idleness.  Once idle, it becomes eligible to receive transactions
1.3       albertel 1201: from the work queue.  If the work queue is not empty when the
                   1202: connection is completed and becomes idle, it will dequeue an entry and
                   1203: start off on it.
                   1204: 
1.1       foxr     1205: =cut
1.3       albertel 1206: 
1.1       foxr     1207: sub MakeLondConnection {     
                   1208:     Debug(4,"MakeLondConnection to ".GetServerHost()." on port "
                   1209: 	  .GetServerPort());
                   1210: 
                   1211:     my $Connection = LondConnection->new(&GetServerHost(),
1.81      albertel 1212: 					 &GetServerPort(),
                   1213: 					 &GetHostId());
1.1       foxr     1214: 
1.90      foxr     1215:     if($Connection eq undef) {	
1.9       foxr     1216: 	Log("CRITICAL","Failed to make a connection with lond.");
1.10      foxr     1217: 	$ConnectionRetriesLeft--;
                   1218: 	return 0;		# Failure.
1.5       foxr     1219:     }  else {
1.22      foxr     1220: 
1.82      albertel 1221: 	$LondConnecting = 1;	# Connection in progress.
1.5       foxr     1222: 	# The connection needs to have writability 
                   1223: 	# monitored in order to send the init sequence
                   1224: 	# that starts the whole authentication/key
                   1225: 	# exchange underway.
                   1226: 	#
                   1227: 	my $Socket = $Connection->GetSocket();
1.30      foxr     1228: 	if($Socket eq undef) {
1.64      foxr     1229: 	    &child_exit(-1, "did not get a socket from the connection");
1.5       foxr     1230: 	} else {
                   1231: 	    &Debug(9,"MakeLondConnection got socket: ".$Socket);
                   1232: 	}
1.1       foxr     1233: 	
1.21      foxr     1234: 	$Connection->SetTimeoutCallback(\&SocketTimeout);
                   1235: 
1.23      foxr     1236: 	my $event = Event->io(fd       => $Socket,
1.5       foxr     1237: 			   poll     => 'w',
                   1238: 			   cb       => \&LondWritable,
1.8       foxr     1239: 			   data     => $Connection,
1.5       foxr     1240: 			   desc => 'Connection to lond server');
                   1241: 	$ActiveConnections{$Connection} = $event;
1.52      foxr     1242: 	if ($ConnectionCount == 0) {
                   1243: 	    &SetupTimer;	# Need to handle timeouts with connections...
                   1244: 	}
1.5       foxr     1245: 	$ConnectionCount++;
1.95      foxr     1246: 	$Connection->SetClientData($ConnectionCount);
1.8       foxr     1247: 	Debug(4, "Connection count = ".$ConnectionCount);
1.6       foxr     1248: 	if($ConnectionCount == 1) { # First Connection:
                   1249: 	    QueueDelayed;
                   1250: 	}
1.97      raeburn  1251: 	Log("SUCCESS", "Created connection ".$ConnectionCount
1.9       foxr     1252: 	    ." to host ".GetServerHost());
1.10      foxr     1253: 	return 1;		# Return success.
1.1       foxr     1254:     }
                   1255:     
                   1256: }
1.3       albertel 1257: 
1.1       foxr     1258: =pod
1.3       albertel 1259: 
1.1       foxr     1260: =head2 StartRequest
1.3       albertel 1261: 
                   1262: Starts a lond request going on a specified lond connection.
                   1263: parameters are:
                   1264: 
                   1265: =item $Lond
                   1266: 
                   1267: Connection to the lond that will send the transaction and receive the
                   1268: reply.
                   1269: 
                   1270: =item $Client
                   1271: 
1.97      raeburn  1272: Connection to the client that is making this request. We got the
1.3       albertel 1273: request from this socket, and when the request has been relayed to
                   1274: lond and we get a reply back from lond it will get sent to this
                   1275: socket.
                   1276: 
                   1277: =item $Request
                   1278: 
                   1279: The text of the request to send.
                   1280: 
1.1       foxr     1281: =cut
                   1282: 
                   1283: sub StartRequest {
1.47      foxr     1284: 
                   1285:     my ($Lond, $Request) = @_;
1.1       foxr     1286:     
1.7       foxr     1287:     Debug(6, "StartRequest: ".$Request->getRequest());
1.1       foxr     1288: 
                   1289:     my $Socket = $Lond->GetSocket();
                   1290:     
1.7       foxr     1291:     $Request->Activate($Lond);
                   1292:     $ActiveTransactions{$Lond} = $Request;
1.1       foxr     1293: 
1.7       foxr     1294:     $Lond->InitiateTransaction($Request->getRequest());
1.23      foxr     1295:     my $event = Event->io(fd      => $Socket,
1.1       foxr     1296: 		       poll    => "w",
                   1297: 		       cb      => \&LondWritable,
                   1298: 		       data    => $Lond,
                   1299: 		       desc    => "lond transaction connection");
                   1300:     $ActiveConnections{$Lond} = $event;
                   1301:     Debug(8," Start Request made watcher data with ".$event->data."\n");
                   1302: }
                   1303: 
                   1304: =pod
1.3       albertel 1305: 
1.1       foxr     1306: =head2 QueueTransaction
1.3       albertel 1307: 
                   1308: If there is an idle lond connection, it is put to work doing this
                   1309: transaction.  Otherwise, the transaction is placed in the work queue.
                   1310: If placed in the work queue and the maximum number of connections has
                   1311: not yet been created, a new connection will be started.  Our goal is
                   1312: to eventually have a sufficient number of connections that the work
                   1313: queue will typically be empty.  parameters are:
                   1314: 
                   1315: =item Socket
                   1316: 
                   1317: open on the lonc client.
                   1318: 
                   1319: =item Request
                   1320: 
                   1321: data to send to the lond.
1.1       foxr     1322: 
                   1323: =cut
1.3       albertel 1324: 
1.1       foxr     1325: sub QueueTransaction {
                   1326: 
1.7       foxr     1327:     my $requestData   = shift;	# This is a LondTransaction.
                   1328:     my $cmd           = $requestData->getRequest();
                   1329: 
                   1330:     Debug(6,"QueueTransaction: ".$cmd);
1.1       foxr     1331: 
                   1332:     my $LondSocket    = $IdleConnections->pop();
                   1333:     if(!defined $LondSocket) {	# Need to queue request.
1.29      foxr     1334: 	Debug(5,"Must queue...");
1.1       foxr     1335: 	$WorkQueue->enqueue($requestData);
1.56      foxr     1336: 	Debug(5, "Queue Transaction startnew $ConnectionCount $LondConnecting");
                   1337: 	if(($ConnectionCount < $MaxConnectionCount)   && (! $LondConnecting)) {
                   1338: 
1.22      foxr     1339: 	    if($ConnectionRetriesLeft > 0) {
1.29      foxr     1340: 		Debug(5,"Starting additional lond connection");
1.56      foxr     1341: 		if(&MakeLondConnection() == 0) {
1.22      foxr     1342: 		    EmptyQueue();	# Fail transactions, can't make connection.
1.42      foxr     1343: 		    CloseAllLondConnections; # Should all be closed but...
1.22      foxr     1344: 		}
                   1345: 	    } else {
                   1346: 		ShowStatus(GetServerHost()." >>> DEAD !!!! <<<");
1.56      foxr     1347: 		$LondConnecting = 0;
1.22      foxr     1348: 		EmptyQueue();	# It's worse than that ... he's dead Jim.
1.42      foxr     1349: 		CloseAllLondConnections; # Should all be closed but..
1.17      foxr     1350: 	    }
1.1       foxr     1351: 	}
                   1352:     } else {			# Can start the request:
                   1353: 	Debug(8,"Can start...");
1.7       foxr     1354: 	StartRequest($LondSocket,  $requestData);
1.1       foxr     1355:     }
                   1356: }
                   1357: 
1.95      foxr     1358: #-------------------------- Lonc UNIX socket handling -------------------
1.1       foxr     1359: =pod
1.3       albertel 1360: 
1.1       foxr     1361: =head2 ClientRequest
1.97      raeburn  1362: 
1.3       albertel 1363: Callback that is called when data can be read from the UNIX domain
                   1364: socket connecting us with an apache server process.
1.1       foxr     1365: 
                   1366: =cut
                   1367: 
                   1368: sub ClientRequest {
                   1369:     Debug(6, "ClientRequest");
                   1370:     my $event   = shift;
                   1371:     my $watcher = $event->w;
                   1372:     my $socket  = $watcher->fd;
                   1373:     my $data    = $watcher->data;
                   1374:     my $thisread;
                   1375: 
                   1376:     Debug(9, "  Watcher named: ".$watcher->desc);
                   1377: 
                   1378:     my $rv = $socket->recv($thisread, POSIX::BUFSIZ, 0);
                   1379:     Debug(8, "rcv:  data length = ".length($thisread)
                   1380: 	  ." read =".$thisread);
1.29      foxr     1381:     unless (defined $rv  && length($thisread)) {
1.1       foxr     1382: 	 # Likely eof on socket.
                   1383: 	Debug(5,"Client Socket closed on lonc for ".$RemoteHost);
                   1384: 	close($socket);
                   1385: 	$watcher->cancel();
                   1386: 	delete($ActiveClients{$socket});
1.10      foxr     1387: 	return;
1.1       foxr     1388:     }
                   1389:     Debug(8,"Data: ".$data." this read: ".$thisread);
                   1390:     $data = $data.$thisread;	# Append new data.
                   1391:     $watcher->data($data);
1.44      albertel 1392:     if($data =~ /\n$/) {	# Request entirely read.
1.87      albertel 1393: 	if ($data eq "close_connection_exit\n") {
1.9       foxr     1394: 	    Log("CRITICAL",
                   1395: 		"Request Close Connection ... exiting");
                   1396: 	    CloseAllLondConnections();
                   1397: 	    exit;
1.87      albertel 1398: 	} elsif ($data eq "reset_retries\n") {
                   1399: 	    Log("INFO", "Resetting Connection Retries.");
                   1400: 	    $ConnectionRetriesLeft = $ConnectionRetries;
                   1401: 	    &UpdateStatus();
                   1402: 	    my $Transaction = LondTransaction->new($data);
                   1403: 	    $Transaction->SetClient($socket);
                   1404: 	    StartClientReply($Transaction, "ok\n");
                   1405: 	    $watcher->cancel();
                   1406: 	    return;
1.9       foxr     1407: 	}
1.1       foxr     1408: 	Debug(8, "Complete transaction received: ".$data);
1.87      albertel 1409: 	if ($LogTransactions) {
1.39      foxr     1410: 	    Log("SUCCESS", "Transaction: '$data'"); # Transaction has \n.
                   1411: 	}
1.8       foxr     1412: 	my $Transaction = LondTransaction->new($data);
1.7       foxr     1413: 	$Transaction->SetClient($socket);
                   1414: 	QueueTransaction($Transaction);
1.1       foxr     1415: 	$watcher->cancel();	# Done looking for input data.
                   1416:     }
                   1417: 
                   1418: }
                   1419: 
1.62      foxr     1420: #
                   1421: #     Accept a connection request for a client (lonc child) and
                   1422: #    start up an event watcher to keep an eye on input from that 
                   1423: #    Event.  This can be called both from NewClient and from
1.80      albertel 1424: #    ChildProcess.
1.62      foxr     1425: # Parameters:
                   1426: #    $socket       - The listener socket.
                   1427: # Returns:
                   1428: #   NONE
                   1429: # Side Effects:
                   1430: #    An event is made to watch the accepted connection.
                   1431: #    Active clients hash is updated to reflect the new connection.
                   1432: #    The client connection count is incremented.
                   1433: #
                   1434: sub accept_client {
                   1435:     my ($socket) = @_;
                   1436: 
                   1437:     Debug(8, "Entering accept for lonc UNIX socket\n");
                   1438:     my $connection = $socket->accept();	# Accept the client connection.
                   1439:     Debug(8,"Connection request accepted from "
                   1440: 	  .GetPeername($connection, AF_UNIX));
                   1441: 
                   1442: 
                   1443:     my $description = sprintf("Connection to lonc client %d",
                   1444: 			      $ClientConnection);
                   1445:     Debug(9, "Creating event named: ".$description);
                   1446:     Event->io(cb      => \&ClientRequest,
                   1447: 	      poll    => 'r',
                   1448: 	      desc    => $description,
                   1449: 	      data    => "",
                   1450: 	      fd      => $connection);
                   1451:     $ActiveClients{$connection} = $ClientConnection;
                   1452:     $ClientConnection++;
                   1453: }
1.1       foxr     1454: 
                   1455: =pod
1.3       albertel 1456: 
1.1       foxr     1457: =head2  NewClient
1.3       albertel 1458: 
                   1459: Callback that is called when a connection is received on the unix
                   1460: socket for a new client of lonc.  The callback is parameterized by the
                   1461: event.. which is a-priori assumed to be an io event, and therefore has
1.97      raeburn  1462: an fd member that is the Listener socket.  We accept the connection
1.3       albertel 1463: and register a new event on the readability of that socket:
                   1464: 
1.1       foxr     1465: =cut
1.3       albertel 1466: 
1.1       foxr     1467: sub NewClient {
                   1468:     Debug(6, "NewClient");
                   1469:     my $event      = shift;		# Get the event parameters.
                   1470:     my $watcher    = $event->w; 
                   1471:     my $socket     = $watcher->fd;	# Get the event' socket.
                   1472: 
1.62      foxr     1473:     &accept_client($socket);
1.1       foxr     1474: }
1.3       albertel 1475: 
                   1476: =pod
                   1477: 
                   1478: =head2 GetLoncSocketPath
                   1479: 
                   1480: Returns the name of the UNIX socket on which to listen for client
                   1481: connections.
1.1       foxr     1482: 
1.58      foxr     1483: =head2 Parameters:
                   1484: 
                   1485:     host (optional)  - Name of the host socket to return.. defaults to
                   1486:                        the return from GetServerHost().
                   1487: 
1.1       foxr     1488: =cut
1.3       albertel 1489: 
1.1       foxr     1490: sub GetLoncSocketPath {
1.58      foxr     1491: 
                   1492:     my $host = GetServerHost();	# Default host.
                   1493:     if (@_) {
                   1494: 	($host)  = @_;		# Override if supplied.
                   1495:     }
                   1496:     return $UnixSocketDir."/".$host;
1.1       foxr     1497: }
                   1498: 
1.3       albertel 1499: =pod
                   1500: 
                   1501: =head2 GetServerHost
                   1502: 
                   1503: Returns the host whose lond we talk with.
                   1504: 
1.1       foxr     1505: =cut
1.3       albertel 1506: 
1.7       foxr     1507: sub GetServerHost {
1.1       foxr     1508:     return $RemoteHost;		# Setup by the fork.
                   1509: }
1.3       albertel 1510: 
                   1511: =pod
                   1512: 
1.81      albertel 1513: =head2 GetServerId
                   1514: 
                   1515: Returns the hostid whose lond we talk with.
                   1516: 
                   1517: =cut
                   1518: 
                   1519: sub GetHostId {
                   1520:     return $RemoteHostId;		# Setup by the fork.
                   1521: }
                   1522: 
                   1523: =pod
                   1524: 
1.3       albertel 1525: =head2 GetServerPort
                   1526: 
                   1527: Returns the lond port number.
                   1528: 
1.1       foxr     1529: =cut
1.3       albertel 1530: 
1.7       foxr     1531: sub GetServerPort {
1.1       foxr     1532:     return $perlvar{londPort};
                   1533: }
1.3       albertel 1534: 
                   1535: =pod
                   1536: 
                   1537: =head2 SetupLoncListener
                   1538: 
                   1539: Setup a lonc listener event.  The event is called when the socket
                   1540: becomes readable.. that corresponds to the receipt of a new
                   1541: connection.  The event handler established will accept the connection
1.99      raeburn  1542: (creating a communications channel), that in turn will establish
1.3       albertel 1543: another event handler to subess requests.
1.1       foxr     1544: 
1.58      foxr     1545: =head2  Parameters:
                   1546: 
                   1547:    host (optional)   Name of the host to set up a unix socket to.
                   1548: 
1.1       foxr     1549: =cut
1.3       albertel 1550: 
1.1       foxr     1551: sub SetupLoncListener {
1.78      albertel 1552:     my ($host,$SocketName) = @_;
                   1553:     if (!$host) { $host = &GetServerHost(); }
                   1554:     if (!$SocketName) { $SocketName = &GetLoncSocketPath($host); }
1.1       foxr     1555: 
1.78      albertel 1556: 
                   1557:     unlink($SocketName);
1.58      foxr     1558: 
1.1       foxr     1559:     my $socket;
1.7       foxr     1560:     unless ($socket =IO::Socket::UNIX->new(Local  => $SocketName,
1.55      albertel 1561: 					    Listen => 250, 
1.1       foxr     1562: 					    Type   => SOCK_STREAM)) {
1.64      foxr     1563: 	if($I_am_child) {
                   1564: 	    &child_exit(-1, "Failed to create a lonc listener socket");
                   1565: 	} else {
                   1566: 	    die "Failed to create a lonc listner socket";
                   1567: 	}
1.1       foxr     1568:     }
1.59      foxr     1569:     return $socket;
1.1       foxr     1570: }
                   1571: 
1.39      foxr     1572: #
                   1573: #   Toggle transaction logging.
                   1574: #  Implicit inputs:  
                   1575: #     LogTransactions
                   1576: #  Implicit Outputs:
                   1577: #     LogTransactions
                   1578: sub ToggleTransactionLogging {
                   1579:     print STDERR "Toggle transaction logging...\n";
                   1580:     if(!$LogTransactions) {
                   1581: 	$LogTransactions = 1;
                   1582:     } else {
                   1583: 	$LogTransactions = 0;
                   1584:     }
                   1585: 
                   1586: 
                   1587:     Log("SUCCESS", "Toggled transaction logging: $LogTransactions \n");
                   1588: }
                   1589: 
1.14      foxr     1590: =pod 
                   1591: 
                   1592: =head2 ChildStatus
                   1593:  
                   1594: Child USR1 signal handler to report the most recent status
                   1595: into the status file.
                   1596: 
1.22      foxr     1597: We also use this to reset the retries count in order to allow the
                   1598: client to retry connections with a previously dead server.
1.69      matthew  1599: 
1.14      foxr     1600: =cut
1.46      albertel 1601: 
1.14      foxr     1602: sub ChildStatus {
                   1603:     my $event = shift;
                   1604:     my $watcher = $event->w;
                   1605: 
                   1606:     Debug(2, "Reporting child status because : ".$watcher->data);
                   1607:     my $docdir = $perlvar{'lonDocRoot'};
1.67      albertel 1608:     
                   1609:     open(LOG,">>$docdir/lon-status/loncstatus.txt");
                   1610:     flock(LOG,LOCK_EX);
                   1611:     print LOG $$."\t".$RemoteHost."\t".$Status."\t".
1.14      foxr     1612: 	$RecentLogEntry."\n";
1.38      foxr     1613:     #
                   1614:     #  Write out information about each of the connections:
                   1615:     #
1.46      albertel 1616:     if ($DebugLevel > 2) {
1.67      albertel 1617: 	print LOG "Active connection statuses: \n";
1.46      albertel 1618: 	my $i = 1;
                   1619: 	print STDERR  "================================= Socket Status Dump:\n";
                   1620: 	foreach my $item (keys %ActiveConnections) {
                   1621: 	    my $Socket = $ActiveConnections{$item}->data;
                   1622: 	    my $state  = $Socket->GetState();
1.67      albertel 1623: 	    print LOG "Connection $i State: $state\n";
1.46      albertel 1624: 	    print STDERR "---------------------- Connection $i \n";
1.48      foxr     1625: 	    $Socket->Dump(-1);	# Ensure it gets dumped..
1.46      albertel 1626: 	    $i++;	
                   1627: 	}
1.38      foxr     1628:     }
1.67      albertel 1629:     flock(LOG,LOCK_UN);
                   1630:     close(LOG);
1.22      foxr     1631:     $ConnectionRetriesLeft = $ConnectionRetries;
1.70      albertel 1632:     UpdateStatus();
1.14      foxr     1633: }
                   1634: 
1.1       foxr     1635: =pod
1.3       albertel 1636: 
1.10      foxr     1637: =head2 SignalledToDeath
                   1638: 
                   1639: Called in response to a signal that causes a chid process to die.
                   1640: 
                   1641: =cut
                   1642: 
                   1643: 
                   1644: sub SignalledToDeath {
1.14      foxr     1645:     my $event  = shift;
                   1646:     my $watcher= $event->w;
                   1647: 
                   1648:     Debug(2,"Signalled to death! via ".$watcher->data);
1.17      foxr     1649:     my ($signal) = $watcher->data;
1.10      foxr     1650:     chomp($signal);
                   1651:     Log("CRITICAL", "Abnormal exit.  Child $$ for $RemoteHost "
                   1652: 	."died through "."\"$signal\"");
1.68      albertel 1653:     #LogPerm("F:lonc: $$ on $RemoteHost signalled to death: "
                   1654: #	    ."\"$signal\"");
1.12      foxr     1655:     exit 0;
1.10      foxr     1656: 
                   1657: }
1.16      foxr     1658: 
1.69      matthew  1659: =pod
                   1660: 
1.16      foxr     1661: =head2 ToggleDebug
                   1662: 
                   1663: This sub toggles trace debugging on and off.
                   1664: 
                   1665: =cut
                   1666: 
                   1667: sub ToggleDebug {
                   1668:     my $Current    = $DebugLevel;
                   1669:        $DebugLevel = $NextDebugLevel;
                   1670:        $NextDebugLevel = $Current;
                   1671: 
                   1672:     Log("SUCCESS", "New debugging level for $RemoteHost now $DebugLevel");
                   1673: 
                   1674: }
                   1675: 
1.69      matthew  1676: =pod
                   1677: 
1.1       foxr     1678: =head2 ChildProcess
                   1679: 
                   1680: This sub implements a child process for a single lonc daemon.
1.61      foxr     1681: Optional parameter:
1.97      raeburn  1682:    $socket  - if provided, this is a socket already open for listening
                   1683:               on the client socket. Otherwise, a new listener is set up.
1.1       foxr     1684: 
                   1685: =cut
                   1686: 
                   1687: sub ChildProcess {
1.80      albertel 1688:     #  We've inherited all the
1.62      foxr     1689:     #  events of our parent and those have to be cancelled or else
                   1690:     #  all holy bloody chaos will result.. trust me, I already made
                   1691:     #  >that< mistake.
                   1692: 
                   1693:     my $host = GetServerHost();
                   1694:     foreach my $listener (keys %parent_dispatchers) {
                   1695: 	my $watcher = $parent_dispatchers{$listener};
                   1696: 	my $s       = $watcher->fd;
                   1697: 	if ($listener ne $host) { # Close everyone but me.
                   1698: 	    Debug(5, "Closing listen socket for $listener");
                   1699: 	    $s->close();
                   1700: 	}
                   1701: 	Debug(5, "Killing watcher for $listener");
                   1702: 
                   1703: 	$watcher->cancel();
1.65      foxr     1704: 	delete($parent_dispatchers{$listener});
1.62      foxr     1705: 
                   1706:     }
1.65      foxr     1707: 
                   1708:     #  kill off the parent's signal handlers too!  
                   1709:     #
                   1710: 
                   1711:     for my $handler (keys %parent_handlers) {
                   1712: 	my $watcher = $parent_handlers{$handler};
                   1713: 	$watcher->cancel();
                   1714: 	delete($parent_handlers{$handler});
                   1715:     }
                   1716: 
1.64      foxr     1717:     $I_am_child    = 1;		# Seems like in spite of it all I may still getting
                   1718:                                 # parent event dispatches.. flag I'm a child.
1.1       foxr     1719: 
                   1720: 
1.14      foxr     1721:     #
                   1722:     #  Signals must be handled by the Event framework...
1.61      foxr     1723:     #
1.14      foxr     1724: 
                   1725:     Event->signal(signal   => "QUIT",
                   1726: 		  cb       => \&SignalledToDeath,
                   1727: 		  data     => "QUIT");
                   1728:     Event->signal(signal   => "HUP",
                   1729: 		  cb       => \&ChildStatus,
                   1730: 		  data     => "HUP");
                   1731:     Event->signal(signal   => "USR1",
                   1732: 		  cb       => \&ChildStatus,
                   1733: 		  data     => "USR1");
1.39      foxr     1734:     Event->signal(signal   => "USR2",
                   1735: 		  cb       => \&ToggleTransactionLogging);
1.16      foxr     1736:     Event->signal(signal   => "INT",
                   1737: 		  cb       => \&ToggleDebug,
                   1738: 		  data     => "INT");
1.1       foxr     1739: 
1.93      foxr     1740:     # Block the pipe signal we'll get when the socket disconnects.  We detect 
                   1741:     # socket disconnection via send/receive failures. On disconnect, the
                   1742:     # socket becomes readable .. which will force the disconnect detection.
                   1743: 
                   1744:     my $set = POSIX::SigSet->new(SIGPIPE);
                   1745:     sigprocmask(SIG_BLOCK, $set);
                   1746: 
1.62      foxr     1747:     #  Figure out if we got passed a socket or need to open one to listen for
                   1748:     #  client requests.
                   1749: 
1.61      foxr     1750:     my ($socket) = @_;
                   1751:     if (!$socket) {
                   1752: 
                   1753: 	$socket =  SetupLoncListener();
                   1754:     }
1.62      foxr     1755:     #  Establish an event to listen for client connection requests.
                   1756: 
                   1757: 
1.59      foxr     1758:     Event->io(cb   => \&NewClient,
                   1759: 	      poll => 'r',
                   1760: 	      desc => 'Lonc Listener Unix Socket',
                   1761: 	      fd   => $socket);
1.1       foxr     1762:     
1.76      albertel 1763:     $Event::DebugLevel = $DebugLevel;
1.1       foxr     1764:     
                   1765:     Debug(9, "Making initial lond connection for ".$RemoteHost);
                   1766: 
                   1767: # Setup the initial server connection:
                   1768:     
1.62      foxr     1769:      # &MakeLondConnection(); // let first work request do it.
1.10      foxr     1770: 
1.80      albertel 1771:     #  need to accept the connection since the event may  not fire.
1.62      foxr     1772: 
1.80      albertel 1773:     &accept_client($socket);
1.5       foxr     1774: 
1.1       foxr     1775:     Debug(9,"Entering event loop");
                   1776:     my $ret = Event::loop();		#  Start the main event loop.
                   1777:     
                   1778:     
1.64      foxr     1779:     &child_exit (-1,"Main event loop exited!!!");
1.1       foxr     1780: }
                   1781: 
                   1782: #  Create a new child for host passed in:
                   1783: 
                   1784: sub CreateChild {
1.81      albertel 1785:     my ($host, $hostid) = @_;
1.52      foxr     1786: 
1.12      foxr     1787:     my $sigset = POSIX::SigSet->new(SIGINT);
                   1788:     sigprocmask(SIG_BLOCK, $sigset);
1.1       foxr     1789:     $RemoteHost = $host;
1.91      foxr     1790:     ShowStatus('Parent keeping the flock'); # Update time in status message.
1.9       foxr     1791:     Log("CRITICAL", "Forking server for ".$host);
1.23      foxr     1792:     my $pid          = fork;
1.1       foxr     1793:     if($pid) {			# Parent
1.17      foxr     1794: 	$RemoteHost = "Parent";
1.83      albertel 1795: 	$ChildPid{$pid} = $host;
1.12      foxr     1796: 	sigprocmask(SIG_UNBLOCK, $sigset);
1.82      albertel 1797: 	undef(@all_host_ids);
1.1       foxr     1798:     } else {			# child.
1.81      albertel 1799: 	$RemoteHostId = $hostid;
1.5       foxr     1800: 	ShowStatus("Connected to ".$RemoteHost);
1.23      foxr     1801: 	$SIG{INT} = 'DEFAULT';
1.12      foxr     1802: 	sigprocmask(SIG_UNBLOCK, $sigset);
1.81      albertel 1803: 	&ChildProcess();		# Does not return.
1.1       foxr     1804:     }
1.61      foxr     1805: }
1.1       foxr     1806: 
1.61      foxr     1807: # parent_client_connection:
                   1808: #    Event handler that processes client connections for the parent process.
                   1809: #    This sub is called when the parent is listening on a socket and
                   1810: #    a connection request arrives.  We must:
                   1811: #     Start a child process to accept the connection request.
                   1812: #     Kill our listen on the socket.
                   1813: # Parameter:
                   1814: #    event       - The event object that was created to monitor this socket.
                   1815: #                  event->w->fd is the socket.
                   1816: # Returns:
                   1817: #    NONE
                   1818: #
                   1819: sub parent_client_connection {
1.62      foxr     1820:     if ($I_am_child) {
                   1821: 	#  Should not get here, but seem to anyway:
                   1822: 	&Debug(5," Child caught parent client connection event!!");
                   1823: 	my ($event) = @_;
                   1824: 	my $watcher = $event->w;
                   1825: 	$watcher->cancel();	# Try to kill it off again!!
                   1826:     } else {
                   1827: 	&Debug(9, "parent_client_connection");
                   1828: 	my ($event)   = @_;
                   1829: 	my $watcher   = $event->w;
                   1830: 	my $socket    = $watcher->fd;
1.81      albertel 1831: 	my $connection = $socket->accept();	# Accept the client connection.
                   1832: 	Event->io(cb      => \&get_remote_hostname,
                   1833: 		  poll    => 'r',
                   1834: 		  data    => "",
                   1835: 		  fd      => $connection);
1.77      albertel 1836:     }
                   1837: }
                   1838: 
                   1839: sub get_remote_hostname {
1.82      albertel 1840:     my ($event)   = @_;
                   1841:     my $watcher   = $event->w;
                   1842:     my $socket    = $watcher->fd;
                   1843: 
                   1844:     my $thisread;
                   1845:     my $rv = $socket->recv($thisread, POSIX::BUFSIZ, 0);
                   1846:     Debug(8, "rcv:  data length = ".length($thisread)." read =".$thisread);
                   1847:     if (!defined($rv) || length($thisread) == 0) {
                   1848: 	# Likely eof on socket.
                   1849: 	Debug(5,"Client Socket closed on lonc for p_c_c");
                   1850: 	close($socket);
                   1851: 	$watcher->cancel();
                   1852: 	return;
                   1853:     }
                   1854: 
                   1855:     my $data    = $watcher->data().$thisread;
                   1856:     $watcher->data($data);
                   1857:     if($data =~ /\n$/) {	# Request entirely read.
                   1858: 	chomp($data);
                   1859:     } else {
                   1860: 	return;
                   1861:     }
1.77      albertel 1862: 
1.82      albertel 1863:     &Debug(5,"Creating child for $data (parent_client_connection)");
                   1864:     (my $hostname,my $lonid,@all_host_ids) = split(':',$data);
1.83      albertel 1865:     $ChildHost{$hostname}++;
                   1866:     if ($ChildHost{$hostname} == 1) {
                   1867: 	&CreateChild($hostname,$lonid);
                   1868:     } else {
                   1869: 	&Log('WARNING',"Request for a second child on $hostname");
                   1870:     }
1.82      albertel 1871:     # Clean up the listen since now the child takes over until it exits.
                   1872:     $watcher->cancel();		# Nolonger listening to this event
                   1873:     $socket->send("done\n");
                   1874:     $socket->close();
1.61      foxr     1875: }
                   1876: 
                   1877: # parent_listen:
                   1878: #    Opens a socket and starts a listen for the parent process on a client UNIX
                   1879: #    domain socket.
                   1880: #
                   1881: #    This involves:
                   1882: #       Creating a socket for listen.
                   1883: #       Removing any socket lock file
                   1884: #       Adding an event handler for this socket becoming readable
                   1885: #         To the parent's event dispatcher.
                   1886: # Parameters:
                   1887: #    loncapa_host    - LonCAPA cluster name of the host represented by the client
                   1888: #                      socket.
                   1889: # Returns:
                   1890: #    NONE
                   1891: #
                   1892: sub parent_listen {
                   1893:     my ($loncapa_host) = @_;
                   1894:     Debug(5, "parent_listen: $loncapa_host");
                   1895: 
1.78      albertel 1896:     my ($socket,$file);
                   1897:     if (!$loncapa_host) {
                   1898: 	$loncapa_host = 'common_parent';
                   1899: 	$file         = $perlvar{'lonSockCreate'};
                   1900:     } else {
                   1901: 	$file         = &GetLoncSocketPath($loncapa_host);
                   1902:     }
                   1903:     $socket = &SetupLoncListener($loncapa_host,$file);
                   1904: 
1.62      foxr     1905:     $listening_to{$socket} = $loncapa_host;
1.61      foxr     1906:     if (!$socket) {
                   1907: 	die "Unable to create a listen socket for $loncapa_host";
                   1908:     }
                   1909:     
1.78      albertel 1910:     my $lock_file = $file.".lock";
1.61      foxr     1911:     unlink($lock_file);		# No problem if it doesn't exist yet [startup e.g.]
                   1912: 
1.77      albertel 1913:     my $watcher = 
                   1914: 	Event->io(cb    => \&parent_client_connection,
                   1915: 		  poll  => 'r',
                   1916: 		  desc  => "Parent listener unix socket ($loncapa_host)",
                   1917: 		  data => "",
                   1918: 		  fd    => $socket);
1.62      foxr     1919:     $parent_dispatchers{$loncapa_host} = $watcher;
1.61      foxr     1920: 
                   1921: }
                   1922: 
1.77      albertel 1923: sub parent_clean_up {
                   1924:     my ($loncapa_host) = @_;
1.87      albertel 1925:     Debug(1, "parent_clean_up: $loncapa_host");
1.77      albertel 1926: 
                   1927:     my $socket_file = &GetLoncSocketPath($loncapa_host);
                   1928:     unlink($socket_file);	# No problem if it doesn't exist yet [startup e.g.]
                   1929:     my $lock_file   = $socket_file.".lock";
                   1930:     unlink($lock_file);		# No problem if it doesn't exist yet [startup e.g.]
                   1931: }
                   1932: 
1.61      foxr     1933: 
1.83      albertel 1934: 
                   1935: #    This sub initiates a listen on the common unix domain lonc client socket.
                   1936: #    loncnew starts up with no children, and only spawns off children when a
                   1937: #    connection request occurs on the common client unix socket.  The spawned
                   1938: #    child continues to run until it has been idle a while at which point it
                   1939: #    eventually exits and once more the parent picks up the listen.
1.61      foxr     1940: #
                   1941: #  Parameters:
                   1942: #      NONE
                   1943: #  Implicit Inputs:
                   1944: #    The configuration file that has been read in by LondConnection.
                   1945: #  Returns:
                   1946: #     NONE
                   1947: #
1.77      albertel 1948: sub listen_on_common_socket {
                   1949:     Debug(5, "listen_on_common_socket");
1.78      albertel 1950:     &parent_listen();
1.77      albertel 1951: }
                   1952: 
1.63      foxr     1953: #   server_died is called whenever a child process exits.
                   1954: #   Since this is dispatched via a signal, we must process all
                   1955: #   dead children until there are no more left.  The action
                   1956: #   is to:
                   1957: #      - Remove the child from the bookeeping hashes
                   1958: #      - Re-establish a listen on the unix domain socket associated
                   1959: #        with that host.
                   1960: # Parameters:
                   1961: #    The event, but we don't actually care about it.
                   1962: sub server_died {
                   1963:     &Debug(9, "server_died called...");
                   1964:     
                   1965:     while(1) {			# Loop until waitpid nowait fails.
                   1966: 	my $pid = waitpid(-1, WNOHANG);
                   1967: 	if($pid <= 0) {
                   1968: 	    return;		# Nothing left to wait for.
                   1969: 	}
                   1970: 	# need the host to restart:
                   1971: 
1.83      albertel 1972: 	my $host = $ChildPid{$pid};
1.63      foxr     1973: 	if($host) {		# It's for real...
                   1974: 	    &Debug(9, "Caught sigchild for $host");
1.83      albertel 1975: 	    delete($ChildPid{$pid});
                   1976: 	    delete($ChildHost{$host});
1.81      albertel 1977: 	    &parent_clean_up($host);
                   1978: 
1.63      foxr     1979: 	} else {
                   1980: 	    &Debug(5, "Caught sigchild for pid not in hosts hash: $pid");
                   1981: 	}
                   1982:     }
                   1983: 
                   1984: }
                   1985: 
1.1       foxr     1986: #
                   1987: #  Parent process logic pass 1:
                   1988: #   For each entry in the hosts table, we will
                   1989: #  fork off an instance of ChildProcess to service the transactions
                   1990: #  to that host.  Each pid will be entered in a global hash
                   1991: #  with the value of the key, the host.
                   1992: #  The parent will then enter a loop to wait for process exits.
                   1993: #  Each exit gets logged and the child gets restarted.
                   1994: #
                   1995: 
1.5       foxr     1996: #
                   1997: #   Fork and start in new session so hang-up isn't going to 
                   1998: #   happen without intent.
                   1999: #
                   2000: 
                   2001: 
1.6       foxr     2002: 
                   2003: 
1.8       foxr     2004: 
1.6       foxr     2005: 
                   2006: ShowStatus("Forming new session");
                   2007: my $childpid = fork;
                   2008: if ($childpid != 0) {
                   2009:     sleep 4;			# Give child a chacne to break to
                   2010:     exit 0;			# a new sesion.
                   2011: }
1.8       foxr     2012: #
                   2013: #   Write my pid into the pid file so I can be located
                   2014: #
                   2015: 
                   2016: ShowStatus("Parent writing pid file:");
1.23      foxr     2017: my $execdir = $perlvar{'lonDaemons'};
1.8       foxr     2018: open (PIDSAVE, ">$execdir/logs/lonc.pid");
                   2019: print PIDSAVE "$$\n";
                   2020: close(PIDSAVE);
1.6       foxr     2021: 
1.17      foxr     2022: 
                   2023: 
1.6       foxr     2024: if (POSIX::setsid() < 0) {
                   2025:     print "Could not create new session\n";
                   2026:     exit -1;
                   2027: }
1.5       foxr     2028: 
                   2029: ShowStatus("Forking node servers");
                   2030: 
1.9       foxr     2031: Log("CRITICAL", "--------------- Starting children ---------------");
                   2032: 
1.31      foxr     2033: LondConnection::ReadConfig;               # Read standard config files.
1.1       foxr     2034: 
1.80      albertel 2035: $RemoteHost = "[parent]";
1.81      albertel 2036: &listen_on_common_socket();
1.60      foxr     2037: 
1.12      foxr     2038: $RemoteHost = "Parent Server";
1.1       foxr     2039: 
                   2040: # Maintain the population:
1.5       foxr     2041: 
                   2042: ShowStatus("Parent keeping the flock");
1.1       foxr     2043: 
1.12      foxr     2044: 
1.80      albertel 2045: # We need to setup a SIGChild event to handle the exit (natural or otherwise)
                   2046: # of the children.
1.61      foxr     2047: 
1.80      albertel 2048: Event->signal(cb       => \&server_died,
                   2049: 	      desc     => "Child exit handler",
                   2050: 	      signal   => "CHLD");
                   2051: 
                   2052: 
                   2053: # Set up all the other signals we set up.
                   2054: 
                   2055: $parent_handlers{INT} = Event->signal(cb       => \&Terminate,
                   2056: 				      desc     => "Parent INT handler",
                   2057: 				      signal   => "INT");
                   2058: $parent_handlers{TERM} = Event->signal(cb       => \&Terminate,
                   2059: 				       desc     => "Parent TERM handler",
                   2060: 				       signal   => "TERM");
1.81      albertel 2061: $parent_handlers{HUP}  = Event->signal(cb       => \&KillThemAll,
                   2062: 				       desc     => "Parent HUP handler.",
                   2063: 				       signal   => "HUP");
1.80      albertel 2064: $parent_handlers{USR1} = Event->signal(cb       => \&CheckKids,
                   2065: 				       desc     => "Parent USR1 handler",
                   2066: 				       signal   => "USR1");
                   2067: $parent_handlers{USR2} = Event->signal(cb       => \&UpdateKids,
                   2068: 				       desc     => "Parent USR2 handler.",
                   2069: 				       signal   => "USR2");
                   2070: 
                   2071: #  Start procdesing events.
                   2072: 
                   2073: $Event::DebugLevel = $DebugLevel;
                   2074: Debug(9, "Parent entering event loop");
                   2075: my $ret = Event::loop();
                   2076: die "Main Event loop exited: $ret";
1.14      foxr     2077: 
                   2078: =pod
                   2079: 
                   2080: =head1 CheckKids
                   2081: 
                   2082:   Since kids do not die as easily in this implementation
1.97      raeburn  2083: as the previous one, there is no need to restart the
1.14      foxr     2084: dead ones (all dead kids get restarted when they die!!)
                   2085: The only thing this function does is to pass USR1 to the
                   2086: kids so that they report their status.
                   2087: 
                   2088: =cut
                   2089: 
                   2090: sub CheckKids {
                   2091:     Debug(2, "Checking status of children");
                   2092:     my $docdir = $perlvar{'lonDocRoot'};
                   2093:     my $fh = IO::File->new(">$docdir/lon-status/loncstatus.txt");
                   2094:     my $now=time;
                   2095:     my $local=localtime($now);
                   2096:     print $fh "LONC status $local - parent $$ \n\n";
1.65      foxr     2097:     foreach my $host (keys %parent_dispatchers) {
                   2098: 	print $fh "LONC Parent process listening for $host\n";
                   2099:     }
1.83      albertel 2100:     foreach my $pid (keys %ChildPid) {
1.14      foxr     2101: 	Debug(2, "Sending USR1 -> $pid");
                   2102: 	kill 'USR1' => $pid;	# Tell Child to report status.
                   2103:     }
1.65      foxr     2104: 
1.14      foxr     2105: }
1.24      foxr     2106: 
                   2107: =pod
                   2108: 
                   2109: =head1  UpdateKids
                   2110: 
1.25      foxr     2111: parent's SIGUSR2 handler.  This handler:
1.24      foxr     2112: 
                   2113: =item
                   2114: 
                   2115: Rereads the hosts file.
                   2116: 
                   2117: =item
                   2118:  
                   2119: Kills off (via sigint) children for hosts that have disappeared.
                   2120: 
                   2121: =item
                   2122: 
1.27      foxr     2123: QUITs  children for hosts that already exist (this just forces a status display
1.24      foxr     2124: and resets the connection retry count for that host.
                   2125: 
                   2126: =item
                   2127: 
                   2128: Starts new children for hosts that have been added to the hosts.tab file since
                   2129: the start of the master program and maintains them.
                   2130: 
                   2131: =cut
                   2132: 
                   2133: sub UpdateKids {
1.27      foxr     2134: 
1.25      foxr     2135:     Log("INFO", "Updating connections via SIGUSR2");
1.27      foxr     2136: 
1.65      foxr     2137:     #  I'm not sure what I was thinking in the first implementation.
                   2138:     # someone will have to work hard to convince me the effect is any
                   2139:     # different than Restart, especially now that we don't start up 
                   2140:     # per host servers automatically, may as well just restart.
                   2141:     # The down side is transactions that are in flight will get timed out
                   2142:     # (lost unless they are critical).
1.27      foxr     2143: 
1.81      albertel 2144:     &KillThemAll();
1.24      foxr     2145: }
                   2146: 
1.14      foxr     2147: 
1.13      foxr     2148: =pod
                   2149: 
                   2150: =head1 Restart
                   2151: 
                   2152: Signal handler for HUP... all children are killed and
1.97      raeburn  2153: we self restart.  This is an el-cheapo way to re-read
1.13      foxr     2154: the config file.
                   2155: 
                   2156: =cut
                   2157: 
                   2158: sub Restart {
1.23      foxr     2159:     &KillThemAll;		# First kill all the children.
1.13      foxr     2160:     Log("CRITICAL", "Restarting");
                   2161:     my $execdir = $perlvar{'lonDaemons'};
                   2162:     unlink("$execdir/logs/lonc.pid");
1.65      foxr     2163:     exec("$executable");
1.10      foxr     2164: }
1.12      foxr     2165: 
                   2166: =pod
                   2167: 
                   2168: =head1 KillThemAll
                   2169: 
                   2170: Signal handler that kills all children by sending them a 
1.17      foxr     2171: SIGHUP.  Responds to sigint and sigterm.
1.12      foxr     2172: 
                   2173: =cut
                   2174: 
1.10      foxr     2175: sub KillThemAll {
1.12      foxr     2176:     Debug(2, "Kill them all!!");
1.85      albertel 2177:     
                   2178:     #local($SIG{CHLD}) = 'IGNORE';
                   2179:     # Our children >will< die.
                   2180:     # but we need to catch their death and cleanup after them in case this is 
                   2181:     # a restart set of kills
                   2182:     my @allpids = keys(%ChildPid);
                   2183:     foreach my $pid (@allpids) {
1.83      albertel 2184: 	my $serving = $ChildPid{$pid};
1.52      foxr     2185: 	ShowStatus("Nicely Killing lonc for $serving pid = $pid");
                   2186: 	Log("CRITICAL", "Nicely Killing lonc for $serving pid = $pid");
1.17      foxr     2187: 	kill 'QUIT' => $pid;
1.12      foxr     2188:     }
1.85      albertel 2189:     ShowStatus("Finished killing child processes off.");
1.1       foxr     2190: }
1.12      foxr     2191: 
1.52      foxr     2192: 
                   2193: #
                   2194: #  Kill all children via KILL.  Just in case the
                   2195: #  first shot didn't get them.
                   2196: 
                   2197: sub really_kill_them_all_dammit
                   2198: {
                   2199:     Debug(2, "Kill them all Dammit");
                   2200:     local($SIG{CHLD} = 'IGNORE'); # In case some purist reenabled them.
1.83      albertel 2201:     foreach my $pid (keys %ChildPid) {
                   2202: 	my $serving = $ChildPid{$pid};
1.52      foxr     2203: 	&ShowStatus("Nastily killing lonc for $serving pid = $pid");
                   2204: 	Log("CRITICAL", "Nastily killing lonc for $serving pid = $pid");
                   2205: 	kill 'KILL' => $pid;
1.83      albertel 2206: 	delete($ChildPid{$pid});
1.52      foxr     2207: 	my $execdir = $perlvar{'lonDaemons'};
                   2208: 	unlink("$execdir/logs/lonc.pid");
                   2209:     }
                   2210: }
1.69      matthew  2211: 
1.14      foxr     2212: =pod
                   2213: 
                   2214: =head1 Terminate
                   2215:  
                   2216: Terminate the system.
                   2217: 
                   2218: =cut
                   2219: 
                   2220: sub Terminate {
1.52      foxr     2221:     &Log("CRITICAL", "Asked to kill children.. first be nice...");
                   2222:     &KillThemAll;
                   2223:     #
                   2224:     #  By now they really should all be dead.. but just in case 
                   2225:     #  send them all SIGKILL's after a bit of waiting:
                   2226: 
                   2227:     sleep(4);
                   2228:     &Log("CRITICAL", "Now kill children nasty");
                   2229:     &really_kill_them_all_dammit;
1.17      foxr     2230:     Log("CRITICAL","Master process exiting");
                   2231:     exit 0;
1.14      foxr     2232: 
                   2233: }
1.81      albertel 2234: 
                   2235: sub my_hostname {
                   2236:     use Sys::Hostname;
                   2237:     my $name = &hostname();
                   2238:     &Debug(9,"Name is $name");
                   2239:     return $name;
                   2240: }
                   2241: 
1.12      foxr     2242: =pod
1.1       foxr     2243: 
                   2244: =head1 Theory
1.3       albertel 2245: 
                   2246: The event class is used to build this as a single process with an
                   2247: event driven model.  The following events are handled:
1.1       foxr     2248: 
                   2249: =item UNIX Socket connection Received
                   2250: 
                   2251: =item Request data arrives on UNIX data transfer socket.
                   2252: 
                   2253: =item lond connection becomes writable.
                   2254: 
                   2255: =item timer fires at 1 second intervals.
                   2256: 
                   2257: All sockets are run in non-blocking mode.  Timeouts managed by the timer
                   2258: handler prevents hung connections.
                   2259: 
                   2260: Key data structures:
                   2261: 
1.3       albertel 2262: =item RequestQueue
                   2263: 
                   2264: A queue of requests received from UNIX sockets that are
                   2265: waiting for a chance to be forwarded on a lond connection socket.
                   2266: 
                   2267: =item ActiveConnections
                   2268: 
                   2269: A hash of lond connections that have transactions in process that are
                   2270: available to be timed out.
                   2271: 
                   2272: =item ActiveTransactions
                   2273: 
                   2274: A hash indexed by lond connections that contain the client reply
                   2275: socket for each connection that has an active transaction on it.
                   2276: 
                   2277: =item IdleConnections
                   2278: 
                   2279: A hash of lond connections that have no work to do.  These connections
                   2280: can be closed if they are idle for a long enough time.
1.1       foxr     2281: 
                   2282: =cut
1.88      foxr     2283: 
                   2284: =pod
                   2285: 
                   2286: =head1 Log messages
                   2287: 
                   2288: The following is a list of log messages that can appear in the 
                   2289: lonc.log file.  Each log file has a severity and a message.
                   2290: 
                   2291: =over 2
                   2292: 
                   2293: =item Warning  A socket timeout was detected
                   2294: 
                   2295: If there are pending transactions in the socket's queue,
                   2296: they are failed (saved if critical).  If the connection
                   2297: retry count gets exceeded by this, the
                   2298: remote host is marked as dead.
1.97      raeburn  2299: Called when timeouts occurred during the connection and
1.88      foxr     2300: connection dialog with a remote host.
                   2301: 
                   2302: =item Critical Host makred DEAD <hostname>   
                   2303: 
                   2304: The numer of retry counts for contacting a host was
                   2305: exceeded. The host is marked dead an no 
                   2306: further attempts will be made by that child.
                   2307: 
                   2308: =item Info lonc pipe client hung up on us     
                   2309: 
                   2310: Write to the client pipe indicated no data transferred
                   2311: Socket to remote host is shut down.  Reply to the client 
                   2312: is discarded.  Note: This is commented out in &ClientWriteable
                   2313: 
                   2314: =item Success  Reply from lond: <data>   
                   2315: 
                   2316: Can be enabled for debugging by setting LogTransactions to nonzero.
                   2317: Indicates a successful transaction with lond, <data> is the data received
                   2318: from the remote lond.
                   2319: 
                   2320: =item Success A delayed transaction was completed  
                   2321: 
                   2322: A transaction that must be reliable was executed and completed
                   2323: as lonc restarted.  This is followed by a mesage of the form
                   2324: 
                   2325:   S: client-name : request
                   2326: 
                   2327: =item WARNING  Failing transaction <cmd>:<subcmd>  
                   2328: 
                   2329: Transaction failed on a socket, but the failure retry count for the remote
                   2330: node has not yet been exhausted (the node is not yet marked dead).
                   2331: cmd is the command, subcmd is the subcommand.  This results from a con_lost
                   2332: when communicating with lond.
                   2333: 
                   2334: =item WARNING Shutting down a socket     
                   2335: 
                   2336: Called when a socket is being closed to lond.  This is emitted both when 
                   2337: idle pruning is being done and when the socket has been disconnected by the remote.
                   2338: 
                   2339: =item WARNING Lond connection lost.
                   2340: 
                   2341: Called when a read from lond's socket failed indicating lond has closed the 
                   2342: connection or died.  This should be followed by one or more
                   2343: 
                   2344:  "WARNING Failing transaction..." msgs for each in-flight or queued transaction.
                   2345: 
                   2346: =item INFO Connected to lond version:  <version> 
                   2347: 
                   2348: When connection negotiation is complete, the lond version is requested and logged here.
                   2349: 
                   2350: =item SUCCESS Connection n to host now ready for action
                   2351: 
                   2352: Emitted when connection has been completed with lond. n is then number of 
                   2353: concurrent connections and host, the host to which the connection has just
                   2354: been established.
                   2355: 
                   2356: =item WARNING Connection to host has been disconnected
                   2357: 
                   2358: Write to a lond resulted in failure status.  Connection to lond is dropped.
                   2359: 
                   2360: =item SUCCESS Created connection n to host host 
                   2361: 
                   2362: Initial connection request to host..(before negotiation).
                   2363: 
                   2364: =item CRITICAL Request Close Connection ... exiting
                   2365: 
                   2366: Client has sent "close_connection_exit"   The loncnew server is exiting.
                   2367: 
                   2368: =item INFO Resetting Connection Retries 
                   2369: 
                   2370: Client has sent "reset_retries" The lond connection retries are reset to zero for the
                   2371: corresponding lond.
                   2372: 
                   2373: =item SUCCESS Transaction <data>
                   2374: 
                   2375: Only emitted if the global variable $LogTransactions was set to true.
                   2376: A client has requested a lond transaction <data> is the contents of the request.
                   2377: 
                   2378: =item SUCCESS Toggled transaction logging <LogTransactions>
                   2379:                                     
                   2380: The state of the $LogTransactions global has been toggled, and its current value
                   2381: (after being toggled) is displayed.  When non zero additional logging of transactions
                   2382: is enabled for debugging purposes.  Transaction logging is toggled on receipt of a USR2
                   2383: signal.
                   2384: 
                   2385: =item CRITICAL Abnormal exit. Child <pid> for <host> died thorugh signal.
                   2386: 
                   2387: QUIT signal received.  lonc child process is exiting.
                   2388: 
                   2389: =item SUCCESS New debugging level for <RemoteHost> now <DebugLevel>
                   2390:                                     
                   2391: Debugging toggled for the host loncnew is talking with.
                   2392: Currently debugging is a level based scheme with higher number 
                   2393: conveying more information.  The daemon starts out at
                   2394: DebugLevel 0 and can toggle back and forth between that and
                   2395: DebugLevel 2  These are controlled by
                   2396: the global variables $DebugLevel and $NextDebugLevel
                   2397: The debug level can go up to 9.
                   2398: SIGINT toggles the debug level.  The higher the debug level the 
                   2399: more debugging information is spewed.  See the Debug
                   2400: sub in loncnew.
                   2401: 
                   2402: =item CRITICAL Forking server for host  
                   2403: 
                   2404: A child is being created to service requests for the specified host.
                   2405: 
                   2406: 
                   2407: =item WARNING Request for a second child on hostname
                   2408:                                     
                   2409: Somehow loncnew was asked to start a second child on a host that already had a child
                   2410: servicing it.  This request is not honored, but themessage is emitted.  This could happen
                   2411: due to a race condition.  When a client attempts to contact loncnew for a new host, a child
                   2412: is forked off to handle the requests for that server.  The parent then backs off the Unix
                   2413: domain socket leaving it for the child to service all requests.  If in the time between
                   2414: creating the child, and backing off, a new connection request comes in to the unix domain
                   2415: socket, this could trigger (unlikely but remotely possible),.
                   2416: 
                   2417: =item CRITICAL ------ Starting Children ----
                   2418: 
                   2419: This message should probably be changed to "Entering event loop"  as the loncnew only starts
                   2420: children as needed.  This message is emitted as new events are established and
                   2421: the event processing loop is entered.
                   2422: 
                   2423: =item INFO Updating connections via SIGUSR2
                   2424:                                     
                   2425: SIGUSR2 received. The original code would kill all clients, re-read the host file,
1.97      raeburn  2426: then restart children for each host.  Now that children are started on demand, this
1.88      foxr     2427: just kills all child processes and lets requests start them as needed again.
                   2428: 
                   2429: 
                   2430: =item CRITICAL Restarting
                   2431: 
                   2432: SigHUP received.  all the children are killed and the script exec's itself to start again.
                   2433: 
                   2434: =item CRITICAL Nicely killing lonc for host pid = <pid>
                   2435: 
                   2436: Attempting to kill the child that is serving the specified host (pid given) cleanly via
1.97      raeburn  2437: SIGQUIT.  The child should handle that, clean up nicely and exit.
1.88      foxr     2438: 
                   2439: =item CRITICAL Nastily killing lonc for host pid = <pid>
                   2440: 
                   2441: The child specified did not die when requested via SIGQUIT.  Therefore it is killed
                   2442: via SIGKILL.
                   2443: 
                   2444: =item CRITICAL Asked to kill children.. first be nice..
                   2445: 
                   2446: In the parent's INT handler.  INT kills the child processes.  This inidicate loncnew
                   2447: is about to attempt to kill all known children via SIGQUIT.  This message should be followed 
                   2448: by one "Nicely killing" message for each extant child.
                   2449: 
                   2450: =item CRITICAL Now kill children nasty 
                   2451: 
                   2452: In the parent's INT handler. remaining children are about to be killed via
                   2453: SIGKILL. Should be followed by a Nastily killing... for each lonc child that 
                   2454: refused to die.
                   2455: 
                   2456: =item CRITICAL Master process exiting
                   2457: 
                   2458: In the parent's INT handler. just prior to the exit 0 call.
                   2459: 
                   2460: =back
                   2461: 
                   2462: =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>
500 Internal Server Error

Internal Server Error

The server encountered an internal error or misconfiguration and was unable to complete your request.

Please contact the server administrator at root@localhost to inform them of the time this error occurred, and the actions you performed just before this error.

More information about this error may be available in the server error log.