Annotation of loncom/loncnew, revision 1.107

1.1       foxr        1: #!/usr/bin/perl
1.2       albertel    2: # The LearningOnline Network with CAPA
                      3: # lonc maintains the connections to remote computers
                      4: #
1.107   ! raeburn     5: # $Id: loncnew,v 1.106 2018/12/06 13:52:28 raeburn Exp $
1.2       albertel    6: #
                      7: # Copyright Michigan State University Board of Trustees
                      8: #
                      9: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
1.17      foxr       10: ## LON-CAPA is free software; you can redistribute it and/or modify
1.2       albertel   11: # it under the terms of the GNU General Public License as published by
                     12: # the Free Software Foundation; either version 2 of the License, or
                     13: # (at your option) any later version.
                     14: #
                     15: # LON-CAPA is distributed in the hope that it will be useful,
                     16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
                     17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
                     18: # GNU General Public License for more details.
                     19: #
                     20: # You should have received a copy of the GNU General Public License
                     21: # along with LON-CAPA; if not, write to the Free Software
                     22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
                     23: #
                     24: # /home/httpd/html/adm/gpl.txt
                     25: #
                     26: # http://www.lon-capa.org/
                     27: #
1.1       foxr       28: #
1.97      raeburn    29: # new lonc handles n request out over m connections to londs.
1.1       foxr       30: # This module is based on the Event class.
                     31: #   Development iterations:
                     32: #    - Setup basic event loop.   (done)
                     33: #    - Add timer dispatch.       (done)
                     34: #    - Add ability to accept lonc UNIX domain sockets.  (done)
                     35: #    - Add ability to create/negotiate lond connections (done).
1.7       foxr       36: #    - Add general logic for dispatching requests and timeouts. (done).
                     37: #    - Add support for the lonc/lond requests.          (done).
1.38      foxr       38: #    - Add logging/status monitoring.                    (done)
                     39: #    - Add Signal handling - HUP restarts. USR1 status report. (done)
1.7       foxr       40: #    - Add Configuration file I/O                       (done).
1.38      foxr       41: #    - Add management/status request interface.         (done)
1.8       foxr       42: #    - Add deferred request capability.                  (done)
1.38      foxr       43: #    - Detect transmission timeouts.                     (done)
1.7       foxr       44: #
                     45: 
1.23      foxr       46: use strict;
1.1       foxr       47: use lib "/home/httpd/lib/perl/";
                     48: use Event qw(:DEFAULT );
                     49: use POSIX qw(:signal_h);
1.12      foxr       50: use POSIX;
1.1       foxr       51: use IO::Socket;
                     52: use IO::Socket::INET;
                     53: use IO::Socket::UNIX;
1.9       foxr       54: use IO::File;
1.6       foxr       55: use IO::Handle;
1.1       foxr       56: use Socket;
                     57: use Crypt::IDEA;
                     58: use LONCAPA::Queue;
                     59: use LONCAPA::Stack;
                     60: use LONCAPA::LondConnection;
1.7       foxr       61: use LONCAPA::LondTransaction;
1.1       foxr       62: use LONCAPA::Configuration;
1.67      albertel   63: use Fcntl qw(:flock);
1.1       foxr       64: 
                     65: 
                     66: # Read the httpd configuration file to get perl variables
                     67: # normally set in apache modules:
                     68: 
                     69: my $perlvarref = LONCAPA::Configuration::read_conf('loncapa.conf');
                     70: my %perlvar    = %{$perlvarref};
                     71: 
                     72: #
                     73: #  parent and shared variables.
                     74: 
1.83      albertel   75: my %ChildPid;			# by pid -> host.
                     76: my %ChildHost;			# by host.
1.105     raeburn    77: my %ChildKeyMode;               # by pid -> keymode
1.62      foxr       78: my %listening_to;		# Socket->host table for who the parent
                     79:                                 # is listening to.
                     80: my %parent_dispatchers;         # host-> listener watcher events. 
1.1       foxr       81: 
1.65      foxr       82: my %parent_handlers;		# Parent signal handlers...
                     83: 
1.9       foxr       84: my $MaxConnectionCount = 10;	# Will get from config later.
1.1       foxr       85: my $ClientConnection = 0;	# Uniquifier for client events.
                     86: 
1.9       foxr       87: my $DebugLevel = 0;
1.29      foxr       88: my $NextDebugLevel= 2;		# So Sigint can toggle this.
1.94      foxr       89: my $IdleTimeout= 5*60;		# Seconds to wait prior to pruning connections.
1.1       foxr       90: 
1.39      foxr       91: my $LogTransactions = 0;	# When True, all transactions/replies get logged.
1.65      foxr       92: my $executable      = $0;	# Get the full path to me.
1.39      foxr       93: 
1.1       foxr       94: #
                     95: #  The variables below are only used by the child processes.
                     96: #
1.107   ! raeburn    97: my $RemoteHost;			# Hostname of host child is talking to.
        !            98: my $RemoteHostId;		# lonid of host child is talking to.
        !            99: my $RemoteDefHostId;		# default lonhostID of host child is talking to.
1.82      albertel  100: my @all_host_ids;
1.20      albertel  101: my $UnixSocketDir= $perlvar{'lonSockDir'};
1.1       foxr      102: my $IdleConnections = Stack->new(); # Set of idle connections
                    103: my %ActiveConnections;		# Connections to the remote lond.
1.7       foxr      104: my %ActiveTransactions;		# LondTransactions in flight.
1.1       foxr      105: my %ActiveClients;		# Serial numbers of active clients by socket.
                    106: my $WorkQueue       = Queue->new(); # Queue of pending transactions.
                    107: my $ConnectionCount = 0;
1.4       foxr      108: my $IdleSeconds     = 0;	# Number of seconds idle.
1.9       foxr      109: my $Status          = "";	# Current status string.
1.14      foxr      110: my $RecentLogEntry  = "";
1.72      albertel  111: my $ConnectionRetries=5;	# Number of connection retries allowed.
                    112: my $ConnectionRetriesLeft=5;	# Number of connection retries remaining.
1.40      foxr      113: my $LondVersion     = "unknown"; # Version of lond we talk with.
1.49      foxr      114: my $KeyMode         = "";       # e.g. ssl, local, insecure from last connect.
1.54      foxr      115: my $LondConnecting  = 0;       # True when a connection is being built.
1.1       foxr      116: 
1.60      foxr      117: 
                    118: 
1.62      foxr      119: my $I_am_child      = 0;	# True if this is the child process.
1.57      foxr      120: 
1.1       foxr      121: #
1.9       foxr      122: #   The hash below gives the HTML format for log messages
                    123: #   given a severity.
                    124: #    
                    125: my %LogFormats;
                    126: 
1.45      albertel  127: $LogFormats{"CRITICAL"} = "<font color='red'>CRITICAL: %s</font>";
                    128: $LogFormats{"SUCCESS"}  = "<font color='green'>SUCCESS: %s</font>";
                    129: $LogFormats{"INFO"}     = "<font color='yellow'>INFO: %s</font>";
                    130: $LogFormats{"WARNING"}  = "<font color='blue'>WARNING: %s</font>";
1.9       foxr      131: $LogFormats{"DEFAULT"}  = " %s ";
                    132: 
1.10      foxr      133: 
1.57      foxr      134: #  UpdateStatus;
                    135: #    Update the idle status display to show how many connections
                    136: #    are left, retries and other stuff.
                    137: #
                    138: sub UpdateStatus {
                    139:     if ($ConnectionRetriesLeft > 0) {
                    140: 	ShowStatus(GetServerHost()." Connection count: ".$ConnectionCount
                    141: 		   ." Retries remaining: ".$ConnectionRetriesLeft
                    142: 		   ." ($KeyMode)");
                    143:     } else {
                    144: 	ShowStatus(GetServerHost()." >> DEAD <<");
                    145:     }
                    146: }
                    147: 
1.10      foxr      148: 
                    149: =pod
                    150: 
                    151: =head2 LogPerm
                    152: 
                    153: Makes an entry into the permanent log file.
                    154: 
                    155: =cut
1.69      matthew   156: 
1.10      foxr      157: sub LogPerm {
                    158:     my $message=shift;
                    159:     my $execdir=$perlvar{'lonDaemons'};
                    160:     my $now=time;
                    161:     my $local=localtime($now);
                    162:     my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log");
1.86      albertel  163:     chomp($message);
1.10      foxr      164:     print $fh "$now:$message:$local\n";
                    165: }
1.9       foxr      166: 
                    167: =pod
                    168: 
                    169: =head2 Log
                    170: 
                    171: Logs a message to the log file.
                    172: Parameters:
                    173: 
                    174: =item severity
                    175: 
                    176: One of CRITICAL, WARNING, INFO, SUCCESS used to select the
                    177: format string used to format the message.  if the severity is
                    178: not a defined severity the Default format string is used.
                    179: 
                    180: =item message
                    181: 
                    182: The base message.  In addtion to the format string, the message
                    183: will be appended to a string containing the name of our remote
                    184: host and the time will be formatted into the message.
                    185: 
                    186: =cut
                    187: 
                    188: sub Log {
1.47      foxr      189: 
                    190:     my ($severity, $message) = @_;
                    191: 
1.9       foxr      192:     if(!$LogFormats{$severity}) {
                    193: 	$severity = "DEFAULT";
                    194:     }
                    195: 
                    196:     my $format = $LogFormats{$severity};
                    197:     
                    198:     #  Put the window dressing in in front of the message format:
                    199: 
                    200:     my $now   = time;
                    201:     my $local = localtime($now);
                    202:     my $finalformat = "$local ($$) [$RemoteHost] [$Status] ";
1.76      albertel  203:     $finalformat = $finalformat.$format."\n";
1.9       foxr      204: 
                    205:     # open the file and put the result.
                    206: 
                    207:     my $execdir = $perlvar{'lonDaemons'};
                    208:     my $fh      = IO::File->new(">>$execdir/logs/lonc.log");
                    209:     my $msg = sprintf($finalformat, $message);
1.14      foxr      210:     $RecentLogEntry = $msg;
1.9       foxr      211:     print $fh $msg;
                    212:     
1.10      foxr      213:     
1.9       foxr      214: }
1.6       foxr      215: 
1.3       albertel  216: 
1.1       foxr      217: =pod
1.3       albertel  218: 
                    219: =head2 GetPeerName
                    220: 
                    221: Returns the name of the host that a socket object is connected to.
                    222: 
1.1       foxr      223: =cut
                    224: 
                    225: sub GetPeername {
1.47      foxr      226: 
                    227: 
                    228:     my ($connection, $AdrFamily) = @_;
                    229: 
1.1       foxr      230:     my $peer       = $connection->peername();
                    231:     my $peerport;
                    232:     my $peerip;
                    233:     if($AdrFamily == AF_INET) {
                    234: 	($peerport, $peerip) = sockaddr_in($peer);
1.23      foxr      235: 	my $peername    = gethostbyaddr($peerip, $AdrFamily);
1.1       foxr      236: 	return $peername;
                    237:     } elsif ($AdrFamily == AF_UNIX) {
                    238: 	my $peerfile;
                    239: 	($peerfile) = sockaddr_un($peer);
                    240: 	return $peerfile;
                    241:     }
                    242: }
                    243: =pod
1.3       albertel  244: 
1.1       foxr      245: =head2 Debug
1.3       albertel  246: 
                    247: Invoked to issue a debug message.
                    248: 
1.1       foxr      249: =cut
1.3       albertel  250: 
1.1       foxr      251: sub Debug {
1.47      foxr      252: 
                    253:     my ($level, $message) = @_;
                    254: 
1.1       foxr      255:     if ($level <= $DebugLevel) {
1.23      foxr      256: 	Log("INFO", "-Debug- $message host = $RemoteHost");
1.1       foxr      257:     }
                    258: }
                    259: 
                    260: sub SocketDump {
1.47      foxr      261: 
                    262:     my ($level, $socket) = @_;
                    263: 
1.1       foxr      264:     if($level <= $DebugLevel) {
1.48      foxr      265: 	$socket->Dump(-1);	# Ensure it will get dumped.
1.1       foxr      266:     }
                    267: }
1.3       albertel  268: 
1.1       foxr      269: =pod
1.3       albertel  270: 
1.5       foxr      271: =head2 ShowStatus
                    272: 
                    273:  Place some text as our pid status.
1.10      foxr      274:  and as what we return in a SIGUSR1
1.5       foxr      275: 
                    276: =cut
1.69      matthew   277: 
1.5       foxr      278: sub ShowStatus {
1.10      foxr      279:     my $state = shift;
                    280:     my $now = time;
                    281:     my $local = localtime($now);
                    282:     $Status   = $local.": ".$state;
                    283:     $0='lonc: '.$state.' '.$local;
1.5       foxr      284: }
                    285: 
                    286: =pod
                    287: 
1.69      matthew   288: =head2 SocketTimeout
1.15      foxr      289: 
                    290:     Called when an action on the socket times out.  The socket is 
                    291:    destroyed and any active transaction is failed.
                    292: 
                    293: 
                    294: =cut
1.69      matthew   295: 
1.15      foxr      296: sub SocketTimeout {
                    297:     my $Socket = shift;
1.38      foxr      298:     Log("WARNING", "A socket timeout was detected");
1.52      foxr      299:     Debug(5, " SocketTimeout called: ");
1.48      foxr      300:     $Socket->Dump(0);
1.42      foxr      301:     if(exists($ActiveTransactions{$Socket})) {
1.43      albertel  302: 	FailTransaction($ActiveTransactions{$Socket});
1.42      foxr      303:     }
1.22      foxr      304:     KillSocket($Socket);	# A transaction timeout also counts as
                    305:                                 # a connection failure:
                    306:     $ConnectionRetriesLeft--;
1.42      foxr      307:     if($ConnectionRetriesLeft <= 0) {
1.52      foxr      308: 	Log("CRITICAL", "Host marked DEAD: ".GetServerHost());
1.56      foxr      309: 	$LondConnecting = 0;
1.42      foxr      310:     }
                    311: 
1.15      foxr      312: }
1.80      albertel  313: 
1.64      foxr      314: #
                    315: #   This function should be called by the child in all cases where it must
1.80      albertel  316: #   exit.  The child process must create a lock file for the AF_UNIX socket
                    317: #   in order to prevent connection requests from lonnet in the time between
                    318: #   process exit and the parent picking up the listen again.
                    319: #
1.64      foxr      320: # Parameters:
                    321: #     exit_code           - Exit status value, however see the next parameter.
                    322: #     message             - If this optional parameter is supplied, the exit
                    323: #                           is via a die with this message.
                    324: #
                    325: sub child_exit {
                    326:     my ($exit_code, $message) = @_;
                    327: 
                    328:     # Regardless of how we exit, we may need to do the lock thing:
                    329: 
1.80      albertel  330:     #
                    331:     #  Create a lock file since there will be a time window
                    332:     #  between our exit and the parent's picking up the listen
                    333:     #  during which no listens will be done on the
                    334:     #  lonnet client socket.
                    335:     #
                    336:     my $lock_file = &GetLoncSocketPath().".lock";
                    337:     open(LOCK,">$lock_file");
                    338:     print LOCK "Contents not important";
                    339:     close(LOCK);
1.81      albertel  340:     unlink(&GetLoncSocketPath());
1.64      foxr      341: 
1.80      albertel  342:     if ($message) {
                    343: 	die($message);
1.64      foxr      344:     } else {
                    345: 	exit($exit_code);
                    346:     }
                    347: }
1.35      foxr      348: #----------------------------- Timer management ------------------------
1.15      foxr      349: 
                    350: =pod
                    351: 
1.1       foxr      352: =head2 Tick
1.3       albertel  353: 
1.97      raeburn   354: Invoked each timer tick.
1.3       albertel  355: 
1.1       foxr      356: =cut
                    357: 
1.5       foxr      358: 
1.1       foxr      359: sub Tick {
1.52      foxr      360:     my ($Event)       = @_;
                    361:     my $clock_watcher = $Event->w;
                    362: 
1.1       foxr      363:     my $client;
1.57      foxr      364:     UpdateStatus();
                    365: 
1.4       foxr      366:     # Is it time to prune connection count:
                    367: 
                    368: 
                    369:     if($IdleConnections->Count()  && 
                    370:        ($WorkQueue->Count() == 0)) { # Idle connections and nothing to do?
1.52      foxr      371: 	$IdleSeconds++;
1.4       foxr      372: 	if($IdleSeconds > $IdleTimeout) { # Prune a connection...
1.23      foxr      373: 	    my $Socket = $IdleConnections->pop();
1.6       foxr      374: 	    KillSocket($Socket);
1.54      foxr      375: 	    $IdleSeconds = 0;	# Otherwise all connections get trimmed to fast.
1.57      foxr      376: 	    UpdateStatus();
1.80      albertel  377: 	    if(($ConnectionCount == 0)) {
1.64      foxr      378: 		&child_exit(0);
                    379: 
1.57      foxr      380: 	    }
1.4       foxr      381: 	}
                    382:     } else {
                    383: 	$IdleSeconds = 0;	# Reset idle count if not idle.
                    384:     }
1.15      foxr      385:     #
                    386:     #  For each inflight transaction, tick down its timeout counter.
                    387:     #
1.35      foxr      388: 
1.34      albertel  389:     foreach my $item (keys %ActiveConnections) {
                    390: 	my $State = $ActiveConnections{$item}->data->GetState();
1.35      foxr      391: 	if ($State ne 'Idle') {
1.34      albertel  392: 	    Debug(5,"Ticking Socket $State $item");
                    393: 	    $ActiveConnections{$item}->data->Tick();
                    394: 	}
1.15      foxr      395:     }
1.5       foxr      396:     # Do we have work in the queue, but no connections to service them?
                    397:     # If so, try to make some new connections to get things going again.
                    398:     #
1.57      foxr      399:     #   Note this code is dead now...
                    400:     #
1.5       foxr      401:     my $Requests = $WorkQueue->Count();
1.56      foxr      402:     if (($ConnectionCount == 0)  && ($Requests > 0) && (!$LondConnecting)) { 
1.10      foxr      403: 	if ($ConnectionRetriesLeft > 0) {
1.56      foxr      404: 	    Debug(5,"Work but no connections, Make a new one");
                    405: 	    my $success;
                    406: 	    $success    = &MakeLondConnection;
                    407: 	    if($success == 0) { # All connections failed:
1.29      foxr      408: 		Debug(5,"Work in queue failed to make any connectiouns\n");
1.22      foxr      409: 		EmptyQueue();	# Fail pending transactions with con_lost.
1.42      foxr      410: 		CloseAllLondConnections(); # Should all be closed but....
1.10      foxr      411: 	    }
                    412: 	} else {
1.56      foxr      413: 	    $LondConnecting = 0;
1.22      foxr      414: 	    ShowStatus(GetServerHost()." >>> DEAD!!! <<<");
1.29      foxr      415: 	    Debug(5,"Work in queue, but gave up on connections..flushing\n");
1.10      foxr      416: 	    EmptyQueue();	# Connections can't be established.
1.42      foxr      417: 	    CloseAllLondConnections(); # Should all already be closed but...
1.5       foxr      418: 	}
                    419:        
                    420:     }
1.49      foxr      421:     if ($ConnectionCount == 0) {
                    422: 	$KeyMode = ""; 
1.52      foxr      423: 	$clock_watcher->cancel();
1.49      foxr      424:     }
1.66      albertel  425:     &UpdateStatus();
1.1       foxr      426: }
                    427: 
                    428: =pod
1.3       albertel  429: 
1.1       foxr      430: =head2 SetupTimer
                    431: 
1.3       albertel  432: Sets up a 1 per sec recurring timer event.  The event handler is used to:
1.1       foxr      433: 
1.3       albertel  434: =item
                    435: 
                    436: Trigger timeouts on communications along active sockets.
                    437: 
                    438: =item
                    439: 
                    440: Trigger disconnections of idle sockets.
1.1       foxr      441: 
                    442: =cut
                    443: 
                    444: sub SetupTimer {
1.52      foxr      445:     Debug(6, "SetupTimer");
1.92      foxr      446:     Event->timer(interval => 1, cb => \&Tick,
                    447: 	hard => 1);
1.1       foxr      448: }
1.3       albertel  449: 
1.1       foxr      450: =pod
1.3       albertel  451: 
1.1       foxr      452: =head2 ServerToIdle
1.3       albertel  453: 
                    454: This function is called when a connection to the server is
                    455: ready for more work.
                    456: 
                    457: If there is work in the Work queue the top element is dequeued
1.1       foxr      458: and the connection will start to work on it.  If the work queue is
                    459: empty, the connection is pushed on the idle connection stack where
                    460: it will either get another work unit, or alternatively, if it sits there
                    461: long enough, it will be shut down and released.
                    462: 
1.3       albertel  463: =cut
1.1       foxr      464: 
                    465: sub ServerToIdle {
                    466:     my $Socket   = shift;	# Get the socket.
1.49      foxr      467:     $KeyMode = $Socket->{AuthenticationMode};
1.7       foxr      468:     delete($ActiveTransactions{$Socket}); # Server has no transaction
1.1       foxr      469: 
1.29      foxr      470:     &Debug(5, "Server to idle");
1.1       foxr      471: 
                    472:     #  If there's work to do, start the transaction:
                    473: 
1.23      foxr      474:     my $reqdata = $WorkQueue->dequeue(); # This is a LondTransaction
1.29      foxr      475:     if ($reqdata ne undef)  {
                    476: 	Debug(5, "Queue gave request data: ".$reqdata->getRequest());
1.7       foxr      477: 	&StartRequest($Socket,  $reqdata);
1.8       foxr      478: 
1.1       foxr      479:     } else {
                    480: 	
                    481:     #  There's no work waiting, so push the server to idle list.
1.29      foxr      482: 	&Debug(5, "No new work requests, server connection going idle");
1.1       foxr      483: 	$IdleConnections->push($Socket);
                    484:     }
                    485: }
1.3       albertel  486: 
1.1       foxr      487: =pod
1.3       albertel  488: 
1.1       foxr      489: =head2 ClientWritable
1.3       albertel  490: 
                    491: Event callback for when a client socket is writable.
                    492: 
1.97      raeburn   493: This callback is established when a transaction response is
                    494: available from lond.  The response is forwarded to the unix socket
1.3       albertel  495: as it becomes writable in this sub.
                    496: 
1.1       foxr      497: Parameters:
                    498: 
1.3       albertel  499: =item Event
                    500: 
                    501: The event that has been triggered. Event->w->data is
                    502: the data and Event->w->fd is the socket to write.
1.1       foxr      503: 
                    504: =cut
1.3       albertel  505: 
1.1       foxr      506: sub ClientWritable {
                    507:     my $Event    = shift;
                    508:     my $Watcher  = $Event->w;
1.84      albertel  509:     if (!defined($Watcher)) {
                    510: 	&child_exit(-1,'No watcher for event in ClientWritable');
                    511:     }
1.1       foxr      512:     my $Data     = $Watcher->data;
                    513:     my $Socket   = $Watcher->fd;
                    514: 
                    515:     # Try to send the data:
                    516: 
                    517:     &Debug(6, "ClientWritable writing".$Data);
                    518:     &Debug(9, "Socket is: ".$Socket);
                    519: 
1.6       foxr      520:     if($Socket->connected) {
                    521: 	my $result = $Socket->send($Data, 0);
                    522: 	
                    523: 	# $result undefined: the write failed.
                    524: 	# otherwise $result is the number of bytes written.
                    525: 	# Remove that preceding string from the data.
                    526: 	# If the resulting data is empty, destroy the watcher
                    527: 	# and set up a read event handler to accept the next
                    528: 	# request.
                    529: 	
                    530: 	&Debug(9,"Send result is ".$result." Defined: ".defined($result));
1.29      foxr      531: 	if($result ne undef) {
1.6       foxr      532: 	    &Debug(9, "send result was defined");
                    533: 	    if($result == length($Data)) { # Entire string sent.
                    534: 		&Debug(9, "ClientWritable data all written");
                    535: 		$Watcher->cancel();
                    536: 		#
                    537: 		#  Set up to read next request from socket:
                    538: 		
                    539: 		my $descr     = sprintf("Connection to lonc client %d",
                    540: 					$ActiveClients{$Socket});
                    541: 		Event->io(cb    => \&ClientRequest,
                    542: 			  poll  => 'r',
                    543: 			  desc  => $descr,
                    544: 			  data  => "",
                    545: 			  fd    => $Socket);
                    546: 		
                    547: 	    } else {		# Partial string sent.
                    548: 		$Watcher->data(substr($Data, $result));
1.15      foxr      549: 		if($result == 0) {    # client hung up on us!!
1.52      foxr      550: 		    # Log("INFO", "lonc pipe client hung up on us!");
1.15      foxr      551: 		    $Watcher->cancel;
                    552: 		    $Socket->shutdown(2);
                    553: 		    $Socket->close();
                    554: 		}
1.6       foxr      555: 	    }
                    556: 	    
                    557: 	} else {			# Error of some sort...
                    558: 	    
                    559: 	    # Some errnos are possible:
                    560: 	    my $errno = $!;
                    561: 	    if($errno == POSIX::EWOULDBLOCK   ||
                    562: 	       $errno == POSIX::EAGAIN        ||
                    563: 	       $errno == POSIX::EINTR) {
1.96      foxr      564: 		# No action taken...the socket will be writable firing the event again
                    565: 		# which will result in a retry of the write.
1.6       foxr      566: 	    } else {		# Unanticipated errno.
                    567: 		&Debug(5,"ClientWritable error or peer shutdown".$RemoteHost);
                    568: 		$Watcher->cancel;	# Stop the watcher.
                    569: 		$Socket->shutdown(2); # Kill connection
                    570: 		$Socket->close();	# Close the socket.
                    571: 	    }
1.1       foxr      572: 	    
                    573: 	}
1.6       foxr      574:     } else {
                    575: 	$Watcher->cancel();	# A delayed request...just cancel.
1.84      albertel  576: 	return;
1.1       foxr      577:     }
                    578: }
                    579: 
                    580: =pod
1.3       albertel  581: 
1.1       foxr      582: =head2 CompleteTransaction
1.3       albertel  583: 
                    584: Called when the reply data has been received for a lond 
1.1       foxr      585: transaction.   The reply data must now be sent to the
                    586: ultimate client on the other end of the Unix socket.  This is
                    587: done by setting up a writable event for the socket with the
                    588: data the reply data.
1.3       albertel  589: 
1.1       foxr      590: Parameters:
1.3       albertel  591: 
                    592: =item Socket
                    593: 
1.97      raeburn   594: Socket on which the lond transaction occurred.  This is a
                    595: LondConnection. The data received are in the TransactionReply member.
1.3       albertel  596: 
1.7       foxr      597: =item Transaction
1.3       albertel  598: 
1.7       foxr      599: The transaction that is being completed.
1.1       foxr      600: 
                    601: =cut
1.3       albertel  602: 
1.1       foxr      603: sub CompleteTransaction {
1.29      foxr      604:     &Debug(5,"Complete transaction");
1.47      foxr      605: 
                    606:     my ($Socket, $Transaction) = @_;
1.1       foxr      607: 
1.7       foxr      608:     if (!$Transaction->isDeferred()) { # Normal transaction
                    609: 	my $data   = $Socket->GetReply(); # Data to send.
1.39      foxr      610: 	if($LogTransactions) {
                    611: 	    Log("SUCCESS", "Reply from lond: '$data'");
                    612: 	}
1.7       foxr      613: 	StartClientReply($Transaction, $data);
                    614:     } else {			# Delete deferred transaction file.
1.9       foxr      615: 	Log("SUCCESS", "A delayed transaction was completed");
1.102     raeburn   616: 	LogPerm("S:".$Socket->PeerLoncapaHim().":".$Transaction->getRequest());
1.86      albertel  617: 	unlink($Transaction->getFile());
1.7       foxr      618:     }
1.6       foxr      619: }
1.42      foxr      620: 
1.6       foxr      621: =pod
1.42      foxr      622: 
1.6       foxr      623: =head1 StartClientReply
                    624: 
                    625:    Initiates a reply to a client where the reply data is a parameter.
                    626: 
1.7       foxr      627: =head2  parameters:
                    628: 
                    629: =item Transaction
                    630: 
                    631:     The transaction for which we are responding to the client.
                    632: 
                    633: =item data
                    634: 
1.97      raeburn   635:     The data to send to apache client.
1.7       foxr      636: 
1.6       foxr      637: =cut
1.42      foxr      638: 
1.6       foxr      639: sub StartClientReply {
1.1       foxr      640: 
1.47      foxr      641:     my ($Transaction, $data) = @_;
1.12      foxr      642: 
1.7       foxr      643:     my $Client   = $Transaction->getClient();
                    644: 
1.1       foxr      645:     &Debug(8," Reply was: ".$data);
                    646:     my $Serial         = $ActiveClients{$Client};
                    647:     my $desc           = sprintf("Connection to lonc client %d",
                    648: 				 $Serial);
                    649:     Event->io(fd       => $Client,
                    650: 	      poll     => "w",
                    651: 	      desc     => $desc,
                    652: 	      cb       => \&ClientWritable,
                    653: 	      data     => $data);
                    654: }
1.42      foxr      655: 
1.4       foxr      656: =pod
1.42      foxr      657: 
1.4       foxr      658: =head2 FailTransaction
                    659: 
                    660:   Finishes a transaction with failure because the associated lond socket
1.7       foxr      661:   disconnected.  There are two possibilities:
                    662:   - The transaction is deferred: in which case we just quietly
                    663:     delete the transaction since there is no client connection.
                    664:   - The transaction is 'live' in which case we initiate the sending
                    665:     of "con_lost" to the client.
                    666: 
1.42      foxr      667: Deleting the transaction means killing it from the %ActiveTransactions hash.
1.4       foxr      668: 
                    669: Parameters:
                    670: 
                    671: =item client  
                    672:  
1.7       foxr      673:    The LondTransaction we are failing.
                    674:  
1.42      foxr      675: 
1.4       foxr      676: =cut
                    677: 
                    678: sub FailTransaction {
1.7       foxr      679:     my $transaction = shift;
1.52      foxr      680:     
                    681:     #  If the socket is dead, that's already logged.
                    682: 
                    683:     if ($ConnectionRetriesLeft > 0) {
                    684: 	Log("WARNING", "Failing transaction "
1.71      albertel  685: 	    .$transaction->getLoggableRequest());
1.52      foxr      686:     }
1.71      albertel  687:     Debug(1, "Failing transaction: ".$transaction->getLoggableRequest());
1.10      foxr      688:     if (!$transaction->isDeferred()) { # If the transaction is deferred we'll get to it.
1.11      foxr      689: 	my $client  = $transaction->getClient();
1.30      foxr      690: 	Debug(1," Replying con_lost to ".$transaction->getRequest());
1.11      foxr      691: 	StartClientReply($transaction, "con_lost\n");
1.7       foxr      692:     }
1.4       foxr      693: 
                    694: }
                    695: 
                    696: =pod
1.69      matthew   697: 
1.6       foxr      698: =head1  EmptyQueue
1.7       foxr      699: 
1.6       foxr      700:   Fails all items in the work queue with con_lost.
1.7       foxr      701:   Note that each item in the work queue is a transaction.
                    702: 
1.6       foxr      703: =cut
1.69      matthew   704: 
1.6       foxr      705: sub EmptyQueue {
1.22      foxr      706:     $ConnectionRetriesLeft--;	# Counts as connection failure too.
1.6       foxr      707:     while($WorkQueue->Count()) {
1.10      foxr      708: 	my $request = $WorkQueue->dequeue(); # This is a transaction
1.7       foxr      709: 	FailTransaction($request);
1.6       foxr      710:     }
                    711: }
                    712: 
                    713: =pod
1.4       foxr      714: 
1.9       foxr      715: =head2 CloseAllLondConnections
                    716: 
                    717: Close all connections open on lond prior to exit e.g.
                    718: 
                    719: =cut
1.69      matthew   720: 
1.9       foxr      721: sub CloseAllLondConnections {
1.23      foxr      722:     foreach my $Socket (keys %ActiveConnections) {
1.42      foxr      723:       if(exists($ActiveTransactions{$Socket})) {
                    724: 	FailTransaction($ActiveTransactions{$Socket});
                    725:       }
                    726:       KillSocket($Socket);
1.9       foxr      727:     }
                    728: }
                    729: 
                    730: =pod
                    731: 
1.4       foxr      732: =head2 KillSocket
                    733:  
                    734: Destroys a socket.  This function can be called either when a socket
                    735: has died of 'natural' causes or because a socket needs to be pruned due to
                    736: idleness.  If the socket has died naturally, if there are no longer any 
                    737: live connections a new connection is created (in case there are transactions
                    738: in the queue).  If the socket has been pruned, it is never re-created.
                    739: 
                    740: Parameters:
1.1       foxr      741: 
1.4       foxr      742: =item Socket
                    743:  
                    744:   The socket to kill off.
                    745: 
1.105     raeburn   746: =item restart
1.4       foxr      747: 
1.97      raeburn   748: non-zero if we are allowed to create a new connection.
1.4       foxr      749: 
1.69      matthew   750: =cut
1.4       foxr      751: 
                    752: sub KillSocket {
                    753:     my $Socket = shift;
1.105     raeburn   754:     my $restart = shift;
1.4       foxr      755: 
1.17      foxr      756:     Log("WARNING", "Shutting down a socket");
1.9       foxr      757:     $Socket->Shutdown();
                    758: 
1.7       foxr      759:     #  If the socket came from the active connection set,
                    760:     #  delete its transaction... note that FailTransaction should
                    761:     #  already have been called!!!
                    762:     #  otherwise it came from the idle set.
                    763:     #  
1.4       foxr      764:     
                    765:     if(exists($ActiveTransactions{$Socket})) {
                    766: 	delete ($ActiveTransactions{$Socket});
                    767:     }
                    768:     if(exists($ActiveConnections{$Socket})) {
1.90      foxr      769: 	$ActiveConnections{$Socket}->cancel;
1.4       foxr      770: 	delete($ActiveConnections{$Socket});
1.105     raeburn   771:         # Decrement ConnectionCount unless we will immediately
                    772:         # re-connect (i.e., $restart is true), because this was
                    773:         # a connection where the SSL channel for exchange of the
                    774:         # shared key failed, and we may use an insecure channel.
                    775:         unless ($restart) {
                    776: 	    $ConnectionCount--;
                    777:         }
1.37      albertel  778: 	if ($ConnectionCount < 0) { $ConnectionCount = 0; }
1.4       foxr      779:     }
1.6       foxr      780:     #  If the connection count has gone to zero and there is work in the
                    781:     #  work queue, the work all gets failed with con_lost.
                    782:     #
1.105     raeburn   783:   
1.6       foxr      784:     if($ConnectionCount == 0) {
1.98      foxr      785: 	$LondConnecting = 0;	# No connections so also not connecting.
1.22      foxr      786: 	EmptyQueue();
1.105     raeburn   787: 	CloseAllLondConnections(); # Should all already be closed but...
                    788:         &clear_childpid($$);
1.4       foxr      789:     }
1.90      foxr      790:     UpdateStatus();
1.4       foxr      791: }
1.1       foxr      792: 
                    793: =pod
1.3       albertel  794: 
1.1       foxr      795: =head2 LondReadable
1.3       albertel  796: 
1.1       foxr      797: This function is called whenever a lond connection
                    798: is readable.  The action is state dependent:
                    799: 
1.3       albertel  800: =head3 State=Initialized
                    801: 
1.100     raeburn   802: We are waiting for the challenge, this is a no-op until the
1.1       foxr      803: state changes.
1.3       albertel  804: 
1.1       foxr      805: =head3 State=Challenged 
1.3       albertel  806: 
                    807: The challenge has arrived we need to transition to Writable.
1.1       foxr      808: The connection must echo the challenge back.
1.3       albertel  809: 
1.1       foxr      810: =head3 State=ChallengeReplied
1.3       albertel  811: 
1.97      raeburn   812: The challenge has been replied to.  Then we are receiving the 
1.1       foxr      813: 'ok' from the partner.
1.3       albertel  814: 
1.40      foxr      815: =head3  State=ReadingVersionString
                    816: 
                    817: We have requested the lond version and are reading the
                    818: version back.  Upon completion, we'll store the version away
                    819: for future use(?).
                    820: 
                    821: =head3 State=HostSet
                    822: 
                    823: We have selected the domain name of our peer (multhomed hosts)
                    824: and are getting the reply (presumably ok) back.
                    825: 
1.1       foxr      826: =head3 State=RequestingKey
1.3       albertel  827: 
                    828: The ok has been received and we need to send the request for
1.1       foxr      829: an encryption key.  Transition to writable for that.
1.3       albertel  830: 
1.1       foxr      831: =head3 State=ReceivingKey
1.3       albertel  832: 
                    833: The the key has been requested, now we are reading the new key.
                    834: 
1.1       foxr      835: =head3 State=Idle 
1.3       albertel  836: 
                    837: The encryption key has been negotiated or we have finished 
1.97      raeburn   838: reading data from the a transaction.   If the callback data have
1.99      raeburn   839: a client as well as the socket information, then we are 
1.97      raeburn   840: doing a transaction and the data received are relayed to the client
1.1       foxr      841: before the socket is put on the idle list.
1.3       albertel  842: 
1.1       foxr      843: =head3 State=SendingRequest
1.3       albertel  844: 
                    845: I do not think this state can be received here, but if it is,
1.1       foxr      846: the appropriate thing to do is to transition to writable, and send
                    847: the request.
1.3       albertel  848: 
1.1       foxr      849: =head3 State=ReceivingReply
1.3       albertel  850: 
                    851: We finished sending the request to the server and now transition
1.1       foxr      852: to readable to receive the reply. 
                    853: 
                    854: The parameter to this function are:
1.3       albertel  855: 
1.1       foxr      856: The event. Implicit in this is the watcher and its data.  The data 
1.97      raeburn   857: contain at least the lond connection object and, if a 
1.1       foxr      858: transaction is in progress, the socket attached to the local client.
                    859: 
1.3       albertel  860: =cut
1.1       foxr      861: 
                    862: sub LondReadable {
1.8       foxr      863: 
1.41      albertel  864:     my $Event      = shift;
                    865:     my $Watcher    = $Event->w;
                    866:     my $Socket     = $Watcher->data;
                    867:     my $client     = undef;
1.40      foxr      868: 
1.41      albertel  869:     &Debug(6,"LondReadable called state = ".$Socket->GetState());
1.40      foxr      870: 
                    871: 
1.41      albertel  872:     my $State = $Socket->GetState(); # All action depends on the state.
1.40      foxr      873: 
1.41      albertel  874:     SocketDump(6, $Socket);
                    875:     my $status = $Socket->Readable();
1.40      foxr      876: 
1.41      albertel  877:     &Debug(2, "Socket->Readable returned: $status");
1.40      foxr      878: 
1.41      albertel  879:     if($status != 0) {
                    880: 	# bad return from socket read. Currently this means that
                    881: 	# The socket has become disconnected. We fail the transaction.
1.40      foxr      882: 
1.41      albertel  883: 	Log("WARNING",
                    884: 	    "Lond connection lost.");
1.105     raeburn   885:         my $state_on_exit = $Socket->GetState();
1.41      albertel  886: 	if(exists($ActiveTransactions{$Socket})) {
                    887: 	    FailTransaction($ActiveTransactions{$Socket});
1.56      foxr      888: 	} else {
                    889: 	    #  Socket is connecting and failed... need to mark
                    890: 	    #  no longer connecting.
                    891: 	    $LondConnecting = 0;
1.41      albertel  892: 	}
                    893: 	$Watcher->cancel();
1.105     raeburn   894:         if ($state_on_exit eq 'ReInitNoSSL') {
                    895:             # SSL certificate verification failed, and insecure connection
                    896:             # allowed. Send restart arg to KillSocket(), so EmptyQueue() 
                    897:             # is not called, as we still hope to process queued request.
                    898: 
                    899:             KillSocket($Socket,1);
                    900: 
                    901:             # Re-initiate creation of Lond Connection for use with queued
                    902:             # request.
                    903: 
                    904:             ShowStatus("Connected to ".$RemoteHost);
                    905:             Log("WARNING","No SSL channel (verification failed), will try with insecure channel");
                    906:             &MakeLondConnection(1);
                    907: 
                    908:         } else {
                    909: 	    KillSocket($Socket);
                    910: 	    $ConnectionRetriesLeft--;       # Counts as connection failure         
                    911:         }
1.41      albertel  912: 	return;
                    913:     }
                    914:     SocketDump(6,$Socket);
1.17      foxr      915: 
1.41      albertel  916:     $State = $Socket->GetState(); # Update in case of transition.
                    917:     &Debug(6, "After read, state is ".$State);
1.1       foxr      918: 
1.41      albertel  919:     if($State eq "Initialized") {
1.1       foxr      920: 
                    921: 
1.105     raeburn   922:     } elsif ($State eq "ReInitNoSSL") {
                    923: 
1.41      albertel  924:     } elsif ($State eq "ChallengeReceived") {
1.1       foxr      925: 	#  The challenge must be echoed back;  The state machine
                    926: 	# in the connection takes care of setting that up.  Just
                    927: 	# need to transition to writable:
1.41      albertel  928: 	
                    929: 	$Watcher->cb(\&LondWritable);
                    930: 	$Watcher->poll("w");
1.1       foxr      931: 
1.41      albertel  932:     } elsif ($State eq "ChallengeReplied") {
1.1       foxr      933: 
1.41      albertel  934:     } elsif ($State eq "RequestingVersion") {
                    935: 	# Need to ask for the version... that is writiability:
1.1       foxr      936: 
1.41      albertel  937: 	$Watcher->cb(\&LondWritable);
                    938: 	$Watcher->poll("w");
                    939: 
                    940:     } elsif ($State eq "ReadingVersionString") {
                    941: 	# Read the rest of the version string... 
                    942:     } elsif ($State eq "SetHost") {
                    943: 	# Need to request the actual domain get set...
                    944: 
                    945: 	$Watcher->cb(\&LondWritable);
                    946: 	$Watcher->poll("w");
                    947:     } elsif ($State eq "HostSet") {
                    948: 	# Reading the 'ok' from the peer.
                    949: 
                    950:     } elsif ($State eq "RequestingKey") {
1.1       foxr      951: 	#  The ok was received.  Now we need to request the key
                    952: 	#  That requires us to be writable:
                    953: 
1.41      albertel  954: 	$Watcher->cb(\&LondWritable);
                    955: 	$Watcher->poll("w");
1.1       foxr      956: 
1.41      albertel  957:     } elsif ($State eq "ReceivingKey") {
1.1       foxr      958: 
1.41      albertel  959:     } elsif ($State eq "Idle") {
1.105     raeburn   960: 
                    961:         if ($ConnectionCount == 1) { 
                    962:             # Write child Pid file to keep track of ssl and insecure
                    963:             # connections
                    964: 
                    965:             &record_childpid($Socket);
                    966:         }
                    967: 
1.41      albertel  968: 	# This is as good a spot as any to get the peer version
                    969: 	# string:
1.40      foxr      970:    
1.41      albertel  971: 	if($LondVersion eq "unknown") {
                    972: 	    $LondVersion = $Socket->PeerVersion();
                    973: 	    Log("INFO", "Connected to lond version: $LondVersion");
                    974: 	}
1.1       foxr      975: 	# If necessary, complete a transaction and then go into the
                    976: 	# idle queue.
1.22      foxr      977: 	#  Note that a trasition to idle indicates a live lond
                    978: 	# on the other end so reset the connection retries.
                    979: 	#
1.41      albertel  980: 	$ConnectionRetriesLeft = $ConnectionRetries; # success resets the count
                    981: 	$Watcher->cancel();
                    982: 	if(exists($ActiveTransactions{$Socket})) {
                    983: 	    Debug(5,"Completing transaction!!");
                    984: 	    CompleteTransaction($Socket, 
                    985: 				$ActiveTransactions{$Socket});
                    986: 	} else {
1.95      foxr      987: 	    my $count = $Socket->GetClientData();
                    988: 	    Log("SUCCESS", "Connection ".$count." to "
1.41      albertel  989: 		.$RemoteHost." now ready for action");
                    990: 	}
                    991: 	ServerToIdle($Socket);	# Next work unit or idle.
1.54      foxr      992: 
                    993: 	#
                    994: 	$LondConnecting = 0;	# Best spot I can think of for this.
                    995: 	# 
1.6       foxr      996: 	
1.41      albertel  997:     } elsif ($State eq "SendingRequest") {
1.1       foxr      998: 	#  We need to be writable for this and probably don't belong
                    999: 	#  here inthe first place.
                   1000: 
1.73      albertel 1001: 	Debug(6, "SendingRequest state encountered in readable");
1.41      albertel 1002: 	$Watcher->poll("w");
                   1003: 	$Watcher->cb(\&LondWritable);
1.1       foxr     1004: 
1.41      albertel 1005:     } elsif ($State eq "ReceivingReply") {
1.1       foxr     1006: 
                   1007: 
1.41      albertel 1008:     } else {
                   1009: 	# Invalid state.
                   1010: 	Debug(4, "Invalid state in LondReadable");
                   1011:     }
1.1       foxr     1012: }
1.3       albertel 1013: 
1.1       foxr     1014: =pod
1.3       albertel 1015: 
1.1       foxr     1016: =head2 LondWritable
1.3       albertel 1017: 
1.1       foxr     1018: This function is called whenever a lond connection
                   1019: becomes writable while there is a writeable monitoring
                   1020: event.  The action taken is very state dependent:
1.3       albertel 1021: 
1.1       foxr     1022: =head3 State = Connected 
1.3       albertel 1023: 
                   1024: The connection is in the process of sending the 'init' hailing to the
1.100     raeburn  1025: lond on the remote end.  The Writable member of the connection object
                   1026: is called.  On error, call ConnectionError to destroy the connection
                   1027: and remove it from the ActiveConnections hash.
1.3       albertel 1028: 
1.1       foxr     1029: =head3 Initialized
1.3       albertel 1030: 
                   1031: 'init' has been sent, writability monitoring is removed and
                   1032: readability monitoring is started with LondReadable as the callback.
                   1033: 
1.1       foxr     1034: =head3 ChallengeReceived
1.3       albertel 1035: 
                   1036: The connection has received the who are you challenge from the remote
                   1037: system, and is in the process of sending the challenge
                   1038: response. Writable is called.
                   1039: 
1.1       foxr     1040: =head3 ChallengeReplied
1.3       albertel 1041: 
                   1042: The connection has replied to the initial challenge The we switch to
                   1043: monitoring readability looking for the server to reply with 'ok'.
                   1044: 
1.1       foxr     1045: =head3 RequestingKey
1.3       albertel 1046: 
                   1047: The connection is in the process of requesting its encryption key.
                   1048: Writable is called.
                   1049: 
1.1       foxr     1050: =head3 ReceivingKey
1.3       albertel 1051: 
                   1052: The connection has sent the request for a key.  Switch to readability
                   1053: monitoring to accept the key
                   1054: 
1.1       foxr     1055: =head3 SendingRequest
1.3       albertel 1056: 
                   1057: The connection is in the process of sending a request to the server.
                   1058: This request is part of a client transaction.  All the states until
                   1059: now represent the client setup protocol. Writable is called.
                   1060: 
1.1       foxr     1061: =head3 ReceivingReply
                   1062: 
1.3       albertel 1063: The connection has sent a request.  Now it must receive a reply.
                   1064: Readability monitoring is requested.
                   1065: 
                   1066: This function is an event handler and therefore receives as
1.1       foxr     1067: a parameter the event that has fired.  The data for the watcher
                   1068: of this event is a reference to a list of one or two elements,
                   1069: depending on state. The first (and possibly only) element is the
                   1070: socket.  The second (present only if a request is in progress)
                   1071: is the socket on which to return a reply to the caller.
                   1072: 
                   1073: =cut
1.3       albertel 1074: 
1.1       foxr     1075: sub LondWritable {
                   1076:     my $Event   = shift;
                   1077:     my $Watcher = $Event->w;
1.8       foxr     1078:     my $Socket  = $Watcher->data;
                   1079:     my $State   = $Socket->GetState();
1.1       foxr     1080: 
1.8       foxr     1081:     Debug(6,"LondWritable State = ".$State."\n");
1.1       foxr     1082: 
1.8       foxr     1083:  
1.1       foxr     1084:     #  Figure out what to do depending on the state of the socket:
                   1085:     
                   1086: 
                   1087: 
                   1088: 
                   1089:     SocketDump(6,$Socket);
                   1090: 
1.42      foxr     1091:     #  If the socket is writable, we must always write.
                   1092:     # Only by writing will we undergo state transitions.
                   1093:     # Old logic wrote in state specific code below, however
                   1094:     # That forces us at least through another invocation of
                   1095:     # this function after writability is possible again.
                   1096:     # This logic also factors out common code for handling
                   1097:     # write failures... in all cases, write failures 
                   1098:     # Kill the socket.
                   1099:     #  This logic makes the branches of the >big< if below
                   1100:     # so that the writing states are actually NO-OPs.
                   1101: 
                   1102:     if ($Socket->Writable() != 0) {
1.43      albertel 1103: 	#  The write resulted in an error.
                   1104: 	# We'll treat this as if the socket got disconnected:
                   1105: 	Log("WARNING", "Connection to ".$RemoteHost.
                   1106: 	    " has been disconnected");
                   1107: 	if(exists($ActiveTransactions{$Socket})) {
                   1108: 	    FailTransaction($ActiveTransactions{$Socket});
1.56      foxr     1109: 	} else {
                   1110: 	    #  In the process of conneting, so need to turn that off.
                   1111: 	    
                   1112: 	    $LondConnecting = 0;
1.43      albertel 1113: 	}
                   1114: 	$Watcher->cancel();
                   1115: 	KillSocket($Socket);
                   1116: 	return;
1.42      foxr     1117:     }
                   1118: 
                   1119: 
                   1120: 
1.41      albertel 1121:     if      ($State eq "Connected")         {
1.1       foxr     1122: 
1.41      albertel 1123: 	#  "init" is being sent...
1.42      foxr     1124:  
1.41      albertel 1125:     } elsif ($State eq "Initialized")       {
1.4       foxr     1126: 
1.41      albertel 1127: 	# Now that init was sent, we switch 
                   1128: 	# to watching for readability:
1.1       foxr     1129: 
1.41      albertel 1130: 	$Watcher->cb(\&LondReadable);
                   1131: 	$Watcher->poll("r");
1.105     raeburn  1132: 
                   1133:     } elsif ($State eq "ReInitNoSSL") {
                   1134: 
1.41      albertel 1135:     } elsif ($State eq "ChallengeReceived") {
                   1136: 	# We received the challenge, now we 
                   1137: 	# are echoing it back. This is a no-op,
                   1138: 	# we're waiting for the state to change
1.1       foxr     1139: 	
1.41      albertel 1140:     } elsif ($State eq "ChallengeReplied")  {
                   1141: 	# The echo was sent back, so we switch
                   1142: 	# to watching readability.
                   1143: 
                   1144: 	$Watcher->cb(\&LondReadable);
                   1145: 	$Watcher->poll("r");
                   1146:     } elsif ($State eq "RequestingVersion") {
                   1147: 	# Sending the peer a version request...
1.42      foxr     1148: 
1.41      albertel 1149:     } elsif ($State eq "ReadingVersionString") {
                   1150: 	# Transition to read since we have sent the
                   1151: 	# version command and now just need to read the
                   1152: 	# version string from the peer:
1.40      foxr     1153:       
1.41      albertel 1154: 	$Watcher->cb(\&LondReadable);
                   1155: 	$Watcher->poll("r");
1.40      foxr     1156:       
1.41      albertel 1157:     } elsif ($State eq "SetHost") {
                   1158: 	#  Setting the remote domain...
1.42      foxr     1159: 
1.41      albertel 1160:     } elsif ($State eq "HostSet") {
                   1161: 	# Back to readable to get the ok.
1.40      foxr     1162:       
1.41      albertel 1163: 	$Watcher->cb(\&LondReadable);
                   1164: 	$Watcher->poll("r");
1.40      foxr     1165:       
                   1166: 
1.41      albertel 1167:     } elsif ($State eq "RequestingKey")     {
                   1168: 	# At this time we're requesting the key.
                   1169: 	# again, this is essentially a no-op.
                   1170: 
                   1171:     } elsif ($State eq "ReceivingKey")      {
                   1172: 	# Now we need to wait for the key
                   1173: 	# to come back from the peer:
                   1174: 
                   1175: 	$Watcher->cb(\&LondReadable);
                   1176: 	$Watcher->poll("r");
                   1177: 
                   1178:     } elsif ($State eq "SendingRequest")    {
1.40      foxr     1179:  
1.41      albertel 1180: 	# At this time we are sending a request to the
1.1       foxr     1181: 	# peer... write the next chunk:
                   1182: 
1.41      albertel 1183: 
                   1184:     } elsif ($State eq "ReceivingReply")    {
                   1185: 	# The send has completed.  Wait for the
                   1186: 	# data to come in for a reply.
                   1187: 	Debug(8,"Writable sent request/receiving reply");
                   1188: 	$Watcher->cb(\&LondReadable);
                   1189: 	$Watcher->poll("r");
1.1       foxr     1190: 
1.41      albertel 1191:     } else {
                   1192: 	#  Control only passes here on an error: 
                   1193: 	#  the socket state does not match any
                   1194: 	#  of the known states... so an error
                   1195: 	#  must be logged.
1.1       foxr     1196: 
1.41      albertel 1197: 	&Debug(4, "Invalid socket state ".$State."\n");
                   1198:     }
1.1       foxr     1199:     
                   1200: }
1.81      albertel 1201: 
1.6       foxr     1202: =pod
                   1203:     
                   1204: =cut
1.69      matthew  1205: 
1.81      albertel 1206: 
1.6       foxr     1207: sub QueueDelayed {
1.8       foxr     1208:     Debug(3,"QueueDelayed called");
                   1209: 
1.6       foxr     1210:     my $path = "$perlvar{'lonSockDir'}/delayed";
1.8       foxr     1211: 
                   1212:     Debug(4, "Delayed path: ".$path);
1.6       foxr     1213:     opendir(DIRHANDLE, $path);
1.75      albertel 1214: 
1.82      albertel 1215:     my $host_id_re = '(?:'.join('|',map {quotemeta($_)} (@all_host_ids)).')';
1.75      albertel 1216:     my @alldelayed = grep(/\.$host_id_re$/, readdir(DIRHANDLE));
1.6       foxr     1217:     closedir(DIRHANDLE);
1.75      albertel 1218:     foreach my $dfname (sort(@alldelayed)) {
                   1219: 	my $reqfile = "$path/$dfname";
                   1220: 	my ($host_id) = ($dfname =~ /\.([^.]*)$/);
                   1221: 	Debug(4, "queueing ".$reqfile." for $host_id");
1.6       foxr     1222: 	my $Handle = IO::File->new($reqfile);
                   1223: 	my $cmd    = <$Handle>;
1.8       foxr     1224: 	chomp $cmd;		# There may or may not be a newline...
1.12      foxr     1225: 	$cmd = $cmd."\n";	# now for sure there's exactly one newline.
1.75      albertel 1226: 	my $Transaction = LondTransaction->new("sethost:$host_id:$cmd");
1.7       foxr     1227: 	$Transaction->SetDeferred($reqfile);
                   1228: 	QueueTransaction($Transaction);
1.6       foxr     1229:     }
                   1230:     
                   1231: }
1.1       foxr     1232: 
                   1233: =pod
1.3       albertel 1234: 
1.1       foxr     1235: =head2 MakeLondConnection
1.3       albertel 1236: 
                   1237: Create a new lond connection object, and start it towards its initial
1.97      raeburn  1238: idleness.  Once idle, it becomes eligible to receive transactions
1.3       albertel 1239: from the work queue.  If the work queue is not empty when the
                   1240: connection is completed and becomes idle, it will dequeue an entry and
                   1241: start off on it.
                   1242: 
1.1       foxr     1243: =cut
1.3       albertel 1244: 
1.105     raeburn  1245: sub MakeLondConnection {
                   1246:     my ($restart) = @_;
1.1       foxr     1247:     Debug(4,"MakeLondConnection to ".GetServerHost()." on port "
                   1248: 	  .GetServerPort());
                   1249: 
                   1250:     my $Connection = LondConnection->new(&GetServerHost(),
1.81      albertel 1251: 					 &GetServerPort(),
1.107   ! raeburn  1252: 					 &GetHostId(),
        !          1253: 					 &GetDefHostId());
1.1       foxr     1254: 
1.105     raeburn  1255:     if($Connection eq undef) {
1.9       foxr     1256: 	Log("CRITICAL","Failed to make a connection with lond.");
1.10      foxr     1257: 	$ConnectionRetriesLeft--;
                   1258: 	return 0;		# Failure.
1.5       foxr     1259:     }  else {
1.82      albertel 1260: 	$LondConnecting = 1;	# Connection in progress.
1.5       foxr     1261: 	# The connection needs to have writability 
                   1262: 	# monitored in order to send the init sequence
                   1263: 	# that starts the whole authentication/key
                   1264: 	# exchange underway.
                   1265: 	#
                   1266: 	my $Socket = $Connection->GetSocket();
1.30      foxr     1267: 	if($Socket eq undef) {
1.64      foxr     1268: 	    &child_exit(-1, "did not get a socket from the connection");
1.5       foxr     1269: 	} else {
                   1270: 	    &Debug(9,"MakeLondConnection got socket: ".$Socket);
                   1271: 	}
1.1       foxr     1272: 	
1.21      foxr     1273: 	$Connection->SetTimeoutCallback(\&SocketTimeout);
                   1274: 
1.23      foxr     1275: 	my $event = Event->io(fd       => $Socket,
1.5       foxr     1276: 			   poll     => 'w',
                   1277: 			   cb       => \&LondWritable,
1.8       foxr     1278: 			   data     => $Connection,
1.5       foxr     1279: 			   desc => 'Connection to lond server');
                   1280: 	$ActiveConnections{$Connection} = $event;
1.52      foxr     1281: 	if ($ConnectionCount == 0) {
                   1282: 	    &SetupTimer;	# Need to handle timeouts with connections...
                   1283: 	}
1.105     raeburn  1284:         unless ($restart) {
                   1285: 	    $ConnectionCount++;
                   1286:         }
1.95      foxr     1287: 	$Connection->SetClientData($ConnectionCount);
1.8       foxr     1288: 	Debug(4, "Connection count = ".$ConnectionCount);
1.6       foxr     1289: 	if($ConnectionCount == 1) { # First Connection:
                   1290: 	    QueueDelayed;
                   1291: 	}
1.97      raeburn  1292: 	Log("SUCCESS", "Created connection ".$ConnectionCount
1.9       foxr     1293: 	    ." to host ".GetServerHost());
1.10      foxr     1294: 	return 1;		# Return success.
1.1       foxr     1295:     }
                   1296:     
                   1297: }
1.3       albertel 1298: 
1.1       foxr     1299: =pod
1.3       albertel 1300: 
1.1       foxr     1301: =head2 StartRequest
1.3       albertel 1302: 
                   1303: Starts a lond request going on a specified lond connection.
                   1304: parameters are:
                   1305: 
                   1306: =item $Lond
                   1307: 
                   1308: Connection to the lond that will send the transaction and receive the
                   1309: reply.
                   1310: 
                   1311: =item $Client
                   1312: 
1.97      raeburn  1313: Connection to the client that is making this request. We got the
1.3       albertel 1314: request from this socket, and when the request has been relayed to
                   1315: lond and we get a reply back from lond it will get sent to this
                   1316: socket.
                   1317: 
                   1318: =item $Request
                   1319: 
                   1320: The text of the request to send.
                   1321: 
1.1       foxr     1322: =cut
                   1323: 
                   1324: sub StartRequest {
1.47      foxr     1325: 
                   1326:     my ($Lond, $Request) = @_;
1.1       foxr     1327:     
1.7       foxr     1328:     Debug(6, "StartRequest: ".$Request->getRequest());
1.1       foxr     1329: 
                   1330:     my $Socket = $Lond->GetSocket();
                   1331:     
1.7       foxr     1332:     $Request->Activate($Lond);
                   1333:     $ActiveTransactions{$Lond} = $Request;
1.1       foxr     1334: 
1.7       foxr     1335:     $Lond->InitiateTransaction($Request->getRequest());
1.23      foxr     1336:     my $event = Event->io(fd      => $Socket,
1.1       foxr     1337: 		       poll    => "w",
                   1338: 		       cb      => \&LondWritable,
                   1339: 		       data    => $Lond,
                   1340: 		       desc    => "lond transaction connection");
                   1341:     $ActiveConnections{$Lond} = $event;
                   1342:     Debug(8," Start Request made watcher data with ".$event->data."\n");
                   1343: }
                   1344: 
                   1345: =pod
1.3       albertel 1346: 
1.1       foxr     1347: =head2 QueueTransaction
1.3       albertel 1348: 
                   1349: If there is an idle lond connection, it is put to work doing this
                   1350: transaction.  Otherwise, the transaction is placed in the work queue.
                   1351: If placed in the work queue and the maximum number of connections has
                   1352: not yet been created, a new connection will be started.  Our goal is
                   1353: to eventually have a sufficient number of connections that the work
                   1354: queue will typically be empty.  parameters are:
                   1355: 
                   1356: =item Socket
                   1357: 
                   1358: open on the lonc client.
                   1359: 
                   1360: =item Request
                   1361: 
                   1362: data to send to the lond.
1.1       foxr     1363: 
                   1364: =cut
1.3       albertel 1365: 
1.1       foxr     1366: sub QueueTransaction {
                   1367: 
1.7       foxr     1368:     my $requestData   = shift;	# This is a LondTransaction.
                   1369:     my $cmd           = $requestData->getRequest();
                   1370: 
                   1371:     Debug(6,"QueueTransaction: ".$cmd);
1.1       foxr     1372: 
                   1373:     my $LondSocket    = $IdleConnections->pop();
                   1374:     if(!defined $LondSocket) {	# Need to queue request.
1.29      foxr     1375: 	Debug(5,"Must queue...");
1.1       foxr     1376: 	$WorkQueue->enqueue($requestData);
1.56      foxr     1377: 	Debug(5, "Queue Transaction startnew $ConnectionCount $LondConnecting");
                   1378: 	if(($ConnectionCount < $MaxConnectionCount)   && (! $LondConnecting)) {
                   1379: 
1.22      foxr     1380: 	    if($ConnectionRetriesLeft > 0) {
1.29      foxr     1381: 		Debug(5,"Starting additional lond connection");
1.56      foxr     1382: 		if(&MakeLondConnection() == 0) {
1.22      foxr     1383: 		    EmptyQueue();	# Fail transactions, can't make connection.
1.42      foxr     1384: 		    CloseAllLondConnections; # Should all be closed but...
1.22      foxr     1385: 		}
                   1386: 	    } else {
                   1387: 		ShowStatus(GetServerHost()." >>> DEAD !!!! <<<");
1.56      foxr     1388: 		$LondConnecting = 0;
1.22      foxr     1389: 		EmptyQueue();	# It's worse than that ... he's dead Jim.
1.42      foxr     1390: 		CloseAllLondConnections; # Should all be closed but..
1.17      foxr     1391: 	    }
1.1       foxr     1392: 	}
                   1393:     } else {			# Can start the request:
                   1394: 	Debug(8,"Can start...");
1.7       foxr     1395: 	StartRequest($LondSocket,  $requestData);
1.1       foxr     1396:     }
                   1397: }
                   1398: 
1.95      foxr     1399: #-------------------------- Lonc UNIX socket handling -------------------
1.1       foxr     1400: =pod
1.3       albertel 1401: 
1.1       foxr     1402: =head2 ClientRequest
1.97      raeburn  1403: 
1.3       albertel 1404: Callback that is called when data can be read from the UNIX domain
                   1405: socket connecting us with an apache server process.
1.1       foxr     1406: 
                   1407: =cut
                   1408: 
                   1409: sub ClientRequest {
                   1410:     Debug(6, "ClientRequest");
                   1411:     my $event   = shift;
                   1412:     my $watcher = $event->w;
                   1413:     my $socket  = $watcher->fd;
                   1414:     my $data    = $watcher->data;
                   1415:     my $thisread;
                   1416: 
                   1417:     Debug(9, "  Watcher named: ".$watcher->desc);
                   1418: 
                   1419:     my $rv = $socket->recv($thisread, POSIX::BUFSIZ, 0);
                   1420:     Debug(8, "rcv:  data length = ".length($thisread)
                   1421: 	  ." read =".$thisread);
1.29      foxr     1422:     unless (defined $rv  && length($thisread)) {
1.1       foxr     1423: 	 # Likely eof on socket.
                   1424: 	Debug(5,"Client Socket closed on lonc for ".$RemoteHost);
                   1425: 	close($socket);
                   1426: 	$watcher->cancel();
                   1427: 	delete($ActiveClients{$socket});
1.10      foxr     1428: 	return;
1.1       foxr     1429:     }
                   1430:     Debug(8,"Data: ".$data." this read: ".$thisread);
                   1431:     $data = $data.$thisread;	# Append new data.
                   1432:     $watcher->data($data);
1.44      albertel 1433:     if($data =~ /\n$/) {	# Request entirely read.
1.87      albertel 1434: 	if ($data eq "close_connection_exit\n") {
1.9       foxr     1435: 	    Log("CRITICAL",
                   1436: 		"Request Close Connection ... exiting");
                   1437: 	    CloseAllLondConnections();
                   1438: 	    exit;
1.87      albertel 1439: 	} elsif ($data eq "reset_retries\n") {
                   1440: 	    Log("INFO", "Resetting Connection Retries.");
                   1441: 	    $ConnectionRetriesLeft = $ConnectionRetries;
                   1442: 	    &UpdateStatus();
                   1443: 	    my $Transaction = LondTransaction->new($data);
                   1444: 	    $Transaction->SetClient($socket);
                   1445: 	    StartClientReply($Transaction, "ok\n");
                   1446: 	    $watcher->cancel();
                   1447: 	    return;
1.9       foxr     1448: 	}
1.1       foxr     1449: 	Debug(8, "Complete transaction received: ".$data);
1.87      albertel 1450: 	if ($LogTransactions) {
1.39      foxr     1451: 	    Log("SUCCESS", "Transaction: '$data'"); # Transaction has \n.
                   1452: 	}
1.8       foxr     1453: 	my $Transaction = LondTransaction->new($data);
1.7       foxr     1454: 	$Transaction->SetClient($socket);
                   1455: 	QueueTransaction($Transaction);
1.1       foxr     1456: 	$watcher->cancel();	# Done looking for input data.
                   1457:     }
                   1458: 
                   1459: }
                   1460: 
1.62      foxr     1461: #
                   1462: #     Accept a connection request for a client (lonc child) and
                   1463: #    start up an event watcher to keep an eye on input from that 
                   1464: #    Event.  This can be called both from NewClient and from
1.80      albertel 1465: #    ChildProcess.
1.62      foxr     1466: # Parameters:
                   1467: #    $socket       - The listener socket.
                   1468: # Returns:
                   1469: #   NONE
                   1470: # Side Effects:
                   1471: #    An event is made to watch the accepted connection.
                   1472: #    Active clients hash is updated to reflect the new connection.
                   1473: #    The client connection count is incremented.
                   1474: #
                   1475: sub accept_client {
                   1476:     my ($socket) = @_;
                   1477: 
                   1478:     Debug(8, "Entering accept for lonc UNIX socket\n");
                   1479:     my $connection = $socket->accept();	# Accept the client connection.
                   1480:     Debug(8,"Connection request accepted from "
                   1481: 	  .GetPeername($connection, AF_UNIX));
                   1482: 
                   1483: 
                   1484:     my $description = sprintf("Connection to lonc client %d",
                   1485: 			      $ClientConnection);
                   1486:     Debug(9, "Creating event named: ".$description);
                   1487:     Event->io(cb      => \&ClientRequest,
                   1488: 	      poll    => 'r',
                   1489: 	      desc    => $description,
                   1490: 	      data    => "",
                   1491: 	      fd      => $connection);
                   1492:     $ActiveClients{$connection} = $ClientConnection;
                   1493:     $ClientConnection++;
                   1494: }
1.1       foxr     1495: 
                   1496: =pod
1.3       albertel 1497: 
1.1       foxr     1498: =head2  NewClient
1.3       albertel 1499: 
                   1500: Callback that is called when a connection is received on the unix
                   1501: socket for a new client of lonc.  The callback is parameterized by the
                   1502: event.. which is a-priori assumed to be an io event, and therefore has
1.97      raeburn  1503: an fd member that is the Listener socket.  We accept the connection
1.3       albertel 1504: and register a new event on the readability of that socket:
                   1505: 
1.1       foxr     1506: =cut
1.3       albertel 1507: 
1.1       foxr     1508: sub NewClient {
                   1509:     Debug(6, "NewClient");
                   1510:     my $event      = shift;		# Get the event parameters.
                   1511:     my $watcher    = $event->w; 
                   1512:     my $socket     = $watcher->fd;	# Get the event' socket.
                   1513: 
1.62      foxr     1514:     &accept_client($socket);
1.1       foxr     1515: }
1.3       albertel 1516: 
                   1517: =pod
                   1518: 
                   1519: =head2 GetLoncSocketPath
                   1520: 
                   1521: Returns the name of the UNIX socket on which to listen for client
                   1522: connections.
1.1       foxr     1523: 
1.58      foxr     1524: =head2 Parameters:
                   1525: 
                   1526:     host (optional)  - Name of the host socket to return.. defaults to
                   1527:                        the return from GetServerHost().
                   1528: 
1.1       foxr     1529: =cut
1.3       albertel 1530: 
1.1       foxr     1531: sub GetLoncSocketPath {
1.58      foxr     1532: 
                   1533:     my $host = GetServerHost();	# Default host.
                   1534:     if (@_) {
                   1535: 	($host)  = @_;		# Override if supplied.
                   1536:     }
                   1537:     return $UnixSocketDir."/".$host;
1.1       foxr     1538: }
                   1539: 
1.3       albertel 1540: =pod
                   1541: 
                   1542: =head2 GetServerHost
                   1543: 
                   1544: Returns the host whose lond we talk with.
                   1545: 
1.1       foxr     1546: =cut
1.3       albertel 1547: 
1.7       foxr     1548: sub GetServerHost {
1.1       foxr     1549:     return $RemoteHost;		# Setup by the fork.
                   1550: }
1.3       albertel 1551: 
                   1552: =pod
                   1553: 
1.107   ! raeburn  1554: =head2 GetHostId
1.81      albertel 1555: 
                   1556: Returns the hostid whose lond we talk with.
                   1557: 
                   1558: =cut
                   1559: 
                   1560: sub GetHostId {
                   1561:     return $RemoteHostId;		# Setup by the fork.
                   1562: }
                   1563: 
                   1564: =pod
                   1565: 
1.107   ! raeburn  1566: =head2 GetDefHostId
        !          1567: 
        !          1568: Returns the default hostid for the node whose lond we talk with.
        !          1569: 
        !          1570: =cut
        !          1571: 
        !          1572: sub GetDefHostId {                      # Setup by the fork.
        !          1573:     return $RemoteDefHostId;
        !          1574: }
        !          1575: 
        !          1576: =pod
        !          1577: 
1.3       albertel 1578: =head2 GetServerPort
                   1579: 
                   1580: Returns the lond port number.
                   1581: 
1.1       foxr     1582: =cut
1.3       albertel 1583: 
1.7       foxr     1584: sub GetServerPort {
1.1       foxr     1585:     return $perlvar{londPort};
                   1586: }
1.3       albertel 1587: 
                   1588: =pod
                   1589: 
                   1590: =head2 SetupLoncListener
                   1591: 
                   1592: Setup a lonc listener event.  The event is called when the socket
                   1593: becomes readable.. that corresponds to the receipt of a new
                   1594: connection.  The event handler established will accept the connection
1.99      raeburn  1595: (creating a communications channel), that in turn will establish
1.3       albertel 1596: another event handler to subess requests.
1.1       foxr     1597: 
1.58      foxr     1598: =head2  Parameters:
                   1599: 
                   1600:    host (optional)   Name of the host to set up a unix socket to.
                   1601: 
1.1       foxr     1602: =cut
1.3       albertel 1603: 
1.1       foxr     1604: sub SetupLoncListener {
1.78      albertel 1605:     my ($host,$SocketName) = @_;
                   1606:     if (!$host) { $host = &GetServerHost(); }
                   1607:     if (!$SocketName) { $SocketName = &GetLoncSocketPath($host); }
1.1       foxr     1608: 
1.78      albertel 1609: 
                   1610:     unlink($SocketName);
1.58      foxr     1611: 
1.1       foxr     1612:     my $socket;
1.7       foxr     1613:     unless ($socket =IO::Socket::UNIX->new(Local  => $SocketName,
1.55      albertel 1614: 					    Listen => 250, 
1.1       foxr     1615: 					    Type   => SOCK_STREAM)) {
1.64      foxr     1616: 	if($I_am_child) {
                   1617: 	    &child_exit(-1, "Failed to create a lonc listener socket");
                   1618: 	} else {
                   1619: 	    die "Failed to create a lonc listner socket";
                   1620: 	}
1.1       foxr     1621:     }
1.59      foxr     1622:     return $socket;
1.1       foxr     1623: }
                   1624: 
1.39      foxr     1625: #
                   1626: #   Toggle transaction logging.
                   1627: #  Implicit inputs:  
                   1628: #     LogTransactions
                   1629: #  Implicit Outputs:
                   1630: #     LogTransactions
                   1631: sub ToggleTransactionLogging {
                   1632:     print STDERR "Toggle transaction logging...\n";
                   1633:     if(!$LogTransactions) {
                   1634: 	$LogTransactions = 1;
                   1635:     } else {
                   1636: 	$LogTransactions = 0;
                   1637:     }
                   1638: 
                   1639: 
                   1640:     Log("SUCCESS", "Toggled transaction logging: $LogTransactions \n");
                   1641: }
                   1642: 
1.14      foxr     1643: =pod 
                   1644: 
                   1645: =head2 ChildStatus
                   1646:  
                   1647: Child USR1 signal handler to report the most recent status
                   1648: into the status file.
                   1649: 
1.22      foxr     1650: We also use this to reset the retries count in order to allow the
                   1651: client to retry connections with a previously dead server.
1.69      matthew  1652: 
1.14      foxr     1653: =cut
1.46      albertel 1654: 
1.14      foxr     1655: sub ChildStatus {
                   1656:     my $event = shift;
                   1657:     my $watcher = $event->w;
                   1658: 
                   1659:     Debug(2, "Reporting child status because : ".$watcher->data);
                   1660:     my $docdir = $perlvar{'lonDocRoot'};
1.67      albertel 1661:     
                   1662:     open(LOG,">>$docdir/lon-status/loncstatus.txt");
                   1663:     flock(LOG,LOCK_EX);
                   1664:     print LOG $$."\t".$RemoteHost."\t".$Status."\t".
1.14      foxr     1665: 	$RecentLogEntry."\n";
1.38      foxr     1666:     #
                   1667:     #  Write out information about each of the connections:
                   1668:     #
1.46      albertel 1669:     if ($DebugLevel > 2) {
1.67      albertel 1670: 	print LOG "Active connection statuses: \n";
1.46      albertel 1671: 	my $i = 1;
                   1672: 	print STDERR  "================================= Socket Status Dump:\n";
                   1673: 	foreach my $item (keys %ActiveConnections) {
                   1674: 	    my $Socket = $ActiveConnections{$item}->data;
                   1675: 	    my $state  = $Socket->GetState();
1.67      albertel 1676: 	    print LOG "Connection $i State: $state\n";
1.46      albertel 1677: 	    print STDERR "---------------------- Connection $i \n";
1.48      foxr     1678: 	    $Socket->Dump(-1);	# Ensure it gets dumped..
1.46      albertel 1679: 	    $i++;	
                   1680: 	}
1.38      foxr     1681:     }
1.67      albertel 1682:     flock(LOG,LOCK_UN);
                   1683:     close(LOG);
1.22      foxr     1684:     $ConnectionRetriesLeft = $ConnectionRetries;
1.70      albertel 1685:     UpdateStatus();
1.14      foxr     1686: }
                   1687: 
1.1       foxr     1688: =pod
1.3       albertel 1689: 
1.10      foxr     1690: =head2 SignalledToDeath
                   1691: 
                   1692: Called in response to a signal that causes a chid process to die.
                   1693: 
                   1694: =cut
                   1695: 
                   1696: 
                   1697: sub SignalledToDeath {
1.14      foxr     1698:     my $event  = shift;
                   1699:     my $watcher= $event->w;
                   1700: 
                   1701:     Debug(2,"Signalled to death! via ".$watcher->data);
1.17      foxr     1702:     my ($signal) = $watcher->data;
1.10      foxr     1703:     chomp($signal);
                   1704:     Log("CRITICAL", "Abnormal exit.  Child $$ for $RemoteHost "
                   1705: 	."died through "."\"$signal\"");
1.68      albertel 1706:     #LogPerm("F:lonc: $$ on $RemoteHost signalled to death: "
                   1707: #	    ."\"$signal\"");
1.105     raeburn  1708:     &clear_childpid($$);
1.12      foxr     1709:     exit 0;
1.10      foxr     1710: 
                   1711: }
1.16      foxr     1712: 
1.69      matthew  1713: =pod
                   1714: 
1.16      foxr     1715: =head2 ToggleDebug
                   1716: 
                   1717: This sub toggles trace debugging on and off.
                   1718: 
                   1719: =cut
                   1720: 
                   1721: sub ToggleDebug {
                   1722:     my $Current    = $DebugLevel;
                   1723:        $DebugLevel = $NextDebugLevel;
                   1724:        $NextDebugLevel = $Current;
                   1725: 
                   1726:     Log("SUCCESS", "New debugging level for $RemoteHost now $DebugLevel");
                   1727: 
                   1728: }
                   1729: 
1.69      matthew  1730: =pod
                   1731: 
1.1       foxr     1732: =head2 ChildProcess
                   1733: 
                   1734: This sub implements a child process for a single lonc daemon.
1.61      foxr     1735: Optional parameter:
1.97      raeburn  1736:    $socket  - if provided, this is a socket already open for listening
                   1737:               on the client socket. Otherwise, a new listener is set up.
1.1       foxr     1738: 
                   1739: =cut
                   1740: 
                   1741: sub ChildProcess {
1.80      albertel 1742:     #  We've inherited all the
1.62      foxr     1743:     #  events of our parent and those have to be cancelled or else
                   1744:     #  all holy bloody chaos will result.. trust me, I already made
                   1745:     #  >that< mistake.
                   1746: 
                   1747:     my $host = GetServerHost();
                   1748:     foreach my $listener (keys %parent_dispatchers) {
                   1749: 	my $watcher = $parent_dispatchers{$listener};
                   1750: 	my $s       = $watcher->fd;
                   1751: 	if ($listener ne $host) { # Close everyone but me.
                   1752: 	    Debug(5, "Closing listen socket for $listener");
                   1753: 	    $s->close();
                   1754: 	}
                   1755: 	Debug(5, "Killing watcher for $listener");
                   1756: 
                   1757: 	$watcher->cancel();
1.65      foxr     1758: 	delete($parent_dispatchers{$listener});
1.62      foxr     1759: 
                   1760:     }
1.65      foxr     1761: 
                   1762:     #  kill off the parent's signal handlers too!  
                   1763:     #
                   1764: 
                   1765:     for my $handler (keys %parent_handlers) {
                   1766: 	my $watcher = $parent_handlers{$handler};
                   1767: 	$watcher->cancel();
                   1768: 	delete($parent_handlers{$handler});
                   1769:     }
                   1770: 
1.64      foxr     1771:     $I_am_child    = 1;		# Seems like in spite of it all I may still getting
                   1772:                                 # parent event dispatches.. flag I'm a child.
1.1       foxr     1773: 
                   1774: 
1.14      foxr     1775:     #
                   1776:     #  Signals must be handled by the Event framework...
1.61      foxr     1777:     #
1.14      foxr     1778: 
                   1779:     Event->signal(signal   => "QUIT",
                   1780: 		  cb       => \&SignalledToDeath,
                   1781: 		  data     => "QUIT");
                   1782:     Event->signal(signal   => "HUP",
                   1783: 		  cb       => \&ChildStatus,
                   1784: 		  data     => "HUP");
                   1785:     Event->signal(signal   => "USR1",
                   1786: 		  cb       => \&ChildStatus,
                   1787: 		  data     => "USR1");
1.39      foxr     1788:     Event->signal(signal   => "USR2",
                   1789: 		  cb       => \&ToggleTransactionLogging);
1.16      foxr     1790:     Event->signal(signal   => "INT",
                   1791: 		  cb       => \&ToggleDebug,
                   1792: 		  data     => "INT");
1.1       foxr     1793: 
1.93      foxr     1794:     # Block the pipe signal we'll get when the socket disconnects.  We detect 
                   1795:     # socket disconnection via send/receive failures. On disconnect, the
                   1796:     # socket becomes readable .. which will force the disconnect detection.
                   1797: 
                   1798:     my $set = POSIX::SigSet->new(SIGPIPE);
                   1799:     sigprocmask(SIG_BLOCK, $set);
                   1800: 
1.62      foxr     1801:     #  Figure out if we got passed a socket or need to open one to listen for
                   1802:     #  client requests.
                   1803: 
1.61      foxr     1804:     my ($socket) = @_;
                   1805:     if (!$socket) {
                   1806: 
                   1807: 	$socket =  SetupLoncListener();
                   1808:     }
1.62      foxr     1809:     #  Establish an event to listen for client connection requests.
                   1810: 
                   1811: 
1.59      foxr     1812:     Event->io(cb   => \&NewClient,
                   1813: 	      poll => 'r',
                   1814: 	      desc => 'Lonc Listener Unix Socket',
                   1815: 	      fd   => $socket);
1.1       foxr     1816:     
1.76      albertel 1817:     $Event::DebugLevel = $DebugLevel;
1.1       foxr     1818:     
                   1819:     Debug(9, "Making initial lond connection for ".$RemoteHost);
                   1820: 
                   1821: # Setup the initial server connection:
                   1822:     
1.62      foxr     1823:      # &MakeLondConnection(); // let first work request do it.
1.10      foxr     1824: 
1.80      albertel 1825:     #  need to accept the connection since the event may  not fire.
1.62      foxr     1826: 
1.80      albertel 1827:     &accept_client($socket);
1.5       foxr     1828: 
1.1       foxr     1829:     Debug(9,"Entering event loop");
                   1830:     my $ret = Event::loop();		#  Start the main event loop.
                   1831:     
                   1832:     
1.64      foxr     1833:     &child_exit (-1,"Main event loop exited!!!");
1.1       foxr     1834: }
                   1835: 
                   1836: #  Create a new child for host passed in:
                   1837: 
                   1838: sub CreateChild {
1.107   ! raeburn  1839:     my ($host, $hostid, $defhostid) = @_;
1.52      foxr     1840: 
1.12      foxr     1841:     my $sigset = POSIX::SigSet->new(SIGINT);
                   1842:     sigprocmask(SIG_BLOCK, $sigset);
1.1       foxr     1843:     $RemoteHost = $host;
1.91      foxr     1844:     ShowStatus('Parent keeping the flock'); # Update time in status message.
1.9       foxr     1845:     Log("CRITICAL", "Forking server for ".$host);
1.23      foxr     1846:     my $pid          = fork;
1.1       foxr     1847:     if($pid) {			# Parent
1.17      foxr     1848: 	$RemoteHost = "Parent";
1.83      albertel 1849: 	$ChildPid{$pid} = $host;
1.12      foxr     1850: 	sigprocmask(SIG_UNBLOCK, $sigset);
1.82      albertel 1851: 	undef(@all_host_ids);
1.1       foxr     1852:     } else {			# child.
1.81      albertel 1853: 	$RemoteHostId = $hostid;
1.107   ! raeburn  1854: 	$RemoteDefHostId = $defhostid;
1.5       foxr     1855: 	ShowStatus("Connected to ".$RemoteHost);
1.23      foxr     1856: 	$SIG{INT} = 'DEFAULT';
1.12      foxr     1857: 	sigprocmask(SIG_UNBLOCK, $sigset);
1.81      albertel 1858: 	&ChildProcess();		# Does not return.
1.1       foxr     1859:     }
1.61      foxr     1860: }
1.1       foxr     1861: 
1.61      foxr     1862: # parent_client_connection:
                   1863: #    Event handler that processes client connections for the parent process.
                   1864: #    This sub is called when the parent is listening on a socket and
                   1865: #    a connection request arrives.  We must:
                   1866: #     Start a child process to accept the connection request.
                   1867: #     Kill our listen on the socket.
                   1868: # Parameter:
                   1869: #    event       - The event object that was created to monitor this socket.
                   1870: #                  event->w->fd is the socket.
                   1871: # Returns:
                   1872: #    NONE
                   1873: #
                   1874: sub parent_client_connection {
1.62      foxr     1875:     if ($I_am_child) {
                   1876: 	#  Should not get here, but seem to anyway:
                   1877: 	&Debug(5," Child caught parent client connection event!!");
                   1878: 	my ($event) = @_;
                   1879: 	my $watcher = $event->w;
                   1880: 	$watcher->cancel();	# Try to kill it off again!!
                   1881:     } else {
                   1882: 	&Debug(9, "parent_client_connection");
                   1883: 	my ($event)   = @_;
                   1884: 	my $watcher   = $event->w;
                   1885: 	my $socket    = $watcher->fd;
1.81      albertel 1886: 	my $connection = $socket->accept();	# Accept the client connection.
                   1887: 	Event->io(cb      => \&get_remote_hostname,
                   1888: 		  poll    => 'r',
                   1889: 		  data    => "",
                   1890: 		  fd      => $connection);
1.77      albertel 1891:     }
                   1892: }
                   1893: 
                   1894: sub get_remote_hostname {
1.82      albertel 1895:     my ($event)   = @_;
                   1896:     my $watcher   = $event->w;
                   1897:     my $socket    = $watcher->fd;
                   1898: 
                   1899:     my $thisread;
                   1900:     my $rv = $socket->recv($thisread, POSIX::BUFSIZ, 0);
                   1901:     Debug(8, "rcv:  data length = ".length($thisread)." read =".$thisread);
                   1902:     if (!defined($rv) || length($thisread) == 0) {
                   1903: 	# Likely eof on socket.
                   1904: 	Debug(5,"Client Socket closed on lonc for p_c_c");
                   1905: 	close($socket);
                   1906: 	$watcher->cancel();
                   1907: 	return;
                   1908:     }
                   1909: 
                   1910:     my $data    = $watcher->data().$thisread;
                   1911:     $watcher->data($data);
                   1912:     if($data =~ /\n$/) {	# Request entirely read.
                   1913: 	chomp($data);
                   1914:     } else {
                   1915: 	return;
                   1916:     }
1.77      albertel 1917: 
1.82      albertel 1918:     &Debug(5,"Creating child for $data (parent_client_connection)");
                   1919:     (my $hostname,my $lonid,@all_host_ids) = split(':',$data);
1.83      albertel 1920:     $ChildHost{$hostname}++;
                   1921:     if ($ChildHost{$hostname} == 1) {
1.107   ! raeburn  1922: 	&CreateChild($hostname,$lonid,$all_host_ids[-1]);
1.83      albertel 1923:     } else {
                   1924: 	&Log('WARNING',"Request for a second child on $hostname");
                   1925:     }
1.82      albertel 1926:     # Clean up the listen since now the child takes over until it exits.
                   1927:     $watcher->cancel();		# Nolonger listening to this event
                   1928:     $socket->send("done\n");
                   1929:     $socket->close();
1.61      foxr     1930: }
                   1931: 
                   1932: # parent_listen:
                   1933: #    Opens a socket and starts a listen for the parent process on a client UNIX
                   1934: #    domain socket.
                   1935: #
                   1936: #    This involves:
                   1937: #       Creating a socket for listen.
                   1938: #       Removing any socket lock file
                   1939: #       Adding an event handler for this socket becoming readable
                   1940: #         To the parent's event dispatcher.
                   1941: # Parameters:
                   1942: #    loncapa_host    - LonCAPA cluster name of the host represented by the client
                   1943: #                      socket.
                   1944: # Returns:
                   1945: #    NONE
                   1946: #
                   1947: sub parent_listen {
                   1948:     my ($loncapa_host) = @_;
                   1949:     Debug(5, "parent_listen: $loncapa_host");
                   1950: 
1.78      albertel 1951:     my ($socket,$file);
                   1952:     if (!$loncapa_host) {
                   1953: 	$loncapa_host = 'common_parent';
                   1954: 	$file         = $perlvar{'lonSockCreate'};
                   1955:     } else {
                   1956: 	$file         = &GetLoncSocketPath($loncapa_host);
                   1957:     }
                   1958:     $socket = &SetupLoncListener($loncapa_host,$file);
                   1959: 
1.62      foxr     1960:     $listening_to{$socket} = $loncapa_host;
1.61      foxr     1961:     if (!$socket) {
                   1962: 	die "Unable to create a listen socket for $loncapa_host";
                   1963:     }
                   1964:     
1.78      albertel 1965:     my $lock_file = $file.".lock";
1.61      foxr     1966:     unlink($lock_file);		# No problem if it doesn't exist yet [startup e.g.]
                   1967: 
1.77      albertel 1968:     my $watcher = 
                   1969: 	Event->io(cb    => \&parent_client_connection,
                   1970: 		  poll  => 'r',
                   1971: 		  desc  => "Parent listener unix socket ($loncapa_host)",
                   1972: 		  data => "",
                   1973: 		  fd    => $socket);
1.62      foxr     1974:     $parent_dispatchers{$loncapa_host} = $watcher;
1.61      foxr     1975: 
                   1976: }
                   1977: 
1.77      albertel 1978: sub parent_clean_up {
                   1979:     my ($loncapa_host) = @_;
1.87      albertel 1980:     Debug(1, "parent_clean_up: $loncapa_host");
1.77      albertel 1981: 
                   1982:     my $socket_file = &GetLoncSocketPath($loncapa_host);
                   1983:     unlink($socket_file);	# No problem if it doesn't exist yet [startup e.g.]
                   1984:     my $lock_file   = $socket_file.".lock";
                   1985:     unlink($lock_file);		# No problem if it doesn't exist yet [startup e.g.]
                   1986: }
                   1987: 
1.61      foxr     1988: 
1.83      albertel 1989: 
                   1990: #    This sub initiates a listen on the common unix domain lonc client socket.
                   1991: #    loncnew starts up with no children, and only spawns off children when a
                   1992: #    connection request occurs on the common client unix socket.  The spawned
                   1993: #    child continues to run until it has been idle a while at which point it
                   1994: #    eventually exits and once more the parent picks up the listen.
1.61      foxr     1995: #
                   1996: #  Parameters:
                   1997: #      NONE
                   1998: #  Implicit Inputs:
                   1999: #    The configuration file that has been read in by LondConnection.
                   2000: #  Returns:
                   2001: #     NONE
                   2002: #
1.77      albertel 2003: sub listen_on_common_socket {
                   2004:     Debug(5, "listen_on_common_socket");
1.78      albertel 2005:     &parent_listen();
1.77      albertel 2006: }
                   2007: 
1.63      foxr     2008: #   server_died is called whenever a child process exits.
                   2009: #   Since this is dispatched via a signal, we must process all
                   2010: #   dead children until there are no more left.  The action
                   2011: #   is to:
                   2012: #      - Remove the child from the bookeeping hashes
                   2013: #      - Re-establish a listen on the unix domain socket associated
                   2014: #        with that host.
                   2015: # Parameters:
                   2016: #    The event, but we don't actually care about it.
                   2017: sub server_died {
                   2018:     &Debug(9, "server_died called...");
                   2019:     
                   2020:     while(1) {			# Loop until waitpid nowait fails.
                   2021: 	my $pid = waitpid(-1, WNOHANG);
                   2022: 	if($pid <= 0) {
                   2023: 	    return;		# Nothing left to wait for.
                   2024: 	}
                   2025: 	# need the host to restart:
                   2026: 
1.83      albertel 2027: 	my $host = $ChildPid{$pid};
1.63      foxr     2028: 	if($host) {		# It's for real...
                   2029: 	    &Debug(9, "Caught sigchild for $host");
1.105     raeburn  2030:             &clear_childpid($pid);
1.83      albertel 2031: 	    delete($ChildPid{$pid});
                   2032: 	    delete($ChildHost{$host});
1.81      albertel 2033: 	    &parent_clean_up($host);
                   2034: 
1.63      foxr     2035: 	} else {
                   2036: 	    &Debug(5, "Caught sigchild for pid not in hosts hash: $pid");
                   2037: 	}
                   2038:     }
                   2039: 
                   2040: }
                   2041: 
1.1       foxr     2042: #
                   2043: #  Parent process logic pass 1:
                   2044: #   For each entry in the hosts table, we will
                   2045: #  fork off an instance of ChildProcess to service the transactions
                   2046: #  to that host.  Each pid will be entered in a global hash
                   2047: #  with the value of the key, the host.
                   2048: #  The parent will then enter a loop to wait for process exits.
                   2049: #  Each exit gets logged and the child gets restarted.
                   2050: #
                   2051: 
1.5       foxr     2052: #
                   2053: #   Fork and start in new session so hang-up isn't going to 
                   2054: #   happen without intent.
                   2055: #
                   2056: 
                   2057: 
1.6       foxr     2058: 
                   2059: 
1.8       foxr     2060: 
1.6       foxr     2061: 
                   2062: ShowStatus("Forming new session");
                   2063: my $childpid = fork;
                   2064: if ($childpid != 0) {
                   2065:     sleep 4;			# Give child a chacne to break to
                   2066:     exit 0;			# a new sesion.
                   2067: }
1.8       foxr     2068: #
                   2069: #   Write my pid into the pid file so I can be located
                   2070: #
                   2071: 
                   2072: ShowStatus("Parent writing pid file:");
1.23      foxr     2073: my $execdir = $perlvar{'lonDaemons'};
1.8       foxr     2074: open (PIDSAVE, ">$execdir/logs/lonc.pid");
                   2075: print PIDSAVE "$$\n";
                   2076: close(PIDSAVE);
1.6       foxr     2077: 
1.17      foxr     2078: 
                   2079: 
1.6       foxr     2080: if (POSIX::setsid() < 0) {
                   2081:     print "Could not create new session\n";
                   2082:     exit -1;
                   2083: }
1.5       foxr     2084: 
                   2085: ShowStatus("Forking node servers");
                   2086: 
1.9       foxr     2087: Log("CRITICAL", "--------------- Starting children ---------------");
                   2088: 
1.31      foxr     2089: LondConnection::ReadConfig;               # Read standard config files.
1.1       foxr     2090: 
1.80      albertel 2091: $RemoteHost = "[parent]";
1.81      albertel 2092: &listen_on_common_socket();
1.60      foxr     2093: 
1.12      foxr     2094: $RemoteHost = "Parent Server";
1.1       foxr     2095: 
                   2096: # Maintain the population:
1.5       foxr     2097: 
                   2098: ShowStatus("Parent keeping the flock");
1.1       foxr     2099: 
1.12      foxr     2100: 
1.80      albertel 2101: # We need to setup a SIGChild event to handle the exit (natural or otherwise)
                   2102: # of the children.
1.61      foxr     2103: 
1.80      albertel 2104: Event->signal(cb       => \&server_died,
                   2105: 	      desc     => "Child exit handler",
                   2106: 	      signal   => "CHLD");
                   2107: 
                   2108: 
                   2109: # Set up all the other signals we set up.
                   2110: 
                   2111: $parent_handlers{INT} = Event->signal(cb       => \&Terminate,
                   2112: 				      desc     => "Parent INT handler",
                   2113: 				      signal   => "INT");
                   2114: $parent_handlers{TERM} = Event->signal(cb       => \&Terminate,
                   2115: 				       desc     => "Parent TERM handler",
                   2116: 				       signal   => "TERM");
1.81      albertel 2117: $parent_handlers{HUP}  = Event->signal(cb       => \&KillThemAll,
                   2118: 				       desc     => "Parent HUP handler.",
                   2119: 				       signal   => "HUP");
1.80      albertel 2120: $parent_handlers{USR1} = Event->signal(cb       => \&CheckKids,
                   2121: 				       desc     => "Parent USR1 handler",
                   2122: 				       signal   => "USR1");
                   2123: $parent_handlers{USR2} = Event->signal(cb       => \&UpdateKids,
                   2124: 				       desc     => "Parent USR2 handler.",
                   2125: 				       signal   => "USR2");
                   2126: 
                   2127: #  Start procdesing events.
                   2128: 
                   2129: $Event::DebugLevel = $DebugLevel;
                   2130: Debug(9, "Parent entering event loop");
                   2131: my $ret = Event::loop();
                   2132: die "Main Event loop exited: $ret";
1.14      foxr     2133: 
                   2134: =pod
                   2135: 
                   2136: =head1 CheckKids
                   2137: 
                   2138:   Since kids do not die as easily in this implementation
1.97      raeburn  2139: as the previous one, there is no need to restart the
1.14      foxr     2140: dead ones (all dead kids get restarted when they die!!)
                   2141: The only thing this function does is to pass USR1 to the
                   2142: kids so that they report their status.
                   2143: 
                   2144: =cut
                   2145: 
                   2146: sub CheckKids {
                   2147:     Debug(2, "Checking status of children");
                   2148:     my $docdir = $perlvar{'lonDocRoot'};
                   2149:     my $fh = IO::File->new(">$docdir/lon-status/loncstatus.txt");
                   2150:     my $now=time;
                   2151:     my $local=localtime($now);
                   2152:     print $fh "LONC status $local - parent $$ \n\n";
1.65      foxr     2153:     foreach my $host (keys %parent_dispatchers) {
                   2154: 	print $fh "LONC Parent process listening for $host\n";
                   2155:     }
1.83      albertel 2156:     foreach my $pid (keys %ChildPid) {
1.14      foxr     2157: 	Debug(2, "Sending USR1 -> $pid");
                   2158: 	kill 'USR1' => $pid;	# Tell Child to report status.
                   2159:     }
1.65      foxr     2160: 
1.14      foxr     2161: }
1.24      foxr     2162: 
                   2163: =pod
                   2164: 
                   2165: =head1  UpdateKids
                   2166: 
1.25      foxr     2167: parent's SIGUSR2 handler.  This handler:
1.24      foxr     2168: 
                   2169: =item
                   2170: 
                   2171: Rereads the hosts file.
                   2172: 
                   2173: =item
                   2174:  
                   2175: Kills off (via sigint) children for hosts that have disappeared.
                   2176: 
                   2177: =item
                   2178: 
1.27      foxr     2179: QUITs  children for hosts that already exist (this just forces a status display
1.24      foxr     2180: and resets the connection retry count for that host.
                   2181: 
                   2182: =item
                   2183: 
                   2184: Starts new children for hosts that have been added to the hosts.tab file since
                   2185: the start of the master program and maintains them.
                   2186: 
                   2187: =cut
                   2188: 
                   2189: sub UpdateKids {
1.27      foxr     2190: 
1.25      foxr     2191:     Log("INFO", "Updating connections via SIGUSR2");
1.27      foxr     2192: 
1.65      foxr     2193:     #  I'm not sure what I was thinking in the first implementation.
                   2194:     # someone will have to work hard to convince me the effect is any
                   2195:     # different than Restart, especially now that we don't start up 
                   2196:     # per host servers automatically, may as well just restart.
                   2197:     # The down side is transactions that are in flight will get timed out
                   2198:     # (lost unless they are critical).
1.27      foxr     2199: 
1.81      albertel 2200:     &KillThemAll();
1.101     raeburn  2201:     LondConnection->ResetReadConfig();
1.105     raeburn  2202:     ShowStatus('Parent keeping the flock');
1.24      foxr     2203: }
                   2204: 
1.14      foxr     2205: 
1.13      foxr     2206: =pod
                   2207: 
                   2208: =head1 Restart
                   2209: 
                   2210: Signal handler for HUP... all children are killed and
1.97      raeburn  2211: we self restart.  This is an el-cheapo way to re-read
1.13      foxr     2212: the config file.
                   2213: 
                   2214: =cut
                   2215: 
                   2216: sub Restart {
1.23      foxr     2217:     &KillThemAll;		# First kill all the children.
1.101     raeburn  2218:     LondConnection->ResetReadConfig();
1.13      foxr     2219:     Log("CRITICAL", "Restarting");
                   2220:     my $execdir = $perlvar{'lonDaemons'};
                   2221:     unlink("$execdir/logs/lonc.pid");
1.65      foxr     2222:     exec("$executable");
1.10      foxr     2223: }
1.12      foxr     2224: 
                   2225: =pod
                   2226: 
                   2227: =head1 KillThemAll
                   2228: 
                   2229: Signal handler that kills all children by sending them a 
1.17      foxr     2230: SIGHUP.  Responds to sigint and sigterm.
1.12      foxr     2231: 
                   2232: =cut
                   2233: 
1.10      foxr     2234: sub KillThemAll {
1.12      foxr     2235:     Debug(2, "Kill them all!!");
1.85      albertel 2236:     
                   2237:     #local($SIG{CHLD}) = 'IGNORE';
                   2238:     # Our children >will< die.
                   2239:     # but we need to catch their death and cleanup after them in case this is 
                   2240:     # a restart set of kills
                   2241:     my @allpids = keys(%ChildPid);
                   2242:     foreach my $pid (@allpids) {
1.83      albertel 2243: 	my $serving = $ChildPid{$pid};
1.52      foxr     2244: 	ShowStatus("Nicely Killing lonc for $serving pid = $pid");
                   2245: 	Log("CRITICAL", "Nicely Killing lonc for $serving pid = $pid");
1.17      foxr     2246: 	kill 'QUIT' => $pid;
1.105     raeburn  2247:         &clear_childpid($pid);
1.12      foxr     2248:     }
1.85      albertel 2249:     ShowStatus("Finished killing child processes off.");
1.1       foxr     2250: }
1.12      foxr     2251: 
1.52      foxr     2252: 
                   2253: #
                   2254: #  Kill all children via KILL.  Just in case the
                   2255: #  first shot didn't get them.
                   2256: 
                   2257: sub really_kill_them_all_dammit
                   2258: {
                   2259:     Debug(2, "Kill them all Dammit");
                   2260:     local($SIG{CHLD} = 'IGNORE'); # In case some purist reenabled them.
1.83      albertel 2261:     foreach my $pid (keys %ChildPid) {
                   2262: 	my $serving = $ChildPid{$pid};
1.52      foxr     2263: 	&ShowStatus("Nastily killing lonc for $serving pid = $pid");
                   2264: 	Log("CRITICAL", "Nastily killing lonc for $serving pid = $pid");
                   2265: 	kill 'KILL' => $pid;
1.83      albertel 2266: 	delete($ChildPid{$pid});
1.105     raeburn  2267:         delete($ChildKeyMode{$pid});
1.52      foxr     2268: 	my $execdir = $perlvar{'lonDaemons'};
                   2269: 	unlink("$execdir/logs/lonc.pid");
                   2270:     }
                   2271: }
1.69      matthew  2272: 
1.14      foxr     2273: =pod
                   2274: 
                   2275: =head1 Terminate
                   2276:  
                   2277: Terminate the system.
                   2278: 
                   2279: =cut
                   2280: 
                   2281: sub Terminate {
1.52      foxr     2282:     &Log("CRITICAL", "Asked to kill children.. first be nice...");
                   2283:     &KillThemAll;
                   2284:     #
                   2285:     #  By now they really should all be dead.. but just in case 
                   2286:     #  send them all SIGKILL's after a bit of waiting:
                   2287: 
                   2288:     sleep(4);
                   2289:     &Log("CRITICAL", "Now kill children nasty");
                   2290:     &really_kill_them_all_dammit;
1.17      foxr     2291:     Log("CRITICAL","Master process exiting");
                   2292:     exit 0;
1.14      foxr     2293: 
                   2294: }
1.81      albertel 2295: 
1.105     raeburn  2296: =pod
                   2297: 
                   2298: =cut
                   2299: 
1.81      albertel 2300: sub my_hostname {
1.104     raeburn  2301:     use Sys::Hostname::FQDN();
                   2302:     my $name = Sys::Hostname::FQDN::fqdn();
1.81      albertel 2303:     &Debug(9,"Name is $name");
                   2304:     return $name;
                   2305: }
                   2306: 
1.105     raeburn  2307: sub record_childpid {
                   2308:     my ($Socket) = @_;
                   2309:     my $docdir = $perlvar{'lonDocRoot'};
                   2310:     my $authmode = $Socket->GetKeyMode();
                   2311:     my $peer = $Socket->PeerLoncapaHim();
                   2312:     if (($authmode eq 'ssl') || ($authmode eq 'insecure')) {
                   2313:         my $childpid = $$;
                   2314:         if ($childpid) {
                   2315:             unless (exists($ChildKeyMode{$childpid})) {
                   2316:                 $ChildKeyMode{$childpid} = $authmode;
                   2317:             }
                   2318:             if (-d "$docdir/lon-status/loncchld") {
                   2319:                 unless (-e "$docdir/lon-status/loncchld/$childpid") {
                   2320:                     if (open (my $pidfh,'>',"$docdir/lon-status/loncchld/$childpid")) {
                   2321:                         print $pidfh "$peer:$authmode\n";
                   2322:                         close($pidfh);
                   2323:                     }
                   2324:                 }
                   2325:             }
                   2326:         }
                   2327:     }
                   2328:     return;
                   2329: }
                   2330: 
                   2331: sub clear_childpid {
                   2332:     my ($childpid) = @_; 
                   2333:     my $docdir = $perlvar{'lonDocRoot'};
                   2334:     if (-d "$docdir/lon-status/loncchld") {
                   2335:         if ($childpid =~ /^\d+$/) {
                   2336:             if (($ChildKeyMode{$childpid} eq 'insecure') ||
                   2337:                 ($ChildKeyMode{$childpid} eq 'ssl')) {
                   2338:                 if (-e "$docdir/lon-status/loncchld/$childpid") {
                   2339:                     unlink("$docdir/lon-status/loncchld/$childpid");
                   2340:                 }
                   2341:             }
                   2342:         }
                   2343:     }
                   2344:     if (exists($ChildKeyMode{$childpid})) {
                   2345:         delete($ChildKeyMode{$childpid});
                   2346:     }
                   2347:     return;
                   2348: }
                   2349: 
1.12      foxr     2350: =pod
1.1       foxr     2351: 
                   2352: =head1 Theory
1.3       albertel 2353: 
                   2354: The event class is used to build this as a single process with an
                   2355: event driven model.  The following events are handled:
1.1       foxr     2356: 
                   2357: =item UNIX Socket connection Received
                   2358: 
                   2359: =item Request data arrives on UNIX data transfer socket.
                   2360: 
                   2361: =item lond connection becomes writable.
                   2362: 
                   2363: =item timer fires at 1 second intervals.
                   2364: 
                   2365: All sockets are run in non-blocking mode.  Timeouts managed by the timer
                   2366: handler prevents hung connections.
                   2367: 
                   2368: Key data structures:
                   2369: 
1.3       albertel 2370: =item RequestQueue
                   2371: 
                   2372: A queue of requests received from UNIX sockets that are
                   2373: waiting for a chance to be forwarded on a lond connection socket.
                   2374: 
                   2375: =item ActiveConnections
                   2376: 
                   2377: A hash of lond connections that have transactions in process that are
                   2378: available to be timed out.
                   2379: 
                   2380: =item ActiveTransactions
                   2381: 
                   2382: A hash indexed by lond connections that contain the client reply
                   2383: socket for each connection that has an active transaction on it.
                   2384: 
                   2385: =item IdleConnections
                   2386: 
                   2387: A hash of lond connections that have no work to do.  These connections
                   2388: can be closed if they are idle for a long enough time.
1.1       foxr     2389: 
                   2390: =cut
1.88      foxr     2391: 
                   2392: =pod
                   2393: 
                   2394: =head1 Log messages
                   2395: 
                   2396: The following is a list of log messages that can appear in the 
                   2397: lonc.log file.  Each log file has a severity and a message.
                   2398: 
                   2399: =over 2
                   2400: 
                   2401: =item Warning  A socket timeout was detected
                   2402: 
                   2403: If there are pending transactions in the socket's queue,
                   2404: they are failed (saved if critical).  If the connection
                   2405: retry count gets exceeded by this, the
                   2406: remote host is marked as dead.
1.97      raeburn  2407: Called when timeouts occurred during the connection and
1.88      foxr     2408: connection dialog with a remote host.
                   2409: 
                   2410: =item Critical Host makred DEAD <hostname>   
                   2411: 
                   2412: The numer of retry counts for contacting a host was
                   2413: exceeded. The host is marked dead an no 
                   2414: further attempts will be made by that child.
                   2415: 
                   2416: =item Info lonc pipe client hung up on us     
                   2417: 
                   2418: Write to the client pipe indicated no data transferred
                   2419: Socket to remote host is shut down.  Reply to the client 
                   2420: is discarded.  Note: This is commented out in &ClientWriteable
                   2421: 
                   2422: =item Success  Reply from lond: <data>   
                   2423: 
                   2424: Can be enabled for debugging by setting LogTransactions to nonzero.
                   2425: Indicates a successful transaction with lond, <data> is the data received
                   2426: from the remote lond.
                   2427: 
                   2428: =item Success A delayed transaction was completed  
                   2429: 
                   2430: A transaction that must be reliable was executed and completed
                   2431: as lonc restarted.  This is followed by a mesage of the form
                   2432: 
                   2433:   S: client-name : request
                   2434: 
                   2435: =item WARNING  Failing transaction <cmd>:<subcmd>  
                   2436: 
                   2437: Transaction failed on a socket, but the failure retry count for the remote
                   2438: node has not yet been exhausted (the node is not yet marked dead).
                   2439: cmd is the command, subcmd is the subcommand.  This results from a con_lost
                   2440: when communicating with lond.
                   2441: 
                   2442: =item WARNING Shutting down a socket     
                   2443: 
                   2444: Called when a socket is being closed to lond.  This is emitted both when 
                   2445: idle pruning is being done and when the socket has been disconnected by the remote.
                   2446: 
                   2447: =item WARNING Lond connection lost.
                   2448: 
                   2449: Called when a read from lond's socket failed indicating lond has closed the 
                   2450: connection or died.  This should be followed by one or more
                   2451: 
                   2452:  "WARNING Failing transaction..." msgs for each in-flight or queued transaction.
                   2453: 
1.105     raeburn  2454: =item WARNING No SSL channel (verification failed), will try with insecure channel.
                   2455: 
                   2456: Called when promotion of a socket to SSL failed because SSL certificate verification failed.
                   2457: Domain configuration must also permit insecure channel use for key exchange. Connection
                   2458: negotiation will start again from the beginning, but with Authentication Mode not set to ssl.
                   2459: 
1.88      foxr     2460: =item INFO Connected to lond version:  <version> 
                   2461: 
                   2462: When connection negotiation is complete, the lond version is requested and logged here.
                   2463: 
                   2464: =item SUCCESS Connection n to host now ready for action
                   2465: 
                   2466: Emitted when connection has been completed with lond. n is then number of 
                   2467: concurrent connections and host, the host to which the connection has just
                   2468: been established.
                   2469: 
                   2470: =item WARNING Connection to host has been disconnected
                   2471: 
                   2472: Write to a lond resulted in failure status.  Connection to lond is dropped.
                   2473: 
                   2474: =item SUCCESS Created connection n to host host 
                   2475: 
                   2476: Initial connection request to host..(before negotiation).
                   2477: 
                   2478: =item CRITICAL Request Close Connection ... exiting
                   2479: 
                   2480: Client has sent "close_connection_exit"   The loncnew server is exiting.
                   2481: 
                   2482: =item INFO Resetting Connection Retries 
                   2483: 
                   2484: Client has sent "reset_retries" The lond connection retries are reset to zero for the
                   2485: corresponding lond.
                   2486: 
                   2487: =item SUCCESS Transaction <data>
                   2488: 
                   2489: Only emitted if the global variable $LogTransactions was set to true.
                   2490: A client has requested a lond transaction <data> is the contents of the request.
                   2491: 
                   2492: =item SUCCESS Toggled transaction logging <LogTransactions>
                   2493:                                     
                   2494: The state of the $LogTransactions global has been toggled, and its current value
                   2495: (after being toggled) is displayed.  When non zero additional logging of transactions
                   2496: is enabled for debugging purposes.  Transaction logging is toggled on receipt of a USR2
                   2497: signal.
                   2498: 
                   2499: =item CRITICAL Abnormal exit. Child <pid> for <host> died thorugh signal.
                   2500: 
                   2501: QUIT signal received.  lonc child process is exiting.
                   2502: 
                   2503: =item SUCCESS New debugging level for <RemoteHost> now <DebugLevel>
                   2504:                                     
                   2505: Debugging toggled for the host loncnew is talking with.
                   2506: Currently debugging is a level based scheme with higher number 
                   2507: conveying more information.  The daemon starts out at
                   2508: DebugLevel 0 and can toggle back and forth between that and
                   2509: DebugLevel 2  These are controlled by
                   2510: the global variables $DebugLevel and $NextDebugLevel
                   2511: The debug level can go up to 9.
                   2512: SIGINT toggles the debug level.  The higher the debug level the 
                   2513: more debugging information is spewed.  See the Debug
                   2514: sub in loncnew.
                   2515: 
                   2516: =item CRITICAL Forking server for host  
                   2517: 
                   2518: A child is being created to service requests for the specified host.
                   2519: 
                   2520: 
                   2521: =item WARNING Request for a second child on hostname
                   2522:                                     
                   2523: Somehow loncnew was asked to start a second child on a host that already had a child
                   2524: servicing it.  This request is not honored, but themessage is emitted.  This could happen
                   2525: due to a race condition.  When a client attempts to contact loncnew for a new host, a child
                   2526: is forked off to handle the requests for that server.  The parent then backs off the Unix
                   2527: domain socket leaving it for the child to service all requests.  If in the time between
                   2528: creating the child, and backing off, a new connection request comes in to the unix domain
                   2529: socket, this could trigger (unlikely but remotely possible),.
                   2530: 
                   2531: =item CRITICAL ------ Starting Children ----
                   2532: 
                   2533: This message should probably be changed to "Entering event loop"  as the loncnew only starts
                   2534: children as needed.  This message is emitted as new events are established and
                   2535: the event processing loop is entered.
                   2536: 
                   2537: =item INFO Updating connections via SIGUSR2
                   2538:                                     
                   2539: SIGUSR2 received. The original code would kill all clients, re-read the host file,
1.97      raeburn  2540: then restart children for each host.  Now that children are started on demand, this
1.88      foxr     2541: just kills all child processes and lets requests start them as needed again.
                   2542: 
                   2543: 
                   2544: =item CRITICAL Restarting
                   2545: 
                   2546: SigHUP received.  all the children are killed and the script exec's itself to start again.
                   2547: 
                   2548: =item CRITICAL Nicely killing lonc for host pid = <pid>
                   2549: 
                   2550: Attempting to kill the child that is serving the specified host (pid given) cleanly via
1.97      raeburn  2551: SIGQUIT.  The child should handle that, clean up nicely and exit.
1.88      foxr     2552: 
                   2553: =item CRITICAL Nastily killing lonc for host pid = <pid>
                   2554: 
                   2555: The child specified did not die when requested via SIGQUIT.  Therefore it is killed
                   2556: via SIGKILL.
                   2557: 
                   2558: =item CRITICAL Asked to kill children.. first be nice..
                   2559: 
                   2560: In the parent's INT handler.  INT kills the child processes.  This inidicate loncnew
                   2561: is about to attempt to kill all known children via SIGQUIT.  This message should be followed 
                   2562: by one "Nicely killing" message for each extant child.
                   2563: 
                   2564: =item CRITICAL Now kill children nasty 
                   2565: 
                   2566: In the parent's INT handler. remaining children are about to be killed via
                   2567: SIGKILL. Should be followed by a Nastily killing... for each lonc child that 
                   2568: refused to die.
                   2569: 
                   2570: =item CRITICAL Master process exiting
                   2571: 
                   2572: In the parent's INT handler. just prior to the exit 0 call.
                   2573: 
                   2574: =back
                   2575: 
                   2576: =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>
500 Internal Server Error

Internal Server Error

The server encountered an internal error or misconfiguration and was unable to complete your request.

Please contact the server administrator at root@localhost to inform them of the time this error occurred, and the actions you performed just before this error.

More information about this error may be available in the server error log.