) {
my ($id,$domain,$role,$name,$ip)=split(/:/,$configline);
chomp($ip);
- $hostip{$id}=$ip;
+ if ($ip) {
+ $hostip{$id}=$ip;
+ $hostname{$id}=$name;
+ }
}
+
close(CONFIG);
# -------------------------------------------------------- Routines for forking
@@ -114,9 +194,19 @@ sub REAPER { # ta
unlink($port);
}
+sub hangup {
+ foreach (keys %children) {
+ $wasserver=$children{$_};
+ &status("Closing $wasserver");
+ &logthis('Closing '.$wasserver.': '.&subreply('exit',$wasserver));
+ &status("Kill PID $_ for $wasserver");
+ kill ('INT',$_);
+ }
+}
+
sub HUNTSMAN { # signal handler for SIGINT
local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children
- kill 'INT' => keys %children;
+ &hangup();
my $execdir=$perlvar{'lonDaemons'};
unlink("$execdir/logs/lonc.pid");
&logthis("CRITICAL: Shutting down");
@@ -125,22 +215,30 @@ sub HUNTSMAN { # si
sub HUPSMAN { # signal handler for SIGHUP
local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children
- kill 'INT' => keys %children;
+ &hangup();
&logthis("CRITICAL: Restarting");
unlink("$execdir/logs/lonc.pid");
my $execdir=$perlvar{'lonDaemons'};
exec("$execdir/lonc"); # here we go again
}
+sub checkchildren {
+ &initnewstatus();
+ &logstatus();
+ &logthis('Going to check on the children');
+ foreach (sort keys %children) {
+ sleep 1;
+ unless (kill 'USR1' => $_) {
+ &logthis ('CRITICAL: Child '.$_.' is dead');
+ &logstatus($$.' is dead');
+ }
+ }
+}
+
sub USRMAN {
&logthis("USR1: Trying to establish connections again");
- foreach $thisserver (keys %hostip) {
- $answer=subreply("ping",$thisserver);
- &logthis("USR1: Ping $thisserver "
- ."(pid >$childpid{$thisserver}<, $childatt{thisserver} attempts): "
- ." >$answer<");
- }
%childatt=();
+ &checkchildren();
}
# -------------------------------------------------- Non-critical communication
@@ -153,10 +251,20 @@ sub subreply {
Type => SOCK_STREAM,
Timeout => 10)
or return "con_lost";
- print $sclient "$cmd\n";
- my $answer=<$sclient>;
- chomp($answer);
- if (!$answer) { $answer="con_lost"; }
+
+
+ $SIG{ALRM}=sub { die "timeout" };
+ $SIG{__DIE__}='DEFAULT';
+ eval {
+ alarm(10);
+ print $sclient "$cmd\n";
+ $answer=<$sclient>;
+ chomp($answer);
+ alarm(0);
+ };
+ if ((!$answer) || ($@=~/timeout/)) { $answer="con_lost"; }
+ $SIG{ALRM}='DEFAULT';
+ $SIG{__DIE__}=\&catchexception;
} else { $answer='self_reply'; }
return $answer;
}
@@ -169,7 +277,8 @@ sub logthis {
my $fh=IO::File->new(">>$execdir/logs/lonc.log");
my $now=time;
my $local=localtime($now);
- print $fh "$local ($$): $message\n";
+ $lastlog=$local.': '.$message;
+ print $fh "$local ($$) [$status]: $message\n";
}
@@ -181,9 +290,34 @@ sub logperm {
my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log");
print $fh "$now:$message:$local\n";
}
+# ------------------------------------------------------------------ Log status
+
+sub logstatus {
+ my $docdir=$perlvar{'lonDocRoot'};
+ my $fh=IO::File->new(">>$docdir/lon-status/loncstatus.txt");
+ print $fh $$."\t".$status."\t".$lastlog."\n";
+}
+
+sub initnewstatus {
+ my $docdir=$perlvar{'lonDocRoot'};
+ my $fh=IO::File->new(">$docdir/lon-status/loncstatus.txt");
+ my $now=time;
+ my $local=localtime($now);
+ print $fh "LONC status $local - parent $$\n\n";
+}
+
+# -------------------------------------------------------------- Status setting
+
+sub status {
+ my $what=shift;
+ my $now=time;
+ my $local=localtime($now);
+ $status=$local.': '.$what;
+}
-# ---------------------------------------------------- Fork once and dissociate
+# ---------------------------------------------------- Fork once and dissociate
+&status("Fork and dissociate");
$fpid=fork;
exit if $fpid;
die "Couldn't fork: $!" unless defined ($fpid);
@@ -191,7 +325,7 @@ die "Couldn't fork: $!" unless defined (
POSIX::setsid() or die "Can't start new session: $!";
# ------------------------------------------------------- Write our PID on disk
-
+&status("Write PID");
$execdir=$perlvar{'lonDaemons'};
open (PIDSAVE,">$execdir/logs/lonc.pid");
print PIDSAVE "$$\n";
@@ -204,8 +338,12 @@ $SIG{HUP}=$SIG{USR1}='IGNORE';
# Fork off our children, one for every server
+&status("Forking ...");
+
foreach $thisserver (keys %hostip) {
- make_new_child($thisserver);
+ if (&online($hostname{$thisserver})) {
+ make_new_child($thisserver);
+ }
}
&logthis("Done starting initial servers");
@@ -218,17 +356,26 @@ $SIG{USR1} = \&USRMAN;
# And maintain the population.
while (1) {
+ &status("Sleeping");
sleep; # wait for a signal (i.e., child's death)
# See who died and start new one
+ &status("Woke up");
foreach $thisserver (keys %hostip) {
if (!$childpid{$thisserver}) {
- if ($childatt{$thisserver}<=$childmaxattempts) {
+ if (($childatt{$thisserver}<$childmaxattempts) &&
+ (&online($hostname{$thisserver}))) {
$childatt{$thisserver}++;
&logthis(
"INFO: Trying to reconnect for $thisserver "
- ."($childatt{$thisserver} of $childmaxattempts attempts)");
+ ."(".($childatt{$thisserver}?$childatt{$thisserver}:'none').
+ " of $childmaxattempts attempts)");
make_new_child($thisserver);
- }
+ } else {
+ &logthis(
+ "INFO: Skipping $thisserver "
+ ."($childatt{$thisserver} of $childmaxattempts attempts)");
+ }
+
}
}
}
@@ -257,7 +404,8 @@ sub make_new_child {
} else {
# Child can *not* return from this subroutine.
$SIG{INT} = 'DEFAULT'; # make SIGINT kill us as it did before
-
+ $SIG{USR1}= \&logstatus;
+
# unblock signals
sigprocmask(SIG_UNBLOCK, $sigset)
or die "Can't unblock SIGINT for fork: $!\n";
@@ -267,64 +415,23 @@ sub make_new_child {
$port = "$perlvar{'lonSockDir'}/$conserver";
unlink($port);
-# ---------------------------------------------------- Client to network server
-unless (
- $remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver},
- PeerPort => $perlvar{'londPort'},
- Proto => "tcp",
- Type => SOCK_STREAM)
- ) {
- my $st=120+int(rand(240));
- &logthis(
-"WARNING: Couldn't connect $conserver ($st secs): $@");
- sleep($st);
- exit;
- };
-# --------------------------------------- Send a ping to make other end do USR1
-print $remotesock "init\n";
-$answer=<$remotesock>;
-print $remotesock "$answer";
-$answer=<$remotesock>;
-chomp($answer);
-&logthis("Init reply for $conserver: >$answer<");
-sleep 5;
-print $remotesock "pong\n";
-$answer=<$remotesock>;
-chomp($answer);
-&logthis("Pong reply for $conserver: >$answer<");
-# ----------------------------------------------------------- Initialize cipher
-print $remotesock "ekey\n";
-my $buildkey=<$remotesock>;
-my $key=$conserver.$perlvar{'lonHostID'};
-$key=~tr/a-z/A-Z/;
-$key=~tr/G-P/0-9/;
-$key=~tr/Q-Z/0-9/;
-$key=$key.$buildkey.$key.$buildkey.$key.$buildkey;
-$key=substr($key,0,32);
-my $cipherkey=pack("H32",$key);
-if ($cipher=new IDEA $cipherkey) {
- &logthis("Secure connection initialized: $conserver");
-} else {
- my $st=120+int(rand(240));
- &logthis(
- "WARNING: ".
- "Could not establish secure connection, $conserver ($st secs)!");
- sleep($st);
- exit;
-}
+# -------------------------------------------------------------- Open other end
-# ----------------------------------------- We're online, send delayed messages
+&openremote($conserver);
+# ----------------------------------------- We're online, send delayed messages
+ &status("Checking for delayed messages");
my @allbuffered;
my $path="$perlvar{'lonSockDir'}/delayed";
opendir(DIRHANDLE,$path);
@allbuffered=grep /\.$conserver$/, readdir DIRHANDLE;
closedir(DIRHANDLE);
my $dfname;
- map {
+ foreach (@allbuffered) {
+ &status("Sending delayed $conserver $_");
$dfname="$path/$_";
- &logthis($dfname);
+ &logthis('Sending '.$dfname);
my $wcmd;
{
my $dfh=IO::File->new($dfname);
@@ -345,18 +452,27 @@ if ($cipher=new IDEA $cipherkey) {
}
$cmd="enc:$cmdlength:$encrequest\n";
}
-
+ $SIG{ALRM}=sub { die "timeout" };
+ $SIG{__DIE__}='DEFAULT';
+ eval {
+ alarm(60);
print $remotesock "$cmd\n";
$answer=<$remotesock>;
chomp($answer);
- if ($answer ne '') {
+ alarm(0);
+ };
+ $SIG{ALRM}='DEFAULT';
+ $SIG{__DIE__}=\&catchexception;
+
+ if (($answer ne '') && ($@!~/timeout/)) {
unlink("$dfname");
&logthis("Delayed $cmd to $conserver: >$answer<");
&logperm("S:$conserver:$bcmd");
}
- } @allbuffered;
+ }
# ------------------------------------------------------- Listen to UNIX socket
+&status("Opening socket $conserver");
unless (
$server = IO::Socket::UNIX->new(Local => $port,
Type => SOCK_STREAM,
@@ -394,11 +510,11 @@ while (1) {
# check for new information on the connections we have
# anything to read or accept?
- foreach $client ($select->can_read(1)) {
+ foreach $client ($select->can_read(0.1)) {
if ($client == $server) {
# accept a new connection
-
+ &status("Accept new connection: $conserver");
$client = $server->accept();
$select->add($client);
nonblock($client);
@@ -413,6 +529,7 @@ while (1) {
delete $outbuffer{$client};
delete $ready{$client};
+ &status("Idle $conserver");
$select->remove($client);
close $client;
next;
@@ -441,18 +558,25 @@ while (1) {
next unless exists $outbuffer{$client};
$rv = $client->send($outbuffer{$client}, 0);
+
+ unless ($outbuffer{$client}=~/con_lost\n$/) {
unless (defined $rv) {
# Whine, but move on.
- warn "I was told I could write, but I can't.\n";
+ &logthis("I was told I could write, but I can't.\n");
next;
}
+ $errno=$!;
if (($rv == length $outbuffer{$client}) ||
- ($! == POSIX::EWOULDBLOCK)) {
+ ($errno == POSIX::EWOULDBLOCK) || ($errno == 0)) {
substr($outbuffer{$client}, 0, $rv) = '';
delete $outbuffer{$client} unless length $outbuffer{$client};
} else {
# Couldn't write all the data, and it wasn't because
# it would have blocked. Shutdown and move on.
+
+ &logthis("Dropping data with ".$errno.": ".
+ length($outbuffer{$client}).", $rv");
+
delete $inbuffer{$client};
delete $outbuffer{$client};
delete $ready{$client};
@@ -461,7 +585,17 @@ while (1) {
close($client);
next;
}
+ } else {
+# -------------------------------------------------------- Wow, connection lost
+ &logthis(
+ "CRITICAL: Closing connection $conserver");
+ &status("Connection lost $conserver");
+ $remotesock->shutdown(2);
+ &logthis("Attempting to open new connection");
+ &openremote($conserver);
+ }
}
+
}
}
@@ -478,6 +612,15 @@ sub handle {
# ============================================================= Process request
# $request is the text of the request
# put text of reply into $outbuffer{$client}
+# ------------------------------------------------------------ Is this the end?
+ if ($request eq "close_connection_exit\n") {
+ &status("Request close connection: $conserver");
+ &logthis(
+ "CRITICAL: Request Close Connection $conserver");
+ $remotesock->shutdown(2);
+ $server->close();
+ exit;
+ }
# -----------------------------------------------------------------------------
if ($request =~ /^encrypt\:/) {
my $cmd=$request;
@@ -492,8 +635,27 @@ sub handle {
}
$request="enc:$cmdlength:$encrequest\n";
}
+# --------------------------------------------------------------- Main exchange
+ $SIG{ALRM}=sub { die "timeout" };
+ $SIG{__DIE__}='DEFAULT';
+ eval {
+ alarm(300);
+ &status("Sending $conserver: $request");
print $remotesock "$request";
+ &status("Waiting for reply from $conserver: $request");
$answer=<$remotesock>;
+ &status("Received reply: $request");
+ alarm(0);
+ };
+ if ($@=~/timeout/) {
+ $answer='';
+ &logthis(
+ "CRITICAL: Timeout $conserver: $request");
+ }
+ $SIG{ALRM}='DEFAULT';
+ $SIG{__DIE__}=\&catchexception;
+
+
if ($answer) {
if ($answer =~ /^enc/) {
my ($cmd,$cmdlength,$encinput)=split(/:/,$answer);
@@ -515,6 +677,7 @@ sub handle {
# ===================================================== Done processing request
}
delete $ready{$client};
+ &status("Completed $conserver: $request");
# -------------------------------------------------------------- End non-forker
}
# ---------------------------------------------------------- End make_new_child
@@ -532,3 +695,135 @@ sub nonblock {
or die "Can't make socket nonblocking: $!\n";
}
+
+sub openremote {
+# ---------------------------------------------------- Client to network server
+
+ my $conserver=shift;
+
+&status("Opening TCP: $conserver");
+
+unless (
+ $remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver},
+ PeerPort => $perlvar{'londPort'},
+ Proto => "tcp",
+ Type => SOCK_STREAM)
+ ) {
+ my $st=120+int(rand(240));
+ &logthis(
+"WARNING: Couldn't connect $conserver ($st secs): $@");
+ sleep($st);
+ exit;
+ };
+# ----------------------------------------------------------------- Init dialog
+
+&status("Init dialogue: $conserver");
+
+ $SIG{ALRM}=sub { die "timeout" };
+ $SIG{__DIE__}='DEFAULT';
+ eval {
+ alarm(60);
+print $remotesock "init\n";
+$answer=<$remotesock>;
+print $remotesock "$answer";
+$answer=<$remotesock>;
+chomp($answer);
+ alarm(0);
+ };
+ $SIG{ALRM}='DEFAULT';
+ $SIG{__DIE__}=\&catchexception;
+
+ if ($@=~/timeout/) {
+ &logthis("Timed out during init: $conserver");
+ exit;
+ }
+
+if ($answer ne 'ok') {
+ &logthis("Init reply for $conserver: >$answer<");
+ my $st=120+int(rand(240));
+ &logthis(
+"WARNING: Init failed $conserver ($st secs)");
+ sleep($st);
+ exit;
+}
+
+sleep 5;
+&status("Ponging $conserver");
+print $remotesock "pong\n";
+$answer=<$remotesock>;
+chomp($answer);
+if ($answer!~/^$converver/) {
+ &logthis("Pong reply for $conserver: >$answer<");
+}
+# ----------------------------------------------------------- Initialize cipher
+
+&status("Initialize cipher: $conserver");
+print $remotesock "ekey\n";
+my $buildkey=<$remotesock>;
+my $key=$conserver.$perlvar{'lonHostID'};
+$key=~tr/a-z/A-Z/;
+$key=~tr/G-P/0-9/;
+$key=~tr/Q-Z/0-9/;
+$key=$key.$buildkey.$key.$buildkey.$key.$buildkey;
+$key=substr($key,0,32);
+my $cipherkey=pack("H32",$key);
+if ($cipher=new IDEA $cipherkey) {
+ &logthis("Secure connection initialized: $conserver");
+} else {
+ my $st=120+int(rand(240));
+ &logthis(
+ "WARNING: ".
+ "Could not establish secure connection, $conserver ($st secs)!");
+ sleep($st);
+ exit;
+}
+
+}
+
+# ----------------------------------- POD (plain old documentation, CPAN style)
+
+=head1 NAME
+
+lonc - LON TCP-MySQL-Server Daemon for handling database requests.
+
+=head1 SYNOPSIS
+
+Should only be run as user=www. This is a command-line script which
+is invoked by loncron.
+
+=head1 DESCRIPTION
+
+Provides persistent TCP connections to the other servers in the network
+through multiplexed domain sockets
+
+ PID in subdir logs/lonc.pid
+ kill kills
+ HUP restarts
+ USR1 tries to open connections again
+
+=head1 README
+
+Not yet written.
+
+=head1 PREREQUISITES
+
+POSIX
+IO::Socket
+IO::Select
+IO::File
+Socket
+Fcntl
+Tie::RefHash
+Crypt::IDEA
+
+=head1 COREQUISITES
+
+=head1 OSNAMES
+
+linux
+
+=head1 SCRIPT CATEGORIES
+
+Server/Process
+
+=cut
500 Internal Server Error
Internal Server Error
The server encountered an internal error or
misconfiguration and was unable to complete
your request.
Please contact the server administrator at
root@localhost to inform them of the time this error occurred,
and the actions you performed just before this error.
More information about this error may be available
in the server error log.