--- loncom/Attic/lonc 2001/03/13 21:15:40 1.14 +++ loncom/Attic/lonc 2002/02/25 15:48:11 1.29 @@ -5,6 +5,30 @@ # provides persistent TCP connections to the other servers in the network # through multiplexed domain sockets # +# $Id: lonc,v 1.29 2002/02/25 15:48:11 www Exp $ +# +# Copyright Michigan State University Board of Trustees +# +# This file is part of the LearningOnline Network with CAPA (LON-CAPA). +# +# LON-CAPA is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# LON-CAPA is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with LON-CAPA; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# /home/httpd/html/adm/gpl.txt +# +# http://www.lon-capa.org/ +# # PID in subdir logs/lonc.pid # kill kills # HUP restarts @@ -15,8 +39,12 @@ # 2/8,7/25 Gerd Kortemeyer # 12/05 Scott Harrison # 12/05 Gerd Kortemeyer +# YEAR=2001 # 01/10/01 Scott Harrison -# 03/14/01 Gerd Kortemeyer +# 03/14/01,03/15,06/12,11/26,11/27,11/28 Gerd Kortemeyer +# 12/20 Scott Harrison +# YEAR=2002 +# 2/19/02,02/22/02,02/25/02 Gerd Kortemeyer # # based on nonforker from Perl Cookbook # - server who multiplexes without forking @@ -29,27 +57,74 @@ use Socket; use Fcntl; use Tie::RefHash; use Crypt::IDEA; +use Net::Ping; +use LWP::UserAgent(); + +my $status=''; +my $lastlog=''; # grabs exception and records it to log before exiting sub catchexception { my ($signal)=@_; - $SIG{'QUIT'}='DEFAULT'; + $SIG{QUIT}='DEFAULT'; $SIG{__DIE__}='DEFAULT'; + chomp($signal); &logthis("CRITICAL: " - ."ABNORMAL EXIT. Child $$ for server $wasserver died through " - ."\"$signal\" with this parameter->[$@]"); + ."ABNORMAL EXIT. Child $$ for server [$wasserver] died through " + ."\"$signal\" with parameter [$@]"); die($@); } -$childmaxattempts=10; +$childmaxattempts=5; + +# -------------------------------------- Routines to see if other box available + +sub online { + my $host=shift; + &status("Pinging ".$host); + my $p=Net::Ping->new("tcp",20); + my $online=$p->ping("$host"); + $p->close(); + undef ($p); + return $online; +} + +sub connected { + my ($local,$remote)=@_; + &status("Checking connection $local to $remote"); + $local=~s/\W//g; + $remote=~s/\W//g; + + unless ($hostname{$local}) { return 'local_unknown'; } + unless ($hostname{$remote}) { return 'remote_unknown'; } + + unless (&online($hostname{$local})) { return 'local_offline'; } + + my $ua=new LWP::UserAgent; + + my $request=new HTTP::Request('GET', + "http://".$hostname{$local}.'/cgi-bin/ping.pl?'.$remote); + + my $response=$ua->request($request); + + unless ($response->is_success) { return 'local_error'; } + + my $reply=$response->content; + $reply=(split("\n",$reply))[0]; + $reply=~s/\W//g; + if ($reply ne $remote) { return $reply; } + return 'ok'; +} + # -------------------------------- Set signal handlers to record abnormal exits -$SIG{'QUIT'}=\&catchexception; +&status("Init exception handlers"); +$SIG{QUIT}=\&catchexception; $SIG{__DIE__}=\&catchexception; # ------------------------------------ Read httpd access.conf and get variables - +&status("Read access.conf"); open (CONFIG,"/etc/httpd/conf/access.conf") || die "Can't read access.conf"; while ($configline=) { @@ -62,6 +137,7 @@ while ($configline=) { close(CONFIG); # ----------------------------- Make sure this process is running from user=www +&status("Check user ID"); my $wwwid=getpwnam('www'); if ($wwwid!=$<) { $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; @@ -89,8 +165,12 @@ open (CONFIG,"$perlvar{'lonTabDir'}/host while ($configline=) { my ($id,$domain,$role,$name,$ip)=split(/:/,$configline); chomp($ip); - $hostip{$id}=$ip; + if ($ip) { + $hostip{$id}=$ip; + $hostname{$id}=$name; + } } + close(CONFIG); # -------------------------------------------------------- Routines for forking @@ -114,9 +194,19 @@ sub REAPER { # ta unlink($port); } +sub hangup { + foreach (keys %children) { + $wasserver=$children{$_}; + &status("Closing $wasserver"); + &logthis('Closing '.$wasserver.': '.&subreply('exit',$wasserver)); + &status("Kill PID $_ for $wasserver"); + kill ('INT',$_); + } +} + sub HUNTSMAN { # signal handler for SIGINT local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children - kill 'INT' => keys %children; + &hangup(); my $execdir=$perlvar{'lonDaemons'}; unlink("$execdir/logs/lonc.pid"); &logthis("CRITICAL: Shutting down"); @@ -125,22 +215,30 @@ sub HUNTSMAN { # si sub HUPSMAN { # signal handler for SIGHUP local($SIG{CHLD}) = 'IGNORE'; # we're going to kill our children - kill 'INT' => keys %children; + &hangup(); &logthis("CRITICAL: Restarting"); unlink("$execdir/logs/lonc.pid"); my $execdir=$perlvar{'lonDaemons'}; exec("$execdir/lonc"); # here we go again } +sub checkchildren { + &initnewstatus(); + &logstatus(); + &logthis('Going to check on the children'); + foreach (sort keys %children) { + sleep 1; + unless (kill 'USR1' => $_) { + &logthis ('CRITICAL: Child '.$_.' is dead'); + &logstatus($$.' is dead'); + } + } +} + sub USRMAN { &logthis("USR1: Trying to establish connections again"); - foreach $thisserver (keys %hostip) { - $answer=subreply("ping",$thisserver); - &logthis("USR1: Ping $thisserver " - ."(pid >$childpid{$thisserver}<, $childatt{thisserver} attempts): " - ." >$answer<"); - } %childatt=(); + &checkchildren(); } # -------------------------------------------------- Non-critical communication @@ -153,10 +251,20 @@ sub subreply { Type => SOCK_STREAM, Timeout => 10) or return "con_lost"; - print $sclient "$cmd\n"; - my $answer=<$sclient>; - chomp($answer); - if (!$answer) { $answer="con_lost"; } + + + $SIG{ALRM}=sub { die "timeout" }; + $SIG{__DIE__}='DEFAULT'; + eval { + alarm(10); + print $sclient "$cmd\n"; + $answer=<$sclient>; + chomp($answer); + alarm(0); + }; + if ((!$answer) || ($@=~/timeout/)) { $answer="con_lost"; } + $SIG{ALRM}='DEFAULT'; + $SIG{__DIE__}=\&catchexception; } else { $answer='self_reply'; } return $answer; } @@ -169,7 +277,8 @@ sub logthis { my $fh=IO::File->new(">>$execdir/logs/lonc.log"); my $now=time; my $local=localtime($now); - print $fh "$local ($$): $message\n"; + $lastlog=$local.': '.$message; + print $fh "$local ($$) [$status]: $message\n"; } @@ -181,9 +290,34 @@ sub logperm { my $fh=IO::File->new(">>$execdir/logs/lonnet.perm.log"); print $fh "$now:$message:$local\n"; } +# ------------------------------------------------------------------ Log status + +sub logstatus { + my $docdir=$perlvar{'lonDocRoot'}; + my $fh=IO::File->new(">>$docdir/lon-status/loncstatus.txt"); + print $fh $$."\t".$status."\t".$lastlog."\n"; +} + +sub initnewstatus { + my $docdir=$perlvar{'lonDocRoot'}; + my $fh=IO::File->new(">$docdir/lon-status/loncstatus.txt"); + my $now=time; + my $local=localtime($now); + print $fh "LONC status $local - parent $$\n\n"; +} + +# -------------------------------------------------------------- Status setting + +sub status { + my $what=shift; + my $now=time; + my $local=localtime($now); + $status=$local.': '.$what; +} -# ---------------------------------------------------- Fork once and dissociate +# ---------------------------------------------------- Fork once and dissociate +&status("Fork and dissociate"); $fpid=fork; exit if $fpid; die "Couldn't fork: $!" unless defined ($fpid); @@ -191,7 +325,7 @@ die "Couldn't fork: $!" unless defined ( POSIX::setsid() or die "Can't start new session: $!"; # ------------------------------------------------------- Write our PID on disk - +&status("Write PID"); $execdir=$perlvar{'lonDaemons'}; open (PIDSAVE,">$execdir/logs/lonc.pid"); print PIDSAVE "$$\n"; @@ -204,8 +338,12 @@ $SIG{HUP}=$SIG{USR1}='IGNORE'; # Fork off our children, one for every server +&status("Forking ..."); + foreach $thisserver (keys %hostip) { - make_new_child($thisserver); + if (&online($hostname{$thisserver})) { + make_new_child($thisserver); + } } &logthis("Done starting initial servers"); @@ -218,17 +356,26 @@ $SIG{USR1} = \&USRMAN; # And maintain the population. while (1) { + &status("Sleeping"); sleep; # wait for a signal (i.e., child's death) # See who died and start new one + &status("Woke up"); foreach $thisserver (keys %hostip) { if (!$childpid{$thisserver}) { - if ($childatt{$thisserver}<=$childmaxattempts) { + if (($childatt{$thisserver}<$childmaxattempts) && + (&online($hostname{$thisserver}))) { $childatt{$thisserver}++; &logthis( "INFO: Trying to reconnect for $thisserver " - ."($childatt{$thisserver} of $childmaxattempts attempts)"); + ."(".($childatt{$thisserver}?$childatt{$thisserver}:'none'). + " of $childmaxattempts attempts)"); make_new_child($thisserver); - } + } else { + &logthis( + "INFO: Skipping $thisserver " + ."($childatt{$thisserver} of $childmaxattempts attempts)"); + } + } } } @@ -257,7 +404,8 @@ sub make_new_child { } else { # Child can *not* return from this subroutine. $SIG{INT} = 'DEFAULT'; # make SIGINT kill us as it did before - + $SIG{USR1}= \&logstatus; + # unblock signals sigprocmask(SIG_UNBLOCK, $sigset) or die "Can't unblock SIGINT for fork: $!\n"; @@ -267,64 +415,23 @@ sub make_new_child { $port = "$perlvar{'lonSockDir'}/$conserver"; unlink($port); -# ---------------------------------------------------- Client to network server -unless ( - $remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver}, - PeerPort => $perlvar{'londPort'}, - Proto => "tcp", - Type => SOCK_STREAM) - ) { - my $st=120+int(rand(240)); - &logthis( -"WARNING: Couldn't connect $conserver ($st secs): $@"); - sleep($st); - exit; - }; -# --------------------------------------- Send a ping to make other end do USR1 -print $remotesock "init\n"; -$answer=<$remotesock>; -print $remotesock "$answer"; -$answer=<$remotesock>; -chomp($answer); -&logthis("Init reply for $conserver: >$answer<"); -sleep 5; -print $remotesock "pong\n"; -$answer=<$remotesock>; -chomp($answer); -&logthis("Pong reply for $conserver: >$answer<"); -# ----------------------------------------------------------- Initialize cipher -print $remotesock "ekey\n"; -my $buildkey=<$remotesock>; -my $key=$conserver.$perlvar{'lonHostID'}; -$key=~tr/a-z/A-Z/; -$key=~tr/G-P/0-9/; -$key=~tr/Q-Z/0-9/; -$key=$key.$buildkey.$key.$buildkey.$key.$buildkey; -$key=substr($key,0,32); -my $cipherkey=pack("H32",$key); -if ($cipher=new IDEA $cipherkey) { - &logthis("Secure connection initialized: $conserver"); -} else { - my $st=120+int(rand(240)); - &logthis( - "WARNING: ". - "Could not establish secure connection, $conserver ($st secs)!"); - sleep($st); - exit; -} +# -------------------------------------------------------------- Open other end -# ----------------------------------------- We're online, send delayed messages +&openremote($conserver); +# ----------------------------------------- We're online, send delayed messages + &status("Checking for delayed messages"); my @allbuffered; my $path="$perlvar{'lonSockDir'}/delayed"; opendir(DIRHANDLE,$path); @allbuffered=grep /\.$conserver$/, readdir DIRHANDLE; closedir(DIRHANDLE); my $dfname; - map { + foreach (@allbuffered) { + &status("Sending delayed $conserver $_"); $dfname="$path/$_"; - &logthis($dfname); + &logthis('Sending '.$dfname); my $wcmd; { my $dfh=IO::File->new($dfname); @@ -345,18 +452,27 @@ if ($cipher=new IDEA $cipherkey) { } $cmd="enc:$cmdlength:$encrequest\n"; } - + $SIG{ALRM}=sub { die "timeout" }; + $SIG{__DIE__}='DEFAULT'; + eval { + alarm(60); print $remotesock "$cmd\n"; $answer=<$remotesock>; chomp($answer); - if ($answer ne '') { + alarm(0); + }; + $SIG{ALRM}='DEFAULT'; + $SIG{__DIE__}=\&catchexception; + + if (($answer ne '') && ($@!~/timeout/)) { unlink("$dfname"); &logthis("Delayed $cmd to $conserver: >$answer<"); &logperm("S:$conserver:$bcmd"); } - } @allbuffered; + } # ------------------------------------------------------- Listen to UNIX socket +&status("Opening socket $conserver"); unless ( $server = IO::Socket::UNIX->new(Local => $port, Type => SOCK_STREAM, @@ -394,11 +510,11 @@ while (1) { # check for new information on the connections we have # anything to read or accept? - foreach $client ($select->can_read(1)) { + foreach $client ($select->can_read(0.1)) { if ($client == $server) { # accept a new connection - + &status("Accept new connection: $conserver"); $client = $server->accept(); $select->add($client); nonblock($client); @@ -413,6 +529,7 @@ while (1) { delete $outbuffer{$client}; delete $ready{$client}; + &status("Idle $conserver"); $select->remove($client); close $client; next; @@ -441,18 +558,25 @@ while (1) { next unless exists $outbuffer{$client}; $rv = $client->send($outbuffer{$client}, 0); + + unless ($outbuffer{$client}=~/con_lost\n$/) { unless (defined $rv) { # Whine, but move on. - warn "I was told I could write, but I can't.\n"; + &logthis("I was told I could write, but I can't.\n"); next; } + $errno=$!; if (($rv == length $outbuffer{$client}) || - ($! == POSIX::EWOULDBLOCK)) { + ($errno == POSIX::EWOULDBLOCK) || ($errno == 0)) { substr($outbuffer{$client}, 0, $rv) = ''; delete $outbuffer{$client} unless length $outbuffer{$client}; } else { # Couldn't write all the data, and it wasn't because # it would have blocked. Shutdown and move on. + + &logthis("Dropping data with ".$errno.": ". + length($outbuffer{$client}).", $rv"); + delete $inbuffer{$client}; delete $outbuffer{$client}; delete $ready{$client}; @@ -461,7 +585,17 @@ while (1) { close($client); next; } + } else { +# -------------------------------------------------------- Wow, connection lost + &logthis( + "CRITICAL: Closing connection $conserver"); + &status("Connection lost $conserver"); + $remotesock->shutdown(2); + &logthis("Attempting to open new connection"); + &openremote($conserver); + } } + } } @@ -478,6 +612,15 @@ sub handle { # ============================================================= Process request # $request is the text of the request # put text of reply into $outbuffer{$client} +# ------------------------------------------------------------ Is this the end? + if ($request eq "close_connection_exit\n") { + &status("Request close connection: $conserver"); + &logthis( + "CRITICAL: Request Close Connection $conserver"); + $remotesock->shutdown(2); + $server->close(); + exit; + } # ----------------------------------------------------------------------------- if ($request =~ /^encrypt\:/) { my $cmd=$request; @@ -492,8 +635,27 @@ sub handle { } $request="enc:$cmdlength:$encrequest\n"; } +# --------------------------------------------------------------- Main exchange + $SIG{ALRM}=sub { die "timeout" }; + $SIG{__DIE__}='DEFAULT'; + eval { + alarm(300); + &status("Sending $conserver: $request"); print $remotesock "$request"; + &status("Waiting for reply from $conserver: $request"); $answer=<$remotesock>; + &status("Received reply: $request"); + alarm(0); + }; + if ($@=~/timeout/) { + $answer=''; + &logthis( + "CRITICAL: Timeout $conserver: $request"); + } + $SIG{ALRM}='DEFAULT'; + $SIG{__DIE__}=\&catchexception; + + if ($answer) { if ($answer =~ /^enc/) { my ($cmd,$cmdlength,$encinput)=split(/:/,$answer); @@ -515,6 +677,7 @@ sub handle { # ===================================================== Done processing request } delete $ready{$client}; + &status("Completed $conserver: $request"); # -------------------------------------------------------------- End non-forker } # ---------------------------------------------------------- End make_new_child @@ -532,3 +695,135 @@ sub nonblock { or die "Can't make socket nonblocking: $!\n"; } + +sub openremote { +# ---------------------------------------------------- Client to network server + + my $conserver=shift; + +&status("Opening TCP: $conserver"); + +unless ( + $remotesock = IO::Socket::INET->new(PeerAddr => $hostip{$conserver}, + PeerPort => $perlvar{'londPort'}, + Proto => "tcp", + Type => SOCK_STREAM) + ) { + my $st=120+int(rand(240)); + &logthis( +"WARNING: Couldn't connect $conserver ($st secs): $@"); + sleep($st); + exit; + }; +# ----------------------------------------------------------------- Init dialog + +&status("Init dialogue: $conserver"); + + $SIG{ALRM}=sub { die "timeout" }; + $SIG{__DIE__}='DEFAULT'; + eval { + alarm(60); +print $remotesock "init\n"; +$answer=<$remotesock>; +print $remotesock "$answer"; +$answer=<$remotesock>; +chomp($answer); + alarm(0); + }; + $SIG{ALRM}='DEFAULT'; + $SIG{__DIE__}=\&catchexception; + + if ($@=~/timeout/) { + &logthis("Timed out during init: $conserver"); + exit; + } + +if ($answer ne 'ok') { + &logthis("Init reply for $conserver: >$answer<"); + my $st=120+int(rand(240)); + &logthis( +"WARNING: Init failed $conserver ($st secs)"); + sleep($st); + exit; +} + +sleep 5; +&status("Ponging $conserver"); +print $remotesock "pong\n"; +$answer=<$remotesock>; +chomp($answer); +if ($answer!~/^$converver/) { + &logthis("Pong reply for $conserver: >$answer<"); +} +# ----------------------------------------------------------- Initialize cipher + +&status("Initialize cipher: $conserver"); +print $remotesock "ekey\n"; +my $buildkey=<$remotesock>; +my $key=$conserver.$perlvar{'lonHostID'}; +$key=~tr/a-z/A-Z/; +$key=~tr/G-P/0-9/; +$key=~tr/Q-Z/0-9/; +$key=$key.$buildkey.$key.$buildkey.$key.$buildkey; +$key=substr($key,0,32); +my $cipherkey=pack("H32",$key); +if ($cipher=new IDEA $cipherkey) { + &logthis("Secure connection initialized: $conserver"); +} else { + my $st=120+int(rand(240)); + &logthis( + "WARNING: ". + "Could not establish secure connection, $conserver ($st secs)!"); + sleep($st); + exit; +} + +} + +# ----------------------------------- POD (plain old documentation, CPAN style) + +=head1 NAME + +lonc - LON TCP-MySQL-Server Daemon for handling database requests. + +=head1 SYNOPSIS + +Should only be run as user=www. This is a command-line script which +is invoked by loncron. + +=head1 DESCRIPTION + +Provides persistent TCP connections to the other servers in the network +through multiplexed domain sockets + + PID in subdir logs/lonc.pid + kill kills + HUP restarts + USR1 tries to open connections again + +=head1 README + +Not yet written. + +=head1 PREREQUISITES + +POSIX +IO::Socket +IO::Select +IO::File +Socket +Fcntl +Tie::RefHash +Crypt::IDEA + +=head1 COREQUISITES + +=head1 OSNAMES + +linux + +=head1 SCRIPT CATEGORIES + +Server/Process + +=cut 500 Internal Server Error

Internal Server Error

The server encountered an internal error or misconfiguration and was unable to complete your request.

Please contact the server administrator at root@localhost to inform them of the time this error occurred, and the actions you performed just before this error.

More information about this error may be available in the server error log.