--- loncom/loncron 2011/06/03 20:45:10 1.90 +++ loncom/loncron 2014/12/16 18:18:44 1.101 @@ -2,7 +2,7 @@ # Housekeeping program, started by cron, loncontrol and loncron.pl # -# $Id: loncron,v 1.90 2011/06/03 20:45:10 raeburn Exp $ +# $Id: loncron,v 1.101 2014/12/16 18:18:44 raeburn Exp $ # # Copyright Michigan State University Board of Trustees # @@ -32,6 +32,7 @@ use strict; use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; +use LONCAPA::Checksumming; use LONCAPA; use Apache::lonnet; use Apache::loncommon; @@ -156,12 +157,12 @@ sub checkon_daemon { $errors++; my $kadaemon=$daemon; if ($kadaemon eq 'lonmemcached') { $kadaemon='memcached'; } - &log($fh,'
Killall '.$daemon.': '. + &log($fh,'
Killall '.$daemon.': '. `killall $kadaemon 2>&1`.' - '); sleep 1; &log($fh,unlink($pidfile).' - '. `killall -9 $kadaemon 2>&1`. - '
'); + '

'); &log($fh,"

$daemon not running, trying to start

"); if (&start_daemon($fh,$daemon,$pidfile,$args)) { @@ -270,12 +271,7 @@ sub log_machine_info { &log($fh,"

distprobe

"); &log($fh,"
");
-    open(DSH,"$perlvar{'lonDaemons'}/distprobe |");
-    while (my $line=) { 
-	&log($fh,&encode_entities($line,'<>&"')); 
-	$psproc++;
-    }
-    close(DSH);
+    &log($fh,&encode_entities(&LONCAPA::distro(),'<>&"'));
     &log($fh,"
"); &errout($fh); @@ -289,9 +285,11 @@ sub start_logging { &log($fh,(< + + LON Status Report $perlvar{'lonHostID'} + @@ -332,15 +330,20 @@ ENDHEADERS "".$role. "".&Apache::lonnet::hostname($id)."\n"); } - &log($fh,"

Spare Hosts

\n"); return $fh; } @@ -486,7 +489,7 @@ sub clean_lonIDs { my $since=$now-$mtime; if ($since>$perlvar{'lonExpire'}) { $cleaned++; - &log($fh,"Unlinking $fname
"); + &log($fh,"Unlinking $fname
"); unlink("$fname"); } else { $active++; @@ -515,21 +518,13 @@ sub clean_sockets { # ----------------------------------------------------------------------- httpd sub check_httpd_logs { my ($fh)=@_; - &log($fh,'

httpd

Access Log

');
-    
-    open (DFH,"tail -n25 /etc/httpd/logs/access_log|");
-    while (my $line=) { &log($fh,&encode_entities($line,'<>&"')) };
-    close (DFH);
-	
-    &log($fh,"

Error Log

");
-	
-    open (DFH,"tail -n25 /etc/httpd/logs/error_log|");
-    while (my $line=) { 
-	&log($fh,"$line");
-	if ($line=~/\[error\]/) { $notices++; } 
+    if (open(PIPE,"./lchttpdlogs|")) {
+        while (my $line=) {
+            &log($fh,$line);
+            if ($line=~/\[error\]/) { $notices++; }
+        }
+        close(PIPE);
     }
-    close (DFH);
-    &log($fh,"
"); &errout($fh); } @@ -538,7 +533,7 @@ sub check_httpd_logs { sub rotate_lonnet_logs { my ($fh)=@_; &log($fh,'

lonnet

Temp Log

');
-    print "checking logs\n";
+    print "Checking logs.\n";
     if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){
 	open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|");
 	while (my $line=) { 
@@ -582,7 +577,7 @@ sub rotate_other_logs {
 sub test_connections {
     my ($fh)=@_;
     &log($fh,'

Connections

'); - print "testing connections\n"; + print "Testing connections.\n"; &log($fh,""); my ($good,$bad)=(0,0); my %hostname = &Apache::lonnet::all_hostnames(); @@ -616,7 +611,7 @@ sub test_connections { sub check_delayed_msg { my ($fh)=@_; &log($fh,'

Delayed Messages

'); - print "checking buffers\n"; + print "Checking buffers.\n"; &log($fh,'

Scanning Permanent Log

'); @@ -627,7 +622,7 @@ sub check_delayed_msg { my ($time,$sdf,$dserv,$dcmd)=split(/:/,$line); if ($sdf eq 'F') { my $local=localtime($time); - &log($fh,"Failed: $time, $dserv, $dcmd
"); + &log($fh,"Failed: $time, $dserv, $dcmd
"); $warnings++; } if ($sdf eq 'S') { $unsend--; } @@ -635,7 +630,9 @@ sub check_delayed_msg { } &log($fh,"

Total unsend messages: $unsend

\n"); - $warnings=$warnings+5*$unsend; + if ($unsend > 0) { + $warnings=$warnings+5*$unsend; + } if ($unsend) { $simplestatus{'unsend'}=$unsend; } &log($fh,"

Outgoing Buffer

\n
");
@@ -649,11 +646,28 @@ sub check_delayed_msg {
     }
     &log($fh,"
\n"); close (DFH); + my %hostname = &Apache::lonnet::all_hostnames(); + my $numhosts = scalar(keys(%hostname)); # pong to all servers that have delayed messages # this will trigger a reverse connection, which should flush the buffers - foreach my $tryserver (keys %servers) { - my $answer=&Apache::lonnet::reply("pong",$tryserver); - &log($fh,"Pong to $tryserver: $answer
"); + foreach my $tryserver (sort(keys(%servers))) { + if ($hostname{$tryserver} || !$numhosts) { + my $answer; + eval { + local $SIG{ ALRM } = sub { die "TIMEOUT" }; + alarm(20); + $answer = &Apache::lonnet::reply("pong",$tryserver); + alarm(0); + }; + if ($@ && $@ =~ m/TIMEOUT/) { + &log($fh,"Attempted pong to $tryserver timed out
"); + print "Time out while contacting: $tryserver for pong.\n"; + } else { + &log($fh,"Pong to $tryserver: $answer
"); + } + } else { + &log($fh,"$tryserver has delayed messages, but is not part of the cluster -- skipping 'Pong'.
"); + } } } @@ -666,7 +680,7 @@ sub finish_logging { my $now=time; my $date=localtime($now); &log($fh,"
$date ($now)\n"); - print "lon-status webpage updated\n"; + print "lon-status webpage updated.\n"; $fh->close(); if ($errors) { $simplestatus{'errors'}=$errors; } @@ -687,65 +701,94 @@ sub log_simplestatus { } sub write_loncaparevs { - print "Retrieving LON-CAPA version information\n"; - if (open(my $fh,">$perlvar{'lonTabDir'}/loncaparevs.tab")) { - my %hostname = &Apache::lonnet::all_hostnames(); - foreach my $id (sort(keys(%hostname))) { - if ($id ne '') { - my $loncaparev; + print "Retrieving LON-CAPA version information.\n"; + my %hostname = &Apache::lonnet::all_hostnames(); + my $output; + foreach my $id (sort(keys(%hostname))) { + if ($id ne '') { + my $loncaparev; + eval { + local $SIG{ ALRM } = sub { die "TIMEOUT" }; + alarm(10); + $loncaparev = + &Apache::lonnet::get_server_loncaparev('',$id,1,'loncron'); + alarm(0); + }; + if ($@ && $@ =~ m/TIMEOUT/) { + print "Time out while contacting lonHost: $id for version.\n"; + } + if ($loncaparev =~ /^[\w.\-]+$/) { + $output .= $id.':'.$loncaparev."\n"; + } + } + } + if ($output) { + if (open(my $fh,">$perlvar{'lonTabDir'}/loncaparevs.tab")) { + print $fh $output; + close($fh); + &Apache::lonnet::load_loncaparevs(); + } + } + return; +} + +sub write_serverhomeIDs { + print "Retrieving LON-CAPA lonHostID information.\n"; + my %name_to_host = &Apache::lonnet::all_names(); + my $output; + foreach my $name (sort(keys(%name_to_host))) { + if ($name ne '') { + if (ref($name_to_host{$name}) eq 'ARRAY') { + my $serverhomeID; eval { local $SIG{ ALRM } = sub { die "TIMEOUT" }; alarm(10); - $loncaparev = - &Apache::lonnet::get_server_loncaparev('',$id,1,'loncron'); + $serverhomeID = + &Apache::lonnet::get_server_homeID($name,1,'loncron'); alarm(0); }; if ($@ && $@ =~ m/TIMEOUT/) { - print "time out while contacting lonHost: $id for version\n"; + print "Time out while contacting server: $name\n"; } - if ($loncaparev =~ /^[\w.\-]+$/) { - print $fh $id.':'.$loncaparev."\n"; + if ($serverhomeID ne '') { + $output .= $name.':'.$serverhomeID."\n"; + } else { + $output .= $name.':'.$name_to_host{$name}->[0]."\n"; } } } - close($fh); + } + if ($output) { + if (open(my $fh,">$perlvar{'lonTabDir'}/serverhomeIDs.tab")) { + print $fh $output; + close($fh); + &Apache::lonnet::load_serverhomeIDs(); + } } return; } -sub write_serverhomeIDs { - print "Retrieving LON-CAPA lonHostID information\n"; - if (open(my $fh,">$perlvar{'lonTabDir'}/serverhomeIDs.tab")) { - my %name_to_host = &Apache::lonnet::all_names(); - foreach my $name (sort(keys(%name_to_host))) { - if ($name ne '') { - if (ref($name_to_host{$name}) eq 'ARRAY') { - my $serverhomeID; - eval { - local $SIG{ ALRM } = sub { die "TIMEOUT" }; - alarm(10); - $serverhomeID = - &Apache::lonnet::get_server_homeID($name,1,'loncron'); - alarm(0); - }; - if ($@ && $@ =~ m/TIMEOUT/) { - print "Time out while contacting server: $name\n"; - } - if ($serverhomeID ne '') { - print $fh $name.':'.$serverhomeID."\n"; - } else { - print $fh $name.':'.$name_to_host{$name}->[0]."\n"; - } - } - } +sub write_checksums { + my $distro = &LONCAPA::distro(); + if ($distro) { + print "Retrieving file version and checksumming.\n"; + my $numchksums = 0; + my ($chksumsref,$versionsref) = + &LONCAPA::Checksumming::get_checksums($distro,$perlvar{'lonDaemons'}, + $perlvar{'lonLib'}, + $perlvar{'lonIncludes'}, + $perlvar{'lonTabDir'}); + if (ref($chksumsref) eq 'HASH') { + $numchksums = scalar(keys(%{$chksumsref})); } - close($fh); + print "File version retrieved and checksumming completed for $numchksums files.\n"; + } else { + print "File version retrieval and checksumming skipped - could not determine Linux distro.\n"; } return; } sub send_mail { - print "sending mail\n"; my $defdom = $perlvar{'lonDefDomain'}; my $origmail = $perlvar{'lonAdmEMail'}; my $emailto = &Apache::loncommon::build_recipient_list(undef, @@ -753,17 +796,40 @@ sub send_mail { if ($totalcount>2500) { $emailto.=",$perlvar{'lonSysEMail'}"; } - my $subj="LON: $perlvar{'lonHostID'} E:$errors W:$warnings N:$notices"; - - my $result=system("metasend -b -S 4000000 -t $emailto -s '$subj' -f $statusdir/index.html -m text/html >& /dev/null"); - if ($result != 0) { - $result=system("mail -s '$subj' $emailto < $statusdir/index.html"); + my $from; + my $hostname=`/bin/hostname`; + chop($hostname); + $hostname=~s/[^\w\.]//g; + if ($hostname) { + $from = 'www@'.$hostname; + } + my $subj="LON: $perlvar{'lonHostID'} E:$errors W:$warnings N:$notices"; + my $loncronmail = "To: $emailto\n". + "From: $from\n". + "Subject: ".$subj."\n". + "Content-type: text/html\; charset=UTF-8\n". + "MIME-Version: 1.0\n\n"; + if (open(my $fh,"<$statusdir/index.html")) { + while (<$fh>) { + $loncronmail .= $_; + } + close($fh); + } else { + $loncronmail .= "Failed to read from http://$hostname/lon-status/index.html\n"; + } + $loncronmail .= "\n\n"; + if (open(my $mailh, "|/usr/lib/sendmail -oi -t -odb")) { + print $mailh $loncronmail; + close($mailh); + print "Sending mail.\n"; + } else { + print "Sending mail failed.\n"; } } sub usage { print(< /dev/null"); exit 1; } @@ -894,7 +960,7 @@ sub main () { &log_simplestatus(); &write_loncaparevs(); &write_serverhomeIDs(); - + &write_checksums(); if ($totalcount>200 && !$noemail) { &send_mail(); } } } @@ -902,10 +968,3 @@ sub main () { &main(); 1; - - - - - - - 500 Internal Server Error

Internal Server Error

The server encountered an internal error or misconfiguration and was unable to complete your request.

Please contact the server administrator at root@localhost to inform them of the time this error occurred, and the actions you performed just before this error.

More information about this error may be available in the server error log.