--- loncom/loncron 2001/01/10 18:47:35 1.15 +++ loncom/loncron 2007/02/02 12:59:13 1.70 @@ -1,29 +1,57 @@ #!/usr/bin/perl -# The LearningOnline Network -# Housekeeping program, started by cron +# Housekeeping program, started by cron, loncontrol and loncron.pl # -# (TCP networking package -# 6/1/99,6/2,6/10,6/11,6/12,6/14,6/26,6/28,6/29,6/30, -# 7/1,7/2,7/9,7/10,7/12 Gerd Kortemeyer) +# $Id: loncron,v 1.70 2007/02/02 12:59:13 raeburn Exp $ +# +# Copyright Michigan State University Board of Trustees +# +# This file is part of the LearningOnline Network with CAPA (LON-CAPA). +# +# LON-CAPA is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# LON-CAPA is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with LON-CAPA; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# /home/httpd/html/adm/gpl.txt +# +# http://www.lon-capa.org/ # -# 7/14,7/15,7/19,7/21,7/22,11/18, -# 2/8 Gerd Kortemeyer -# Dec 00 Scott Harrison -# 12/23 Gerd Kortemeyer + +$|=1; +use strict; + +use lib '/home/httpd/lib/perl/'; +use LONCAPA::Configuration; use IO::File; use IO::Socket; +use HTML::Entities; +use Getopt::Long; +#globals +use vars qw (%perlvar %simplestatus $errors $warnings $notices $totalcount); + +my $statusdir="/home/httpd/html/lon-status"; + # -------------------------------------------------- Non-critical communication sub reply { - my ($cmd,$server)=@_; - my $peerfile="$perlvar{'lonSockDir'}/$server"; + my ($cmd,$server,$hostname)=@_; + my $peerfile="$perlvar{'lonSockDir'}/".$hostname->{$server}; my $client=IO::Socket::UNIX->new(Peer =>"$peerfile", Type => SOCK_STREAM, Timeout => 10) or return "con_lost"; - print $client "$cmd\n"; + print $client "sethost:$server:$cmd\n"; my $answer=<$client>; chomp($answer); if (!$answer) { $answer="con_lost"; } @@ -32,317 +60,405 @@ sub reply { # --------------------------------------------------------- Output error status +sub log { + my $fh=shift; + if ($fh) { print $fh @_ } +} + sub errout { my $fh=shift; - print $fh (< + &log($fh,(< -
Notices$notices
Warnings$warnings
Errors$errors

Top

+

Top

ENDERROUT } -# ================================================================ Main Program +sub start_daemon { + my ($fh,$daemon,$pidfile,$args) = @_; + my $progname=$daemon; + if ($daemon eq 'lonc' && $args eq 'new') { + $progname='loncnew'; + print "new "; + } + my $error_fname="$perlvar{'lonDaemons'}/logs/${daemon}_errors"; + my $size=(stat($error_fname))[7]; + if ($size>40000) { + &log($fh,"

Rotating error logs ...

"); + rename("$error_fname.2","$error_fname.3"); + rename("$error_fname.1","$error_fname.2"); + rename("$error_fname","$error_fname.1"); + } + system("$perlvar{'lonDaemons'}/$progname 2>$perlvar{'lonDaemons'}/logs/${daemon}_errors"); + sleep 1; + if (-e $pidfile) { + &log($fh,"

Seems like it started ...

"); + my $lfh=IO::File->new("$pidfile"); + my $daemonpid=<$lfh>; + chomp($daemonpid); + if ($daemonpid =~ /^\d+$/ && kill 0 => $daemonpid) { + return 1; + } else { + return 0; + } + } + &log($fh,"

Seems like that did not work!

"); + $errors++; + return 0; +} -# ------------------------------------------------------------ Read access.conf -{ - my $config=IO::File->new("/etc/httpd/conf/access.conf"); +sub checkon_daemon { + my ($fh,$daemon,$maxsize,$send,$args)=@_; - while (my $configline=<$config>) { - if ($configline =~ /PerlSetVar/) { - my ($dummy,$varname,$varvalue)=split(/\s+/,$configline); - $perlvar{$varname}=$varvalue; - } + my $result; + &log($fh,'

'.$daemon.'

Log

'); + printf("%-15s ",$daemon); + if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){ + open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/$daemon.log|"); + while (my $line=) { + &log($fh,"$line"); + if ($line=~/INFO/) { $notices++; } + if ($line=~/WARNING/) { $notices++; } + if ($line=~/CRITICAL/) { $warnings++; } + }; + close (DFH); } -} + &log($fh,"

"); + + my $pidfile="$perlvar{'lonDaemons'}/logs/$daemon.pid"; + + my $restartflag=1; + my $daemonpid; + if (-e $pidfile) { + my $lfh=IO::File->new("$pidfile"); + $daemonpid=<$lfh>; + chomp($daemonpid); + if ($daemonpid =~ /^\d+$/ && kill 0 => $daemonpid) { + &log($fh,"

$daemon at pid $daemonpid responding"); + if ($send) { &log($fh,", sending $send"); } + &log($fh,"

"); + if ($send eq 'USR1') { kill USR1 => $daemonpid; } + if ($send eq 'USR2') { kill USR2 => $daemonpid; } + $restartflag=0; + if ($send eq 'USR2') { + $result = 'reloaded'; + print "reloaded\n"; + } else { + $result = 'running'; + print "running\n"; + } + } else { + $errors++; + &log($fh,"

$daemon at pid $daemonpid not responding

"); + $restartflag=1; + &log($fh,"

Decided to clean up stale .pid file and restart $daemon

"); + } + } + if ($restartflag==1) { + $simplestatus{$daemon}='off'; + $errors++; + my $kadaemon=$daemon; + if ($kadaemon eq 'lonmemcached') { $kadaemon='memcached'; } + &log($fh,'
Killall '.$daemon.': '. + `killall $kadaemon 2>&1`.' - '); + sleep 1; + &log($fh,unlink($pidfile).' - '. + `killall -9 $kadaemon 2>&1`. + '
'); + &log($fh,"

$daemon not running, trying to start

"); + + if (&start_daemon($fh,$daemon,$pidfile,$args)) { + &log($fh,"

$daemon at pid $daemonpid responding

"); + $simplestatus{$daemon}='restarted'; + $result = 'started'; + print "started\n"; + } else { + $errors++; + &log($fh,"

$daemon at pid $daemonpid not responding

"); + &log($fh,"

Give it one more try ...

"); + print " "; + if (&start_daemon($fh,$daemon,$pidfile,$args)) { + &log($fh,"

$daemon at pid $daemonpid responding

"); + $simplestatus{$daemon}='restarted'; + $result = 'started'; + print "started\n"; + } else { + $result = 'failed'; + print " failed\n"; + $simplestatus{$daemon}='failed'; + $errors++; $errors++; + &log($fh,"

$daemon at pid $daemonpid not responding

"); + &log($fh,"

Unable to start $daemon

"); + } + } -# --------------------------------------- Make sure that LON-CAPA is configured -# I only test for one thing here (lonHostID). This is just a safeguard. -if ('{[[[[lonHostID]]]]}' eq $perlvar{'lonHostID'}) { - print("Unconfigured machine.\n"); - $emailto=$perlvar{'lonSysEMail'}; - $hostname=`/bin/hostname`; - chop $hostname; - $hostname=~s/[^\w\.]//g; # make sure is safe to pass through shell - $subj="LON: Unconfigured machine $hostname"; - system("echo 'Unconfigured machine $hostname.' |\ - mailto $emailto -s '$subj' > /dev/null"); - exit 1; -} + if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){ + &log($fh,"

");
+	    open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/$daemon.log|");
+	    while (my $line=) { 
+		&log($fh,"$line");
+		if ($line=~/WARNING/) { $notices++; }
+		if ($line=~/CRITICAL/) { $notices++; }
+	    };
+	    close (DFH);
+	    &log($fh,"

"); + } + } + + my $fname="$perlvar{'lonDaemons'}/logs/$daemon.log"; + + my ($dev,$ino,$mode,$nlink, + $uid,$gid,$rdev,$size, + $atime,$mtime,$ctime, + $blksize,$blocks)=stat($fname); + + if ($size>$maxsize) { + &log($fh,"

Rotating logs ...

"); + rename("$fname.2","$fname.3"); + rename("$fname.1","$fname.2"); + rename("$fname","$fname.1"); + } -# ----------------------------- Make sure this process is running from user=www -my $wwwid=getpwnam('www'); -if ($wwwid!=$<) { - print("User ID mismatch. This program must be run as user 'www'\n"); - $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; - $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; - system("echo 'User ID mismatch. loncron must be run as user www.' |\ - mailto $emailto -s '$subj' > /dev/null"); - exit 1; + &errout($fh); + return $result; } -# ------------------------------------------------------------- Read hosts file -{ - my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab"); - - while (my $configline=<$config>) { - my ($id,$domain,$role,$name,$ip)=split(/:/,$configline); - $hostname{$id}=$name; - $hostdom{$id}=$domain; - $hostrole{$id}=$role; - $hostip{$id}=$ip; - if (($role eq 'library') && ($id ne $perlvar{'lonHostID'})) { - $libserv{$id}=$name; - } +# --------------------------------------------------------------------- Machine +sub log_machine_info { + my ($fh)=@_; + &log($fh,'

Machine Information

'); + &log($fh,"

loadavg

"); + + open (LOADAVGH,"/proc/loadavg"); + my $loadavg=; + close (LOADAVGH); + + &log($fh,"$loadavg"); + + my @parts=split(/\s+/,$loadavg); + if ($parts[1]>4.0) { + $errors++; + } elsif ($parts[1]>2.0) { + $warnings++; + } elsif ($parts[1]>1.0) { + $notices++; } -} -# ------------------------------------------------------ Read spare server file -{ - my $config=IO::File->new("$perlvar{'lonTabDir'}/spare.tab"); + &log($fh,"

df

"); + &log($fh,"
");
 
-    while (my $configline=<$config>) {
-       chomp($configline);
-       if (($configline) && ($configline ne $perlvar{'lonHostID'})) {
-          $spareid{$configline}=1;
-       }
+    open (DFH,"df|");
+    while (my $line=) { 
+	&log($fh,&encode_entities($line,'<>&"')); 
+	@parts=split(/\s+/,$line);
+	my $usage=$parts[4];
+	$usage=~s/\W//g;
+	if ($usage>90) { 
+	    $warnings++;
+	    $notices++; 
+	} elsif ($usage>80) {
+	    $warnings++;
+	} elsif ($usage>60) {
+	    $notices++;
+	}
+	if ($usage>95) { $warnings++; $warnings++; $simplestatus{'diskfull'}++; }
     }
-}
+    close (DFH);
+    &log($fh,"
"); -# ---------------------------------------------------------------- Start report -$statusdir="/home/httpd/html/lon-status"; + &log($fh,"

ps

"); + &log($fh,"
");
+    my $psproc=0;
 
-$errors=0;
-$warnings=0;
-$notices=0;
+    open (PSH,"ps aux --cols 140 |");
+    while (my $line=) { 
+	&log($fh,&encode_entities($line,'<>&"')); 
+	$psproc++;
+    }
+    close (PSH);
+    &log($fh,"
"); -$now=time; -$date=localtime($now); + if ($psproc>200) { $notices++; } + if ($psproc>250) { $notices++; } -{ -my $fh=IO::File->new(">$statusdir/newstatus.html"); + &log($fh,"

distprobe

"); + &log($fh,"
");
+    open(DSH,"$perlvar{'lonDaemons'}/distprobe |");
+    while (my $line=) { 
+	&log($fh,&encode_entities($line,'<>&"')); 
+	$psproc++;
+    }
+    close(DSH);
+    &log($fh,"
"); -print $fh (<new(">$statusdir/newstatus.html"); + my %simplestatus=(); + my $now=time; + my $date=localtime($now); + + + &log($fh,(< LON Status Report $perlvar{'lonHostID'} -
+

LON Status Report $perlvar{'lonHostID'}

$date ($now)

    -
  1. Configuration -
  2. Machine Information -
  3. Temporary Files -
  4. Session Tokens -
  5. httpd -
  6. lonsql -
  7. lond -
  8. lonc -
  9. lonnet -
  10. Connections -
  11. Delayed Messages -
  12. Error Count +
  13. Configuration
  14. +
  15. Machine Information
  16. +
  17. Temporary Files
  18. +
  19. Session Tokens
  20. +
  21. httpd
  22. +
  23. lonsql
  24. +
  25. lond
  26. +
  27. lonc
  28. +
  29. lonhttpd
  30. +
  31. lonnet
  32. +
  33. Connections
  34. +
  35. Delayed Messages
  36. +
  37. Error Count
-
- +
+

Configuration

PerlVars

- +
ENDHEADERS -foreach $varname (keys %perlvar) { - print $fh "\n"; -} -print $fh "
$varname$perlvar{$varname}

Hosts

"; -foreach $id (keys %hostname) { -print $fh - ""; -print $fh "\n"; -} -print $fh "
$id$hostdom{$id}$hostrole{$id}$hostname{$id}$hostip{$id}

Spare Hosts

    "; -foreach $id (keys %spareid) { - print $fh "
  1. $id\n"; + foreach my $varname (sort(keys(%perlvar))) { + &log($fh,"$varname". + &encode_entities($perlvar{$varname},'<>&"')."\n"); + } + &log($fh,"

    Hosts

    "); + foreach my $id (sort(keys(%{$hostname}))) { + &log($fh, + "\n"); + } + &log($fh,"
    $id".$hostdom->{$id}. + "".$hostrole->{$id}. + "".$hostname->{$id}."

    Spare Hosts

      "); + foreach my $id (sort(keys(%{$spareid}))) { + &log($fh,"
    1. $id\n
    2. "); + } + &log($fh,"
    \n"); + return $fh; } -print $fh "
\n"; - -# --------------------------------------------------------------------- Machine - -print $fh '

Machine Information

'; -print $fh "

loadavg

"; - -open (LOADAVGH,"/proc/loadavg"); -$loadavg=; -close (LOADAVGH); - -print $fh "$loadavg"; - -@parts=split(/\s+/,$loadavg); -if ($parts[1]>4.0) { - $errors++; -} elsif ($parts[1]>2.0) { - $warnings++; -} elsif ($parts[1]>1.0) { - $notices++; -} - -print $fh "

df

"; -print $fh "
";
-
-open (DFH,"df|");
-while ($line=) { 
-   print $fh "$line"; 
-   @parts=split(/\s+/,$line);
-   $usage=$parts[4];
-   $usage=~s/\W//g;
-   if ($usage>90) { 
-      $warnings++; 
-   } elsif ($usage>80) {
-      $warnings++;
-   } elsif ($usage>60) {
-      $notices++;
-   }
-   if ($usage>95) { $warnings++; $warnings++ }
-}
-close (DFH);
-print $fh "
"; -&errout($fh); - # --------------------------------------------------------------- clean out tmp -print $fh '

Temporary Files

'; -$cleaned=0; -while ($fname=<$perlvar{'lonDaemons'}/tmp/*>) { - my ($dev,$ino,$mode,$nlink, - $uid,$gid,$rdev,$size, - $atime,$mtime,$ctime, - $blksize,$blocks)=stat($fname); - $now=time; - $since=$now-$mtime; - if ($since>$perlvar{'lonExpire'}) { - $cleaned++; - unlink("$fname"); - } - +sub clean_tmp { + my ($fh)=@_; + &log($fh,'

Temporary Files

'); + my $cleaned=0; + my $old=0; + while (my $fname=<$perlvar{'lonDaemons'}/tmp/*>) { + my ($dev,$ino,$mode,$nlink, + $uid,$gid,$rdev,$size, + $atime,$mtime,$ctime, + $blksize,$blocks)=stat($fname); + my $now=time; + my $since=$now-$mtime; + if ($since>$perlvar{'lonExpire'}) { + my $line=''; + if (open(PROBE,$fname)) { + $line=; + close(PROBE); + } + unless ($line=~/^CHECKOUTTOKEN\&/) { + $cleaned++; + unlink("$fname"); + } else { + if ($since>365*$perlvar{'lonExpire'}) { + $cleaned++; + unlink("$fname"); + } else { $old++; } + } + } + } + &log($fh,"Cleaned up ".$cleaned." files (".$old." old checkout tokens)."); } -print $fh "Cleaned up ".$cleaned." files."; # ------------------------------------------------------------ clean out lonIDs -print $fh '

Session Tokens

'; -$cleaned=0; -$active=0; -while ($fname=<$perlvar{'lonIDsDir'}/*>) { - my ($dev,$ino,$mode,$nlink, - $uid,$gid,$rdev,$size, - $atime,$mtime,$ctime, - $blksize,$blocks)=stat($fname); - $now=time; - $since=$now-$mtime; - if ($since>$perlvar{'lonExpire'}) { - $cleaned++; - print $fh "Unlinking $fname
"; - unlink("$fname"); - } else { - $active++; - } - +sub clean_lonIDs { + my ($fh)=@_; + &log($fh,'

Session Tokens

'); + my $cleaned=0; + my $active=0; + while (my $fname=<$perlvar{'lonIDsDir'}/*>) { + my ($dev,$ino,$mode,$nlink, + $uid,$gid,$rdev,$size, + $atime,$mtime,$ctime, + $blksize,$blocks)=stat($fname); + my $now=time; + my $since=$now-$mtime; + if ($since>$perlvar{'lonExpire'}) { + $cleaned++; + &log($fh,"Unlinking $fname
"); + unlink("$fname"); + } else { + $active++; + } + } + &log($fh,"

Cleaned up ".$cleaned." stale session token(s).

"); + &log($fh,"

$active open session(s)

"); } -print $fh "

Cleaned up ".$cleaned." stale session token(s)."; -print $fh "

$active open session(s)

"; -# ----------------------------------------------------------------------- httpd - -print $fh '

httpd

Access Log

';
-
-open (DFH,"tail -n40 /etc/httpd/logs/access_log|");
-while ($line=) { print $fh "$line" };
-close (DFH);
-
-print $fh "

Error Log

";
-
-open (DFH,"tail -n50 /etc/httpd/logs/error_log|");
-while ($line=) { 
-   print $fh "$line";
-   if ($line=~/\[error\]/) { $notices++; } 
-};
-close (DFH);
-print $fh "
"; -&errout($fh); +# ----------------------------------------------------------------------- httpd +sub check_httpd_logs { + my ($fh)=@_; + &log($fh,'

httpd

Access Log

');
+    
+    open (DFH,"tail -n25 /etc/httpd/logs/access_log|");
+    while (my $line=) { &log($fh,&encode_entities($line,'<>&"')) };
+    close (DFH);
+	
+    &log($fh,"

Error Log

");
+	
+    open (DFH,"tail -n25 /etc/httpd/logs/error_log|");
+    while (my $line=) { 
+	&log($fh,"$line");
+	if ($line=~/\[error\]/) { $notices++; } 
+    }
+    close (DFH);
+    &log($fh,"
"); + &errout($fh); +} -# ---------------------------------------------------------------------- lonsql -# -# Do not run for now -# -if ($perlvar{'lonRole'} eq "library" && 1==0) { +# ---------------------------------------------------------------------- lonnet - print $fh '

lonsql

Log

';
-    
-    if (-e "$perlvar{'lonDaemons'}/logs/lonsql.log"){
-	open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonsql.log|");
-	while ($line=) { 
-	    print $fh "$line";
-	    if ($line=~/INFO/) { $notices++; }
-	    if ($line=~/WARNING/) { $notices++; }
-	    if ($line=~/CRITICAL/) { $warnings++; }
-	};
+sub rotate_lonnet_logs {
+    my ($fh)=@_;
+    &log($fh,'

lonnet

Temp Log

');
+    print "checking logs\n";
+    if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){
+	open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|");
+	while (my $line=) { 
+	    &log($fh,&encode_entities($line,'<>&"'));
+	}
 	close (DFH);
     }
-    print $fh "
"; - - my $lonsqlfile="$perlvar{'lonDaemons'}/logs/lonsql.pid"; + &log($fh,"

Perm Log

");
     
-    if (-e $lonsqlfile) {
-	my $lfh=IO::File->new("$lonsqlfile");
-	my $lonsqlpid=<$lfh>;
-	chomp($lonsqlpid);
-	if (kill 0 => $lonsqlpid) {
-	    print $fh "

lonsql at pid $lonsqlpid responding

"; - } else { - $errors++; $errors++; - print $fh "

lonsql at pid $lonsqlpid not responding

"; - } - } else { - $errors++; - print $fh "

lonsql not running, trying to start

"; - system("$perlvar{'lonDaemons'}/lonsql"); - sleep 120; - if (-e $lonsqlfile) { - print $fh "Seems like it started ...

"; - my $lfh=IO::File->new("$lonsqlfile"); - my $lonsqlpid=<$lfh>; - chomp($lonsqlpid); - sleep 30; - if (kill 0 => $lonsqlpid) { - print $fh "

lonsql at pid $lonsqlpid responding

"; - } else { - $errors++; $errors++; - print $fh "

lonsql at pid $lonsqlpid not responding

"; - print $fh "Give it one more try ...

"; - system("$perlvar{'lonDaemons'}/lonsql"); - sleep 120; - } - } else { - print $fh "Seems like that did not work!

"; - $errors++; - } - if (-e "$perlvar{'lonDaemons'}/logs/lonsql.log"){ - print $fh "

";
-	    open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonsql.log|");
-	    while ($line=) { 
-		print $fh "$line";
-		if ($line=~/WARNING/) { $notices++; }
-		if ($line=~/CRITICAL/) { $notices++; }
-	    };
-	    close (DFH);
-	    print $fh "
"; + if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") { + open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|"); + while (my $line=) { + &log($fh,&encode_entities($line,'<>&"')); } - } + close (DFH); + } else { &log($fh,"No perm log\n") } - $fname="$perlvar{'lonDaemons'}/logs/lonsql.log"; + my $fname="$perlvar{'lonDaemons'}/logs/lonnet.log"; my ($dev,$ino,$mode,$nlink, $uid,$gid,$rdev,$size, @@ -350,323 +466,280 @@ if ($perlvar{'lonRole'} eq "library" && $blksize,$blocks)=stat($fname); if ($size>40000) { - print $fh "Rotating logs ...

"; + &log($fh,"

Rotating logs ...

"); rename("$fname.2","$fname.3"); rename("$fname.1","$fname.2"); rename("$fname","$fname.1"); } + &log($fh,"
"); &errout($fh); } -# ------------------------------------------------------------------------ lond -print $fh '

lond

Log

';
-
-if (-e "$perlvar{'lonDaemons'}/logs/lond.log"){
-open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lond.log|");
-while ($line=) { 
-   print $fh "$line";
-   if ($line=~/INFO/) { $notices++; }
-   if ($line=~/WARNING/) { $notices++; }
-   if ($line=~/CRITICAL/) { $warnings++; }
-};
-close (DFH);
-}
-print $fh "
"; - -my $londfile="$perlvar{'lonDaemons'}/logs/lond.pid"; - -my $restartflag=1; -if (-e $londfile) { - my $lfh=IO::File->new("$londfile"); - my $londpid=<$lfh>; - chomp($londpid); - if (kill 0 => $londpid) { - print $fh "

lond at pid $londpid responding

"; - $restartflag=0; - } else { - $errors++; - print $fh "

lond at pid $londpid not responding

"; - # Intelligently handle this. - # Possibility #1: there is no process - # Solution: remove .pid file and restart - if (getpgrp($londpid)==-1) { - unlink($londfile); - $restartflag=1; - } - else { - # Possibility #2: there is a live process that is not responding - # for an unknown reason - # Solution: kill parent and children processes, remove .pid and restart - `killall -9 lond`; - unlink($londfile); - $restartflag=1; - } - print $fh - "

Deciding to clean up stale .pid file and restart lond

"; - } -} -if ($restartflag==1) { - $errors++; - print $fh "

lond not running, trying to start

"; - system("$perlvar{'lonDaemons'}/lond"); - sleep 120; - if (-e $londfile) { - print $fh "Seems like it started ...

"; - my $lfh=IO::File->new("$londfile"); - my $londpid=<$lfh>; - chomp($londpid); - sleep 30; - if (kill 0 => $londpid) { - print $fh "

lond at pid $londpid responding

"; - } else { - $errors++; $errors++; - print $fh "

lond at pid $londpid not responding

"; - print $fh "Give it one more try ...

"; - system("$perlvar{'lonDaemons'}/lond"); - sleep 120; - } - } else { - print $fh "Seems like that did not work!

"; - $errors++; - } - if (-e "$perlvar{'lonDaemons'}/logs/lond.log"){ - print $fh "

";
-    open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lond.log|");
-    while ($line=) { 
-      print $fh "$line";
-      if ($line=~/WARNING/) { $notices++; }
-      if ($line=~/CRITICAL/) { $notices++; }
-    };
-    close (DFH);
-    print $fh "
"; - } +# ----------------------------------------------------------------- Connections +sub test_connections { + my ($fh,$hostname)=@_; + &log($fh,'

Connections

'); + print "testing connections\n"; + &log($fh,""); + my ($good,$bad)=(0,0); + foreach my $tryserver (sort(keys(%{$hostname}))) { + print("."); + my $result; + my $answer=&reply("ping",$tryserver,$hostname); + if ($answer eq "$tryserver:$perlvar{'lonHostID'}") { + $result="ok"; + $good++; + } else { + $result=$answer; + $warnings++; + if ($answer eq 'con_lost') { + $bad++; + $warnings++; + } else { + $good++; #self connection + } + } + if ($answer =~ /con_lost/) { print(" $tryserver down\n"); } + &log($fh,"\n"); + } + &log($fh,"
$tryserver$result
"); + print "\n$good good, $bad bad connections\n"; + &errout($fh); } -$fname="$perlvar{'lonDaemons'}/logs/lond.log"; - my ($dev,$ino,$mode,$nlink, - $uid,$gid,$rdev,$size, - $atime,$mtime,$ctime, - $blksize,$blocks)=stat($fname); - -if ($size>40000) { - print $fh "Rotating logs ...

"; - rename("$fname.2","$fname.3"); - rename("$fname.1","$fname.2"); - rename("$fname","$fname.1"); -} - -&errout($fh); -# ------------------------------------------------------------------------ lonc - -print $fh '


lonc

Log

';
-
-if (-e "$perlvar{'lonDaemons'}/logs/lonc.log"){
-open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonc.log|");
-while ($line=) { 
-   print $fh "$line";
-   if ($line=~/INFO/) { $notices++; }
-   if ($line=~/WARNING/) { $notices++; }
-   if ($line=~/CRITICAL/) { $warnings++; }
-};
-close (DFH);
-}
-print $fh "
"; - -my $loncfile="$perlvar{'lonDaemons'}/logs/lonc.pid"; - -$restartflag=1; -if (-e $loncfile) { - my $lfh=IO::File->new("$loncfile"); - my $loncpid=<$lfh>; - chomp($loncpid); - if (kill 0 => $loncpid) { - print $fh "

lonc at pid $loncpid responding, sending USR1

"; - kill USR1 => $loncpid; - $restartflag=0; - } else { - $errors++; - print $fh "

lonc at pid $loncpid not responding

"; - # Intelligently handle this. - # Possibility #1: there is no process - # Solution: remove .pid file and restart - if (getpgrp($loncpid)==-1) { - unlink($loncfile); - $restartflag=1; - } - else { - # Possibility #2: there is a live process that is not responding - # for an unknown reason - # Solution: kill parent and children processes, remove .pid and restart - `killall -9 lonc`; - unlink($loncfile); - $restartflag=1; - } - print $fh - "

Deciding to clean up stale .pid file and restart lonc

"; - } -} -if ($restartflag==1) { - $errors++; - print $fh "

lonc not running, trying to start

"; - system("$perlvar{'lonDaemons'}/lonc"); - sleep 120; - if (-e $loncfile) { - print $fh "Seems like it started ...

"; - my $lfh=IO::File->new("$loncfile"); - my $loncpid=<$lfh>; - chomp($loncpid); - sleep 30; - if (kill 0 => $loncpid) { - print $fh "

lonc at pid $loncpid responding

"; - } else { - $errors++; $errors++; - print $fh "

lonc at pid $loncpid not responding

"; - print $fh "Give it one more try ...

"; - system("$perlvar{'lonDaemons'}/lonc"); - sleep 120; - } - } else { - print $fh "Seems like that did not work!

"; - $errors++; - } - if (-e "$perlvar{'lonDaemons'}/logs/lonc.log") { - print $fh "

";
-    open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonc.log|");
-    while ($line=) { 
-      print $fh "$line";
-      if ($line=~/WARNING/) { $notices++; }
-      if ($line=~/CRITICAL/) { $notices++; }
-    };
-    close (DFH);
-    print $fh "
"; - } -} - -$fname="$perlvar{'lonDaemons'}/logs/lonc.log"; +# ------------------------------------------------------------ Delayed messages +sub check_delayed_msg { + my ($fh,$hostname)=@_; + &log($fh,'

Delayed Messages

'); + print "checking buffers\n"; + + &log($fh,'

Scanning Permanent Log

'); - my ($dev,$ino,$mode,$nlink, - $uid,$gid,$rdev,$size, - $atime,$mtime,$ctime, - $blksize,$blocks)=stat($fname); + my $unsend=0; -if ($size>40000) { - print $fh "Rotating logs ...

"; - rename("$fname.2","$fname.3"); - rename("$fname.1","$fname.2"); - rename("$fname","$fname.1"); -} + my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log"); + while (my $line=<$dfh>) { + my ($time,$sdf,$dserv,$dcmd)=split(/:/,$line); + if ($sdf eq 'F') { + my $local=localtime($time); + &log($fh,"Failed: $time, $dserv, $dcmd
"); + $warnings++; + } + if ($sdf eq 'S') { $unsend--; } + if ($sdf eq 'D') { $unsend++; } + } - -&errout($fh); -# ---------------------------------------------------------------------- lonnet + &log($fh,"

Total unsend messages: $unsend

\n"); + $warnings=$warnings+5*$unsend; -print $fh '

lonnet

Temp Log

';
-if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){
-open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|");
-while ($line=) { 
-    print $fh "$line";
-};
-close (DFH);
-}
-print $fh "

Perm Log

";
-
-if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") {
-    open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|");
-while ($line=) { 
-   print $fh "$line";
-};
-close (DFH);
-} else { print $fh "No perm log\n" }
-
-$fname="$perlvar{'lonDaemons'}/logs/lonnet.log";
-
-                          my ($dev,$ino,$mode,$nlink,
-                              $uid,$gid,$rdev,$size,
-                              $atime,$mtime,$ctime,
-                              $blksize,$blocks)=stat($fname);
-
-if ($size>40000) {
-    print $fh "Rotating logs ...

"; - rename("$fname.2","$fname.3"); - rename("$fname.1","$fname.2"); - rename("$fname","$fname.1"); + if ($unsend) { $simplestatus{'unsend'}=$unsend; } + &log($fh,"

Outgoing Buffer

\n
");
+# list directory with delayed messages and remember offline servers
+    my %servers=();
+    open (DFH,"ls -lF $perlvar{'lonSockDir'}/delayed|");
+    while (my $line=) {
+        my ($server)=($line=~/\.(\w+)$/);
+        if ($server) { $servers{$server}=1; }
+	&log($fh,&encode_entities($line,'<>&"'));
+    }
+    &log($fh,"
\n"); + close (DFH); +# pong to all servers that have delayed messages +# this will trigger a reverse connection, which should flush the buffers + foreach my $tryserver (keys %servers) { + my $answer=&reply("pong",$tryserver,$hostname); + &log($fh,"Pong to $tryserver: $answer
"); + } } -print $fh "
"; -&errout($fh); -# ----------------------------------------------------------------- Connections +sub finish_logging { + my ($fh)=@_; + &log($fh,"
\n"); + $totalcount=$notices+4*$warnings+100*$errors; + &errout($fh); + &log($fh,"

Total Error Count: $totalcount

"); + my $now=time; + my $date=localtime($now); + &log($fh,"
$date ($now)\n"); + print "lon-status webpage updated\n"; + $fh->close(); + + if ($errors) { $simplestatus{'errors'}=$errors; } + if ($warnings) { $simplestatus{'warnings'}=$warnings; } + if ($notices) { $simplestatus{'notices'}=$notices; } + $simplestatus{'time'}=time; +} -print $fh '

Connections

'; +sub log_simplestatus { + rename ("$statusdir/newstatus.html","$statusdir/index.html"); + + my $sfh=IO::File->new(">$statusdir/loncron_simple.txt"); + foreach (keys %simplestatus) { + print $sfh $_.'='.$simplestatus{$_}.'&'; + } + print $sfh "\n"; + $sfh->close(); +} -print $fh ""; -foreach $tryserver (keys %hostname) { +sub send_mail { + print "sending mail\n"; + my $emailto="$perlvar{'lonAdmEMail'}"; + if ($totalcount>2500) { + $emailto.=",$perlvar{'lonSysEMail'}"; + } + my $subj="LON: $perlvar{'lonHostID'} E:$errors W:$warnings N:$notices"; - $answer=reply("pong",$tryserver); - if ($answer eq "$tryserver:$perlvar{'lonHostID'}") { - $result="ok"; - } else { - $result=$answer; - $warnings++; - if ($answer eq 'con_lost') { $warnings++; } + my $result=system("metasend -b -S 4000000 -t $emailto -s '$subj' -f $statusdir/index.html -m text/html >& /dev/null"); + if ($result != 0) { + $result=system("mail -s '$subj' $emailto < $statusdir/index.html"); } - print $fh "\n"; +} +sub usage { + print(<"; -&errout($fh); -# ------------------------------------------------------------ Delayed messages +# ================================================================ Main Program +sub main () { + my ($oldlonc,$help,$justcheckdaemons,$noemail,$justcheckconnections, + $justreload); + &GetOptions("help" => \$help, + "oldlonc" => \$oldlonc, + "justcheckdaemons" => \$justcheckdaemons, + "noemail" => \$noemail, + "justcheckconnections" => \$justcheckconnections, + "justreload" => \$justreload + ); + if ($help) { &usage(); return; } +# --------------------------------- Read loncapa_apache.conf and loncapa.conf + my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); + %perlvar=%{$perlvarref}; + undef $perlvarref; + delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed + delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed + +# --------------------------------------- Make sure that LON-CAPA is configured +# I only test for one thing here (lonHostID). This is just a safeguard. + if ('{[[[[lonHostID]]]]}' eq $perlvar{'lonHostID'}) { + print("Unconfigured machine.\n"); + my $emailto=$perlvar{'lonSysEMail'}; + my $hostname=`/bin/hostname`; + chop $hostname; + $hostname=~s/[^\w\.]//g; # make sure is safe to pass through shell + my $subj="LON: Unconfigured machine $hostname"; + system("echo 'Unconfigured machine $hostname.' |\ + mailto $emailto -s '$subj' > /dev/null"); + exit 1; + } -print $fh '

Delayed Messages

'; +# ----------------------------- Make sure this process is running from user=www + my $wwwid=getpwnam('www'); + if ($wwwid!=$<) { + print("User ID mismatch. This program must be run as user 'www'\n"); + my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; + my $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; + system("echo 'User ID mismatch. loncron must be run as user www.' |\ + mailto $emailto -s '$subj' > /dev/null"); + exit 1; + } -print $fh '

Scanning Permanent Log

'; +# ------------------------------------------------------------- Read hosts file + my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab"); + + my (%hostname,%hostdom,%hostrole,%spareid); + while (my $configline=<$config>) { + next if ($configline =~ /^(\#|\s*\$)/); + my ($id,$domain,$role,$name)=split(/:/,$configline); + if ($id && $domain && $role && $name) { + $name=~s/\s//g; + $hostname{$id}=$name; + $hostdom{$id}=$domain; + $hostrole{$id}=$role; + } + } + undef $config; -$unsend=0; -{ - my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log"); - while ($line=<$dfh>) { - ($time,$sdf,$dserv,$dcmd)=split(/:/,$line); - if ($sdf eq 'F') { - $local=localtime($time); - print "Failed: $time, $dserv, $dcmd
"; - $warnings++; - } - if ($sdf eq 'S') { $unsend--; } - if ($sdf eq 'D') { $unsend++; } - } -} -print $fh "Total unsend messages: $unsend

\n"; -$warnings=$warnings+5*$unsend; - -print $fh "

Outgoing Buffer

"; - -open (DFH,"ls -lF $perlvar{'lonSockDir'}/delayed|"); -while ($line=) { - print $fh "$line
"; -}; -close (DFH); - -# ------------------------------------------------------------------------- End -print $fh "
\n"; -$totalcount=$notices+4*$warnings+100*$errors; -&errout($fh); -print $fh "

Total Error Count: $totalcount

"; -$now=time; -$date=localtime($now); -print $fh "
$date ($now)\n"; +# ------------------------------------------------------ Read spare server file + $config=IO::File->new("$perlvar{'lonTabDir'}/spare.tab"); + + while (my $configline=<$config>) { + chomp($configline); + if (($configline) && ($configline ne $perlvar{'lonHostID'})) { + $spareid{$configline}=1; + } + } + undef $config; -} +# ---------------------------------------------------------------- Start report -rename ("$statusdir/newstatus.html","$statusdir/index.html"); - -if ($totalcount>200) { - $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; - $subj="LON: $perlvar{'lonHostID'} E:$errors W:$warnings N:$notices"; - system( - "metasend -b -t $emailto -s '$subj' -f $statusdir/index.html -m text/html"); + $errors=0; + $warnings=0; + $notices=0; + + + my $fh; + if (!$justcheckdaemons && !$justcheckconnections && !$justreload) { + $fh=&start_logging(\%hostdom,\%hostrole,\%hostname,\%spareid); + + &log_machine_info($fh); + &clean_tmp($fh); + &clean_lonIDs($fh); + &check_httpd_logs($fh); + &rotate_lonnet_logs($fh); + } + if (!$justcheckconnections && !$justreload) { + &checkon_daemon($fh,'lonsql',200000); + if ( &checkon_daemon($fh,'lond',40000,'USR1') eq 'running') { + &checkon_daemon($fh,'lond',40000,'USR2'); + } + my $args='new'; + if ($oldlonc) { $args = ''; } + &checkon_daemon($fh,'lonc',40000,'USR1',$args); + &checkon_daemon($fh,'lonhttpd',40000); + &checkon_daemon($fh,'lonmemcached',40000); + &checkon_daemon($fh,'lonmaxima',40000); + } + if ($justreload) { + &checkon_daemon($fh,'lond',40000,'USR2'); + my $args='new'; + if ($oldlonc) { $args = ''; } + &checkon_daemon($fh,'lonc',40000,'USR2',$args); + } + if ($justcheckconnections) { + &test_connections($fh,\%hostname); + } + if (!$justcheckdaemons && !$justcheckconnections && !$justreload) { + &check_delayed_msg($fh,\%hostname); + &finish_logging($fh); + &log_simplestatus(); + + if ($totalcount>200 && !$noemail) { &send_mail(); } + } } + +&main(); 1; 500 Internal Server Error

Internal Server Error

The server encountered an internal error or misconfiguration and was unable to complete your request.

Please contact the server administrator at root@localhost to inform them of the time this error occurred, and the actions you performed just before this error.

More information about this error may be available in the server error log.

$tryserver$result