--- loncom/loncron 2003/10/14 15:36:21 1.44 +++ loncom/loncron 2019/07/07 16:15:24 1.103.2.4 @@ -1,324 +1,234 @@ #!/usr/bin/perl -# The LearningOnline Network -# Housekeeping program, started by cron +# Housekeeping program, started by cron, loncontrol and loncron.pl # -# (TCP networking package -# 6/1/99,6/2,6/10,6/11,6/12,6/14,6/26,6/28,6/29,6/30, -# 7/1,7/2,7/9,7/10,7/12 Gerd Kortemeyer) +# $Id: loncron,v 1.103.2.4 2019/07/07 16:15:24 raeburn Exp $ +# +# Copyright Michigan State University Board of Trustees +# +# This file is part of the LearningOnline Network with CAPA (LON-CAPA). +# +# LON-CAPA is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# LON-CAPA is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with LON-CAPA; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +# +# /home/httpd/html/adm/gpl.txt +# +# http://www.lon-capa.org/ # -# 7/14,7/15,7/19,7/21,7/22,11/18, -# 2/8 Gerd Kortemeyer -# 12/23 Gerd Kortemeyer -# YEAR=2001 -# 09/04,09/06,11/26 Gerd Kortemeyer $|=1; +use strict; use lib '/home/httpd/lib/perl/'; use LONCAPA::Configuration; +use LONCAPA::Checksumming; +use LONCAPA; +use Apache::lonnet; +use Apache::loncommon; use IO::File; use IO::Socket; +use HTML::Entities; +use Getopt::Long; +use File::Copy; +#globals +use vars qw (%perlvar %simplestatus $errors $warnings $notices $totalcount); + +my $statusdir="/home/httpd/html/lon-status"; -# -------------------------------------------------- Non-critical communication -sub reply { - my ($cmd,$server)=@_; - my $peerfile="$perlvar{'lonSockDir'}/$server"; - my $client=IO::Socket::UNIX->new(Peer =>"$peerfile", - Type => SOCK_STREAM, - Timeout => 10) - or return "con_lost"; - print $client "$cmd\n"; - my $answer=<$client>; - chomp($answer); - if (!$answer) { $answer="con_lost"; } - return $answer; -} # --------------------------------------------------------- Output error status +sub log { + my $fh=shift; + if ($fh) { print $fh @_ } +} + sub errout { my $fh=shift; - print $fh (< + &log($fh,(< -
Notices$notices
Warnings$warnings
Errors$errors

Top

+

Top

ENDERROUT } +sub rotate_logfile { + my ($file,$fh,$description) = @_; + my $size=(stat($file))[7]; + if ($size>40000) { + &log($fh,"

Rotating $description ...

"); + rename("$file.2","$file.3"); + rename("$file.1","$file.2"); + rename("$file","$file.1"); + } +} + sub start_daemon { - my ($fh,$daemon,$pidfile) = @_; + my ($fh,$daemon,$pidfile,$args) = @_; my $progname=$daemon; - if ($daemon eq 'lonc' && $ARGV[0] eq 'new') { + if ($daemon eq 'lonc') { $progname='loncnew'; - print "new "; } - system("$perlvar{'lonDaemons'}/$progname 2>>$perlvar{'lonDaemons'}/logs/${daemon}_errors"); - sleep 2; + my $error_fname="$perlvar{'lonDaemons'}/logs/${daemon}_errors"; + &rotate_logfile($error_fname,$fh,'error logs'); + if ($daemon eq 'lonc') { + &clean_sockets($fh); + } + system("$perlvar{'lonDaemons'}/$progname 2>$perlvar{'lonDaemons'}/logs/${daemon}_errors"); + sleep 1; if (-e $pidfile) { - print $fh "Seems like it started ...

"; + &log($fh,"

Seems like it started ...

"); my $lfh=IO::File->new("$pidfile"); my $daemonpid=<$lfh>; chomp($daemonpid); - sleep 2; - if (kill 0 => $daemonpid) { + if ($daemonpid =~ /^\d+$/ && kill 0 => $daemonpid) { return 1; } else { return 0; } } - print $fh "Seems like that did not work!

"; + &log($fh,"

Seems like that did not work!

"); $errors++; return 0; } sub checkon_daemon { - my ($fh,$daemon,$maxsize,$sendusr1)=@_; + my ($fh,$daemon,$maxsize,$send,$args)=@_; - print $fh '

'.$daemon.'

Log

';
-    printf("%-10s ",$daemon);
-    if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){
-	open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/$daemon.log|");
-	while ($line=) { 
-	    print $fh "$line";
-	    if ($line=~/INFO/) { $notices++; }
-	    if ($line=~/WARNING/) { $notices++; }
-	    if ($line=~/CRITICAL/) { $warnings++; }
-	};
-	close (DFH);
+    my $result;
+    &log($fh,'

'.$daemon.'

Log

'); + printf("%-15s ",$daemon); + if ($fh) { + if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){ + if (open(DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/$daemon.log|")) { + while (my $line=) { + &log($fh,"$line"); + if ($line=~/INFO/) { $notices++; } + if ($line=~/WARNING/) { $notices++; } + if ($line=~/CRITICAL/) { $warnings++; } + } + close (DFH); + } + } + &log($fh,"

"); } - print $fh "
"; - + my $pidfile="$perlvar{'lonDaemons'}/logs/$daemon.pid"; my $restartflag=1; - + my $daemonpid; if (-e $pidfile) { my $lfh=IO::File->new("$pidfile"); - my $daemonpid=<$lfh>; + $daemonpid=<$lfh>; chomp($daemonpid); - if (kill 0 => $daemonpid) { - print $fh "

$daemon at pid $daemonpid responding"; - if ($sendusr1) { print $fh ", sending USR1"; } - print $fh "

"; - if ($sendusr1) { kill USR1 => $daemonpid; } + if ($daemonpid =~ /^\d+$/ && kill 0 => $daemonpid) { + &log($fh,"

$daemon at pid $daemonpid responding"); + if ($send) { &log($fh,", sending $send"); } + &log($fh,"

"); + if ($send eq 'USR1') { kill USR1 => $daemonpid; } + if ($send eq 'USR2') { kill USR2 => $daemonpid; } $restartflag=0; - print "running\n"; + if ($send eq 'USR2') { + $result = 'reloaded'; + print "reloaded\n"; + } else { + $result = 'running'; + print "running\n"; + } } else { $errors++; - print $fh "

$daemon at pid $daemonpid not responding

"; + &log($fh,"

$daemon at pid $daemonpid not responding

"); $restartflag=1; - print $fh "

Decided to clean up stale .pid file and restart $daemon

"; + &log($fh,"

Decided to clean up stale .pid file and restart $daemon

"); } } if ($restartflag==1) { $simplestatus{$daemon}='off'; $errors++; - print $fh '
Killall '.$daemon.': '. - `killall $daemon 2>&1`.' - '; - sleep 2; - print $fh unlink($pidfile).' - '. - `killall -9 $daemon 2>&1`. - '
'; - print $fh "

$daemon not running, trying to start

"; - - if (&start_daemon($fh,$daemon,$pidfile)) { - print $fh "

$daemon at pid $daemonpid responding

"; + my $kadaemon=$daemon; + if ($kadaemon eq 'lonmemcached') { $kadaemon='memcached'; } + &log($fh,'
Killall '.$daemon.': '. + `killall $kadaemon 2>&1`.' - '); + sleep 1; + &log($fh,unlink($pidfile).' - '. + `killall -9 $kadaemon 2>&1`. + '
'); + &log($fh,"

$daemon not running, trying to start

"); + + if (&start_daemon($fh,$daemon,$pidfile,$args)) { + &log($fh,"

$daemon at pid $daemonpid responding

"); $simplestatus{$daemon}='restarted'; + $result = 'started'; print "started\n"; } else { $errors++; - print $fh "

$daemon at pid $daemonpid not responding

"; - print $fh "Give it one more try ...

"; + &log($fh,"

$daemon at pid $daemonpid not responding

"); + &log($fh,"

Give it one more try ...

"); print " "; - if (&start_daemon($fh,$daemon,$pidfile)) { - print $fh "

$daemon at pid $daemonpid responding

"; + if (&start_daemon($fh,$daemon,$pidfile,$args)) { + &log($fh,"

$daemon at pid $daemonpid responding

"); $simplestatus{$daemon}='restarted'; + $result = 'started'; print "started\n"; } else { + $result = 'failed'; print " failed\n"; $simplestatus{$daemon}='failed'; $errors++; $errors++; - print $fh "

$daemon at pid $daemonpid not responding

"; - print $fh "Unable to start $daemon

"; + &log($fh,"

$daemon at pid $daemonpid not responding

"); + &log($fh,"

Unable to start $daemon

"); } } - - if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){ - print $fh "

";
-	    open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/$daemon.log|");
-	    while ($line=) { 
-		print $fh "$line";
-		if ($line=~/WARNING/) { $notices++; }
-		if ($line=~/CRITICAL/) { $notices++; }
-	    };
-	    close (DFH);
-	    print $fh "
"; - } + if ($fh) { + if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){ + &log($fh,"

");
+	        if (open(DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/$daemon.log|")) {
+	            while (my $line=) { 
+		        &log($fh,"$line");
+		        if ($line=~/WARNING/) { $notices++; }
+		        if ($line=~/CRITICAL/) { $notices++; }
+	            }
+	            close (DFH);
+                }
+	        &log($fh,"

"); + } + } } - $fname="$perlvar{'lonDaemons'}/logs/$daemon.log"; - - my ($dev,$ino,$mode,$nlink, - $uid,$gid,$rdev,$size, - $atime,$mtime,$ctime, - $blksize,$blocks)=stat($fname); - - if ($size>$maxsize) { - print $fh "Rotating logs ...

"; - rename("$fname.2","$fname.3"); - rename("$fname.1","$fname.2"); - rename("$fname","$fname.1"); - } + my $fname="$perlvar{'lonDaemons'}/logs/$daemon.log"; + &rotate_logfile($fname,$fh,'logs'); &errout($fh); -} -# ================================================================ Main Program - -# --------------------------------- Read loncapa_apache.conf and loncapa.conf -my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); -%perlvar=%{$perlvarref}; -undef $perlvarref; -delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed -delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed - -# --------------------------------------- Make sure that LON-CAPA is configured -# I only test for one thing here (lonHostID). This is just a safeguard. -if ('{[[[[lonHostID]]]]}' eq $perlvar{'lonHostID'}) { - print("Unconfigured machine.\n"); - $emailto=$perlvar{'lonSysEMail'}; - $hostname=`/bin/hostname`; - chop $hostname; - $hostname=~s/[^\w\.]//g; # make sure is safe to pass through shell - $subj="LON: Unconfigured machine $hostname"; - system("echo 'Unconfigured machine $hostname.' |\ - mailto $emailto -s '$subj' > /dev/null"); - exit 1; -} - -# ----------------------------- Make sure this process is running from user=www -my $wwwid=getpwnam('www'); -if ($wwwid!=$<) { - print("User ID mismatch. This program must be run as user 'www'\n"); - $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; - $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; - system("echo 'User ID mismatch. loncron must be run as user www.' |\ - mailto $emailto -s '$subj' > /dev/null"); - exit 1; -} - -# ------------------------------------------------------------- Read hosts file -{ - my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab"); - - while (my $configline=<$config>) { - my ($id,$domain,$role,$name,$ip,$domdescr)=split(/:/,$configline); - if ($id && $domain && $role && $name && $ip) { - $hostname{$id}=$name; - $hostdom{$id}=$domain; - $hostip{$id}=$ip; - $hostrole{$id}=$role; - if ($domdescr) { $domaindescription{$domain}=$domdescr; } - if (($role eq 'library') && ($id ne $perlvar{'lonHostID'})) { - $libserv{$id}=$name; - } - } else { - if ($configline) { -# &logthis("Skipping hosts.tab line -$configline-"); - } - } - } + return $result; } -# ------------------------------------------------------ Read spare server file -{ - my $config=IO::File->new("$perlvar{'lonTabDir'}/spare.tab"); - - while (my $configline=<$config>) { - chomp($configline); - if (($configline) && ($configline ne $perlvar{'lonHostID'})) { - $spareid{$configline}=1; - } - } -} - -# ---------------------------------------------------------------- Start report - -$statusdir="/home/httpd/html/lon-status"; - -$errors=0; -$warnings=0; -$notices=0; - -$now=time; -$date=localtime($now); - -{ - my $fh=IO::File->new(">$statusdir/newstatus.html"); - my %simplestatus=(); - - print $fh (< - -LON Status Report $perlvar{'lonHostID'} - - - -

LON Status Report $perlvar{'lonHostID'}

-

$date ($now)

-
    -
  1. Configuration -
  2. Machine Information -
  3. Temporary Files -
  4. Session Tokens -
  5. httpd -
  6. lonsql -
  7. lond -
  8. lonc -
  9. lonhttpd -
  10. lonnet -
  11. Connections -
  12. Delayed Messages -
  13. Error Count -
-
- -

Configuration

-

PerlVars

- -ENDHEADERS - - foreach $varname (sort(keys(%perlvar))) { - print $fh "\n"; - } - print $fh "
$varname$perlvar{$varname}

Hosts

"; - foreach $id (sort(keys(%hostname))) { - print $fh - ""; - print $fh "\n"; - } - print $fh "
$id$hostdom{$id}$hostrole{$id}$hostname{$id}$hostip{$id}

Spare Hosts

    "; - foreach $id (sort(keys(%spareid))) { - print $fh "
  1. $id\n"; - } - - print $fh "
\n"; - # --------------------------------------------------------------------- Machine - - print $fh '

Machine Information

'; - print $fh "

loadavg

"; - +sub log_machine_info { + my ($fh)=@_; + &log($fh,'

Machine Information

'); + &log($fh,"

loadavg

"); + open (LOADAVGH,"/proc/loadavg"); - $loadavg=; + my $loadavg=; close (LOADAVGH); - print $fh "$loadavg"; + &log($fh,"$loadavg"); - @parts=split(/\s+/,$loadavg); + my @parts=split(/\s+/,$loadavg); if ($parts[1]>4.0) { $errors++; } elsif ($parts[1]>2.0) { @@ -327,14 +237,14 @@ ENDHEADERS $notices++; } - print $fh "

df

"; - print $fh "
";
+    &log($fh,"

df

"); + &log($fh,"
");
 
     open (DFH,"df|");
-    while ($line=) { 
-	print $fh "$line"; 
+    while (my $line=) { 
+	&log($fh,&encode_entities($line,'<>&"')); 
 	@parts=split(/\s+/,$line);
-	$usage=$parts[4];
+	my $usage=$parts[4];
 	$usage=~s/\W//g;
 	if ($usage>90) { 
 	    $warnings++;
@@ -347,226 +257,478 @@ ENDHEADERS
 	if ($usage>95) { $warnings++; $warnings++; $simplestatus{'diskfull'}++; }
     }
     close (DFH);
-    print $fh "
"; + &log($fh,"
"); - print $fh "

ps

"; - print $fh "
";
-    $psproc=0;
+    &log($fh,"

ps

"); + &log($fh,"
");
+    my $psproc=0;
 
-    open (PSH,"ps -aux|");
-    while ($line=) { 
-	print $fh "$line"; 
+    open (PSH,"ps aux --cols 140 |");
+    while (my $line=) { 
+	&log($fh,&encode_entities($line,'<>&"')); 
 	$psproc++;
     }
     close (PSH);
-    print $fh "
"; + &log($fh,"
"); if ($psproc>200) { $notices++; } if ($psproc>250) { $notices++; } + &log($fh,"

distprobe

"); + &log($fh,"
");
+    &log($fh,&encode_entities(&LONCAPA::distro(),'<>&"'));
+    &log($fh,"
"); + &errout($fh); +} -# --------------------------------------------------------------- clean out tmp - print $fh '

Temporary Files

'; - $cleaned=0; - $old=0; - while ($fname=<$perlvar{'lonDaemons'}/tmp/*>) { - my ($dev,$ino,$mode,$nlink, - $uid,$gid,$rdev,$size, - $atime,$mtime,$ctime, - $blksize,$blocks)=stat($fname); - $now=time; - $since=$now-$mtime; - if ($since>$perlvar{'lonExpire'}) { - $line=''; - if (open(PROBE,$fname)) { - $line=; - close(PROBE); - } - unless ($line=~/^CHECKOUTTOKEN\&/) { - $cleaned++; - unlink("$fname"); - } else { - if ($since>365*$perlvar{'lonExpire'}) { - $cleaned++; - unlink("$fname"); - } else { $old++; } - } - } +sub start_logging { + my $fh=IO::File->new(">$statusdir/newstatus.html"); + my %simplestatus=(); + my $now=time; + my $date=localtime($now); + + &log($fh,(< + + +LON Status Report $perlvar{'lonHostID'} + + + +
+

LON Status Report $perlvar{'lonHostID'}

+

$date ($now)

+
    +
  1. Configuration
  2. +
  3. Machine Information
  4. +
  5. Temporary Files
  6. +
  7. Session Tokens
  8. +
  9. WebDAV Session Tokens
  10. +
  11. httpd
  12. +
  13. lonsql
  14. +
  15. lond
  16. +
  17. lonc
  18. +
  19. lonnet
  20. +
  21. Connections
  22. +
  23. Delayed Messages
  24. +
  25. Error Count
  26. +
+
+ +

Configuration

+

PerlVars

+ +ENDHEADERS + + foreach my $varname (sort(keys(%perlvar))) { + &log($fh,"\n"); + } + &log($fh,"
$varname". + &encode_entities($perlvar{$varname},'<>&"')."

Hosts

"); + my %hostname = &Apache::lonnet::all_hostnames(); + foreach my $id (sort(keys(%hostname))) { + my $role = (&Apache::lonnet::is_library($id) ? 'library' + : 'access'); + &log($fh, + "\n"); + } + &log($fh,"
$id".&Apache::lonnet::host_domain($id). + "".$role. + "".&Apache::lonnet::hostname($id)."

Spare Hosts

"); + if (keys(%Apache::lonnet::spareid) > 0) { + &log($fh,"\n"); + } else { + &log($fh,"No spare hosts specified
\n"); } - print $fh "Cleaned up ".$cleaned." files (".$old." old checkout tokens)."; + return $fh; +} + +# --------------------------------------------------------------- clean out tmp +sub clean_tmp { + my ($fh)=@_; + &log($fh,'

Temporary Files

'); + my ($cleaned,$old,$removed) = (0,0,0); + my %errors = ( + dir => [], + file => [], + failopen => [], + ); + my %error_titles = ( + dir => 'failed to remove empty directory:', + file => 'failed to unlike stale file', + failopen => 'failed to open file or directory' + ); + ($cleaned,$old,$removed) = &recursive_clean_tmp('',$cleaned,$old,$removed,\%errors); + &log($fh,"Cleaned up: ".$cleaned." files; removed: $removed empty directories; (found: $old old checkout tokens)"); + foreach my $key (sort(keys(%errors))) { + if (ref($errors{$key}) eq 'ARRAY') { + if (@{$errors{$key}} > 0) { + &log($fh,"Error during cleanup ($error_titles{$key}):
'); + } + } + } +} + +sub recursive_clean_tmp { + my ($subdir,$cleaned,$old,$removed,$errors) = @_; + my $base = "$perlvar{'lonDaemons'}/tmp"; + my $path = $base; + next if ($subdir =~ m{\.\./}); + next unless (ref($errors) eq 'HASH'); + unless ($subdir eq '') { + $path .= '/'.$subdir; + } + if (opendir(my $dh,"$path")) { + while (my $file = readdir($dh)) { + next if ($file =~ /^\.\.?$/); + my $fname = "$path/$file"; + if (-d $fname) { + my $innerdir; + if ($subdir eq '') { + $innerdir = $file; + } else { + $innerdir = $subdir.'/'.$file; + } + ($cleaned,$old,$removed) = + &recursive_clean_tmp($innerdir,$cleaned,$old,$removed,$errors); + my @doms = &Apache::lonnet::current_machine_domains(); + + if (open(my $dirhandle,$fname)) { + unless (($innerdir eq 'helprequests') || + (($innerdir =~ /^addcourse/) && ($innerdir !~ m{/\d+$}))) { + my @contents = grep {!/^\.\.?$/} readdir($dirhandle); + join('&&',@contents)."\n"; + if (scalar(grep {!/^\.\.?$/} readdir($dirhandle)) == 0) { + closedir($dirhandle); + if ($fname =~ m{^\Q$perlvar{'lonDaemons'}\E/tmp/}) { + if (rmdir($fname)) { + $removed ++; + } elsif (ref($errors->{dir}) eq 'ARRAY') { + push(@{$errors->{dir}},$fname); + } + } + } + } else { + closedir($dirhandle); + } + } + } else { + my ($dev,$ino,$mode,$nlink, + $uid,$gid,$rdev,$size, + $atime,$mtime,$ctime, + $blksize,$blocks)=stat($fname); + my $now=time; + my $since=$now-$mtime; + if ($since>$perlvar{'lonExpire'}) { + if ($subdir eq '') { + my $line=''; + if ($fname =~ /\.db$/) { + if (unlink($fname)) { + $cleaned++; + } elsif (ref($errors->{file}) eq 'ARRAY') { + push(@{$errors->{file}},$fname); + } + } elsif (open(PROBE,$fname)) { + my $line=''; + $line=; + close(PROBE); + if ($line=~/^CHECKOUTTOKEN\&/) { + if ($since>365*$perlvar{'lonExpire'}) { + if (unlink($fname)) { + $cleaned++; + } elsif (ref($errors->{file}) eq 'ARRAY') { + push(@{$errors->{file}},$fname); + } + } else { + $old++; + } + } else { + if (unlink($fname)) { + $cleaned++; + } elsif (ref($errors->{file}) eq 'ARRAY') { + push(@{$errors->{file}},$fname); + } + } + } elsif (ref($errors->{failopen}) eq 'ARRAY') { + push(@{$errors->{failopen}},$fname); + } + } else { + if (unlink($fname)) { + $cleaned++; + } elsif (ref($errors->{file}) eq 'ARRAY') { + push(@{$errors->{file}},$fname); + } + } + } + } + } + closedir($dh); + } elsif (ref($errors->{failopen}) eq 'ARRAY') { + push(@{$errors->{failopen}},$path); + } + return ($cleaned,$old,$removed); +} # ------------------------------------------------------------ clean out lonIDs - print $fh '

Session Tokens

'; - $cleaned=0; - $active=0; - while ($fname=<$perlvar{'lonIDsDir'}/*>) { +sub clean_lonIDs { + my ($fh)=@_; + &log($fh,'

Session Tokens

'); + my $cleaned=0; + my $active=0; + while (my $fname=<$perlvar{'lonIDsDir'}/*>) { my ($dev,$ino,$mode,$nlink, $uid,$gid,$rdev,$size, $atime,$mtime,$ctime, $blksize,$blocks)=stat($fname); - $now=time; - $since=$now-$mtime; + my $now=time; + my $since=$now-$mtime; if ($since>$perlvar{'lonExpire'}) { $cleaned++; - print $fh "Unlinking $fname
"; + &log($fh,"Unlinking $fname
"); unlink("$fname"); } else { $active++; } - } - print $fh "

Cleaned up ".$cleaned." stale session token(s)."; - print $fh "

$active open session(s)

"; + &log($fh,"

Cleaned up ".$cleaned." stale session token(s).

"); + &log($fh,"

$active open session(s)

"); +} -# ----------------------------------------------------------------------- httpd +# ------------------------------------------------ clean out webDAV Session IDs +sub clean_webDAV_sessionIDs { + my ($fh)=@_; + if ($perlvar{'lonRole'} eq 'library') { + &log($fh,'

WebDAV Session Tokens

'); + my $cleaned=0; + my $active=0; + my $now = time; + if (-d $perlvar{'lonDAVsessDir'}) { + while (my $fname=<$perlvar{'lonDAVsessDir'}/*>) { + my @stats = stat($fname); + my $since=$now-$stats[9]; + if ($since>$perlvar{'lonExpire'}) { + $cleaned++; + &log($fh,"Unlinking $fname
"); + unlink("$fname"); + } else { + $active++; + } + } + &log($fh,"

Cleaned up ".$cleaned." stale webDAV session token(s).

"); + &log($fh,"

$active open webDAV session(s)

"); + } + } +} - print $fh '

httpd

Access Log

';
-    
-    open (DFH,"tail -n25 /etc/httpd/logs/access_log|");
-    while ($line=) { print $fh "$line" };
-    close (DFH);
+# ----------------------------------------------------------- clean out sockets
+sub clean_sockets {
+    my ($fh)=@_;
+    my $cleaned=0;
+    opendir(SOCKETS,$perlvar{'lonSockDir'});
+    while (my $fname=readdir(SOCKETS)) {
+	next if (-d $fname 
+		 || $fname=~/(mysqlsock|maximasock|rsock|\Q$perlvar{'lonSockDir'}\E)/);
+	$cleaned++;
+	&log($fh,"Unlinking $fname
"); + unlink("/home/httpd/sockets/$fname"); + } + &log($fh,"

Cleaned up ".$cleaned." stale sockets.

"); +} - print $fh "

Error Log

";
 
-    open (DFH,"tail -n25 /etc/httpd/logs/error_log|");
-    while ($line=) { 
-	print $fh "$line";
-	if ($line=~/\[error\]/) { $notices++; } 
-    };
-    close (DFH);
-    print $fh "
"; +# ----------------------------------------------------------------------- httpd +sub check_httpd_logs { + my ($fh)=@_; + if (open(PIPE,"./lchttpdlogs|")) { + while (my $line=) { + &log($fh,$line); + if ($line=~/\[error\]/) { $notices++; } + } + close(PIPE); + } &errout($fh); - - -# ---------------------------------------------------------------------- lonsql - - &checkon_daemon($fh,'lonsql',200000); - -# ------------------------------------------------------------------------ lond - - &checkon_daemon($fh,'lond',40000,1); - -# ------------------------------------------------------------------------ lonc - - &checkon_daemon($fh,'lonc',40000,1); - -# -------------------------------------------------------------------- lonhttpd - - &checkon_daemon($fh,'lonhttpd',40000); +} # ---------------------------------------------------------------------- lonnet - print $fh '

lonnet

Temp Log

';
-    print "checking logs\n";
+sub rotate_lonnet_logs {
+    my ($fh)=@_;
+    &log($fh,'

lonnet

Temp Log

');
+    print "Checking logs.\n";
     if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){
 	open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|");
-	while ($line=) { 
-	    print $fh "$line";
-	};
+	while (my $line=) { 
+	    &log($fh,&encode_entities($line,'<>&"'));
+	}
 	close (DFH);
     }
-    print $fh "

Perm Log

";
+    &log($fh,"

Perm Log

");
     
     if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") {
 	open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|");
-	while ($line=) { 
-	    print $fh "$line";
-	};
+	while (my $line=) { 
+	    &log($fh,&encode_entities($line,'<>&"'));
+	}
 	close (DFH);
-    } else { print $fh "No perm log\n" }
+    } else { &log($fh,"No perm log\n") }
 
-    $fname="$perlvar{'lonDaemons'}/logs/lonnet.log";
+    my $fname="$perlvar{'lonDaemons'}/logs/lonnet.log";
+    &rotate_logfile($fname,$fh,'lonnet log');
 
-    my ($dev,$ino,$mode,$nlink,
-	$uid,$gid,$rdev,$size,
-	$atime,$mtime,$ctime,
-	$blksize,$blocks)=stat($fname);
+    &log($fh,"
"); + &errout($fh); +} - if ($size>40000) { - print $fh "Rotating logs ...

"; - rename("$fname.2","$fname.3"); - rename("$fname.1","$fname.2"); - rename("$fname","$fname.1"); +sub rotate_other_logs { + my ($fh) = @_; + my %logs = ( + autoenroll => 'Auto Enroll log', + autocreate => 'Create Course log', + searchcat => 'Search Cataloguing log', + autoupdate => 'Auto Update log', + refreshcourseids_db => 'Refresh CourseIDs db log', + ); + foreach my $item (keys(%logs)) { + my $fname=$perlvar{'lonDaemons'}.'/logs/'.$item.'.log'; + &rotate_logfile($fname,$fh,$logs{$item}); } +} - print $fh "

"; - &errout($fh); # ----------------------------------------------------------------- Connections - - print $fh '

Connections

'; - print "testing connections\n"; - print $fh ""; - foreach $tryserver (sort(keys(%hostname))) { +sub test_connections { + my ($fh)=@_; + &log($fh,'

Connections

'); + print "Testing connections.\n"; + &log($fh,"
"); + my ($good,$bad)=(0,0); + my %hostname = &Apache::lonnet::all_hostnames(); + foreach my $tryserver (sort(keys(%hostname))) { print("."); - $answer=reply("pong",$tryserver); + my $result; + my $answer=&Apache::lonnet::reply("ping",$tryserver); if ($answer eq "$tryserver:$perlvar{'lonHostID'}") { $result="ok"; + $good++; } else { $result=$answer; $warnings++; - if ($answer eq 'con_lost') { $warnings++; } + if ($answer eq 'con_lost') { + $bad++; + $warnings++; + } else { + $good++; #self connection + } } if ($answer =~ /con_lost/) { print(" $tryserver down\n"); } - print $fh "\n"; - + &log($fh,"\n"); } - print $fh "
$tryserver$result
$tryserver$result
"; - + &log($fh,""); + print "\n$good good, $bad bad connections\n"; &errout($fh); +} + + # ------------------------------------------------------------ Delayed messages +sub check_delayed_msg { + my ($fh)=@_; + &log($fh,'

Delayed Messages

'); + print "Checking buffers.\n"; + + &log($fh,'

Scanning Permanent Log

'); - print $fh '

Delayed Messages

'; - print "checking buffers\n"; + my $unsend=0; - print $fh '

Scanning Permanent Log

'; + my %hostname = &Apache::lonnet::all_hostnames(); + my $numhosts = scalar(keys(%hostname)); - $unsend=0; - { - my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log"); - while ($line=<$dfh>) { - ($time,$sdf,$dserv,$dcmd)=split(/:/,$line); - if ($sdf eq 'F') { - $local=localtime($time); - print $fh "Failed: $time, $dserv, $dcmd
"; - $warnings++; - } - if ($sdf eq 'S') { $unsend--; } - if ($sdf eq 'D') { $unsend++; } + my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log"); + while (my $line=<$dfh>) { + my ($time,$sdf,$dserv,$dcmd)=split(/:/,$line); + if ($numhosts) { + next unless ($hostname{$dserv}); + } + if ($sdf eq 'F') { + my $local=localtime($time); + &log($fh,"Failed: $time, $dserv, $dcmd
"); + $warnings++; } + if ($sdf eq 'S') { $unsend--; } + if ($sdf eq 'D') { $unsend++; } } - print $fh "Total unsend messages: $unsend

\n"; - $warnings=$warnings+5*$unsend; - if ($unsend) { $simplestatus{'unsend'}=$unsend; } - print $fh "

Outgoing Buffer

"; + &log($fh,"

Total unsend messages: $unsend

\n"); + if ($unsend > 0) { + $warnings=$warnings+5*$unsend; + } + if ($unsend) { $simplestatus{'unsend'}=$unsend; } + &log($fh,"

Outgoing Buffer

\n
");
+# list directory with delayed messages and remember offline servers
+    my %servers=();
     open (DFH,"ls -lF $perlvar{'lonSockDir'}/delayed|");
-    while ($line=) { 
-	print $fh "$line
"; - }; + while (my $line=) { + my ($server)=($line=~/\.(\w+)$/); + if ($server) { $servers{$server}=1; } + &log($fh,&encode_entities($line,'<>&"')); + } + &log($fh,"
\n"); close (DFH); +# pong to all servers that have delayed messages +# this will trigger a reverse connection, which should flush the buffers + foreach my $tryserver (sort(keys(%servers))) { + if ($hostname{$tryserver} || !$numhosts) { + my $answer; + eval { + local $SIG{ ALRM } = sub { die "TIMEOUT" }; + alarm(20); + $answer = &Apache::lonnet::reply("pong",$tryserver); + alarm(0); + }; + if ($@ && $@ =~ m/TIMEOUT/) { + &log($fh,"Attempted pong to $tryserver timed out
"); + print "Time out while contacting: $tryserver for pong.\n"; + } else { + &log($fh,"Pong to $tryserver: $answer
"); + } + } else { + &log($fh,"$tryserver has delayed messages, but is not part of the cluster -- skipping 'Pong'.
"); + } + } +} -# ------------------------------------------------------------------------- End - print $fh "
\n"; +sub finish_logging { + my ($fh)=@_; + &log($fh,"\n"); $totalcount=$notices+4*$warnings+100*$errors; &errout($fh); - print $fh "

Total Error Count: $totalcount

"; - $now=time; - $date=localtime($now); - print $fh "
$date ($now)\n"; - print "lon-status webpage updated\n"; + &log($fh,"

Total Error Count: $totalcount

"); + my $now=time; + my $date=localtime($now); + &log($fh,"
$date ($now)\n"); + print "lon-status webpage updated.\n"; $fh->close(); + + if ($errors) { $simplestatus{'errors'}=$errors; } + if ($warnings) { $simplestatus{'warnings'}=$warnings; } + if ($notices) { $simplestatus{'notices'}=$notices; } + $simplestatus{'time'}=time; } -if ($errors) { $simplestatus{'errors'}=$errors; } -if ($warnings) { $simplestatus{'warnings'}=$warnings; } -if ($notices) { $simplestatus{'notices'}=$notices; } -$simplestatus{'time'}=time; -rename ("$statusdir/newstatus.html","$statusdir/index.html"); -{ +sub log_simplestatus { + rename("$statusdir/newstatus.html","$statusdir/index.html"); + my $sfh=IO::File->new(">$statusdir/loncron_simple.txt"); foreach (keys %simplestatus) { print $sfh $_.'='.$simplestatus{$_}.'&'; @@ -574,21 +736,382 @@ rename ("$statusdir/newstatus.html","$st print $sfh "\n"; $sfh->close(); } -if ($totalcount>200) { - print "sending mail\n"; - $emailto="$perlvar{'lonAdmEMail'}"; - if ($totalcount>1000) { + +sub write_loncaparevs { + print "Retrieving LON-CAPA version information.\n"; + my %hostname = &Apache::lonnet::all_hostnames(); + my $output; + foreach my $id (sort(keys(%hostname))) { + if ($id ne '') { + my $loncaparev; + eval { + local $SIG{ ALRM } = sub { die "TIMEOUT" }; + alarm(10); + $loncaparev = + &Apache::lonnet::get_server_loncaparev('',$id,1,'loncron'); + alarm(0); + }; + if ($@ && $@ =~ m/TIMEOUT/) { + print "Time out while contacting lonHost: $id for version.\n"; + } + if ($loncaparev =~ /^[\w.\-]+$/) { + $output .= $id.':'.$loncaparev."\n"; + } + } + } + if ($output) { + if (open(my $fh,">$perlvar{'lonTabDir'}/loncaparevs.tab")) { + print $fh $output; + close($fh); + &Apache::lonnet::load_loncaparevs(); + } + } + return; +} + +sub write_serverhomeIDs { + print "Retrieving LON-CAPA lonHostID information.\n"; + my %name_to_host = &Apache::lonnet::all_names(); + my $output; + foreach my $name (sort(keys(%name_to_host))) { + if ($name ne '') { + if (ref($name_to_host{$name}) eq 'ARRAY') { + my $serverhomeID; + eval { + local $SIG{ ALRM } = sub { die "TIMEOUT" }; + alarm(10); + $serverhomeID = + &Apache::lonnet::get_server_homeID($name,1,'loncron'); + alarm(0); + }; + if ($@ && $@ =~ m/TIMEOUT/) { + print "Time out while contacting server: $name\n"; + } + if ($serverhomeID ne '') { + $output .= $name.':'.$serverhomeID."\n"; + } else { + $output .= $name.':'.$name_to_host{$name}->[0]."\n"; + } + } + } + } + if ($output) { + if (open(my $fh,">$perlvar{'lonTabDir'}/serverhomeIDs.tab")) { + print $fh $output; + close($fh); + &Apache::lonnet::load_serverhomeIDs(); + } + } + return; +} + +sub write_checksums { + my $distro = &LONCAPA::distro(); + if ($distro) { + print "Retrieving file version and checksumming.\n"; + my $numchksums = 0; + my ($chksumsref,$versionsref) = + &LONCAPA::Checksumming::get_checksums($distro,$perlvar{'lonDaemons'}, + $perlvar{'lonLib'}, + $perlvar{'lonIncludes'}, + $perlvar{'lonTabDir'}); + if (ref($chksumsref) eq 'HASH') { + $numchksums = scalar(keys(%{$chksumsref})); + } + print "File version retrieved and checksumming completed for $numchksums files.\n"; + } else { + print "File version retrieval and checksumming skipped - could not determine Linux distro.\n"; + } + return; +} + +sub write_hostips { + my $lontabdir = $perlvar{'lonTabDir'}; + my $defdom = $perlvar{'lonDefDomain'}; + my $lonhost = $perlvar{'lonHostID'}; + my $newfile = "$lontabdir/currhostips.tab"; + my $oldfile = "$lontabdir/prevhostips.tab"; + my (%prevhosts,%currhosts,%ipchange); + if ((-e $newfile) && (-s $newfile)) { + move($newfile,$oldfile); + chmod(0644,$oldfile); + if (open(my $fh,'<',$oldfile)) { + while (my $line=<$fh>) { + chomp($line); + if ($line =~ /^([^:]+):([\d.]+)$/) { + $prevhosts{$1} = $2; + } + } + close($fh); + } + } + my ($ip_info,$cached) = + &Apache::lonnet::is_cached_new('iphost','iphost'); + if (!$cached) { + &Apache::lonnet::get_iphost(); + ($ip_info,$cached) = + &Apache::lonnet::is_cached_new('iphost','iphost'); + } + if (ref($ip_info) eq 'ARRAY') { + %currhosts = %{$ip_info->[1]}; + if (open(my $fh,'>',$newfile)) { + foreach my $key (keys(%currhosts)) { + print $fh "$key:$currhosts{$key}\n"; + } + close($fh); + chmod(0644,$newfile); + } + } + if (keys(%prevhosts) && keys(%currhosts)) { + foreach my $key (keys(%prevhosts)) { + unless ($currhosts{$key} eq $prevhosts{$key}) { + $ipchange{$key} = $prevhosts{$key}.'|'.$currhosts{$key}; + } + } + foreach my $key (keys(%currhosts)) { + unless ($currhosts{$key} eq $prevhosts{$key}) { + $ipchange{$key} = $prevhosts{$key}.' | '.$currhosts{$key}; + } + } + } + if (&Apache::lonnet::domain($defdom,'primary') eq $lonhost) { + if (keys(%ipchange)) { + if (open(my $fh,'>>',$perlvar{'lonDaemons'}.'/logs/hostip.log')) { + print $fh "********************\n".localtime(time).' Changes --'."\n". + "Hostname | Previous IP | New IP\n". + "--------------------------------\n"; + foreach my $hostname (sort(keys(%ipchange))) { + print $fh "$hostname | $ipchange{$hostname}\n"; + } + print $fh "\n*******************\n\n"; + close($fh); + } + my $emailto = &Apache::loncommon::build_recipient_list(undef, + 'hostipmail',$defdom); + if ($emailto) { + my $subject = "LON-CAPA Hostname to IP change ($perlvar{'lonHostID'})"; + my $chgmail = "To: $emailto\n". + "Subject: $subject\n". + "Content-type: text/plain\; charset=UTF-8\n". + "MIME-Version: 1.0\n\n". + "Host/IP changes\n". + " \n". + "Hostname | Previous IP | New IP\n". + "--------------------------------\n"; + foreach my $hostname (sort(keys(%ipchange))) { + $chgmail .= "$hostname | $ipchange{$hostname}\n"; + } + $chgmail .= "\n\n"; + if (open(my $mailh, "|/usr/lib/sendmail -oi -t -odb")) { + print $mailh $chgmail; + close($mailh); + print "Sending mail notification of hostname/IP changes.\n"; + } + } + } + } + return; +} + +sub send_mail { + my $defdom = $perlvar{'lonDefDomain'}; + my $origmail = $perlvar{'lonAdmEMail'}; + my $emailto = &Apache::loncommon::build_recipient_list(undef, + 'lonstatusmail',$defdom,$origmail); + if ($totalcount>2500) { $emailto.=",$perlvar{'lonSysEMail'}"; } - $subj="LON: $perlvar{'lonHostID'} E:$errors W:$warnings N:$notices"; - system("metasend -b -t $emailto -s '$subj' -f $statusdir/index.html -m text/html"); + my $from; + my $hostname=`/bin/hostname`; + chop($hostname); + $hostname=~s/[^\w\.]//g; + if ($hostname) { + $from = 'www@'.$hostname; + } + my $subj="LON: $perlvar{'lonHostID'} E:$errors W:$warnings N:$notices"; + my $loncronmail = "To: $emailto\n". + "From: $from\n". + "Subject: ".$subj."\n". + "Content-type: text/html\; charset=UTF-8\n". + "MIME-Version: 1.0\n\n"; + if (open(my $fh,"<$statusdir/index.html")) { + while (<$fh>) { + $loncronmail .= $_; + } + close($fh); + } else { + $loncronmail .= "Failed to read from http://$hostname/lon-status/index.html\n"; + } + $loncronmail .= "\n\n"; + if (open(my $mailh, "|/usr/lib/sendmail -oi -t -odb")) { + print $mailh $loncronmail; + close($mailh); + print "Sending mail.\n"; + } else { + print "Sending mail failed.\n"; + } } -1; +sub usage { + print(< \$help, + "justcheckdaemons" => \$justcheckdaemons, + "noemail" => \$noemail, + "justcheckconnections" => \$justcheckconnections, + "justreload" => \$justreload, + "justiptables" => \$justiptables + ); + if ($help) { &usage(); return; } +# --------------------------------- Read loncapa_apache.conf and loncapa.conf + my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf'); + %perlvar=%{$perlvarref}; + undef $perlvarref; + delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed + delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed + chdir($perlvar{'lonDaemons'}); +# --------------------------------------- Make sure that LON-CAPA is configured +# I only test for one thing here (lonHostID). This is just a safeguard. + if ('{[[[[lonHostID]]]]}' eq $perlvar{'lonHostID'}) { + print("Unconfigured machine.\n"); + my $emailto=$perlvar{'lonSysEMail'}; + my $hostname=`/bin/hostname`; + chop $hostname; + $hostname=~s/[^\w\.]//g; # make sure is safe to pass through shell + my $subj="LON: Unconfigured machine $hostname"; + system("echo 'Unconfigured machine $hostname.' |". + " mail -s '$subj' $emailto > /dev/null"); + exit 1; + } +# ----------------------------- Make sure this process is running from user=www + my $wwwid=getpwnam('www'); + if ($wwwid!=$<) { + print("User ID mismatch. This program must be run as user 'www'.\n"); + my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; + my $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; + system("echo 'User ID mismatch. loncron must be run as user www.' |". + " mail -s '$subj' $emailto > /dev/null"); + exit 1; + } + +# -------------------------------------------- Force reload of host information + my $nomemcache; + if ($justcheckdaemons) { + $nomemcache=1; + my $memcachepidfile="$perlvar{'lonDaemons'}/logs/memcached.pid"; + my $memcachepid; + if (-e $memcachepidfile) { + my $memfh=IO::File->new($memcachepidfile); + $memcachepid=<$memfh>; + chomp($memcachepid); + if ($memcachepid =~ /^\d+$/ && kill 0 => $memcachepid) { + undef($nomemcache); + } + } + } + if (!$justiptables) { + &Apache::lonnet::load_hosts_tab(1,$nomemcache); + &Apache::lonnet::load_domain_tab(1,$nomemcache); + &Apache::lonnet::get_iphost(1,$nomemcache); + } + +# ----------------------------------------- Force firewall update for lond port + + if ((!$justcheckdaemons) && (!$justreload)) { + my $now = time; + my $tmpfile = $perlvar{'lonDaemons'}.'/tmp/lciptables_iphost_'. + $now.$$.int(rand(10000)); + if (open(my $fh,">$tmpfile")) { + my %iphosts = &Apache::lonnet::get_iphost(); + foreach my $key (keys(%iphosts)) { + print $fh "$key\n"; + } + close($fh); + if (&LONCAPA::try_to_lock('/tmp/lock_lciptables')) { + my $execpath = $perlvar{'lonDaemons'}.'/lciptables'; + system("$execpath $tmpfile"); + unlink('/tmp/lock_lciptables'); # Remove the lock file. + } + unlink($tmpfile); + } + } +# ---------------------------------------------------------------- Start report + $errors=0; + $warnings=0; + $notices=0; + + my $fh; + if (!$justcheckdaemons && !$justcheckconnections && !$justreload && !$justiptables) { + $fh=&start_logging(); + + &log_machine_info($fh); + &clean_tmp($fh); + &clean_lonIDs($fh); + &clean_webDAV_sessionIDs($fh); + &check_httpd_logs($fh); + &rotate_lonnet_logs($fh); + &rotate_other_logs($fh); + } + if (!$justcheckconnections && !$justreload && !$justiptables) { + &checkon_daemon($fh,'lonmemcached',40000); + &checkon_daemon($fh,'lonsql',200000); + if ( &checkon_daemon($fh,'lond',40000,'USR1') eq 'running') { + &checkon_daemon($fh,'lond',40000,'USR2'); + } + &checkon_daemon($fh,'lonc',40000,'USR1'); + &checkon_daemon($fh,'lonmaxima',40000); + &checkon_daemon($fh,'lonr',40000); + } + if ($justreload) { + &checkon_daemon($fh,'lond',40000,'USR2'); + &checkon_daemon($fh,'lonc',40000,'USR2'); + } + if ($justcheckconnections) { + &test_connections($fh); + } + if (!$justcheckdaemons && !$justcheckconnections && !$justreload && !$justiptables) { + &check_delayed_msg($fh); + &finish_logging($fh); + &log_simplestatus(); + &write_loncaparevs(); + &write_serverhomeIDs(); + &write_checksums(); + &write_hostips(); + if ($totalcount>200 && !$noemail) { &send_mail(); } + } +} +&main(); +1; 500 Internal Server Error

Internal Server Error

The server encountered an internal error or misconfiguration and was unable to complete your request.

Please contact the server administrator at root@localhost to inform them of the time this error occurred, and the actions you performed just before this error.

More information about this error may be available in the server error log.