File:  [LON-CAPA] / loncom / loncron
Revision 1.46: download - view: text, annotated - select for diffs
Tue May 11 19:12:50 2004 UTC (20 years ago) by albertel
Branches: MAIN
CVS tags: HEAD
- modularized (refactored?)

#!/usr/bin/perl

# The LearningOnline Network
# Housekeeping program, started by cron
#
# (TCP networking package
# 6/1/99,6/2,6/10,6/11,6/12,6/14,6/26,6/28,6/29,6/30,
# 7/1,7/2,7/9,7/10,7/12 Gerd Kortemeyer)
#
# 7/14,7/15,7/19,7/21,7/22,11/18,
# 2/8 Gerd Kortemeyer
# 12/23 Gerd Kortemeyer
# YEAR=2001
# 09/04,09/06,11/26 Gerd Kortemeyer

$|=1;

use lib '/home/httpd/lib/perl/';
use LONCAPA::Configuration;

use IO::File;
use IO::Socket;
use strict;
#globals
use vars qw (%perlvar %simplestatus $errors $warnings $notices $totalcount);

my $statusdir="/home/httpd/html/lon-status";


# -------------------------------------------------- Non-critical communication
sub reply {
    my ($cmd,$server)=@_;
    my $peerfile="$perlvar{'lonSockDir'}/$server";
    my $client=IO::Socket::UNIX->new(Peer    =>"$peerfile",
                                     Type    => SOCK_STREAM,
                                     Timeout => 10)
       or return "con_lost";
    print $client "$cmd\n";
    my $answer=<$client>;
    chomp($answer);
    if (!$answer) { $answer="con_lost"; }
    return $answer;
}

# --------------------------------------------------------- Output error status

sub log {
    my $fh=shift;
    if ($fh) {	print $fh @_  }
}

sub errout {
   my $fh=shift;
   &log($fh,(<<ENDERROUT));
     <p><table border=2 bgcolor="#CCCCCC">
     <tr><td>Notices</td><td>$notices</td></tr>
     <tr><td>Warnings</td><td>$warnings</td></tr>
     <tr><td>Errors</td><td>$errors</td></tr>
     </table><p><a href="#top">Top</a><p>
ENDERROUT
}

sub start_daemon {
    my ($fh,$daemon,$pidfile) = @_;
    my $progname=$daemon;
    if ($daemon eq 'lonc' && $ARGV[0] eq 'new') {
	$progname='loncnew'; 
	print "new ";
    }
    system("$perlvar{'lonDaemons'}/$progname 2>>$perlvar{'lonDaemons'}/logs/${daemon}_errors");
    sleep 2;
    if (-e $pidfile) {
	&log($fh,"Seems like it started ...<p>");
	my $lfh=IO::File->new("$pidfile");
	my $daemonpid=<$lfh>;
	chomp($daemonpid);
	sleep 2;
	if (kill 0 => $daemonpid) {
	    return 1;
	} else {
	    return 0;
	}
    }
    &log($fh,"Seems like that did not work!<p>");
    $errors++;
    return 0;
}

sub checkon_daemon {
    my ($fh,$daemon,$maxsize,$sendusr1)=@_;

    &log($fh,'<hr><a name="'.$daemon.'"><h2>'.$daemon.'</h2><h3>Log</h3><pre>');
    printf("%-10s ",$daemon);
    if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){
	open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/$daemon.log|");
	while (my $line=<DFH>) { 
	    &log($fh,"$line");
	    if ($line=~/INFO/) { $notices++; }
	    if ($line=~/WARNING/) { $notices++; }
	    if ($line=~/CRITICAL/) { $warnings++; }
	};
	close (DFH);
    }
    &log($fh,"</pre>");
    
    my $pidfile="$perlvar{'lonDaemons'}/logs/$daemon.pid";
    
    my $restartflag=1;
    my $daemonpid;
    if (-e $pidfile) {
	my $lfh=IO::File->new("$pidfile");
	$daemonpid=<$lfh>;
	chomp($daemonpid);
	if (kill 0 => $daemonpid) {
	    &log($fh,"<h3>$daemon at pid $daemonpid responding");
	    if ($sendusr1) { &log($fh,", sending USR1"); }
	    &log($fh,"</h3>");
	    if ($sendusr1) { kill USR1 => $daemonpid; }
	    $restartflag=0;
	    print "running\n";
	} else {
	    $errors++;
	    &log($fh,"<h3>$daemon at pid $daemonpid not responding</h3>");
	    $restartflag=1;
	    &log($fh,"<h3>Decided to clean up stale .pid file and restart $daemon</h3>");
	}
    }
    if ($restartflag==1) {
	$simplestatus{$daemon}='off';
	$errors++;
	&log($fh,'<br><font color="red">Killall '.$daemon.': '.
	    `killall $daemon 2>&1`.' - ');
	sleep 2;
	&log($fh,unlink($pidfile).' - '.
	    `killall -9 $daemon 2>&1`.
	    '</font><br>');
	&log($fh,"<h3>$daemon not running, trying to start</h3>");
	
	if (&start_daemon($fh,$daemon,$pidfile)) {
	    &log($fh,"<h3>$daemon at pid $daemonpid responding</h3>");
	    $simplestatus{$daemon}='restarted';
	    print "started\n";
	} else {
	    $errors++;
	    &log($fh,"<h3>$daemon at pid $daemonpid not responding</h3>");
	    &log($fh,"Give it one more try ...<p>");
	    print " ";
	    if (&start_daemon($fh,$daemon,$pidfile)) {
		&log($fh,"<h3>$daemon at pid $daemonpid responding</h3>");
		$simplestatus{$daemon}='restarted';
		print "started\n";
	    } else {
		print " failed\n";
		$simplestatus{$daemon}='failed';
		$errors++; $errors++;
		&log($fh,"<h3>$daemon at pid $daemonpid not responding</h3>");
		&log($fh,"Unable to start $daemon<p>");
	    }
	}

	if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){
	    &log($fh,"<p><pre>");
	    open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/$daemon.log|");
	    while (my $line=<DFH>) { 
		&log($fh,"$line");
		if ($line=~/WARNING/) { $notices++; }
		if ($line=~/CRITICAL/) { $notices++; }
	    };
	    close (DFH);
	    &log($fh,"</pre>");
	}
    }
    
    my $fname="$perlvar{'lonDaemons'}/logs/$daemon.log";
    
    my ($dev,$ino,$mode,$nlink,
	$uid,$gid,$rdev,$size,
	$atime,$mtime,$ctime,
	$blksize,$blocks)=stat($fname);
    
    if ($size>$maxsize) {
	&log($fh,"Rotating logs ...<p>");
	rename("$fname.2","$fname.3");
	rename("$fname.1","$fname.2");
	rename("$fname","$fname.1");
    }

    &errout($fh);
}

# --------------------------------------------------------------------- Machine
sub log_machine_info {
    my ($fh)=@_;
    &log($fh,'<hr><a name="machine"><h2>Machine Information</h2>');
    &log($fh,"<h3>loadavg</h3>");
	
    open (LOADAVGH,"/proc/loadavg");
    my $loadavg=<LOADAVGH>;
    close (LOADAVGH);
    
    &log($fh,"<tt>$loadavg</tt>");
    
    my @parts=split(/\s+/,$loadavg);
    if ($parts[1]>4.0) {
	$errors++;
    } elsif ($parts[1]>2.0) {
	$warnings++;
    } elsif ($parts[1]>1.0) {
	$notices++;
    }

    &log($fh,"<h3>df</h3>");
    &log($fh,"<pre>");

    open (DFH,"df|");
    while (my $line=<DFH>) { 
	&log($fh,"$line"); 
	@parts=split(/\s+/,$line);
	my $usage=$parts[4];
	$usage=~s/\W//g;
	if ($usage>90) { 
	    $warnings++;
	    $notices++; 
	} elsif ($usage>80) {
	    $warnings++;
	} elsif ($usage>60) {
	    $notices++;
	}
	if ($usage>95) { $warnings++; $warnings++; $simplestatus{'diskfull'}++; }
    }
    close (DFH);
    &log($fh,"</pre>");


    &log($fh,"<h3>ps</h3>");
    &log($fh,"<pre>");
    my $psproc=0;

    open (PSH,"ps -aux --cols 140 |");
    while (my $line=<PSH>) { 
	&log($fh,"$line"); 
	$psproc++;
    }
    close (PSH);
    &log($fh,"</pre>");

    if ($psproc>200) { $notices++; }
    if ($psproc>250) { $notices++; }

    &errout($fh);
}

sub start_logging {
    my ($hostdom,$hostrole,$hostname,$spareid)=@_;
    my $fh=IO::File->new(">$statusdir/newstatus.html");
    my %simplestatus=();
    my $now=time;
    my $date=localtime($now);
    

    &log($fh,(<<ENDHEADERS));
<html>
<head>
<title>LON Status Report $perlvar{'lonHostID'}</title>
</head>
<body bgcolor="#AAAAAA">
<a name="top">
<h1>LON Status Report $perlvar{'lonHostID'}</h1>
<h2>$date ($now)</h2>
<ol>
<li><a href="#configuration">Configuration</a>
<li><a href="#machine">Machine Information</a>
<li><a href="#tmp">Temporary Files</a>
<li><a href="#tokens">Session Tokens</a>
<li><a href="#httpd">httpd</a>
<li><a href="#lonsql">lonsql</a>
<li><a href="#lond">lond</a>
<li><a href="#lonc">lonc</a>
<li><a href="#lonhttpd">lonhttpd</a>
<li><a href="#lonnet">lonnet</a>
<li><a href="#connections">Connections</a>
<li><a href="#delayed">Delayed Messages</a>
<li><a href="#errcount">Error Coindex.html.unt</a>
</ol>
<hr>
<a name="configuration">
<h2>Configuration</h2>
<h3>PerlVars</h3>
<table border=2>
ENDHEADERS

    foreach my $varname (sort(keys(%perlvar))) {
	&log($fh,"<tr><td>$varname</td><td>$perlvar{$varname}</td></tr>\n");
    }
    &log($fh,"</table><h3>Hosts</h3><table border=2>");
    foreach my $id (sort(keys(%{$hostname}))) {
	&log($fh,
	    "<tr><td>$id</td><td>".$hostdom->{$id}.
	    "</td><td>".$hostrole->{$id}.
	    "</td><td>".$hostname->{$id}."</td></tr>\n");
    }
    &log($fh,"</table><h3>Spare Hosts</h3><ol>");
    foreach my $id (sort(keys(%{$spareid}))) {
	&log($fh,"<li>$id\n");
    }
    &log($fh,"</ol>\n");
    return $fh;
}

# --------------------------------------------------------------- clean out tmp
sub clean_tmp {
    my ($fh)=@_;
    &log($fh,'<hr><a name="tmp"><h2>Temporary Files</h2>');
    my $cleaned=0;
    my $old=0;
    while (my $fname=<$perlvar{'lonDaemons'}/tmp/*>) {
	my ($dev,$ino,$mode,$nlink,
	    $uid,$gid,$rdev,$size,
	    $atime,$mtime,$ctime,
	    $blksize,$blocks)=stat($fname);
	my $now=time;
	my $since=$now-$mtime;
	if ($since>$perlvar{'lonExpire'}) {
	    my $line='';
	    if (open(PROBE,$fname)) {
		$line=<PROBE>;
		close(PROBE);
	    }
	    unless ($line=~/^CHECKOUTTOKEN\&/) {
		$cleaned++;
		unlink("$fname");
	    } else {
		if ($since>365*$perlvar{'lonExpire'}) {
		    $cleaned++;
		    unlink("$fname");
		} else { $old++; }
	    }
	}
    }
    &log($fh,"Cleaned up ".$cleaned." files (".$old." old checkout tokens).");
}

# ------------------------------------------------------------ clean out lonIDs
sub clean_lonIDs {
    my ($fh)=@_;
    &log($fh,'<hr><a name="tokens"><h2>Session Tokens</h2>');
    my $cleaned=0;
    my $active=0;
    while (my $fname=<$perlvar{'lonIDsDir'}/*>) {
	my ($dev,$ino,$mode,$nlink,
	    $uid,$gid,$rdev,$size,
	    $atime,$mtime,$ctime,
	    $blksize,$blocks)=stat($fname);
	my $now=time;
	my $since=$now-$mtime;
	if ($since>$perlvar{'lonExpire'}) {
	    $cleaned++;
	    &log($fh,"Unlinking $fname<br>");
	    unlink("$fname");
	} else {
	    $active++;
	}
    }
    &log($fh,"<p>Cleaned up ".$cleaned." stale session token(s).");
    &log($fh,"<h3>$active open session(s)</h3>");
}


# ----------------------------------------------------------------------- httpd
sub check_httpd_logs {
    my ($fh)=@_;
    &log($fh,'<hr><a name="httpd"><h2>httpd</h2><h3>Access Log</h3><pre>');
    
    open (DFH,"tail -n25 /etc/httpd/logs/access_log|");
    while (my $line=<DFH>) { &log($fh,"$line") };
    close (DFH);
	
    &log($fh,"</pre><h3>Error Log</h3><pre>");
	
    open (DFH,"tail -n25 /etc/httpd/logs/error_log|");
    while (my $line=<DFH>) { 
	&log($fh,"$line");
	if ($line=~/\[error\]/) { $notices++; } 
    }
    close (DFH);
    &log($fh,"</pre>");
    &errout($fh);
}

# ---------------------------------------------------------------------- lonnet

sub rotate_logs {
    my ($fh)=@_;
    &log($fh,'<hr><a name="lonnet"><h2>lonnet</h2><h3>Temp Log</h3><pre>');
    print "checking logs\n";
    if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){
	open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|");
	while (my $line=<DFH>) { 
	    &log($fh,"$line");
	}
	close (DFH);
    }
    &log($fh,"</pre><h3>Perm Log</h3><pre>");
    
    if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") {
	open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|");
	while (my $line=<DFH>) { 
	    &log($fh,"$line");
	}
	close (DFH);
    } else { &log($fh,"No perm log\n") }

    my $fname="$perlvar{'lonDaemons'}/logs/lonnet.log";

    my ($dev,$ino,$mode,$nlink,
	$uid,$gid,$rdev,$size,
	$atime,$mtime,$ctime,
	$blksize,$blocks)=stat($fname);

    if ($size>40000) {
	&log($fh,"Rotating logs ...<p>");
	rename("$fname.2","$fname.3");
	rename("$fname.1","$fname.2");
	rename("$fname","$fname.1");
    }

    &log($fh,"</pre>");
    &errout($fh);
}

# ----------------------------------------------------------------- Connections
sub test_connections {
    my ($fh,$hostname)=@_;
    &log($fh,'<hr><a name="connections"><h2>Connections</h2>');
    print "testing connections\n";
    &log($fh,"<table border=2>");
    foreach my $tryserver (sort(keys(%{$hostname}))) {
	print(".");
	my $result;
	my $answer=reply("pong",$tryserver);
	if ($answer eq "$tryserver:$perlvar{'lonHostID'}") {
	    $result="<b>ok</b>";
	} else {
	    $result=$answer;
	    $warnings++;
	    if ($answer eq 'con_lost') { $warnings++; }
	}
	if ($answer =~ /con_lost/) { print(" $tryserver down\n"); }
	&log($fh,"<tr><td>$tryserver</td><td>$result</td></tr>\n");
    }
    &log($fh,"</table>");

    &errout($fh);
}


# ------------------------------------------------------------ Delayed messages
sub check_delayed_msg {
    my ($fh)=@_;
    &log($fh,'<hr><a name="delayed"><h2>Delayed Messages</h2>');
    print "checking buffers\n";
    
    &log($fh,'<h3>Scanning Permanent Log</h3>');

    my $unsend=0;

    my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log");
    while (my $line=<$dfh>) {
	my ($time,$sdf,$dserv,$dcmd)=split(/:/,$line);
	if ($sdf eq 'F') { 
	    my $local=localtime($time);
	    &log($fh,"<b>Failed: $time, $dserv, $dcmd</b><br>");
	    $warnings++;
	}
	if ($sdf eq 'S') { $unsend--; }
	if ($sdf eq 'D') { $unsend++; }
    }

    &log($fh,"Total unsend messages: <b>$unsend</b><p>\n");
    $warnings=$warnings+5*$unsend;

    if ($unsend) { $simplestatus{'unsend'}=$unsend; }
    &log($fh,"<h3>Outgoing Buffer</h3>");

    open (DFH,"ls -lF $perlvar{'lonSockDir'}/delayed|");
    while (my $line=<DFH>) { 
	&log($fh,"$line<br>");
    }
    close (DFH);
}

sub finish_logging {
    my ($fh)=@_;
    &log($fh,"<a name=errcount>\n");
    $totalcount=$notices+4*$warnings+100*$errors;
    &errout($fh);
    &log($fh,"<h1>Total Error Count: $totalcount</h1>");
    my $now=time;
    my $date=localtime($now);
    &log($fh,"<hr>$date ($now)</body></html>\n");
    print "lon-status webpage updated\n";
    $fh->close();

    if ($errors) { $simplestatus{'errors'}=$errors; }
    if ($warnings) { $simplestatus{'warnings'}=$warnings; }
    if ($notices) { $simplestatus{'notices'}=$notices; }
    $simplestatus{'time'}=time;
}

sub log_simplestatus {
    rename ("$statusdir/newstatus.html","$statusdir/index.html");
    
    my $sfh=IO::File->new(">$statusdir/loncron_simple.txt");
    foreach (keys %simplestatus) {
	print $sfh $_.'='.$simplestatus{$_}.'&';
    }
    print $sfh "\n";
    $sfh->close();
}

sub send_mail {
    print "sending mail\n";
    my $emailto="$perlvar{'lonAdmEMail'}";
    if ($totalcount>1000) {
	$emailto.=",$perlvar{'lonSysEMail'}";
    }
    my $subj="LON: $perlvar{'lonHostID'} E:$errors W:$warnings N:$notices"; 
    system("metasend -b -t $emailto -s '$subj' -f $statusdir/index.html -m text/html");
}

# ================================================================ Main Program
sub main () {
# --------------------------------- Read loncapa_apache.conf and loncapa.conf
    my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
    %perlvar=%{$perlvarref};
    undef $perlvarref;
    delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
    delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed

# --------------------------------------- Make sure that LON-CAPA is configured
# I only test for one thing here (lonHostID).  This is just a safeguard.
    if ('{[[[[lonHostID]]]]}' eq $perlvar{'lonHostID'}) {
	print("Unconfigured machine.\n");
	my $emailto=$perlvar{'lonSysEMail'};
	my $hostname=`/bin/hostname`;
	chop $hostname;
	$hostname=~s/[^\w\.]//g; # make sure is safe to pass through shell
	my $subj="LON: Unconfigured machine $hostname";
	system("echo 'Unconfigured machine $hostname.' |\
 mailto $emailto -s '$subj' > /dev/null");
	exit 1;
    }

# ----------------------------- Make sure this process is running from user=www
    my $wwwid=getpwnam('www');
    if ($wwwid!=$<) {
	print("User ID mismatch.  This program must be run as user 'www'\n");
	my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
	my $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
	system("echo 'User ID mismatch.  loncron must be run as user www.' |\
 mailto $emailto -s '$subj' > /dev/null");
	exit 1;
    }

# ------------------------------------------------------------- Read hosts file
    my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab");
    
    my (%hostname,%hostdom,%hostrole,%spareid);
    while (my $configline=<$config>) {
	my ($id,$domain,$role,$name,$ip,$domdescr)=split(/:/,$configline);
	if ($id && $domain && $role && $name && $ip) {
	    $hostname{$id}=$name;
	    $hostdom{$id}=$domain;
	    $hostrole{$id}=$role;
	} else {
	    if ($configline) {
#		&logthis("Skipping hosts.tab line -$configline-");
	    }
	}
    }
    undef $config;

# ------------------------------------------------------ Read spare server file
    $config=IO::File->new("$perlvar{'lonTabDir'}/spare.tab");
    
    while (my $configline=<$config>) {
	chomp($configline);
	if (($configline) && ($configline ne $perlvar{'lonHostID'})) {
	    $spareid{$configline}=1;
	}
    }
    undef $config;

# ---------------------------------------------------------------- Start report

    $errors=0;
    $warnings=0;
    $notices=0;

    my $fh=&start_logging(\%hostdom,\%hostrole,\%hostname,\%spareid);

    &log_machine_info($fh);
    &clean_tmp($fh);
    &clean_lonIDs($fh);
    &check_httpd_logs($fh);
    &checkon_daemon($fh,'lonsql',200000);
    &checkon_daemon($fh,'lond',40000,1);
    &checkon_daemon($fh,'lonc',40000,1);
    &checkon_daemon($fh,'lonhttpd',40000);
    
    &test_connections($fh,\%hostname);
    &check_delayed_msg($fh);

    &finish_logging($fh);
    &log_simplestatus();
	
    if ($totalcount>200) { &send_mail(); }
}

&main();
1;









FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>
500 Internal Server Error

Internal Server Error

The server encountered an internal error or misconfiguration and was unable to complete your request.

Please contact the server administrator at root@localhost to inform them of the time this error occurred, and the actions you performed just before this error.

More information about this error may be available in the server error log.