--- loncom/loncron 1999/11/18 19:52:46 1.2 +++ loncom/loncron 2001/02/26 21:37:52 1.17 @@ -7,7 +7,11 @@ # 6/1/99,6/2,6/10,6/11,6/12,6/14,6/26,6/28,6/29,6/30, # 7/1,7/2,7/9,7/10,7/12 Gerd Kortemeyer) # -# 7/14,7/15,7/19,7/21,7/22,11/18 Gerd Kortemeyer +# 7/14,7/15,7/19,7/21,7/22,11/18, +# 2/8 Gerd Kortemeyer +# Dec 00 Scott Harrison +# 12/23 Gerd Kortemeyer +# 02/12/2001 Scott Harrison use IO::File; use IO::Socket; @@ -42,7 +46,6 @@ ENDERROUT # ================================================================ Main Program - # ------------------------------------------------------------ Read access.conf { my $config=IO::File->new("/etc/httpd/conf/access.conf"); @@ -55,6 +58,31 @@ ENDERROUT } } +# --------------------------------------- Make sure that LON-CAPA is configured +# I only test for one thing here (lonHostID). This is just a safeguard. +if ('{[[[[lonHostID]]]]}' eq $perlvar{'lonHostID'}) { + print("Unconfigured machine.\n"); + $emailto=$perlvar{'lonSysEMail'}; + $hostname=`/bin/hostname`; + chop $hostname; + $hostname=~s/[^\w\.]//g; # make sure is safe to pass through shell + $subj="LON: Unconfigured machine $hostname"; + system("echo 'Unconfigured machine $hostname.' |\ + mailto $emailto -s '$subj' > /dev/null"); + exit 1; +} + +# ----------------------------- Make sure this process is running from user=www +my $wwwid=getpwnam('www'); +if ($wwwid!=$<) { + print("User ID mismatch. This program must be run as user 'www'\n"); + $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}"; + $subj="LON: $perlvar{'lonHostID'} User ID mismatch"; + system("echo 'User ID mismatch. loncron must be run as user www.' |\ + mailto $emailto -s '$subj' > /dev/null"); + exit 1; +} + # ------------------------------------------------------------- Read hosts file { my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab"); @@ -102,14 +130,17 @@ print $fh (< LON Status Report $perlvar{'lonHostID'} - +

LON Status Report $perlvar{'lonHostID'}

$date ($now)

  1. Configuration
  2. Machine Information +
  3. Temporary Files +
  4. Session Tokens
  5. httpd +
  6. lonsql
  7. lond
  8. lonc
  9. lonnet @@ -152,7 +183,7 @@ close (LOADAVGH); print $fh "$loadavg"; @parts=split(/\s+/,$loadavg); -if ($parts[1]>3.0) { +if ($parts[1]>4.0) { $errors++; } elsif ($parts[1]>2.0) { $warnings++; @@ -170,17 +201,59 @@ while ($line=) { $usage=$parts[4]; $usage=~s/\W//g; if ($usage>90) { - $errors++; + $warnings++; } elsif ($usage>80) { $warnings++; } elsif ($usage>60) { $notices++; } - if ($usage>95) { $errors++; } + if ($usage>95) { $warnings++; $warnings++ } } close (DFH); print $fh ""; &errout($fh); + +# --------------------------------------------------------------- clean out tmp +print $fh '

    Temporary Files

    '; +$cleaned=0; +while ($fname=<$perlvar{'lonDaemons'}/tmp/*>) { + my ($dev,$ino,$mode,$nlink, + $uid,$gid,$rdev,$size, + $atime,$mtime,$ctime, + $blksize,$blocks)=stat($fname); + $now=time; + $since=$now-$mtime; + if ($since>$perlvar{'lonExpire'}) { + $cleaned++; + unlink("$fname"); + } + +} +print $fh "Cleaned up ".$cleaned." files."; + +# ------------------------------------------------------------ clean out lonIDs +print $fh '

    Session Tokens

    '; +$cleaned=0; +$active=0; +while ($fname=<$perlvar{'lonIDsDir'}/*>) { + my ($dev,$ino,$mode,$nlink, + $uid,$gid,$rdev,$size, + $atime,$mtime,$ctime, + $blksize,$blocks)=stat($fname); + $now=time; + $since=$now-$mtime; + if ($since>$perlvar{'lonExpire'}) { + $cleaned++; + print $fh "Unlinking $fname
    "; + unlink("$fname"); + } else { + $active++; + } + +} +print $fh "

    Cleaned up ".$cleaned." stale session token(s)."; +print $fh "

    $active open session(s)

    "; + # ----------------------------------------------------------------------- httpd print $fh '

    httpd

    Access Log

    ';
    @@ -199,15 +272,106 @@ while ($line=) {
     close (DFH);
     print $fh "
    "; &errout($fh); + + +# ---------------------------------------------------------------------- lonsql +# +# Do not run for now +# +if ($perlvar{'lonRole'} eq "library" && 1==0) { + + print $fh '

    lonsql

    Log

    ';
    +    
    +    if (-e "$perlvar{'lonDaemons'}/logs/lonsql.log"){
    +	open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonsql.log|");
    +	while ($line=) { 
    +	    print $fh "$line";
    +	    if ($line=~/INFO/) { $notices++; }
    +	    if ($line=~/WARNING/) { $notices++; }
    +	    if ($line=~/CRITICAL/) { $warnings++; }
    +	};
    +	close (DFH);
    +    }
    +    print $fh "
    "; + + my $lonsqlfile="$perlvar{'lonDaemons'}/logs/lonsql.pid"; + + if (-e $lonsqlfile) { + my $lfh=IO::File->new("$lonsqlfile"); + my $lonsqlpid=<$lfh>; + chomp($lonsqlpid); + if (kill 0 => $lonsqlpid) { + print $fh "

    lonsql at pid $lonsqlpid responding

    "; + } else { + $errors++; $errors++; + print $fh "

    lonsql at pid $lonsqlpid not responding

    "; + } + } else { + $errors++; + print $fh "

    lonsql not running, trying to start

    "; + system( + "$perlvar{'lonDaemons'}/lonsql 2>>$perlvar{'lonDaemons'}/logs/lonsql_errors"); + sleep 120; + if (-e $lonsqlfile) { + print $fh "Seems like it started ...

    "; + my $lfh=IO::File->new("$lonsqlfile"); + my $lonsqlpid=<$lfh>; + chomp($lonsqlpid); + sleep 30; + if (kill 0 => $lonsqlpid) { + print $fh "

    lonsql at pid $lonsqlpid responding

    "; + } else { + $errors++; $errors++; + print $fh "

    lonsql at pid $lonsqlpid not responding

    "; + print $fh "Give it one more try ...

    "; + system( + "$perlvar{'lonDaemons'}/lonsql 2>>$perlvar{'lonDaemons'}/logs/lonsql_errors"); + sleep 120; + } + } else { + print $fh "Seems like that did not work!

    "; + $errors++; + } + if (-e "$perlvar{'lonDaemons'}/logs/lonsql.log"){ + print $fh "

    ";
    +	    open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonsql.log|");
    +	    while ($line=) { 
    +		print $fh "$line";
    +		if ($line=~/WARNING/) { $notices++; }
    +		if ($line=~/CRITICAL/) { $notices++; }
    +	    };
    +	    close (DFH);
    +	    print $fh "
    "; + } + } + + $fname="$perlvar{'lonDaemons'}/logs/lonsql.log"; + + my ($dev,$ino,$mode,$nlink, + $uid,$gid,$rdev,$size, + $atime,$mtime,$ctime, + $blksize,$blocks)=stat($fname); + + if ($size>40000) { + print $fh "Rotating logs ...

    "; + rename("$fname.2","$fname.3"); + rename("$fname.1","$fname.2"); + rename("$fname","$fname.1"); + } + + &errout($fh); +} # ------------------------------------------------------------------------ lond print $fh '


    lond

    Log

    ';
     
     if (-e "$perlvar{'lonDaemons'}/logs/lond.log"){
    -open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lond.log|");
    +open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lond.log|");
     while ($line=) { 
        print $fh "$line";
    -   if ($line=~/giving up/) { $notices++; }
    +   if ($line=~/INFO/) { $notices++; }
    +   if ($line=~/WARNING/) { $notices++; }
    +   if ($line=~/CRITICAL/) { $warnings++; }
     };
     close (DFH);
     }
    @@ -215,20 +379,41 @@ print $fh "
    "; my $londfile="$perlvar{'lonDaemons'}/logs/lond.pid"; -if (-e $londfile) { +my $restartflag=1; +if (-e $londfile) { my $lfh=IO::File->new("$londfile"); my $londpid=<$lfh>; chomp($londpid); if (kill 0 => $londpid) { print $fh "

    lond at pid $londpid responding

    "; + $restartflag=0; } else { - $errors++; $errors++; + $errors++; print $fh "

    lond at pid $londpid not responding

    "; + # Intelligently handle this. + # Possibility #1: there is no process + # Solution: remove .pid file and restart + if (getpgrp($londpid)==-1) { + unlink($londfile); + $restartflag=1; + } + else { + # Possibility #2: there is a live process that is not responding + # for an unknown reason + # Solution: kill parent and children processes, remove .pid and restart + `killall -9 lond`; + unlink($londfile); + $restartflag=1; + } + print $fh + "

    Deciding to clean up stale .pid file and restart lond

    "; } -} else { +} +if ($restartflag==1) { $errors++; print $fh "

    lond not running, trying to start

    "; - system("$perlvar{'lonDaemons'}/lond"); + system( + "$perlvar{'lonDaemons'}/lond 2>>$perlvar{'lonDaemons'}/logs/lond_errors"); sleep 120; if (-e $londfile) { print $fh "Seems like it started ...

    "; @@ -242,13 +427,25 @@ if (-e $londfile) { $errors++; $errors++; print $fh "

    lond at pid $londpid not responding

    "; print $fh "Give it one more try ...

    "; - system("$perlvar{'lonDaemons'}/lond"); + system( + "$perlvar{'lonDaemons'}/lond 2>>$perlvar{'lonDaemons'}/logs/lond_errors"); sleep 120; } } else { print $fh "Seems like that did not work!

    "; $errors++; } + if (-e "$perlvar{'lonDaemons'}/logs/lond.log"){ + print $fh "

    ";
    +    open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lond.log|");
    +    while ($line=) { 
    +      print $fh "$line";
    +      if ($line=~/WARNING/) { $notices++; }
    +      if ($line=~/CRITICAL/) { $notices++; }
    +    };
    +    close (DFH);
    +    print $fh "
    "; + } } $fname="$perlvar{'lonDaemons'}/logs/lond.log"; @@ -271,10 +468,12 @@ if ($size>40000) { print $fh '

    lonc

    Log

    ';
     
     if (-e "$perlvar{'lonDaemons'}/logs/lonc.log"){
    -open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonc.log|");
    +open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonc.log|");
     while ($line=) { 
        print $fh "$line";
    -   if ($line=~/died/) { $notices++; }
    +   if ($line=~/INFO/) { $notices++; }
    +   if ($line=~/WARNING/) { $notices++; }
    +   if ($line=~/CRITICAL/) { $warnings++; }
     };
     close (DFH);
     }
    @@ -282,6 +481,7 @@ print $fh "
    "; my $loncfile="$perlvar{'lonDaemons'}/logs/lonc.pid"; +$restartflag=1; if (-e $loncfile) { my $lfh=IO::File->new("$loncfile"); my $loncpid=<$lfh>; @@ -289,14 +489,34 @@ if (-e $loncfile) { if (kill 0 => $loncpid) { print $fh "

    lonc at pid $loncpid responding, sending USR1

    "; kill USR1 => $loncpid; + $restartflag=0; } else { - $errors++; $errors++; + $errors++; print $fh "

    lonc at pid $loncpid not responding

    "; + # Intelligently handle this. + # Possibility #1: there is no process + # Solution: remove .pid file and restart + if (getpgrp($loncpid)==-1) { + unlink($loncfile); + $restartflag=1; + } + else { + # Possibility #2: there is a live process that is not responding + # for an unknown reason + # Solution: kill parent and children processes, remove .pid and restart + `killall -9 lonc`; + unlink($loncfile); + $restartflag=1; + } + print $fh + "

    Deciding to clean up stale .pid file and restart lonc

    "; } -} else { +} +if ($restartflag==1) { $errors++; print $fh "

    lonc not running, trying to start

    "; - system("$perlvar{'lonDaemons'}/lonc"); + system( + "$perlvar{'lonDaemons'}/lonc 2>>$perlvar{'lonDaemons'}/logs/lonc_errors"); sleep 120; if (-e $loncfile) { print $fh "Seems like it started ...

    "; @@ -310,13 +530,25 @@ if (-e $loncfile) { $errors++; $errors++; print $fh "

    lonc at pid $loncpid not responding

    "; print $fh "Give it one more try ...

    "; - system("$perlvar{'lonDaemons'}/lonc"); + system( + "$perlvar{'lonDaemons'}/lonc 2>>$perlvar{'lonDaemons'}/logs/lonc_errors"); sleep 120; } } else { print $fh "Seems like that did not work!

    "; $errors++; } + if (-e "$perlvar{'lonDaemons'}/logs/lonc.log") { + print $fh "

    ";
    +    open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/lonc.log|");
    +    while ($line=) { 
    +      print $fh "$line";
    +      if ($line=~/WARNING/) { $notices++; }
    +      if ($line=~/CRITICAL/) { $notices++; }
    +    };
    +    close (DFH);
    +    print $fh "
    "; + } } $fname="$perlvar{'lonDaemons'}/logs/lonc.log"; @@ -342,13 +574,10 @@ if (-e "$perlvar{'lonDaemons'}/logs/lonn open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|"); while ($line=) { print $fh "$line"; - if ($line=~/Delayed/) { $warnings++; } - if ($line=~/giving up/) { $warnings++; } - if ($line=~/FAILED/) { $errors++; } }; close (DFH); } -print $fh "

    Perm Log

    "; +print $fh "

    Perm Log

    ";
     
     if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") {
         open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|");
    
    
    500 Internal Server Error
    
    

    Internal Server Error

    The server encountered an internal error or misconfiguration and was unable to complete your request.

    Please contact the server administrator at root@localhost to inform them of the time this error occurred, and the actions you performed just before this error.

    More information about this error may be available in the server error log.