1: #!/usr/bin/perl
2:
3: # Housekeeping program, started by cron, loncontrol and loncron.pl
4: #
5: # $Id: loncron,v 1.47 2004/05/11 19:15:03 albertel Exp $
6: #
7: # Copyright Michigan State University Board of Trustees
8: #
9: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
10: #
11: # LON-CAPA is free software; you can redistribute it and/or modify
12: # it under the terms of the GNU General Public License as published by
13: # the Free Software Foundation; either version 2 of the License, or
14: # (at your option) any later version.
15: #
16: # LON-CAPA is distributed in the hope that it will be useful,
17: # but WITHOUT ANY WARRANTY; without even the implied warranty of
18: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19: # GNU General Public License for more details.
20: #
21: # You should have received a copy of the GNU General Public License
22: # along with LON-CAPA; if not, write to the Free Software
23: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24: #
25: # /home/httpd/html/adm/gpl.txt
26: #
27: # http://www.lon-capa.org/
28: #
29:
30: $|=1;
31:
32: use lib '/home/httpd/lib/perl/';
33: use LONCAPA::Configuration;
34:
35: use IO::File;
36: use IO::Socket;
37: use strict;
38: #globals
39: use vars qw (%perlvar %simplestatus $errors $warnings $notices $totalcount);
40:
41: my $statusdir="/home/httpd/html/lon-status";
42:
43:
44: # -------------------------------------------------- Non-critical communication
45: sub reply {
46: my ($cmd,$server)=@_;
47: my $peerfile="$perlvar{'lonSockDir'}/$server";
48: my $client=IO::Socket::UNIX->new(Peer =>"$peerfile",
49: Type => SOCK_STREAM,
50: Timeout => 10)
51: or return "con_lost";
52: print $client "$cmd\n";
53: my $answer=<$client>;
54: chomp($answer);
55: if (!$answer) { $answer="con_lost"; }
56: return $answer;
57: }
58:
59: # --------------------------------------------------------- Output error status
60:
61: sub log {
62: my $fh=shift;
63: if ($fh) { print $fh @_ }
64: }
65:
66: sub errout {
67: my $fh=shift;
68: &log($fh,(<<ENDERROUT));
69: <p><table border=2 bgcolor="#CCCCCC">
70: <tr><td>Notices</td><td>$notices</td></tr>
71: <tr><td>Warnings</td><td>$warnings</td></tr>
72: <tr><td>Errors</td><td>$errors</td></tr>
73: </table><p><a href="#top">Top</a><p>
74: ENDERROUT
75: }
76:
77: sub start_daemon {
78: my ($fh,$daemon,$pidfile) = @_;
79: my $progname=$daemon;
80: if ($daemon eq 'lonc' && $ARGV[0] eq 'new') {
81: $progname='loncnew';
82: print "new ";
83: }
84: system("$perlvar{'lonDaemons'}/$progname 2>>$perlvar{'lonDaemons'}/logs/${daemon}_errors");
85: sleep 2;
86: if (-e $pidfile) {
87: &log($fh,"Seems like it started ...<p>");
88: my $lfh=IO::File->new("$pidfile");
89: my $daemonpid=<$lfh>;
90: chomp($daemonpid);
91: sleep 2;
92: if (kill 0 => $daemonpid) {
93: return 1;
94: } else {
95: return 0;
96: }
97: }
98: &log($fh,"Seems like that did not work!<p>");
99: $errors++;
100: return 0;
101: }
102:
103: sub checkon_daemon {
104: my ($fh,$daemon,$maxsize,$sendusr1)=@_;
105:
106: &log($fh,'<hr><a name="'.$daemon.'"><h2>'.$daemon.'</h2><h3>Log</h3><pre>');
107: printf("%-10s ",$daemon);
108: if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){
109: open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/$daemon.log|");
110: while (my $line=<DFH>) {
111: &log($fh,"$line");
112: if ($line=~/INFO/) { $notices++; }
113: if ($line=~/WARNING/) { $notices++; }
114: if ($line=~/CRITICAL/) { $warnings++; }
115: };
116: close (DFH);
117: }
118: &log($fh,"</pre>");
119:
120: my $pidfile="$perlvar{'lonDaemons'}/logs/$daemon.pid";
121:
122: my $restartflag=1;
123: my $daemonpid;
124: if (-e $pidfile) {
125: my $lfh=IO::File->new("$pidfile");
126: $daemonpid=<$lfh>;
127: chomp($daemonpid);
128: if (kill 0 => $daemonpid) {
129: &log($fh,"<h3>$daemon at pid $daemonpid responding");
130: if ($sendusr1) { &log($fh,", sending USR1"); }
131: &log($fh,"</h3>");
132: if ($sendusr1) { kill USR1 => $daemonpid; }
133: $restartflag=0;
134: print "running\n";
135: } else {
136: $errors++;
137: &log($fh,"<h3>$daemon at pid $daemonpid not responding</h3>");
138: $restartflag=1;
139: &log($fh,"<h3>Decided to clean up stale .pid file and restart $daemon</h3>");
140: }
141: }
142: if ($restartflag==1) {
143: $simplestatus{$daemon}='off';
144: $errors++;
145: &log($fh,'<br><font color="red">Killall '.$daemon.': '.
146: `killall $daemon 2>&1`.' - ');
147: sleep 2;
148: &log($fh,unlink($pidfile).' - '.
149: `killall -9 $daemon 2>&1`.
150: '</font><br>');
151: &log($fh,"<h3>$daemon not running, trying to start</h3>");
152:
153: if (&start_daemon($fh,$daemon,$pidfile)) {
154: &log($fh,"<h3>$daemon at pid $daemonpid responding</h3>");
155: $simplestatus{$daemon}='restarted';
156: print "started\n";
157: } else {
158: $errors++;
159: &log($fh,"<h3>$daemon at pid $daemonpid not responding</h3>");
160: &log($fh,"Give it one more try ...<p>");
161: print " ";
162: if (&start_daemon($fh,$daemon,$pidfile)) {
163: &log($fh,"<h3>$daemon at pid $daemonpid responding</h3>");
164: $simplestatus{$daemon}='restarted';
165: print "started\n";
166: } else {
167: print " failed\n";
168: $simplestatus{$daemon}='failed';
169: $errors++; $errors++;
170: &log($fh,"<h3>$daemon at pid $daemonpid not responding</h3>");
171: &log($fh,"Unable to start $daemon<p>");
172: }
173: }
174:
175: if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){
176: &log($fh,"<p><pre>");
177: open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/$daemon.log|");
178: while (my $line=<DFH>) {
179: &log($fh,"$line");
180: if ($line=~/WARNING/) { $notices++; }
181: if ($line=~/CRITICAL/) { $notices++; }
182: };
183: close (DFH);
184: &log($fh,"</pre>");
185: }
186: }
187:
188: my $fname="$perlvar{'lonDaemons'}/logs/$daemon.log";
189:
190: my ($dev,$ino,$mode,$nlink,
191: $uid,$gid,$rdev,$size,
192: $atime,$mtime,$ctime,
193: $blksize,$blocks)=stat($fname);
194:
195: if ($size>$maxsize) {
196: &log($fh,"Rotating logs ...<p>");
197: rename("$fname.2","$fname.3");
198: rename("$fname.1","$fname.2");
199: rename("$fname","$fname.1");
200: }
201:
202: &errout($fh);
203: }
204:
205: # --------------------------------------------------------------------- Machine
206: sub log_machine_info {
207: my ($fh)=@_;
208: &log($fh,'<hr><a name="machine"><h2>Machine Information</h2>');
209: &log($fh,"<h3>loadavg</h3>");
210:
211: open (LOADAVGH,"/proc/loadavg");
212: my $loadavg=<LOADAVGH>;
213: close (LOADAVGH);
214:
215: &log($fh,"<tt>$loadavg</tt>");
216:
217: my @parts=split(/\s+/,$loadavg);
218: if ($parts[1]>4.0) {
219: $errors++;
220: } elsif ($parts[1]>2.0) {
221: $warnings++;
222: } elsif ($parts[1]>1.0) {
223: $notices++;
224: }
225:
226: &log($fh,"<h3>df</h3>");
227: &log($fh,"<pre>");
228:
229: open (DFH,"df|");
230: while (my $line=<DFH>) {
231: &log($fh,"$line");
232: @parts=split(/\s+/,$line);
233: my $usage=$parts[4];
234: $usage=~s/\W//g;
235: if ($usage>90) {
236: $warnings++;
237: $notices++;
238: } elsif ($usage>80) {
239: $warnings++;
240: } elsif ($usage>60) {
241: $notices++;
242: }
243: if ($usage>95) { $warnings++; $warnings++; $simplestatus{'diskfull'}++; }
244: }
245: close (DFH);
246: &log($fh,"</pre>");
247:
248:
249: &log($fh,"<h3>ps</h3>");
250: &log($fh,"<pre>");
251: my $psproc=0;
252:
253: open (PSH,"ps -aux --cols 140 |");
254: while (my $line=<PSH>) {
255: &log($fh,"$line");
256: $psproc++;
257: }
258: close (PSH);
259: &log($fh,"</pre>");
260:
261: if ($psproc>200) { $notices++; }
262: if ($psproc>250) { $notices++; }
263:
264: &errout($fh);
265: }
266:
267: sub start_logging {
268: my ($hostdom,$hostrole,$hostname,$spareid)=@_;
269: my $fh=IO::File->new(">$statusdir/newstatus.html");
270: my %simplestatus=();
271: my $now=time;
272: my $date=localtime($now);
273:
274:
275: &log($fh,(<<ENDHEADERS));
276: <html>
277: <head>
278: <title>LON Status Report $perlvar{'lonHostID'}</title>
279: </head>
280: <body bgcolor="#AAAAAA">
281: <a name="top">
282: <h1>LON Status Report $perlvar{'lonHostID'}</h1>
283: <h2>$date ($now)</h2>
284: <ol>
285: <li><a href="#configuration">Configuration</a>
286: <li><a href="#machine">Machine Information</a>
287: <li><a href="#tmp">Temporary Files</a>
288: <li><a href="#tokens">Session Tokens</a>
289: <li><a href="#httpd">httpd</a>
290: <li><a href="#lonsql">lonsql</a>
291: <li><a href="#lond">lond</a>
292: <li><a href="#lonc">lonc</a>
293: <li><a href="#lonhttpd">lonhttpd</a>
294: <li><a href="#lonnet">lonnet</a>
295: <li><a href="#connections">Connections</a>
296: <li><a href="#delayed">Delayed Messages</a>
297: <li><a href="#errcount">Error Coindex.html.unt</a>
298: </ol>
299: <hr>
300: <a name="configuration">
301: <h2>Configuration</h2>
302: <h3>PerlVars</h3>
303: <table border=2>
304: ENDHEADERS
305:
306: foreach my $varname (sort(keys(%perlvar))) {
307: &log($fh,"<tr><td>$varname</td><td>$perlvar{$varname}</td></tr>\n");
308: }
309: &log($fh,"</table><h3>Hosts</h3><table border=2>");
310: foreach my $id (sort(keys(%{$hostname}))) {
311: &log($fh,
312: "<tr><td>$id</td><td>".$hostdom->{$id}.
313: "</td><td>".$hostrole->{$id}.
314: "</td><td>".$hostname->{$id}."</td></tr>\n");
315: }
316: &log($fh,"</table><h3>Spare Hosts</h3><ol>");
317: foreach my $id (sort(keys(%{$spareid}))) {
318: &log($fh,"<li>$id\n");
319: }
320: &log($fh,"</ol>\n");
321: return $fh;
322: }
323:
324: # --------------------------------------------------------------- clean out tmp
325: sub clean_tmp {
326: my ($fh)=@_;
327: &log($fh,'<hr><a name="tmp"><h2>Temporary Files</h2>');
328: my $cleaned=0;
329: my $old=0;
330: while (my $fname=<$perlvar{'lonDaemons'}/tmp/*>) {
331: my ($dev,$ino,$mode,$nlink,
332: $uid,$gid,$rdev,$size,
333: $atime,$mtime,$ctime,
334: $blksize,$blocks)=stat($fname);
335: my $now=time;
336: my $since=$now-$mtime;
337: if ($since>$perlvar{'lonExpire'}) {
338: my $line='';
339: if (open(PROBE,$fname)) {
340: $line=<PROBE>;
341: close(PROBE);
342: }
343: unless ($line=~/^CHECKOUTTOKEN\&/) {
344: $cleaned++;
345: unlink("$fname");
346: } else {
347: if ($since>365*$perlvar{'lonExpire'}) {
348: $cleaned++;
349: unlink("$fname");
350: } else { $old++; }
351: }
352: }
353: }
354: &log($fh,"Cleaned up ".$cleaned." files (".$old." old checkout tokens).");
355: }
356:
357: # ------------------------------------------------------------ clean out lonIDs
358: sub clean_lonIDs {
359: my ($fh)=@_;
360: &log($fh,'<hr><a name="tokens"><h2>Session Tokens</h2>');
361: my $cleaned=0;
362: my $active=0;
363: while (my $fname=<$perlvar{'lonIDsDir'}/*>) {
364: my ($dev,$ino,$mode,$nlink,
365: $uid,$gid,$rdev,$size,
366: $atime,$mtime,$ctime,
367: $blksize,$blocks)=stat($fname);
368: my $now=time;
369: my $since=$now-$mtime;
370: if ($since>$perlvar{'lonExpire'}) {
371: $cleaned++;
372: &log($fh,"Unlinking $fname<br>");
373: unlink("$fname");
374: } else {
375: $active++;
376: }
377: }
378: &log($fh,"<p>Cleaned up ".$cleaned." stale session token(s).");
379: &log($fh,"<h3>$active open session(s)</h3>");
380: }
381:
382:
383: # ----------------------------------------------------------------------- httpd
384: sub check_httpd_logs {
385: my ($fh)=@_;
386: &log($fh,'<hr><a name="httpd"><h2>httpd</h2><h3>Access Log</h3><pre>');
387:
388: open (DFH,"tail -n25 /etc/httpd/logs/access_log|");
389: while (my $line=<DFH>) { &log($fh,"$line") };
390: close (DFH);
391:
392: &log($fh,"</pre><h3>Error Log</h3><pre>");
393:
394: open (DFH,"tail -n25 /etc/httpd/logs/error_log|");
395: while (my $line=<DFH>) {
396: &log($fh,"$line");
397: if ($line=~/\[error\]/) { $notices++; }
398: }
399: close (DFH);
400: &log($fh,"</pre>");
401: &errout($fh);
402: }
403:
404: # ---------------------------------------------------------------------- lonnet
405:
406: sub rotate_logs {
407: my ($fh)=@_;
408: &log($fh,'<hr><a name="lonnet"><h2>lonnet</h2><h3>Temp Log</h3><pre>');
409: print "checking logs\n";
410: if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){
411: open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|");
412: while (my $line=<DFH>) {
413: &log($fh,"$line");
414: }
415: close (DFH);
416: }
417: &log($fh,"</pre><h3>Perm Log</h3><pre>");
418:
419: if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") {
420: open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|");
421: while (my $line=<DFH>) {
422: &log($fh,"$line");
423: }
424: close (DFH);
425: } else { &log($fh,"No perm log\n") }
426:
427: my $fname="$perlvar{'lonDaemons'}/logs/lonnet.log";
428:
429: my ($dev,$ino,$mode,$nlink,
430: $uid,$gid,$rdev,$size,
431: $atime,$mtime,$ctime,
432: $blksize,$blocks)=stat($fname);
433:
434: if ($size>40000) {
435: &log($fh,"Rotating logs ...<p>");
436: rename("$fname.2","$fname.3");
437: rename("$fname.1","$fname.2");
438: rename("$fname","$fname.1");
439: }
440:
441: &log($fh,"</pre>");
442: &errout($fh);
443: }
444:
445: # ----------------------------------------------------------------- Connections
446: sub test_connections {
447: my ($fh,$hostname)=@_;
448: &log($fh,'<hr><a name="connections"><h2>Connections</h2>');
449: print "testing connections\n";
450: &log($fh,"<table border=2>");
451: foreach my $tryserver (sort(keys(%{$hostname}))) {
452: print(".");
453: my $result;
454: my $answer=reply("pong",$tryserver);
455: if ($answer eq "$tryserver:$perlvar{'lonHostID'}") {
456: $result="<b>ok</b>";
457: } else {
458: $result=$answer;
459: $warnings++;
460: if ($answer eq 'con_lost') { $warnings++; }
461: }
462: if ($answer =~ /con_lost/) { print(" $tryserver down\n"); }
463: &log($fh,"<tr><td>$tryserver</td><td>$result</td></tr>\n");
464: }
465: &log($fh,"</table>");
466:
467: &errout($fh);
468: }
469:
470:
471: # ------------------------------------------------------------ Delayed messages
472: sub check_delayed_msg {
473: my ($fh)=@_;
474: &log($fh,'<hr><a name="delayed"><h2>Delayed Messages</h2>');
475: print "checking buffers\n";
476:
477: &log($fh,'<h3>Scanning Permanent Log</h3>');
478:
479: my $unsend=0;
480:
481: my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log");
482: while (my $line=<$dfh>) {
483: my ($time,$sdf,$dserv,$dcmd)=split(/:/,$line);
484: if ($sdf eq 'F') {
485: my $local=localtime($time);
486: &log($fh,"<b>Failed: $time, $dserv, $dcmd</b><br>");
487: $warnings++;
488: }
489: if ($sdf eq 'S') { $unsend--; }
490: if ($sdf eq 'D') { $unsend++; }
491: }
492:
493: &log($fh,"Total unsend messages: <b>$unsend</b><p>\n");
494: $warnings=$warnings+5*$unsend;
495:
496: if ($unsend) { $simplestatus{'unsend'}=$unsend; }
497: &log($fh,"<h3>Outgoing Buffer</h3>");
498:
499: open (DFH,"ls -lF $perlvar{'lonSockDir'}/delayed|");
500: while (my $line=<DFH>) {
501: &log($fh,"$line<br>");
502: }
503: close (DFH);
504: }
505:
506: sub finish_logging {
507: my ($fh)=@_;
508: &log($fh,"<a name=errcount>\n");
509: $totalcount=$notices+4*$warnings+100*$errors;
510: &errout($fh);
511: &log($fh,"<h1>Total Error Count: $totalcount</h1>");
512: my $now=time;
513: my $date=localtime($now);
514: &log($fh,"<hr>$date ($now)</body></html>\n");
515: print "lon-status webpage updated\n";
516: $fh->close();
517:
518: if ($errors) { $simplestatus{'errors'}=$errors; }
519: if ($warnings) { $simplestatus{'warnings'}=$warnings; }
520: if ($notices) { $simplestatus{'notices'}=$notices; }
521: $simplestatus{'time'}=time;
522: }
523:
524: sub log_simplestatus {
525: rename ("$statusdir/newstatus.html","$statusdir/index.html");
526:
527: my $sfh=IO::File->new(">$statusdir/loncron_simple.txt");
528: foreach (keys %simplestatus) {
529: print $sfh $_.'='.$simplestatus{$_}.'&';
530: }
531: print $sfh "\n";
532: $sfh->close();
533: }
534:
535: sub send_mail {
536: print "sending mail\n";
537: my $emailto="$perlvar{'lonAdmEMail'}";
538: if ($totalcount>1000) {
539: $emailto.=",$perlvar{'lonSysEMail'}";
540: }
541: my $subj="LON: $perlvar{'lonHostID'} E:$errors W:$warnings N:$notices";
542: system("metasend -b -t $emailto -s '$subj' -f $statusdir/index.html -m text/html");
543: }
544:
545: # ================================================================ Main Program
546: sub main () {
547: # --------------------------------- Read loncapa_apache.conf and loncapa.conf
548: my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
549: %perlvar=%{$perlvarref};
550: undef $perlvarref;
551: delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
552: delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed
553:
554: # --------------------------------------- Make sure that LON-CAPA is configured
555: # I only test for one thing here (lonHostID). This is just a safeguard.
556: if ('{[[[[lonHostID]]]]}' eq $perlvar{'lonHostID'}) {
557: print("Unconfigured machine.\n");
558: my $emailto=$perlvar{'lonSysEMail'};
559: my $hostname=`/bin/hostname`;
560: chop $hostname;
561: $hostname=~s/[^\w\.]//g; # make sure is safe to pass through shell
562: my $subj="LON: Unconfigured machine $hostname";
563: system("echo 'Unconfigured machine $hostname.' |\
564: mailto $emailto -s '$subj' > /dev/null");
565: exit 1;
566: }
567:
568: # ----------------------------- Make sure this process is running from user=www
569: my $wwwid=getpwnam('www');
570: if ($wwwid!=$<) {
571: print("User ID mismatch. This program must be run as user 'www'\n");
572: my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
573: my $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
574: system("echo 'User ID mismatch. loncron must be run as user www.' |\
575: mailto $emailto -s '$subj' > /dev/null");
576: exit 1;
577: }
578:
579: # ------------------------------------------------------------- Read hosts file
580: my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab");
581:
582: my (%hostname,%hostdom,%hostrole,%spareid);
583: while (my $configline=<$config>) {
584: my ($id,$domain,$role,$name,$ip,$domdescr)=split(/:/,$configline);
585: if ($id && $domain && $role && $name && $ip) {
586: $hostname{$id}=$name;
587: $hostdom{$id}=$domain;
588: $hostrole{$id}=$role;
589: } else {
590: if ($configline) {
591: # &logthis("Skipping hosts.tab line -$configline-");
592: }
593: }
594: }
595: undef $config;
596:
597: # ------------------------------------------------------ Read spare server file
598: $config=IO::File->new("$perlvar{'lonTabDir'}/spare.tab");
599:
600: while (my $configline=<$config>) {
601: chomp($configline);
602: if (($configline) && ($configline ne $perlvar{'lonHostID'})) {
603: $spareid{$configline}=1;
604: }
605: }
606: undef $config;
607:
608: # ---------------------------------------------------------------- Start report
609:
610: $errors=0;
611: $warnings=0;
612: $notices=0;
613:
614: my $fh=&start_logging(\%hostdom,\%hostrole,\%hostname,\%spareid);
615:
616: &log_machine_info($fh);
617: &clean_tmp($fh);
618: &clean_lonIDs($fh);
619: &check_httpd_logs($fh);
620: &checkon_daemon($fh,'lonsql',200000);
621: &checkon_daemon($fh,'lond',40000,1);
622: &checkon_daemon($fh,'lonc',40000,1);
623: &checkon_daemon($fh,'lonhttpd',40000);
624:
625: &test_connections($fh,\%hostname);
626: &check_delayed_msg($fh);
627:
628: &finish_logging($fh);
629: &log_simplestatus();
630:
631: if ($totalcount>200) { &send_mail(); }
632: }
633:
634: &main();
635: 1;
636:
637:
638:
639:
640:
641:
642:
643:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>