1: #!/usr/bin/perl
2:
3: # Housekeeping program, started by cron, loncontrol and loncron.pl
4: #
5: # $Id: loncron,v 1.48 2004/05/11 20:19:46 albertel Exp $
6: #
7: # Copyright Michigan State University Board of Trustees
8: #
9: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
10: #
11: # LON-CAPA is free software; you can redistribute it and/or modify
12: # it under the terms of the GNU General Public License as published by
13: # the Free Software Foundation; either version 2 of the License, or
14: # (at your option) any later version.
15: #
16: # LON-CAPA is distributed in the hope that it will be useful,
17: # but WITHOUT ANY WARRANTY; without even the implied warranty of
18: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19: # GNU General Public License for more details.
20: #
21: # You should have received a copy of the GNU General Public License
22: # along with LON-CAPA; if not, write to the Free Software
23: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24: #
25: # /home/httpd/html/adm/gpl.txt
26: #
27: # http://www.lon-capa.org/
28: #
29:
30: $|=1;
31: use strict;
32:
33: use lib '/home/httpd/lib/perl/';
34: use LONCAPA::Configuration;
35:
36: use IO::File;
37: use IO::Socket;
38: use HTML::Entities;
39: #globals
40: use vars qw (%perlvar %simplestatus $errors $warnings $notices $totalcount);
41:
42: my $statusdir="/home/httpd/html/lon-status";
43:
44:
45: # -------------------------------------------------- Non-critical communication
46: sub reply {
47: my ($cmd,$server)=@_;
48: my $peerfile="$perlvar{'lonSockDir'}/$server";
49: my $client=IO::Socket::UNIX->new(Peer =>"$peerfile",
50: Type => SOCK_STREAM,
51: Timeout => 10)
52: or return "con_lost";
53: print $client "$cmd\n";
54: my $answer=<$client>;
55: chomp($answer);
56: if (!$answer) { $answer="con_lost"; }
57: return $answer;
58: }
59:
60: # --------------------------------------------------------- Output error status
61:
62: sub log {
63: my $fh=shift;
64: if ($fh) { print $fh @_ }
65: }
66:
67: sub errout {
68: my $fh=shift;
69: &log($fh,(<<ENDERROUT));
70: <table border="2" bgcolor="#CCCCCC">
71: <tr><td>Notices</td><td>$notices</td></tr>
72: <tr><td>Warnings</td><td>$warnings</td></tr>
73: <tr><td>Errors</td><td>$errors</td></tr>
74: </table><p><a href="#top">Top</a></p>
75: ENDERROUT
76: }
77:
78: sub start_daemon {
79: my ($fh,$daemon,$pidfile) = @_;
80: my $progname=$daemon;
81: if ($daemon eq 'lonc' && $ARGV[0] eq 'new') {
82: $progname='loncnew';
83: print "new ";
84: }
85: system("$perlvar{'lonDaemons'}/$progname 2>>$perlvar{'lonDaemons'}/logs/${daemon}_errors");
86: sleep 2;
87: if (-e $pidfile) {
88: &log($fh,"<p>Seems like it started ...</p>");
89: my $lfh=IO::File->new("$pidfile");
90: my $daemonpid=<$lfh>;
91: chomp($daemonpid);
92: sleep 2;
93: if (kill 0 => $daemonpid) {
94: return 1;
95: } else {
96: return 0;
97: }
98: }
99: &log($fh,"<p>Seems like that did not work!</p>");
100: $errors++;
101: return 0;
102: }
103:
104: sub checkon_daemon {
105: my ($fh,$daemon,$maxsize,$sendusr1)=@_;
106:
107: &log($fh,'<hr /><a name="'.$daemon.'" /><h2>'.$daemon.'</h2><h3>Log</h3><p style="white-space: pre;"><tt>');
108: printf("%-10s ",$daemon);
109: if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){
110: open (DFH,"tail -n25 $perlvar{'lonDaemons'}/logs/$daemon.log|");
111: while (my $line=<DFH>) {
112: &log($fh,"$line");
113: if ($line=~/INFO/) { $notices++; }
114: if ($line=~/WARNING/) { $notices++; }
115: if ($line=~/CRITICAL/) { $warnings++; }
116: };
117: close (DFH);
118: }
119: &log($fh,"</tt></p>");
120:
121: my $pidfile="$perlvar{'lonDaemons'}/logs/$daemon.pid";
122:
123: my $restartflag=1;
124: my $daemonpid;
125: if (-e $pidfile) {
126: my $lfh=IO::File->new("$pidfile");
127: $daemonpid=<$lfh>;
128: chomp($daemonpid);
129: if (kill 0 => $daemonpid) {
130: &log($fh,"<h3>$daemon at pid $daemonpid responding");
131: if ($sendusr1) { &log($fh,", sending USR1"); }
132: &log($fh,"</h3>");
133: if ($sendusr1) { kill USR1 => $daemonpid; }
134: $restartflag=0;
135: print "running\n";
136: } else {
137: $errors++;
138: &log($fh,"<h3>$daemon at pid $daemonpid not responding</h3>");
139: $restartflag=1;
140: &log($fh,"<h3>Decided to clean up stale .pid file and restart $daemon</h3>");
141: }
142: }
143: if ($restartflag==1) {
144: $simplestatus{$daemon}='off';
145: $errors++;
146: &log($fh,'<br><font color="red">Killall '.$daemon.': '.
147: `killall $daemon 2>&1`.' - ');
148: sleep 2;
149: &log($fh,unlink($pidfile).' - '.
150: `killall -9 $daemon 2>&1`.
151: '</font><br>');
152: &log($fh,"<h3>$daemon not running, trying to start</h3>");
153:
154: if (&start_daemon($fh,$daemon,$pidfile)) {
155: &log($fh,"<h3>$daemon at pid $daemonpid responding</h3>");
156: $simplestatus{$daemon}='restarted';
157: print "started\n";
158: } else {
159: $errors++;
160: &log($fh,"<h3>$daemon at pid $daemonpid not responding</h3>");
161: &log($fh,"<p>Give it one more try ...</p>");
162: print " ";
163: if (&start_daemon($fh,$daemon,$pidfile)) {
164: &log($fh,"<h3>$daemon at pid $daemonpid responding</h3>");
165: $simplestatus{$daemon}='restarted';
166: print "started\n";
167: } else {
168: print " failed\n";
169: $simplestatus{$daemon}='failed';
170: $errors++; $errors++;
171: &log($fh,"<h3>$daemon at pid $daemonpid not responding</h3>");
172: &log($fh,"<p>Unable to start $daemon</p>");
173: }
174: }
175:
176: if (-e "$perlvar{'lonDaemons'}/logs/$daemon.log"){
177: &log($fh,"<p><pre>");
178: open (DFH,"tail -n100 $perlvar{'lonDaemons'}/logs/$daemon.log|");
179: while (my $line=<DFH>) {
180: &log($fh,"$line");
181: if ($line=~/WARNING/) { $notices++; }
182: if ($line=~/CRITICAL/) { $notices++; }
183: };
184: close (DFH);
185: &log($fh,"</pre></p>");
186: }
187: }
188:
189: my $fname="$perlvar{'lonDaemons'}/logs/$daemon.log";
190:
191: my ($dev,$ino,$mode,$nlink,
192: $uid,$gid,$rdev,$size,
193: $atime,$mtime,$ctime,
194: $blksize,$blocks)=stat($fname);
195:
196: if ($size>$maxsize) {
197: &log($fh,"<p>Rotating logs ...</p>");
198: rename("$fname.2","$fname.3");
199: rename("$fname.1","$fname.2");
200: rename("$fname","$fname.1");
201: }
202:
203: &errout($fh);
204: }
205:
206: # --------------------------------------------------------------------- Machine
207: sub log_machine_info {
208: my ($fh)=@_;
209: &log($fh,'<hr /><a name="machine" /><h2>Machine Information</h2>');
210: &log($fh,"<h3>loadavg</h3>");
211:
212: open (LOADAVGH,"/proc/loadavg");
213: my $loadavg=<LOADAVGH>;
214: close (LOADAVGH);
215:
216: &log($fh,"<tt>$loadavg</tt>");
217:
218: my @parts=split(/\s+/,$loadavg);
219: if ($parts[1]>4.0) {
220: $errors++;
221: } elsif ($parts[1]>2.0) {
222: $warnings++;
223: } elsif ($parts[1]>1.0) {
224: $notices++;
225: }
226:
227: &log($fh,"<h3>df</h3>");
228: &log($fh,"<pre>");
229:
230: open (DFH,"df|");
231: while (my $line=<DFH>) {
232: &log($fh,&encode_entities($line,'<>&"'));
233: @parts=split(/\s+/,$line);
234: my $usage=$parts[4];
235: $usage=~s/\W//g;
236: if ($usage>90) {
237: $warnings++;
238: $notices++;
239: } elsif ($usage>80) {
240: $warnings++;
241: } elsif ($usage>60) {
242: $notices++;
243: }
244: if ($usage>95) { $warnings++; $warnings++; $simplestatus{'diskfull'}++; }
245: }
246: close (DFH);
247: &log($fh,"</pre>");
248:
249:
250: &log($fh,"<h3>ps</h3>");
251: &log($fh,"<pre>");
252: my $psproc=0;
253:
254: open (PSH,"ps -aux --cols 140 |");
255: while (my $line=<PSH>) {
256: &log($fh,&encode_entities($line,'<>&"'));
257: $psproc++;
258: }
259: close (PSH);
260: &log($fh,"</pre>");
261:
262: if ($psproc>200) { $notices++; }
263: if ($psproc>250) { $notices++; }
264:
265: &errout($fh);
266: }
267:
268: sub start_logging {
269: my ($hostdom,$hostrole,$hostname,$spareid)=@_;
270: my $fh=IO::File->new(">$statusdir/newstatus.html");
271: my %simplestatus=();
272: my $now=time;
273: my $date=localtime($now);
274:
275:
276: &log($fh,(<<ENDHEADERS));
277: <html>
278: <head>
279: <title>LON Status Report $perlvar{'lonHostID'}</title>
280: </head>
281: <body bgcolor="#AAAAAA">
282: <a name="top" />
283: <h1>LON Status Report $perlvar{'lonHostID'}</h1>
284: <h2>$date ($now)</h2>
285: <ol>
286: <li><a href="#configuration">Configuration</a></li>
287: <li><a href="#machine">Machine Information</a></li>
288: <li><a href="#tmp">Temporary Files</a></li>
289: <li><a href="#tokens">Session Tokens</a></li>
290: <li><a href="#httpd">httpd</a></li>
291: <li><a href="#lonsql">lonsql</a></li>
292: <li><a href="#lond">lond</a></li>
293: <li><a href="#lonc">lonc</a></li>
294: <li><a href="#lonhttpd">lonhttpd</a></li>
295: <li><a href="#lonnet">lonnet</a></li>
296: <li><a href="#connections">Connections</a></li>
297: <li><a href="#delayed">Delayed Messages</a></li>
298: <li><a href="#errcount">Error Count</a></li>
299: </ol>
300: <hr />
301: <a name="configuration" />
302: <h2>Configuration</h2>
303: <h3>PerlVars</h3>
304: <table border="2">
305: ENDHEADERS
306:
307: foreach my $varname (sort(keys(%perlvar))) {
308: &log($fh,"<tr><td>$varname</td><td>".
309: &encode_entities($perlvar{$varname},'<>&"')."</td></tr>\n");
310: }
311: &log($fh,"</table><h3>Hosts</h3><table border='2'>");
312: foreach my $id (sort(keys(%{$hostname}))) {
313: &log($fh,
314: "<tr><td>$id</td><td>".$hostdom->{$id}.
315: "</td><td>".$hostrole->{$id}.
316: "</td><td>".$hostname->{$id}."</td></tr>\n");
317: }
318: &log($fh,"</table><h3>Spare Hosts</h3><ol>");
319: foreach my $id (sort(keys(%{$spareid}))) {
320: &log($fh,"<li>$id\n</li>");
321: }
322: &log($fh,"</ol>\n");
323: return $fh;
324: }
325:
326: # --------------------------------------------------------------- clean out tmp
327: sub clean_tmp {
328: my ($fh)=@_;
329: &log($fh,'<hr /><a name="tmp" /><h2>Temporary Files</h2>');
330: my $cleaned=0;
331: my $old=0;
332: while (my $fname=<$perlvar{'lonDaemons'}/tmp/*>) {
333: my ($dev,$ino,$mode,$nlink,
334: $uid,$gid,$rdev,$size,
335: $atime,$mtime,$ctime,
336: $blksize,$blocks)=stat($fname);
337: my $now=time;
338: my $since=$now-$mtime;
339: if ($since>$perlvar{'lonExpire'}) {
340: my $line='';
341: if (open(PROBE,$fname)) {
342: $line=<PROBE>;
343: close(PROBE);
344: }
345: unless ($line=~/^CHECKOUTTOKEN\&/) {
346: $cleaned++;
347: unlink("$fname");
348: } else {
349: if ($since>365*$perlvar{'lonExpire'}) {
350: $cleaned++;
351: unlink("$fname");
352: } else { $old++; }
353: }
354: }
355: }
356: &log($fh,"Cleaned up ".$cleaned." files (".$old." old checkout tokens).");
357: }
358:
359: # ------------------------------------------------------------ clean out lonIDs
360: sub clean_lonIDs {
361: my ($fh)=@_;
362: &log($fh,'<hr /><a name="tokens" /><h2>Session Tokens</h2>');
363: my $cleaned=0;
364: my $active=0;
365: while (my $fname=<$perlvar{'lonIDsDir'}/*>) {
366: my ($dev,$ino,$mode,$nlink,
367: $uid,$gid,$rdev,$size,
368: $atime,$mtime,$ctime,
369: $blksize,$blocks)=stat($fname);
370: my $now=time;
371: my $since=$now-$mtime;
372: if ($since>$perlvar{'lonExpire'}) {
373: $cleaned++;
374: &log($fh,"Unlinking $fname<br>");
375: unlink("$fname");
376: } else {
377: $active++;
378: }
379: }
380: &log($fh,"<p>Cleaned up ".$cleaned." stale session token(s).</p>");
381: &log($fh,"<h3>$active open session(s)</h3>");
382: }
383:
384:
385: # ----------------------------------------------------------------------- httpd
386: sub check_httpd_logs {
387: my ($fh)=@_;
388: &log($fh,'<hr /><a name="httpd" /><h2>httpd</h2><h3>Access Log</h3><pre>');
389:
390: open (DFH,"tail -n25 /etc/httpd/logs/access_log|");
391: while (my $line=<DFH>) { &log($fh,&encode_entities($line,'<>&"')) };
392: close (DFH);
393:
394: &log($fh,"</pre><h3>Error Log</h3><pre>");
395:
396: open (DFH,"tail -n25 /etc/httpd/logs/error_log|");
397: while (my $line=<DFH>) {
398: &log($fh,"$line");
399: if ($line=~/\[error\]/) { $notices++; }
400: }
401: close (DFH);
402: &log($fh,"</pre>");
403: &errout($fh);
404: }
405:
406: # ---------------------------------------------------------------------- lonnet
407:
408: sub rotate_lonnet_logs {
409: my ($fh)=@_;
410: &log($fh,'<hr /><a name="lonnet" /><h2>lonnet</h2><h3>Temp Log</h3><pre>');
411: print "checking logs\n";
412: if (-e "$perlvar{'lonDaemons'}/logs/lonnet.log"){
413: open (DFH,"tail -n50 $perlvar{'lonDaemons'}/logs/lonnet.log|");
414: while (my $line=<DFH>) {
415: &log($fh,&encode_entities($line,'<>&"'));
416: }
417: close (DFH);
418: }
419: &log($fh,"</pre><h3>Perm Log</h3><pre>");
420:
421: if (-e "$perlvar{'lonDaemons'}/logs/lonnet.perm.log") {
422: open(DFH,"tail -n10 $perlvar{'lonDaemons'}/logs/lonnet.perm.log|");
423: while (my $line=<DFH>) {
424: &log($fh,&encode_entities($line,'<>&"'));
425: }
426: close (DFH);
427: } else { &log($fh,"No perm log\n") }
428:
429: my $fname="$perlvar{'lonDaemons'}/logs/lonnet.log";
430:
431: my ($dev,$ino,$mode,$nlink,
432: $uid,$gid,$rdev,$size,
433: $atime,$mtime,$ctime,
434: $blksize,$blocks)=stat($fname);
435:
436: if ($size>40000) {
437: &log($fh,"<p>Rotating logs ...</p>");
438: rename("$fname.2","$fname.3");
439: rename("$fname.1","$fname.2");
440: rename("$fname","$fname.1");
441: }
442:
443: &log($fh,"</pre>");
444: &errout($fh);
445: }
446:
447: # ----------------------------------------------------------------- Connections
448: sub test_connections {
449: my ($fh,$hostname)=@_;
450: &log($fh,'<hr /><a name="connections" /><h2>Connections</h2>');
451: print "testing connections\n";
452: &log($fh,"<table border='2'>");
453: foreach my $tryserver (sort(keys(%{$hostname}))) {
454: print(".");
455: my $result;
456: my $answer=reply("pong",$tryserver);
457: if ($answer eq "$tryserver:$perlvar{'lonHostID'}") {
458: $result="<b>ok</b>";
459: } else {
460: $result=$answer;
461: $warnings++;
462: if ($answer eq 'con_lost') { $warnings++; }
463: }
464: if ($answer =~ /con_lost/) { print(" $tryserver down\n"); }
465: &log($fh,"<tr><td>$tryserver</td><td>$result</td></tr>\n");
466: }
467: &log($fh,"</table>");
468:
469: &errout($fh);
470: }
471:
472:
473: # ------------------------------------------------------------ Delayed messages
474: sub check_delayed_msg {
475: my ($fh)=@_;
476: &log($fh,'<hr /><a name="delayed" /><h2>Delayed Messages</h2>');
477: print "checking buffers\n";
478:
479: &log($fh,'<h3>Scanning Permanent Log</h3>');
480:
481: my $unsend=0;
482:
483: my $dfh=IO::File->new("$perlvar{'lonDaemons'}/logs/lonnet.perm.log");
484: while (my $line=<$dfh>) {
485: my ($time,$sdf,$dserv,$dcmd)=split(/:/,$line);
486: if ($sdf eq 'F') {
487: my $local=localtime($time);
488: &log($fh,"<b>Failed: $time, $dserv, $dcmd</b><br>");
489: $warnings++;
490: }
491: if ($sdf eq 'S') { $unsend--; }
492: if ($sdf eq 'D') { $unsend++; }
493: }
494:
495: &log($fh,"<p>Total unsend messages: <b>$unsend</b></p>\n");
496: $warnings=$warnings+5*$unsend;
497:
498: if ($unsend) { $simplestatus{'unsend'}=$unsend; }
499: &log($fh,"<h3>Outgoing Buffer</h3>\n<pre>");
500:
501: open (DFH,"ls -lF $perlvar{'lonSockDir'}/delayed|");
502: while (my $line=<DFH>) {
503: &log($fh,&encode_entities($line,'<>&"'));
504: }
505: &log($fh,"</pre>\n");
506: close (DFH);
507: }
508:
509: sub finish_logging {
510: my ($fh)=@_;
511: &log($fh,"<a name='errcount' />\n");
512: $totalcount=$notices+4*$warnings+100*$errors;
513: &errout($fh);
514: &log($fh,"<h1>Total Error Count: $totalcount</h1>");
515: my $now=time;
516: my $date=localtime($now);
517: &log($fh,"<hr />$date ($now)</body></html>\n");
518: print "lon-status webpage updated\n";
519: $fh->close();
520:
521: if ($errors) { $simplestatus{'errors'}=$errors; }
522: if ($warnings) { $simplestatus{'warnings'}=$warnings; }
523: if ($notices) { $simplestatus{'notices'}=$notices; }
524: $simplestatus{'time'}=time;
525: }
526:
527: sub log_simplestatus {
528: rename ("$statusdir/newstatus.html","$statusdir/index.html");
529:
530: my $sfh=IO::File->new(">$statusdir/loncron_simple.txt");
531: foreach (keys %simplestatus) {
532: print $sfh $_.'='.$simplestatus{$_}.'&';
533: }
534: print $sfh "\n";
535: $sfh->close();
536: }
537:
538: sub send_mail {
539: print "sending mail\n";
540: my $emailto="$perlvar{'lonAdmEMail'}";
541: if ($totalcount>1000) {
542: $emailto.=",$perlvar{'lonSysEMail'}";
543: }
544: my $subj="LON: $perlvar{'lonHostID'} E:$errors W:$warnings N:$notices";
545: system("metasend -b -t $emailto -s '$subj' -f $statusdir/index.html -m text/html");
546: }
547:
548: # ================================================================ Main Program
549: sub main () {
550: # --------------------------------- Read loncapa_apache.conf and loncapa.conf
551: my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
552: %perlvar=%{$perlvarref};
553: undef $perlvarref;
554: delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
555: delete $perlvar{'lonSqlAccess'}; # remove since sensitive and not needed
556:
557: # --------------------------------------- Make sure that LON-CAPA is configured
558: # I only test for one thing here (lonHostID). This is just a safeguard.
559: if ('{[[[[lonHostID]]]]}' eq $perlvar{'lonHostID'}) {
560: print("Unconfigured machine.\n");
561: my $emailto=$perlvar{'lonSysEMail'};
562: my $hostname=`/bin/hostname`;
563: chop $hostname;
564: $hostname=~s/[^\w\.]//g; # make sure is safe to pass through shell
565: my $subj="LON: Unconfigured machine $hostname";
566: system("echo 'Unconfigured machine $hostname.' |\
567: mailto $emailto -s '$subj' > /dev/null");
568: exit 1;
569: }
570:
571: # ----------------------------- Make sure this process is running from user=www
572: my $wwwid=getpwnam('www');
573: if ($wwwid!=$<) {
574: print("User ID mismatch. This program must be run as user 'www'\n");
575: my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
576: my $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
577: system("echo 'User ID mismatch. loncron must be run as user www.' |\
578: mailto $emailto -s '$subj' > /dev/null");
579: exit 1;
580: }
581:
582: # ------------------------------------------------------------- Read hosts file
583: my $config=IO::File->new("$perlvar{'lonTabDir'}/hosts.tab");
584:
585: my (%hostname,%hostdom,%hostrole,%spareid);
586: while (my $configline=<$config>) {
587: next if ($configline =~ /^(\#|\s*$)/);
588: my ($id,$domain,$role,$name,$ip,$domdescr)=split(/:/,$configline);
589: if ($id && $domain && $role && $name && $ip) {
590: $hostname{$id}=$name;
591: $hostdom{$id}=$domain;
592: $hostrole{$id}=$role;
593: }
594: }
595: undef $config;
596:
597: # ------------------------------------------------------ Read spare server file
598: $config=IO::File->new("$perlvar{'lonTabDir'}/spare.tab");
599:
600: while (my $configline=<$config>) {
601: chomp($configline);
602: if (($configline) && ($configline ne $perlvar{'lonHostID'})) {
603: $spareid{$configline}=1;
604: }
605: }
606: undef $config;
607:
608: # ---------------------------------------------------------------- Start report
609:
610: $errors=0;
611: $warnings=0;
612: $notices=0;
613:
614: my $fh=&start_logging(\%hostdom,\%hostrole,\%hostname,\%spareid);
615:
616: &log_machine_info($fh);
617: &clean_tmp($fh);
618: &clean_lonIDs($fh);
619: &check_httpd_logs($fh);
620: &rotate_lonnet_logs($fh);
621: &checkon_daemon($fh,'lonsql',200000);
622: &checkon_daemon($fh,'lond',40000,1);
623: &checkon_daemon($fh,'lonc',40000,1);
624: &checkon_daemon($fh,'lonhttpd',40000);
625:
626: &test_connections($fh,\%hostname);
627: &check_delayed_msg($fh);
628:
629: &finish_logging($fh);
630: &log_simplestatus();
631:
632: if ($totalcount>200) { &send_mail(); }
633: }
634:
635: &main();
636: 1;
637:
638:
639:
640:
641:
642:
643:
644:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>