--- loncom/metadata_database/searchcat.pl 2002/10/08 15:09:36 1.21
+++ loncom/metadata_database/searchcat.pl 2003/02/03 18:03:53 1.31
@@ -2,7 +2,7 @@
# The LearningOnline Network
# searchcat.pl "Search Catalog" batch script
#
-# $Id: searchcat.pl,v 1.21 2002/10/08 15:09:36 www Exp $
+# $Id: searchcat.pl,v 1.31 2003/02/03 18:03:53 harris41 Exp $
#
# Copyright Michigan State University Board of Trustees
#
@@ -26,12 +26,6 @@
#
# http://www.lon-capa.org/
#
-# YEAR=2001
-# 04/14/2001, 04/16/2001 Scott Harrison
-#
-# YEAR=2002
-# 05/11/2002 Scott Harrison
-#
###
# This script goes through a LON-CAPA resource
@@ -45,6 +39,7 @@ use IO::File;
use HTML::TokeParser;
use DBI;
use GDBM_File;
+use POSIX qw(strftime mktime);
my @metalist;
@@ -57,26 +52,28 @@ sub unescape {
return $str;
}
+# -------------------------------------------------------- Escape Special Chars
+
+sub escape {
+ my $str=shift;
+ $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
+ return $str;
+}
+
# ------------------------------------------- Code to evaluate dynamic metadata
sub dynamicmeta {
-#
-#
-# Do nothing for now ...
-#
-#
- return;
-#
-# ..., but stuff below already works
-#
+
my $url=&declutter(shift);
$url=~s/\.meta$//;
my %returnhash=();
my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//);
my $prodir=&propath($adomain,$aauthor);
- if (tie(%evaldata,'GDBM_File',
- $prodir.'/nohist_resevaldata.db',&GDBM_READER,0640)) {
+ if ((tie(%evaldata,'GDBM_File',
+ $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
+ (tie(%newevaldata,'GDBM_File',
+ $prodir.'/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) {
my %sum=();
my %cnt=();
my %listitems=('count' => 'add',
@@ -98,34 +95,47 @@ sub dynamicmeta {
foreach (keys %evaldata) {
my $key=&unescape($_);
if ($key=~/$regexp/) {
- if (defined($cnt{$1})) { $cnt{$1}++; } else { $cnt{$1}=1; }
- unless ($listitems{$1} eq 'app') {
- if (defined($sum{$1})) {
- $sum{$1}+=$evaldata{$_};
- } else {
- $sum{$1}=$evaldata{$_};
+ my $ctype=$1;
+ if (defined($cnt{$ctype})) {
+ $cnt{$ctype}++;
+ } else {
+ $cnt{$ctype}=1;
+ }
+ unless ($listitems{$ctype} eq 'app') {
+ if (defined($sum{$ctype})) {
+ $sum{$ctype}+=$evaldata{$_};
+ } else {
+ $sum{$ctype}=$evaldata{$_};
+ }
+ } else {
+ if (defined($sum{$ctype})) {
+ if ($evaldata{$_}) {
+ $sum{$ctype}.='
'.$evaldata{$_};
}
- } else {
- if (defined($sum{$1})) {
- if ($evaldata{$_}) {
- $sum{$1}.='
'.$evaldata{$_};
- }
- } else {
- $sum{$1}=''.$evaldata{$_};
- }
- }
- }
- foreach (keys %cnt) {
- if ($listitems{$_} eq 'avg') {
- $returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
- } elsif ($listitems{$_} eq 'cnt') {
- $returnhash{$_}=$cnt{$_};
- } else {
- $returnhash{$_}=$sum{$_};
- }
- }
+ } else {
+ $sum{$ctype}=''.$evaldata{$_};
+ }
+ }
+ if ($ctype ne 'count') {
+ $newevaldata{$_}=$evaldata{$_};
+ }
+ }
+ }
+ foreach (keys %cnt) {
+ if ($listitems{$_} eq 'avg') {
+ $returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
+ } elsif ($listitems{$_} eq 'cnt') {
+ $returnhash{$_}=$cnt{$_};
+ } else {
+ $returnhash{$_}=$sum{$_};
+ }
+ }
+ if ($returnhash{'count'}) {
+ my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count';
+ $newevaldata{$newkey}=$returnhash{'count'};
}
untie(%evaldata);
+ untie(%newevaldata);
}
return %returnhash;
}
@@ -148,13 +158,29 @@ delete $perlvar{'lonReceipt'}; # remove
# ------------------------------------- Only run if machine is a library server
exit unless $perlvar{'lonRole'} eq 'library';
+# ----------------------------- Make sure this process is running from user=www
+
+my $wwwid=getpwnam('www');
+if ($wwwid!=$<) {
+ $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
+ $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
+ system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\
+ mailto $emailto -s '$subj' > /dev/null");
+ exit 1;
+}
+
+
+# ---------------------------------------------------------- We are in business
+
+open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
+print LOG '==== Searchcat Run '.localtime()."====\n\n";
my $dbh;
# ------------------------------------- Make sure that database can be accessed
{
unless (
$dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0})
) {
- print "Cannot connect to database!\n";
+ print LOG "Cannot connect to database!\n";
exit;
}
my $make_metadata_table = "CREATE TABLE IF NOT EXISTS metadata (".
@@ -179,13 +205,20 @@ my @homeusers=grep
grep {!/^\.\.?$/} readdir(RESOURCES);
closedir RESOURCES;
foreach my $user (@homeusers) {
+ print LOG "\n=== User: ".$user."\n\n";
+# Remove left-over db-files from potentially crashed searchcat run
+ my $prodir=&propath($perlvar{'lonDefDomain'},$user);
+ unlink($prodir.'/nohist_new_resevaldata.db');
+# Use find.pl
+ undef @metalist;
+ @metalist=();
&find("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user");
-}
# -- process each file to get metadata and put into search catalog SQL database
# Also, check to see if already there.
# I could just delete (without searching first), but this works for now.
foreach my $m (@metalist) {
+ print LOG "- ".$m."\n";
my $ref=&metadata($m);
my $m2='/res/'.&declutter($m);
$m2=~s/\.meta$//;
@@ -220,8 +253,17 @@ foreach my $m (@metalist) {
# Need to, perhaps, remove stale SQL database records.
# ... not yet implemented
+
+# -------------------------------------------------- Copy over the new db-files
+ system('mv '.$prodir.'/nohist_new_resevaldata.db '.
+ $prodir.'/nohist_resevaldata.db');
+}
# --------------------------------------------------- Close database connection
$dbh->disconnect;
+print LOG "\n==== Searchcat completed ".localtime()." ====\n";
+close(LOG);
+exit 0;
+# =============================================================================
# ---------------------------------------------------------------- Get metadata
# significantly altered from subroutine present in lonnet
@@ -315,7 +357,30 @@ sub propath {
# ---------------------------- convert 'time' format into a datetime sql format
sub sqltime {
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
- localtime(@_[0]);
+ localtime(&unsqltime(@_[0]));
$mon++; $year+=1900;
return "$year-$mon-$mday $hour:$min:$sec";
}
+
+sub maketime {
+ my %th=@_;
+ return POSIX::mktime(
+ ($th{'seconds'},$th{'minutes'},$th{'hours'},
+ $th{'day'},$th{'month'}-1,$th{'year'}-1900,0,0,$th{'dlsav'}));
+}
+
+
+#########################################
+#
+# Retro-fixing of un-backward-compatible time format
+
+sub unsqltime {
+ my $timestamp=shift;
+ if ($timestamp=~/^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/) {
+ $timestamp=&maketime(
+ 'year'=>$1,'month'=>$2,'day'=>$3,
+ 'hours'=>$4,'minutes'=>$5,'seconds'=>$6);
+ }
+ return $timestamp;
+}
+