--- loncom/metadata_database/searchcat.pl 2003/03/26 20:15:57 1.32
+++ loncom/metadata_database/searchcat.pl 2003/12/24 20:41:32 1.46
@@ -2,7 +2,7 @@
# The LearningOnline Network
# searchcat.pl "Search Catalog" batch script
#
-# $Id: searchcat.pl,v 1.32 2003/03/26 20:15:57 www Exp $
+# $Id: searchcat.pl,v 1.46 2003/12/24 20:41:32 www Exp $
#
# Copyright Michigan State University Board of Trustees
#
@@ -27,6 +27,7 @@
# http://www.lon-capa.org/
#
###
+
=pod
=head1 NAME
@@ -64,6 +65,8 @@ and correct user experience.
=cut
+use strict;
+
use lib '/home/httpd/lib/perl/';
use LONCAPA::Configuration;
@@ -73,9 +76,40 @@ use DBI;
use GDBM_File;
use POSIX qw(strftime mktime);
+require "find.pl";
+
my @metalist;
+my $simplestatus='';
+my %countext=();
+
+# ----------------------------------------------------- write out simple status
+sub writesimple {
+ open(SMP,'>/home/httpd/html/lon-status/mysql.txt');
+ print SMP $simplestatus."\n";
+ close(SMP);
+}
+sub writecount {
+ open(RSMP,'>/home/httpd/html/lon-status/rescount.txt');
+ foreach (keys %countext) {
+ print RSMP $_.'='.$countext{$_}.'&';
+ }
+ print RSMP 'time='.time."\n";
+ close(RSMP);
+}
+
+# -------------------------------------- counts files with different extensions
+sub count {
+ my $file=shift;
+ $file=~/\.(\w+)$/;
+ my $ext=lc($1);
+ if (defined($countext{$ext})) {
+ $countext{$ext}++;
+ } else {
+ $countext{$ext}=1;
+ }
+}
# ----------------------------------------------------- Un-Escape Special Chars
sub unescape {
@@ -92,95 +126,91 @@ sub escape {
return $str;
}
-
# ------------------------------------------- Code to evaluate dynamic metadata
sub dynamicmeta {
-
my $url=&declutter(shift);
$url=~s/\.meta$//;
my %returnhash=();
my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//);
my $prodir=&propath($adomain,$aauthor);
- if ((tie(%evaldata,'GDBM_File',
- $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
- (tie(%newevaldata,'GDBM_File',
- $prodir.'/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) {
- my %sum=();
- my %cnt=();
- my %listitems=('count' => 'add',
- 'course' => 'add',
- 'avetries' => 'avg',
- 'stdno' => 'add',
- 'difficulty' => 'avg',
- 'clear' => 'avg',
- 'technical' => 'avg',
- 'helpful' => 'avg',
- 'correct' => 'avg',
- 'depth' => 'avg',
- 'comments' => 'app',
- 'usage' => 'cnt'
- );
- my $regexp=$url;
- $regexp=~s/(\W)/\\$1/g;
- $regexp='___'.$regexp.'___([a-z]+)$';
- foreach (keys %evaldata) {
- my $key=&unescape($_);
- if ($key=~/$regexp/) {
- my $ctype=$1;
- if (defined($cnt{$ctype})) {
- $cnt{$ctype}++;
- } else {
- $cnt{$ctype}=1;
- }
- unless ($listitems{$ctype} eq 'app') {
- if (defined($sum{$ctype})) {
- $sum{$ctype}+=$evaldata{$_};
- } else {
- $sum{$ctype}=$evaldata{$_};
- }
- } else {
- if (defined($sum{$ctype})) {
- if ($evaldata{$_}) {
- $sum{$ctype}.='
'.$evaldata{$_};
- }
- } else {
- $sum{$ctype}=''.$evaldata{$_};
- }
+
+# Get metadata except counts
+ if (tie(my %evaldata,'GDBM_File',
+ $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) {
+ my %sum=();
+ my %cnt=();
+ my %concat=();
+ my %listitems=(
+ 'course' => 'add',
+ 'goto' => 'add',
+ 'comefrom' => 'add',
+ 'avetries' => 'avg',
+ 'stdno' => 'add',
+ 'difficulty' => 'avg',
+ 'clear' => 'avg',
+ 'technical' => 'avg',
+ 'helpful' => 'avg',
+ 'correct' => 'avg',
+ 'depth' => 'avg',
+ 'comments' => 'app',
+ 'usage' => 'cnt'
+ );
+
+ my $regexp=$url;
+ $regexp=~s/(\W)/\\$1/g;
+ $regexp='___'.$regexp.'___([a-z]+)$';
+ while (my ($esckey,$value)=each %evaldata) {
+ my $key=&unescape($esckey);
+ if ($key=~/$regexp/) {
+ my ($item,$purl,$cat)=split(/___/,$key);
+ if (defined($cnt{$cat})) { $cnt{$cat}++; } else { $cnt{$cat}=1; }
+ unless ($listitems{$cat} eq 'app') {
+ if (defined($sum{$cat})) {
+ $sum{$cat}+=$evaldata{$esckey};
+ $concat{$cat}.=','.$item;
+ } else {
+ $sum{$cat}=$evaldata{$esckey};
+ $concat{$cat}=$item;
+ }
+ } else {
+ if (defined($sum{$cat})) {
+ if ($evaldata{$esckey}=~/\w/) {
+ $sum{$cat}.='
'.$evaldata{$esckey};
+ }
+ } else {
+ $sum{$cat}=''.$evaldata{$esckey};
+ }
+ }
+ }
+ }
+ untie(%evaldata);
+# transfer gathered data to returnhash, calculate averages where applicable
+ while (my $cat=each(%cnt)) {
+ if ($listitems{$cat} eq 'avg') {
+ $returnhash{$cat}=int(($sum{$cat}/$cnt{$cat})*100.0+0.5)/100.0;
+ } elsif ($listitems{$cat} eq 'cnt') {
+ $returnhash{$cat}=$cnt{$cat};
+ } else {
+ $returnhash{$cat}=$sum{$cat};
}
- if ($ctype ne 'count') {
- $newevaldata{$_}=$evaldata{$_};
- }
- }
- }
- foreach (keys %cnt) {
- if ($listitems{$_} eq 'avg') {
- $returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
- } elsif ($listitems{$_} eq 'cnt') {
- $returnhash{$_}=$cnt{$_};
- } else {
- $returnhash{$_}=$sum{$_};
- }
- }
- if ($returnhash{'count'}) {
- my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count';
- $newevaldata{$newkey}=$returnhash{'count'};
- }
- untie(%evaldata);
- untie(%newevaldata);
- }
- return %returnhash;
+ $returnhash{$cat.'_list'}=$concat{$cat};
+ }
+ }
+# get count
+ if (tie(my %evaldata,'GDBM_File',
+ $prodir.'/nohist_accesscount.db',&GDBM_READER(),0640)) {
+ my $escurl=&escape($url);
+ if (! exists($evaldata{$escurl})) {
+ $returnhash{'count'}=0;
+ } else {
+ $returnhash{'count'}=$evaldata{$escurl};
+ }
+ untie %evaldata;
+ }
+ return %returnhash;
}
-# ----------------- Code to enable 'find' subroutine listing of the .meta files
-require "find.pl";
-sub wanted {
- (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
- -f _ &&
- /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
- push(@metalist,"$dir/$_");
-}
-
# --------------- Read loncapa_apache.conf and loncapa.conf and get variables
my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
my %perlvar=%{$perlvarref};
@@ -194,11 +224,11 @@ exit unless $perlvar{'lonRole'} eq 'libr
my $wwwid=getpwnam('www');
if ($wwwid!=$<) {
- $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
- $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
- system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\
+ my $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
+ my $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
+ system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\
mailto $emailto -s '$subj' > /dev/null");
- exit 1;
+ exit 1;
}
@@ -206,6 +236,7 @@ if ($wwwid!=$<) {
open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
print LOG '==== Searchcat Run '.localtime()."====\n\n";
+$simplestatus='time='.time.'&';
my $dbh;
# ------------------------------------- Make sure that database can be accessed
{
@@ -213,8 +244,12 @@ my $dbh;
$dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0})
) {
print LOG "Cannot connect to database!\n";
+ $simplestatus.='mysql=defunct';
+ &writesimple();
exit;
}
+
+# Create table for static metadata, unless exists
my $make_metadata_table = "CREATE TABLE IF NOT EXISTS metadata (".
"title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, ".
"version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, ".
@@ -228,73 +263,109 @@ my $dbh;
"FULLTEXT idx_copyright (copyright)) TYPE=MYISAM";
# It would sure be nice to have some logging mechanism.
$dbh->do($make_metadata_table);
+
+# Create table for dynamic metadata, unless exists
+ my $make_dynmetadata_table = "CREATE TABLE IF NOT EXISTS dynmetadata (".
+ "url TEXT, count INTEGER UNSIGNED, ".
+ "course INTEGER UNSIGNED, course_list TEXT, ".
+ "goto INTEGER UNSIGNED, goto_list TEXT, ".
+ "comefrom INTEGER UNSIGNED, comefrom_list TEXT, ".
+ "usage INTEGER UNSIGNED, usage_list TEXT, ".
+ "stdno INTEGER UNSIGNED, stdno_list TEXT, ".
+ "avetries FLOAT, avetries_list TEXT, ".
+ "difficulty FLOAT, difficulty_list TEXT ".
+ "TYPE=MYISAM";
+ # It would sure be nice to have some logging mechanism.
+#### $dbh->do($make_dynmetadata_table);
+
}
# ------------------------------------------------------------- get .meta files
opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}");
-my @homeusers=grep
- {&ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_")}
- grep {!/^\.\.?$/} readdir(RESOURCES);
+my @homeusers = grep {
+ &ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_")
+ } grep {!/^\.\.?$/} readdir(RESOURCES);
closedir RESOURCES;
+
+#
+# Create the statement handlers we need
+my $delete_sth = $dbh->prepare
+ ("DELETE FROM metadata WHERE url LIKE BINARY ?");
+
+my $insert_sth = $dbh->prepare
+ ("INSERT INTO metadata VALUES (".
+ "?,". # title
+ "?,". # author
+ "?,". # subject
+ "?,". # m2???
+ "?,". # version
+ "?,". # current
+ "?,". # notes
+ "?,". # abstract
+ "?,". # mime
+ "?,". # language
+ "?,". # creationdate
+ "?,". # revisiondate
+ "?,". # owner
+ "?)" # copyright
+ );
+
foreach my $user (@homeusers) {
print LOG "\n=== User: ".$user."\n\n";
-# Remove left-over db-files from potentially crashed searchcat run
+
my $prodir=&propath($perlvar{'lonDefDomain'},$user);
- unlink($prodir.'/nohist_new_resevaldata.db');
-# Use find.pl
+ # Use find.pl
undef @metalist;
@metalist=();
&find("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user");
-
-# -- process each file to get metadata and put into search catalog SQL database
-# Also, check to see if already there.
-# I could just delete (without searching first), but this works for now.
-foreach my $m (@metalist) {
- print LOG "- ".$m."\n";
- my $ref=&metadata($m);
- my $m2='/res/'.&declutter($m);
- $m2=~s/\.meta$//;
- &dynamicmeta($m2);
- my $q2="select * from metadata where url like binary '$m2'";
- my $sth = $dbh->prepare($q2);
- $sth->execute();
- my $r1=$sth->fetchall_arrayref;
- if (@$r1) {
- $sth=$dbh->prepare("delete from metadata where url like binary '$m2'");
- $sth->execute();
+ # -- process each file to get metadata and put into search catalog SQL
+ # database. Also, check to see if already there.
+ # I could just delete (without searching first), but this works for now.
+ foreach my $m (@metalist) {
+ print LOG "- ".$m."\n";
+ my $ref=&metadata($m);
+ my $m2='/res/'.&declutter($m);
+ $m2=~s/\.meta$//;
+ if ($ref->{'obsolete'}) { print LOG "obsolete\n"; next; }
+ if ($ref->{'copyright'} eq 'private') { print LOG "private\n"; next; }
+ &dynamicmeta($m2);
+ &count($m2);
+ $delete_sth->execute($m2);
+ $insert_sth->execute($ref->{'title'},
+ $ref->{'author'},
+ $ref->{'subject'},
+ $m2,
+ $ref->{'keywords'},
+ 'current',
+ $ref->{'notes'},
+ $ref->{'abstract'},
+ $ref->{'mime'},
+ $ref->{'language'},
+ sqltime($ref->{'creationdate'}),
+ sqltime($ref->{'lastrevisiondate'}),
+ $ref->{'owner'},
+ $ref->{'copyright'});
+# if ($dbh->err()) {
+# print STDERR "Error:".$dbh->errstr()."\n";
+# }
+ $ref = undef;
}
- $sth=$dbh->prepare('insert into metadata values ('.
- '"'.delete($ref->{'title'}).'"'.','.
- '"'.delete($ref->{'author'}).'"'.','.
- '"'.delete($ref->{'subject'}).'"'.','.
- '"'.$m2.'"'.','.
- '"'.delete($ref->{'keywords'}).'"'.','.
- '"'.'current'.'"'.','.
- '"'.delete($ref->{'notes'}).'"'.','.
- '"'.delete($ref->{'abstract'}).'"'.','.
- '"'.delete($ref->{'mime'}).'"'.','.
- '"'.delete($ref->{'language'}).'"'.','.
- '"'.sqltime(delete($ref->{'creationdate'})).'"'.','.
- '"'.sqltime(delete($ref->{'lastrevisiondate'})).'"'.','.
- '"'.delete($ref->{'owner'}).'"'.','.
- '"'.delete($ref->{'copyright'}).'"'.')');
- $sth->execute();
-}
-
-# ----------------------------------------------------------- Clean up database
-# Need to, perhaps, remove stale SQL database records.
-# ... not yet implemented
-
-
-# -------------------------------------------------- Copy over the new db-files
- system('mv '.$prodir.'/nohist_new_resevaldata.db '.
- $prodir.'/nohist_resevaldata.db');
+
+ # --------------------------------------------------- Clean up database
+ # Need to, perhaps, remove stale SQL database records.
+ # ... not yet implemented
+
}
# --------------------------------------------------- Close database connection
$dbh->disconnect;
print LOG "\n==== Searchcat completed ".localtime()." ====\n";
close(LOG);
+&writesimple();
+&writecount();
exit 0;
+
+
+
# =============================================================================
# ---------------------------------------------------------------- Get metadata
@@ -312,30 +383,30 @@ sub metadata {
my $parser=HTML::TokeParser->new(\$metastring);
my $token;
while ($token=$parser->get_token) {
- if ($token->[0] eq 'S') {
- my $entry=$token->[1];
- my $unikey=$entry;
- if (defined($token->[2]->{'part'})) {
- $unikey.='_'.$token->[2]->{'part'};
- }
- if (defined($token->[2]->{'name'})) {
- $unikey.='_'.$token->[2]->{'name'};
- }
- if ($metacache{$uri.'keys'}) {
- $metacache{$uri.'keys'}.=','.$unikey;
- } else {
- $metacache{$uri.'keys'}=$unikey;
- }
- map {
- $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_};
- } @{$token->[3]};
- unless (
- $metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry)
- ) { $metacache{$uri.''.$unikey}=
- $metacache{$uri.''.$unikey.'.default'};
- }
- }
- }
+ if ($token->[0] eq 'S') {
+ my $entry=$token->[1];
+ my $unikey=$entry;
+ if (defined($token->[2]->{'part'})) {
+ $unikey.='_'.$token->[2]->{'part'};
+ }
+ if (defined($token->[2]->{'name'})) {
+ $unikey.='_'.$token->[2]->{'name'};
+ }
+ if ($metacache{$uri.'keys'}) {
+ $metacache{$uri.'keys'}.=','.$unikey;
+ } else {
+ $metacache{$uri.'keys'}=$unikey;
+ }
+ map {
+ $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_};
+ } @{$token->[3]};
+ unless (
+ $metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry)
+ ) { $metacache{$uri.''.$unikey}=
+ $metacache{$uri.''.$unikey.'.default'};
+ }
+ }
+ }
}
return \%metacache;
}
@@ -343,12 +414,12 @@ sub metadata {
# ------------------------------------------------------------ Serves up a file
# returns either the contents of the file or a -1
sub getfile {
- my $file=shift;
- if (! -e $file ) { return -1; };
- my $fh=IO::File->new($file);
- my $a='';
- while (<$fh>) { $a .=$_; }
- return $a
+ my $file=shift;
+ if (! -e $file ) { return -1; };
+ my $fh=IO::File->new($file);
+ my $a='';
+ while (<$fh>) { $a .=$_; }
+ return $a;
}
# ------------------------------------------------------------- Declutters URLs
@@ -396,9 +467,9 @@ sub sqltime {
sub maketime {
my %th=@_;
- return POSIX::mktime(
- ($th{'seconds'},$th{'minutes'},$th{'hours'},
- $th{'day'},$th{'month'}-1,$th{'year'}-1900,0,0,$th{'dlsav'}));
+ return POSIX::mktime(($th{'seconds'},$th{'minutes'},$th{'hours'},
+ $th{'day'},$th{'month'}-1,
+ $th{'year'}-1900,0,0,$th{'dlsav'}));
}
@@ -409,10 +480,19 @@ sub maketime {
sub unsqltime {
my $timestamp=shift;
if ($timestamp=~/^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/) {
- $timestamp=&maketime(
- 'year'=>$1,'month'=>$2,'day'=>$3,
- 'hours'=>$4,'minutes'=>$5,'seconds'=>$6);
+ $timestamp=&maketime('year'=>$1,'month'=>$2,'day'=>$3,
+ 'hours'=>$4,'minutes'=>$5,'seconds'=>$6);
}
return $timestamp;
}
+# ----------------- Code to enable 'find' subroutine listing of the .meta files
+
+no strict "vars";
+
+sub wanted {
+ (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
+ -f _ &&
+ /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
+ push(@metalist,"$dir/$_");
+}