1: #!/usr/bin/perl
2: # The LearningOnline Network
3: # searchcat.pl "Search Catalog" batch script
4: #
5: # $Id: searchcat.pl,v 1.31 2003/02/03 18:03:53 harris41 Exp $
6: #
7: # Copyright Michigan State University Board of Trustees
8: #
9: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
10: #
11: # LON-CAPA is free software; you can redistribute it and/or modify
12: # it under the terms of the GNU General Public License as published by
13: # the Free Software Foundation; either version 2 of the License, or
14: # (at your option) any later version.
15: #
16: # LON-CAPA is distributed in the hope that it will be useful,
17: # but WITHOUT ANY WARRANTY; without even the implied warranty of
18: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19: # GNU General Public License for more details.
20: #
21: # You should have received a copy of the GNU General Public License
22: # along with LON-CAPA; if not, write to the Free Software
23: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24: #
25: # /home/httpd/html/adm/gpl.txt
26: #
27: # http://www.lon-capa.org/
28: #
29: ###
30:
31: # This script goes through a LON-CAPA resource
32: # directory and gathers metadata.
33: # The metadata is entered into a SQL database.
34:
35: use lib '/home/httpd/lib/perl/';
36: use LONCAPA::Configuration;
37:
38: use IO::File;
39: use HTML::TokeParser;
40: use DBI;
41: use GDBM_File;
42: use POSIX qw(strftime mktime);
43:
44: my @metalist;
45:
46:
47: # ----------------------------------------------------- Un-Escape Special Chars
48:
49: sub unescape {
50: my $str=shift;
51: $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
52: return $str;
53: }
54:
55: # -------------------------------------------------------- Escape Special Chars
56:
57: sub escape {
58: my $str=shift;
59: $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
60: return $str;
61: }
62:
63:
64: # ------------------------------------------- Code to evaluate dynamic metadata
65:
66: sub dynamicmeta {
67:
68: my $url=&declutter(shift);
69: $url=~s/\.meta$//;
70: my %returnhash=();
71: my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//);
72: my $prodir=&propath($adomain,$aauthor);
73: if ((tie(%evaldata,'GDBM_File',
74: $prodir.'/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
75: (tie(%newevaldata,'GDBM_File',
76: $prodir.'/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) {
77: my %sum=();
78: my %cnt=();
79: my %listitems=('count' => 'add',
80: 'course' => 'add',
81: 'avetries' => 'avg',
82: 'stdno' => 'add',
83: 'difficulty' => 'avg',
84: 'clear' => 'avg',
85: 'technical' => 'avg',
86: 'helpful' => 'avg',
87: 'correct' => 'avg',
88: 'depth' => 'avg',
89: 'comments' => 'app',
90: 'usage' => 'cnt'
91: );
92: my $regexp=$url;
93: $regexp=~s/(\W)/\\$1/g;
94: $regexp='___'.$regexp.'___([a-z]+)$';
95: foreach (keys %evaldata) {
96: my $key=&unescape($_);
97: if ($key=~/$regexp/) {
98: my $ctype=$1;
99: if (defined($cnt{$ctype})) {
100: $cnt{$ctype}++;
101: } else {
102: $cnt{$ctype}=1;
103: }
104: unless ($listitems{$ctype} eq 'app') {
105: if (defined($sum{$ctype})) {
106: $sum{$ctype}+=$evaldata{$_};
107: } else {
108: $sum{$ctype}=$evaldata{$_};
109: }
110: } else {
111: if (defined($sum{$ctype})) {
112: if ($evaldata{$_}) {
113: $sum{$ctype}.='<hr>'.$evaldata{$_};
114: }
115: } else {
116: $sum{$ctype}=''.$evaldata{$_};
117: }
118: }
119: if ($ctype ne 'count') {
120: $newevaldata{$_}=$evaldata{$_};
121: }
122: }
123: }
124: foreach (keys %cnt) {
125: if ($listitems{$_} eq 'avg') {
126: $returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
127: } elsif ($listitems{$_} eq 'cnt') {
128: $returnhash{$_}=$cnt{$_};
129: } else {
130: $returnhash{$_}=$sum{$_};
131: }
132: }
133: if ($returnhash{'count'}) {
134: my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count';
135: $newevaldata{$newkey}=$returnhash{'count'};
136: }
137: untie(%evaldata);
138: untie(%newevaldata);
139: }
140: return %returnhash;
141: }
142:
143: # ----------------- Code to enable 'find' subroutine listing of the .meta files
144: require "find.pl";
145: sub wanted {
146: (($dev,$ino,$mode,$nlink,$uid,$gid) = lstat($_)) &&
147: -f _ &&
148: /^.*\.meta$/ && !/^.+\.\d+\.[^\.]+\.meta$/ &&
149: push(@metalist,"$dir/$_");
150: }
151:
152: # --------------- Read loncapa_apache.conf and loncapa.conf and get variables
153: my $perlvarref=LONCAPA::Configuration::read_conf('loncapa.conf');
154: my %perlvar=%{$perlvarref};
155: undef $perlvarref; # remove since sensitive and not needed
156: delete $perlvar{'lonReceipt'}; # remove since sensitive and not needed
157:
158: # ------------------------------------- Only run if machine is a library server
159: exit unless $perlvar{'lonRole'} eq 'library';
160:
161: # ----------------------------- Make sure this process is running from user=www
162:
163: my $wwwid=getpwnam('www');
164: if ($wwwid!=$<) {
165: $emailto="$perlvar{'lonAdmEMail'},$perlvar{'lonSysEMail'}";
166: $subj="LON: $perlvar{'lonHostID'} User ID mismatch";
167: system("echo 'User ID mismatch. searchcat.pl must be run as user www.' |\
168: mailto $emailto -s '$subj' > /dev/null");
169: exit 1;
170: }
171:
172:
173: # ---------------------------------------------------------- We are in business
174:
175: open(LOG,'>'.$perlvar{'lonDaemons'}.'/logs/searchcat.log');
176: print LOG '==== Searchcat Run '.localtime()."====\n\n";
177: my $dbh;
178: # ------------------------------------- Make sure that database can be accessed
179: {
180: unless (
181: $dbh = DBI->connect("DBI:mysql:loncapa","www",$perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0})
182: ) {
183: print LOG "Cannot connect to database!\n";
184: exit;
185: }
186: my $make_metadata_table = "CREATE TABLE IF NOT EXISTS metadata (".
187: "title TEXT, author TEXT, subject TEXT, url TEXT, keywords TEXT, ".
188: "version TEXT, notes TEXT, abstract TEXT, mime TEXT, language TEXT, ".
189: "creationdate DATETIME, lastrevisiondate DATETIME, owner TEXT, ".
190: "copyright TEXT, FULLTEXT idx_title (title), ".
191: "FULLTEXT idx_author (author), FULLTEXT idx_subject (subject), ".
192: "FULLTEXT idx_url (url), FULLTEXT idx_keywords (keywords), ".
193: "FULLTEXT idx_version (version), FULLTEXT idx_notes (notes), ".
194: "FULLTEXT idx_abstract (abstract), FULLTEXT idx_mime (mime), ".
195: "FULLTEXT idx_language (language), FULLTEXT idx_owner (owner), ".
196: "FULLTEXT idx_copyright (copyright)) TYPE=MYISAM";
197: # It would sure be nice to have some logging mechanism.
198: $dbh->do($make_metadata_table);
199: }
200:
201: # ------------------------------------------------------------- get .meta files
202: opendir(RESOURCES,"$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}");
203: my @homeusers=grep
204: {&ishome("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$_")}
205: grep {!/^\.\.?$/} readdir(RESOURCES);
206: closedir RESOURCES;
207: foreach my $user (@homeusers) {
208: print LOG "\n=== User: ".$user."\n\n";
209: # Remove left-over db-files from potentially crashed searchcat run
210: my $prodir=&propath($perlvar{'lonDefDomain'},$user);
211: unlink($prodir.'/nohist_new_resevaldata.db');
212: # Use find.pl
213: undef @metalist;
214: @metalist=();
215: &find("$perlvar{'lonDocRoot'}/res/$perlvar{'lonDefDomain'}/$user");
216:
217: # -- process each file to get metadata and put into search catalog SQL database
218: # Also, check to see if already there.
219: # I could just delete (without searching first), but this works for now.
220: foreach my $m (@metalist) {
221: print LOG "- ".$m."\n";
222: my $ref=&metadata($m);
223: my $m2='/res/'.&declutter($m);
224: $m2=~s/\.meta$//;
225: &dynamicmeta($m2);
226: my $q2="select * from metadata where url like binary '$m2'";
227: my $sth = $dbh->prepare($q2);
228: $sth->execute();
229: my $r1=$sth->fetchall_arrayref;
230: if (@$r1) {
231: $sth=$dbh->prepare("delete from metadata where url like binary '$m2'");
232: $sth->execute();
233: }
234: $sth=$dbh->prepare('insert into metadata values ('.
235: '"'.delete($ref->{'title'}).'"'.','.
236: '"'.delete($ref->{'author'}).'"'.','.
237: '"'.delete($ref->{'subject'}).'"'.','.
238: '"'.$m2.'"'.','.
239: '"'.delete($ref->{'keywords'}).'"'.','.
240: '"'.'current'.'"'.','.
241: '"'.delete($ref->{'notes'}).'"'.','.
242: '"'.delete($ref->{'abstract'}).'"'.','.
243: '"'.delete($ref->{'mime'}).'"'.','.
244: '"'.delete($ref->{'language'}).'"'.','.
245: '"'.sqltime(delete($ref->{'creationdate'})).'"'.','.
246: '"'.sqltime(delete($ref->{'lastrevisiondate'})).'"'.','.
247: '"'.delete($ref->{'owner'}).'"'.','.
248: '"'.delete($ref->{'copyright'}).'"'.')');
249: $sth->execute();
250: }
251:
252: # ----------------------------------------------------------- Clean up database
253: # Need to, perhaps, remove stale SQL database records.
254: # ... not yet implemented
255:
256:
257: # -------------------------------------------------- Copy over the new db-files
258: system('mv '.$prodir.'/nohist_new_resevaldata.db '.
259: $prodir.'/nohist_resevaldata.db');
260: }
261: # --------------------------------------------------- Close database connection
262: $dbh->disconnect;
263: print LOG "\n==== Searchcat completed ".localtime()." ====\n";
264: close(LOG);
265: exit 0;
266: # =============================================================================
267:
268: # ---------------------------------------------------------------- Get metadata
269: # significantly altered from subroutine present in lonnet
270: sub metadata {
271: my ($uri,$what)=@_;
272: my %metacache;
273: $uri=&declutter($uri);
274: my $filename=$uri;
275: $uri=~s/\.meta$//;
276: $uri='';
277: unless ($metacache{$uri.'keys'}) {
278: unless ($filename=~/\.meta$/) { $filename.='.meta'; }
279: my $metastring=&getfile($perlvar{'lonDocRoot'}.'/res/'.$filename);
280: my $parser=HTML::TokeParser->new(\$metastring);
281: my $token;
282: while ($token=$parser->get_token) {
283: if ($token->[0] eq 'S') {
284: my $entry=$token->[1];
285: my $unikey=$entry;
286: if (defined($token->[2]->{'part'})) {
287: $unikey.='_'.$token->[2]->{'part'};
288: }
289: if (defined($token->[2]->{'name'})) {
290: $unikey.='_'.$token->[2]->{'name'};
291: }
292: if ($metacache{$uri.'keys'}) {
293: $metacache{$uri.'keys'}.=','.$unikey;
294: } else {
295: $metacache{$uri.'keys'}=$unikey;
296: }
297: map {
298: $metacache{$uri.''.$unikey.'.'.$_}=$token->[2]->{$_};
299: } @{$token->[3]};
300: unless (
301: $metacache{$uri.''.$unikey}=$parser->get_text('/'.$entry)
302: ) { $metacache{$uri.''.$unikey}=
303: $metacache{$uri.''.$unikey.'.default'};
304: }
305: }
306: }
307: }
308: return \%metacache;
309: }
310:
311: # ------------------------------------------------------------ Serves up a file
312: # returns either the contents of the file or a -1
313: sub getfile {
314: my $file=shift;
315: if (! -e $file ) { return -1; };
316: my $fh=IO::File->new($file);
317: my $a='';
318: while (<$fh>) { $a .=$_; }
319: return $a
320: }
321:
322: # ------------------------------------------------------------- Declutters URLs
323: sub declutter {
324: my $thisfn=shift;
325: $thisfn=~s/^$perlvar{'lonDocRoot'}//;
326: $thisfn=~s/^\///;
327: $thisfn=~s/^res\///;
328: return $thisfn;
329: }
330:
331: # --------------------------------------- Is this the home server of an author?
332: # (copied from lond, modification of the return value)
333: sub ishome {
334: my $author=shift;
335: $author=~s/\/home\/httpd\/html\/res\/([^\/]*)\/([^\/]*).*/$1\/$2/;
336: my ($udom,$uname)=split(/\//,$author);
337: my $proname=propath($udom,$uname);
338: if (-e $proname) {
339: return 1;
340: } else {
341: return 0;
342: }
343: }
344:
345: # -------------------------------------------- Return path to profile directory
346: # (copied from lond)
347: sub propath {
348: my ($udom,$uname)=@_;
349: $udom=~s/\W//g;
350: $uname=~s/\W//g;
351: my $subdir=$uname.'__';
352: $subdir =~ s/(.)(.)(.).*/$1\/$2\/$3/;
353: my $proname="$perlvar{'lonUsersDir'}/$udom/$subdir/$uname";
354: return $proname;
355: }
356:
357: # ---------------------------- convert 'time' format into a datetime sql format
358: sub sqltime {
359: my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) =
360: localtime(&unsqltime(@_[0]));
361: $mon++; $year+=1900;
362: return "$year-$mon-$mday $hour:$min:$sec";
363: }
364:
365: sub maketime {
366: my %th=@_;
367: return POSIX::mktime(
368: ($th{'seconds'},$th{'minutes'},$th{'hours'},
369: $th{'day'},$th{'month'}-1,$th{'year'}-1900,0,0,$th{'dlsav'}));
370: }
371:
372:
373: #########################################
374: #
375: # Retro-fixing of un-backward-compatible time format
376:
377: sub unsqltime {
378: my $timestamp=shift;
379: if ($timestamp=~/^(\d+)\-(\d+)\-(\d+)\s+(\d+)\:(\d+)\:(\d+)$/) {
380: $timestamp=&maketime(
381: 'year'=>$1,'month'=>$2,'day'=>$3,
382: 'hours'=>$4,'minutes'=>$5,'seconds'=>$6);
383: }
384: return $timestamp;
385: }
386:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>