--- loncom/metadata_database/searchcat.pl 2003/02/03 05:39:37 1.28 +++ loncom/metadata_database/searchcat.pl 2003/02/03 17:01:55 1.30 @@ -2,30 +2,29 @@ # The LearningOnline Network # searchcat.pl "Search Catalog" batch script # -# $Id: searchcat.pl,v 1.28 2003/02/03 05:39:37 harris41 Exp $ +# $Id: searchcat.pl,v 1.30 2003/02/03 17:01:55 www Exp $ # # Copyright Michigan State University Board of Trustees # -# This file is part of the LearningOnline Network with a -# Computer assisted personalized approach (loncapa). +# This file is part of the LearningOnline Network with CAPA (LON-CAPA). # -# Loncapa is free software; you can redistribute it and/or modify +# LON-CAPA is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # -# Loncapa is distributed in the hope that it will be useful, +# LON-CAPA is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License -# along with loncapa; if not, write to the Free Software +# along with LON-CAPA; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # /home/httpd/html/adm/gpl.txt # -# http://www.loncapa.org/ +# http://www.lon-capa.org/ # # YEAR=2001 # 04/14/2001, 04/16/2001 Scott Harrison @@ -375,14 +374,21 @@ sub escape ($) B - evaluate and store dynamic metadata. -Dynamic metadata is stored in a nohist_resevaldata GDBM database. -Most of the calculations in this subroutine are totally pointless -and not useful for anything that this subroutine does. -(THIS IS A FRUSTRATED SUBROUTINE THAT IS NON-OPTIMAL, *&*&!.) -The only thing that this subroutine really makes happen is adjusting -a 'count' value inside the F as well -as updating F with information from -F. +Returns the dynamic metadata for an author, which will later be added to the +MySQL database (not yet implemented). + +The vast majority of entries in F, which contains +the dynamic metadata for an author's resources, are "count", which make +the file really large and evaluation really slow. + +While computing the current value of all dynamic metadata +for later insertion into the MySQL metadata cache (not yet implemented), +this routine also simply adds up all "count" type fields and replaces them by +one new field with the to-date count. + +Only after successful completion of working with one author, copy new file to +original file. Copy to tmp-"new"-db-file was necessary since db-file size +would not shrink after "delete" of key. =over 4 @@ -402,16 +408,14 @@ Returns: =cut -sub build_on_the_fly_dynamic_metadata ($) - { - # BEWARE ALL WHO TRY TO UNDERSTAND THIS ABSURDLY HORRIBLE SUBROUTINE. - - # Do all sorts of mumbo-jumbo to compute the user's directory. - my $url = &declutter(shift(@_)); - $url =~ s/\.meta$//; - my %returnhash = (); - my ($adomain,$aauthor) = ($url =~ m!^(\w+)/(\w+)/!); - my $user_directory = &construct_path_to_user_directory($adomain,$aauthor); +sub build_on_the_fly_dynamic_metadata { + + # Need to compute the user's directory. + my $url=&declutter(shift); + $url=~s/\.meta$//; + my %returnhash=(); + my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//); + my $user_directory=&construct_path_to_user_directory($adomain,$aauthor); # Attempt a GDBM database instantiation inside users directory and proceed. if ((tie(%evaldata,'GDBM_File', @@ -419,14 +423,13 @@ sub build_on_the_fly_dynamic_metadata ($ '/nohist_resevaldata.db',&GDBM_READER(),0640)) && (tie(%newevaldata,'GDBM_File', $user_directory. - '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) - { + '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) { # For different variables, track the running sum and counts. - my %sum = (); - my %cnt = (); + my %sum=(); + my %cnt=(); # Define computed items as a sum (add) or an average (avg) or a raw - # count (cnt) or 'app'? + # count (cnt) or append (app)? my %listitems=('count' => 'add', 'course' => 'add', 'avetries' => 'avg', @@ -442,99 +445,70 @@ sub build_on_the_fly_dynamic_metadata ($ ); # Untaint the url and use as part of a regular expression. - my $regexp = $url; - $regexp =~ s/(\W)/\\$1/g; - $regexp = '___'.$regexp.'___([a-z]+)$'; - - # Check existing nohist database for this url. - # THE ONLY TIME THIS IS IMPORTANT FOR THIS AWFUL SUBROUTINE - # IS FOR 'count' ENTRIES - # AND FOR REFRESHING non-'count' ENTRIES INSIDE nohist_new DATABASE. - foreach (keys %evaldata) - { - my $key = &unescape($_); - if ($key =~ /$regexp/) # If url-based entry exists. - { - my $ctype = $1; # Set to specific category type. + my $regexp=$url; + $regexp=~s/(\W)/\\$1/g; + $regexp='___'.$regexp.'___([a-z]+)$'; #' emacs + + # Check existing database for this author. + # this is modifying the 'count' entries + # and copying all other entries over + + foreach (keys %evaldata) { + my $key=&unescape($_); + if ($key=~/$regexp/) { # If url-based entry exists. + my $ctype=$1; # Set to specific category type. # Do an increment for this category type. - if (defined($cnt{$ctype})) - { + if (defined($cnt{$ctype})) { $cnt{$ctype}++; - } - else - { - $cnt{$ctype} = 1; - } - unless ($listitems{$ctype} eq 'app') # WHAT DOES 'app' MEAN? - { + } else { + $cnt{$ctype}=1; + } + unless ($listitems{$ctype} eq 'app') { # append comments # Increment the sum based on the evaluated data in the db. - if (defined($sum{$ctype})) - { - $sum{$ctype} += $evaldata{$_}; - } - else - { - $sum{$ctype} = $evaldata{$_}; - } - } - else # 'app' mode, means to use '
' as a separator - { - if (defined($sum{$ctype})) - { - if ($evaldata{$_}) - { - $sum{$ctype} .= '
'.$evaldata{$_}; - } - } - else - { - $sum{$ctype} = ''.$evaldata{$_}; - } - } - if ($ctype ne 'count') - { - # ALERT! THIS HORRIBLE LOOP IS ACTUALLY DOING SOMETHING - # USEFUL! - $newevaldata{$_} = $evaldata{$_}; - } - } - } - - # THE ONLY OTHER TIME THIS LOOP IS USEFUL IS FOR THE 'count' HASH - # ELEMENT. - foreach (keys %cnt) - { - if ($listitems{$_} eq 'avg') - { - $returnhash{$_} = int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0; - } - elsif ($listitems{$_} eq 'cnt') - { - $returnhash{$_} = $cnt{$_}; - } - else - { - $returnhash{$_} = $sum{$_}; - } - } - - # A RARE MOMENT OF DOING ANYTHING USEFUL INSIDE THIS - # BLEEPING SUBROUTINE. - if ($returnhash{'count'}) - { - my $newkey = $$.'_'.time.'_searchcat___'.&escape($url).'___count'; - $newevaldata{$newkey} = $returnhash{'count'}; - } + if (defined($sum{$ctype})) { + $sum{$ctype}+=$evaldata{$_}; + } else { + $sum{$ctype}=$evaldata{$_}; + } + } else { # 'app' mode, means to use '
' as a separator + if (defined($sum{$ctype})) { + if ($evaldata{$_}) { + $sum{$ctype}.='
'.$evaldata{$_}; + } + } else { + $sum{$ctype}=''.$evaldata{$_}; + } + } + if ($ctype ne 'count') { + # this is copying all data except 'count' attributes + $newevaldata{$_}=$evaldata{$_}; + } + } + } + + # these values will be returned (currently still unused) + foreach (keys %cnt) { + if ($listitems{$_} eq 'avg') { + $returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0; + } elsif ($listitems{$_} eq 'cnt') { + $returnhash{$_}=$cnt{$_}; + } else { + $returnhash{$_}=$sum{$_}; + } + } + + # generate new count key in resevaldata, insert sum + if ($returnhash{'count'}) { + my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count'; + $newevaldata{$newkey}=$returnhash{'count'}; + } untie(%evaldata); # Close/release the original nohist database. untie(%newevaldata); # Close/release the new nohist database. - } - return(%returnhash); - # Celebrate! We have now accomplished some simple calculations using - # 1000% bloated functionality in our subroutine. Go wash your eyeballs - # out now. - } + } + return %returnhash; +} =pod