loncom/metadata_database/searchcat.pl - diff

Return to searchcat.pl CVS log

Up to [LON-CAPA] / loncom / metadata_database

Diff for /loncom/metadata_database/searchcat.pl between versions 1.29 and 1.30

-version 1.29, 2003/02/03 13:42:16
+version 1.30, 2003/02/03 17:01:55
  Line 374  sub escape ($)
  B<build_on_the_fly_dynamic_metadata> - evaluate and store dynamic metadata.
- Dynamic metadata is stored in a nohist_resevaldata GDBM database.
+ Returns the dynamic metadata for an author, which will later be added to the
- The only thing that this subroutine really makes happen is adjusting
+ MySQL database (not yet implemented).
- a 'count' value inside the F<nohist_new_resevaldata.db> as well
- as updating F<nohist_new_resevaldata.db> with information from
+ The vast majority of entries in F<nohist_resevaldata.db>, which contains
- F<nohist_resevaldata.db>.
+ the dynamic metadata for an author's resources, are "count", which make
+ the file really large and evaluation really slow.
+ While computing the current value of all dynamic metadata
+ for later insertion into the MySQL metadata cache (not yet implemented),
+ this routine also simply adds up all "count" type fields and replaces them by
+ one new field with the to-date count.
+ Only after successful completion of working with one author, copy new file to
+ original file. Copy to tmp-"new"-db-file was necessary since db-file size
+ would not shrink after "delete" of key.
- It may need optmization, but since it gets called once a week. . .
  =over 4
  Parameters:
- Line 399  Returns:
+ Line 408  Returns:
  =cut
- sub build_on_the_fly_dynamic_metadata ($)
+ sub build_on_the_fly_dynamic_metadata {
-   {
-     # some elements in here maybe non-obvious
      # Need to compute the user's directory.
-     my $url = &declutter(shift(@_));
+     my $url=&declutter(shift);
-     $url =~ s/\.meta$//;
+     $url=~s/\.meta$//;
-     my %returnhash = ();
+     my %returnhash=();
-     my ($adomain,$aauthor) = ($url =~ m!^(\w+)/(\w+)/!);
+     my ($adomain,$aauthor)=($url=~/^(\w+)\/(\w+)\//);
-     my $user_directory = &construct_path_to_user_directory($adomain,$aauthor);
+     my $user_directory=&construct_path_to_user_directory($adomain,$aauthor);
      # Attempt a GDBM database instantiation inside users directory and proceed.
      if ((tie(%evaldata,'GDBM_File',
- Line 416  sub build_on_the_fly_dynamic_metadata ($
+ Line 423  sub build_on_the_fly_dynamic_metadata ($
  	     '/nohist_resevaldata.db',&GDBM_READER(),0640)) &&
          (tie(%newevaldata,'GDBM_File',
  	     $user_directory.
- 	     '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640)))
+ 	     '/nohist_new_resevaldata.db',&GDBM_WRCREAT(),0640))) {
-       {
  	# For different variables, track the running sum and counts.
- 	my %sum = ();
+ 	my %sum=();
- 	my %cnt = ();
+ 	my %cnt=();
  	# Define computed items as a sum (add) or an average (avg) or a raw
- 	# count (cnt) or 'app'?
+ 	# count (cnt) or append (app)?
  	my %listitems=('count'        => 'add',
  		       'course'       => 'add',
  		       'avetries'     => 'avg',
- Line 439  sub build_on_the_fly_dynamic_metadata ($
+ Line 445  sub build_on_the_fly_dynamic_metadata ($
  		       );
  	# Untaint the url and use as part of a regular expression.
- 	my $regexp = $url;
+ 	my $regexp=$url;
- 	$regexp =~ s/(\W)/\\$1/g;
+ 	$regexp=~s/(\W)/\\$1/g;
- 	$regexp = '___'.$regexp.'___([a-z]+)$'; #' emacs
+ 	$regexp='___'.$regexp.'___([a-z]+)$'; #' emacs
- 	# Check existing nohist database for this url.
+ 	# Check existing database for this author.
-         # this is modfying the 'count' entries
+         # this is modifying the 'count' entries
-         # and copying all othe entries over
+         # and copying all other entries over
- 	foreach (keys %evaldata)
- 	  {
+ 	foreach (keys %evaldata) {
- 	    my $key = &unescape($_);
+ 	    my $key=&unescape($_);
- 	    if ($key =~ /$regexp/) # If url-based entry exists.
+ 	    if ($key=~/$regexp/) { # If url-based entry exists.
- 	      {
+ 		my $ctype=$1; # Set to specific category type.
- 		my $ctype = $1; # Set to specific category type.
  		# Do an increment for this category type.
- 		if (defined($cnt{$ctype}))
+ 		if (defined($cnt{$ctype})) {
- 		  {
  		    $cnt{$ctype}++;
- 		  }
+ 		} else {
- 		else
+ 		    $cnt{$ctype}=1;
- 		  {
+ 		}
- 		    $cnt{$ctype} = 1;
+                 unless ($listitems{$ctype} eq 'app') { # append comments
- 		  }
-                 unless ($listitems{$ctype} eq 'app') # WHAT DOES 'app' MEAN?
- 		  {
  		    # Increment the sum based on the evaluated data in the db.
- 		    if (defined($sum{$ctype}))
+ 		    if (defined($sum{$ctype})) {
- 		      {
+ 			$sum{$ctype}+=$evaldata{$_};
- 			$sum{$ctype} += $evaldata{$_};
+ 		    } else {
- 		      }
+ 			$sum{$ctype}=$evaldata{$_};
- 		    else
+ 		    }
- 		      {
+  		} else { # 'app' mode, means to use '<hr />' as a separator
- 			$sum{$ctype} = $evaldata{$_};
+ 		    if (defined($sum{$ctype})) {
- 		      }
+ 			if ($evaldata{$_}) {
-  		  }
+ 			    $sum{$ctype}.='<hr />'.$evaldata{$_};
- 		else # 'app' mode, means to use '<hr />' as a separator
+ 			}
- 		  {
+ 		    } else {
- 		    if (defined($sum{$ctype}))
+ 			$sum{$ctype}=''.$evaldata{$_};
- 		      {
+ 		    }
- 			if ($evaldata{$_})
+ 	        }
- 			  {
+ 		if ($ctype ne 'count') {
- 			    $sum{$ctype} .= '<hr />'.$evaldata{$_};
- 			  }
- 		      }
- 		    else
- 		      {
- 			$sum{$ctype} = ''.$evaldata{$_};
- 		      }
- 		  }
- 		if ($ctype ne 'count')
- 		  {
                      # this is copying all data except 'count' attributes
- 		    $newevaldata{$_} = $evaldata{$_};
+ 		    $newevaldata{$_}=$evaldata{$_};
- 		  }
+ 	        }
- 	      }
+ 	    }
- 	  }
+ 	}
-         # the only other time this loop is useful is for the 'count' hash
+         # these values will be returned (currently still unused)
-         # element
+ 	foreach (keys %cnt) {
- 	foreach (keys %cnt)
+ 	    if ($listitems{$_} eq 'avg') {
- 	  {
+ 		$returnhash{$_}=int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
- 	    if ($listitems{$_} eq 'avg')
+ 	    } elsif ($listitems{$_} eq 'cnt') {
- 	      {
+ 		$returnhash{$_}=$cnt{$_};
- 		$returnhash{$_} = int(($sum{$_}/$cnt{$_})*100.0+0.5)/100.0;
+ 	    } else {
- 	      }
+ 		$returnhash{$_}=$sum{$_};
- 	    elsif ($listitems{$_} eq 'cnt')
+ 	    }
- 	      {
+ 	}
- 		$returnhash{$_} = $cnt{$_};
- 	      }
+         # generate new count key in resevaldata, insert sum
- 	    else
+ 	if ($returnhash{'count'}) {
- 	      {
+ 	    my $newkey=$$.'_'.time.'_searchcat___'.&escape($url).'___count';
- 		$returnhash{$_} = $sum{$_};
+ 	    $newevaldata{$newkey}=$returnhash{'count'};
- 	      }
+ 	}
- 	  }
-         # seems to be doing something useful
- 	if ($returnhash{'count'})
- 	  {
- 	    my $newkey = $$.'_'.time.'_searchcat___'.&escape($url).'___count';
- 	    $newevaldata{$newkey} = $returnhash{'count'};
- 	  }
  	untie(%evaldata); # Close/release the original nohist database.
  	untie(%newevaldata); # Close/release the new nohist database.
-       }
+     }
-     return(%returnhash);
+     return %returnhash;
-   }
+ }
  =pod

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>

Removed from v.1.29
changed lines
	Added in v.1.30