File:  [LON-CAPA] / loncom / metadata_database / LONCAPA / lonmetadata.pm
Revision 1.5: download - view: text, annotated - select for diffs
Mon Apr 12 21:11:45 2004 UTC (20 years, 2 months ago) by matthew
Branches: MAIN
CVS tags: HEAD
Moved nohist_reseval parsing functionality from searchcat.pl to lonmetadata.pm.
searchcat.pl: stop dying on insert errors.
lonmetadata.pm: &store_metadata now translates 'nan' to 'NULL' because MySQL
won't take 'nan' in a numeric field.
Added &process_reseval_data and &process_dynamic_metadata routines.

    1: # The LearningOnline Network with CAPA
    2: #
    3: # $Id: lonmetadata.pm,v 1.5 2004/04/12 21:11:45 matthew Exp $
    4: #
    5: # Copyright Michigan State University Board of Trustees
    6: #
    7: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
    8: #
    9: # LON-CAPA is free software; you can redistribute it and/or modify
   10: # it under the terms of the GNU General Public License as published by
   11: # the Free Software Foundation; either version 2 of the License, or
   12: # (at your option) any later version.
   13: #
   14: # LON-CAPA is distributed in the hope that it will be useful,
   15: # but WITHOUT ANY WARRANTY; without even the implied warranty of
   16: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   17: # GNU General Public License for more details.
   18: #
   19: # You should have received a copy of the GNU General Public License
   20: # along with LON-CAPA; if not, write to the Free Software
   21: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   22: #
   23: # /home/httpd/html/adm/gpl.txt
   24: #
   25: # http://www.lon-capa.org/
   26: #
   27: ######################################################################
   28: 
   29: package LONCAPA::lonmetadata;
   30: 
   31: use strict;
   32: use DBI;
   33: 
   34: ######################################################################
   35: ######################################################################
   36: 
   37: =pod 
   38: 
   39: =head1 Name
   40: 
   41: lonmetadata
   42: 
   43: =head1 Synopsis
   44: 
   45: lonmetadata holds a description of the metadata table and provides
   46: wrappers for the storage and retrieval of metadata to/from the database.
   47: 
   48: =head1 Description
   49: 
   50: =head1 Methods
   51: 
   52: =over 4
   53: 
   54: =cut
   55: 
   56: ######################################################################
   57: ######################################################################
   58: 
   59: =pod
   60: 
   61: =item Old table creation command
   62: 
   63: CREATE TABLE IF NOT EXISTS metadata 
   64: (title TEXT, 
   65: author TEXT, 
   66: subject TEXT, 
   67: url TEXT, 
   68: keywords TEXT, 
   69: version TEXT, 
   70: notes TEXT, 
   71: abstract TEXT, 
   72: mime TEXT, 
   73: language TEXT, 
   74: creationdate DATETIME, 
   75: lastrevisiondate DATETIME, 
   76: owner TEXT, 
   77: copyright TEXT, 
   78: 
   79: FULLTEXT idx_title (title), 
   80: FULLTEXT idx_author (author), 
   81: FULLTEXT idx_subject (subject), 
   82: FULLTEXT idx_url (url), 
   83: FULLTEXT idx_keywords (keywords), 
   84: FULLTEXT idx_version (version), 
   85: FULLTEXT idx_notes (notes), 
   86: FULLTEXT idx_abstract (abstract), 
   87: FULLTEXT idx_mime (mime), 
   88: FULLTEXT idx_language (language),
   89: FULLTEXT idx_owner (owner), 
   90: FULLTEXT idx_copyright (copyright)) 
   91: 
   92: TYPE=MYISAM;
   93: 
   94: =cut
   95: 
   96: ######################################################################
   97: ######################################################################
   98: my @Metadata_Table_Description = 
   99:     (
  100:      { name => 'title',     type=>'TEXT'},
  101:      { name => 'author',    type=>'TEXT'},
  102:      { name => 'subject',   type=>'TEXT'},
  103:      { name => 'url',       type=>'TEXT', restrictions => 'NOT NULL' },
  104:      { name => 'keywords',  type=>'TEXT'},
  105:      { name => 'version',   type=>'TEXT'},
  106:      { name => 'notes',     type=>'TEXT'},
  107:      { name => 'abstract',  type=>'TEXT'},
  108:      { name => 'mime',      type=>'TEXT'},
  109:      { name => 'language',  type=>'TEXT'},
  110:      { name => 'creationdate',     type=>'DATETIME'},
  111:      { name => 'lastrevisiondate', type=>'DATETIME'},
  112:      { name => 'owner',     type=>'TEXT'},
  113:      { name => 'copyright', type=>'TEXT'}, 
  114:       #--------------------------------------------------
  115:      { name => 'dependencies',   type=>'TEXT'},
  116:      { name => 'modifyinguser',  type=>'TEXT'},
  117:      { name => 'authorspace',    type=>'TEXT'},
  118:      { name => 'lowestgradelevel',  type=>'INT'},
  119:      { name => 'highestgradelevel', type=>'INT'},
  120:      { name => 'standards',      type=>'TEXT'},
  121:      { name => 'count',          type=>'INT'},
  122:      { name => 'course',         type=>'INT'},
  123:      { name => 'course_list',    type=>'TEXT'},
  124:      { name => 'goto',           type=>'INT'},
  125:      { name => 'goto_list',      type=>'TEXT'},
  126:      { name => 'comefrom',       type=>'INT'},
  127:      { name => 'comefrom_list',  type=>'TEXT'},
  128:      { name => 'sequsage',       type=>'INT'},
  129:      { name => 'sequsage_list',  type=>'TEXT'},
  130:      { name => 'stdno',          type=>'INT'},
  131:      { name => 'stdno_list',     type=>'TEXT'},
  132:      { name => 'avetries',       type=>'FLOAT'},
  133:      { name => 'avetries_list',  type=>'TEXT'},
  134:      { name => 'difficulty',     type=>'FLOAT'},
  135:      { name => 'difficulty_list',type=>'TEXT'},
  136:      { name => 'clear',          type=>'FLOAT'},
  137:      { name => 'technical',      type=>'FLOAT'},
  138:      { name => 'correct',        type=>'FLOAT'},
  139:      { name => 'helpful',        type=>'FLOAT'},
  140:      { name => 'depth',          type=>'FLOAT'},
  141:      { name => 'hostname',       type=> 'TEXT'},
  142:      #--------------------------------------------------
  143:      );
  144: 
  145: my @Fulltext_indicies = qw/
  146:     title
  147:     author
  148:     subject
  149:     url
  150:     keywords
  151:     version
  152:     notes
  153:     abstract
  154:     mime
  155:     language
  156:     owner
  157:     copyright/;
  158: 
  159: ######################################################################
  160: ######################################################################
  161: 
  162: =pod
  163: 
  164: =item &describe_metadata_storage
  165: 
  166: Input: None
  167: 
  168: Returns: An array of hash references describing the columns and indicies
  169: of the metadata table(s).
  170: 
  171: =cut
  172: 
  173: ######################################################################
  174: ######################################################################
  175: sub describe_metadata_storage { 
  176:     return (\@Metadata_Table_Description,\@Fulltext_indicies);
  177: }
  178: 
  179: ######################################################################
  180: ######################################################################
  181: 
  182: =pod
  183: 
  184: =item create_metadata_storage()
  185: 
  186: Inputs: table name (optional): the name of the table.  Default is 'metadata'.
  187: 
  188: Returns: A perl string which, when executed by MySQL, will cause the
  189: metadata storage to be initialized.
  190: 
  191: =cut
  192: 
  193: ######################################################################
  194: ######################################################################
  195: sub create_metadata_storage { 
  196:     my ($tablename) = @_;
  197:     $tablename = 'metadata' if (! defined($tablename));
  198:     my $request = "CREATE TABLE IF NOT EXISTS ".$tablename." ";
  199:     #
  200:     # Process the columns  (this code is stolen from lonmysql.pm)
  201:     my @Columns;
  202:     my $col_des; # mysql column description
  203:     foreach my $coldata (@Metadata_Table_Description) {
  204:         my $column = $coldata->{'name'};
  205:         $col_des = '';
  206:         if (lc($coldata->{'type'}) =~ /(enum|set)/) { # 'enum' or 'set'
  207:             $col_des.=$column." ".$coldata->{'type'}."('".
  208:                 join("', '",@{$coldata->{'values'}})."')";
  209:         } else {
  210:             $col_des.=$column." ".$coldata->{'type'};
  211:             if (exists($coldata->{'size'})) {
  212:                 $col_des.="(".$coldata->{'size'}.")";
  213:             }
  214:         }
  215:         # Modifiers
  216:         if (exists($coldata->{'restrictions'})){
  217:             $col_des.=" ".$coldata->{'restrictions'};
  218:         }
  219:         if (exists($coldata->{'default'})) {
  220:             $col_des.=" DEFAULT '".$coldata->{'default'}."'";
  221:         }
  222:         $col_des.=' AUTO_INCREMENT' if (exists($coldata->{'auto_inc'}) &&
  223:                                         ($coldata->{'auto_inc'} eq 'yes'));
  224:         $col_des.=' PRIMARY KEY'    if (exists($coldata->{'primary_key'}) &&
  225:                                         ($coldata->{'primary_key'} eq 'yes'));
  226:     } continue {
  227:         # skip blank items.
  228:         push (@Columns,$col_des) if ($col_des ne '');
  229:     }
  230:     foreach my $colname (@Fulltext_indicies) {
  231:         my $text = 'FULLTEXT idx_'.$colname.' ('.$colname.')';
  232:         push (@Columns,$text);
  233:     }
  234:     $request .= "(".join(", ",@Columns).") TYPE=MyISAM";
  235:     return $request;
  236: }
  237: 
  238: ######################################################################
  239: ######################################################################
  240: 
  241: =pod
  242: 
  243: =item store_metadata()
  244: 
  245: Inputs: database handle ($dbh), a table name, and a hash or hash reference 
  246: containing the metadata for a single resource.
  247: 
  248: Returns: 1 on success, 0 on failure to store.
  249: 
  250: =cut
  251: 
  252: ######################################################################
  253: ######################################################################
  254: {
  255:     ##
  256:     ##  WARNING: The following cleverness may cause trouble in cases where
  257:     ##  the dbi connection is dropped and recreated - a stale statement
  258:     ##  handler may linger around and cause trouble.
  259:     ##
  260:     ##  In most scripts, this will work fine.  If the dbi is going to be
  261:     ##  dropped and (possibly) later recreated, call &clear_sth.  Yes it
  262:     ##  is annoying but $sth appearantly does not have a link back to the 
  263:     ##  $dbh, so we can't check our validity.
  264:     ##
  265:     my $sth = undef;
  266:     my $sth_table = undef;
  267: 
  268: sub create_statement_handler {
  269:     my $dbh = shift();
  270:     my $tablename = shift();
  271:     $tablename = 'metadata' if (! defined($tablename));
  272:     $sth_table = $tablename;
  273:     my $request = 'INSERT INTO '.$tablename.' VALUES(';
  274:     foreach (@Metadata_Table_Description) {
  275:         $request .= '?,';
  276:     }
  277:     chop $request;
  278:     $request.= ')';
  279:     $sth = $dbh->prepare($request);
  280:     return;
  281: }
  282: 
  283: sub clear_sth { $sth=undef; $sth_table=undef;}
  284: 
  285: sub store_metadata {
  286:     my $dbh = shift();
  287:     my $tablename = shift();
  288:     my $errors = '';
  289:     if (! defined($sth) || 
  290:         ( defined($tablename) && ($sth_table ne $tablename)) || 
  291:         (! defined($tablename) && $sth_table ne 'metadata')) {
  292:         &create_statement_handler($dbh,$tablename);
  293:     }
  294:     my $successcount = 0;
  295:     while (my $mdata = shift()) {
  296:         next if (ref($mdata) ne "HASH");
  297:         my @MData;
  298:         foreach my $field (@Metadata_Table_Description) {
  299:             if (exists($mdata->{$field->{'name'}})) {
  300:                 if ($mdata->{$field->{'name'}} eq 'nan') {
  301:                     push(@MData,'NULL');
  302:                 } else {
  303:                     push(@MData,$mdata->{$field->{'name'}});
  304:                 }
  305:             } else {
  306:                 push(@MData,undef);
  307:             }
  308:         }
  309:         $sth->execute(@MData);
  310:         if (! $sth->err) {
  311:             $successcount++;
  312:         } else {
  313:             $errors = join(',',$errors,$sth->errstr);
  314:         }
  315:     }
  316:     if (wantarray()) {
  317:         return ($successcount,$errors);
  318:     } else {
  319:         return $successcount;
  320:     }
  321: }
  322: 
  323: }
  324: 
  325: ######################################################################
  326: ######################################################################
  327: 
  328: =pod
  329: 
  330: =item lookup_metadata()
  331: 
  332: Inputs: database handle ($dbh) and a hash or hash reference containing 
  333: metadata which will be used for a search.
  334: 
  335: Returns: scalar with error string on failure, array reference on success.
  336: The array reference is the same one returned by $sth->fetchall_arrayref().
  337: 
  338: =cut
  339: 
  340: ######################################################################
  341: ######################################################################
  342: sub lookup_metadata {
  343:     my ($dbh,$condition,$fetchparameter) = @_;
  344:     my $error;
  345:     my $returnvalue=[];
  346:     my $request = 'SELECT * FROM metadata';
  347:     if (defined($condition)) {
  348:         $request .= ' WHERE '.$condition;
  349:     }
  350:     my $sth = $dbh->prepare($request);
  351:     if ($sth->err) {
  352:         $error = $sth->errstr;
  353:     }
  354:     if (! $error) {
  355:         $sth->execute();
  356:         if ($sth->err) {
  357:             $error = $sth->errstr;
  358:         } else {
  359:             $returnvalue = $sth->fetchall_arrayref($fetchparameter);
  360:             if ($sth->err) {
  361:                 $error = $sth->errstr;
  362:             }
  363:         }
  364:     }
  365:     return ($error,$returnvalue);
  366: }
  367: 
  368: ######################################################################
  369: ######################################################################
  370: 
  371: =pod
  372: 
  373: =item delete_metadata()
  374: 
  375: 
  376: 
  377: =cut
  378: 
  379: ######################################################################
  380: ######################################################################
  381: sub delete_metadata {}
  382: 
  383: ######################################################################
  384: ######################################################################
  385: 
  386: 
  387: ######################################################################
  388: ######################################################################
  389: 
  390: =pod
  391: 
  392: =item &process_reseval_data 
  393: 
  394: Process a nohist_resevaldata hash into a more complex data structure.
  395: 
  396: Input: Hash reference containing reseval data
  397: 
  398: Returns: Hash with the following structure:
  399: 
  400: $hash{$url}->{'statistics'}->{$courseid}->{'avetries'}   = $value
  401: $hash{$url}->{'statistics'}->{$courseid}->{'count'}      = $value
  402: $hash{$url}->{'statistics'}->{$courseid}->{'difficulty'} = $value
  403: $hash{$url}->{'statistics'}->{$courseid}->{'stdno'}      = $value
  404: $hash{$url}->{'statistics'}->{$courseid}->{'timestamp'}  = $value
  405: 
  406: $hash{$url}->{'evaluation'}->{$username}->{'clear'}     = $value
  407: $hash{$url}->{'evaluation'}->{$username}->{'comments'}  = $value
  408: $hash{$url}->{'evaluation'}->{$username}->{'depth'}     = $value
  409: $hash{$url}->{'evaluation'}->{$username}->{'technical'} = $value
  410: $hash{$url}->{'evaluation'}->{$username}->{'helpful'}   = $value
  411: 
  412: $hash{$url}->{'course'}    = \@Courses
  413: $hash{$url}->{'comefrom'}  = \@Resources
  414: $hash{$url}->{'goto'}      = \@Resources
  415: $hash{$url}->{'usage'}     = \@Resources
  416: 
  417: $hash{$url}->{'stats'}->{$courseid\_$section}->{$key} = $value
  418: 
  419: =cut
  420: 
  421: ######################################################################
  422: ######################################################################
  423: sub process_reseval_data {
  424:     my ($evaldata) = @_;
  425:     my %DynamicData;
  426:     #
  427:     # Process every stored element
  428:     while (my ($storedkey,$value) = each(%{$evaldata})) {
  429:         my ($source,$file,$type) = split('___',$storedkey);
  430:         $source = &unescape($source);
  431:         $file = &unescape($file);
  432:         $value = &unescape($value);
  433:          "    got ".$file."\n        ".$type." ".$source."\n";
  434:         if ($type =~ /^(avetries|count|difficulty|stdno|timestamp)$/) {
  435:             #
  436:             # Statistics: $source is course id
  437:             $DynamicData{$file}->{'statistics'}->{$source}->{$type}=$value;
  438:         } elsif ($type =~ /^(clear|comments|depth|technical|helpful)$/){
  439:             #
  440:             # Evaluation $source is username, check if they evaluated it
  441:             # more than once.  If so, pad the entry with a space.
  442:             while(exists($DynamicData{$file}->{'evaluation'}->{$type}->{$source})) {
  443:                 $source .= ' ';
  444:             }
  445:             $DynamicData{$file}->{'evaluation'}->{$type}->{$source}=$value;
  446:         } elsif ($type =~ /^(course|comefrom|goto|usage)$/) {
  447:             #
  448:             # Context $source is course id or resource
  449:             push(@{$DynamicData{$file}->{$type}},&unescape($source));
  450:         } elsif ($type eq 'stats') {
  451:             #
  452:             # Statistics storage...
  453:             # $source is $cid\_$sec\_$stdno
  454:             # $value is stat1=value&stat2=value&stat3=value,....
  455:             #
  456:             my ($cid,$sec,$stdno)=split('_',$source);
  457:             my $crssec = $cid.'_'.$sec;
  458:             my @Data = split('&',$value);
  459:             my %Statistics;
  460:             while (my ($key,$value) = split('=',pop(@Data))) {
  461:                 $Statistics{$key} = $value;
  462:             }
  463:             #
  464:             # Only store the data if the number of students is greater
  465:             # than the data already stored
  466:             if (! exists($DynamicData{$file}->{'stats'}->{$crssec}) ||
  467:                 $DynamicData{$file}->{'stats'}->{$crssec}->{'stdno'}<$stdno){
  468:                 $DynamicData{$file}->{'stats'}->{$crssec}=\%Statistics;
  469:             }
  470:         }
  471:     }
  472:     return %DynamicData;
  473: }
  474: 
  475: 
  476: ######################################################################
  477: ######################################################################
  478: 
  479: =pod
  480: 
  481: =item &process_dynamic_metadata
  482: 
  483: Inputs: $url: the url of the item to process
  484: $DynamicData: hash reference for the results of &process_reseval_data
  485: 
  486: Returns: Hash containing the following keys:
  487:     avetries, avetries_list, difficulty, difficulty_list, stdno, stdno_list,
  488:     course, course_list, goto, goto_list, comefrom, comefrom_list,
  489:     usage, clear, technical, correct, helpful, depth, comments
  490: 
  491:     Each of the return keys is associated with either a number or a string
  492:     The *_list items are comma-seperated strings.  'comments' is a string
  493:     containing generically marked-up comments.
  494: 
  495: =cut
  496: 
  497: ######################################################################
  498: ######################################################################
  499: sub process_dynamic_metadata {
  500:     my ($url,$DynamicData) = @_;
  501:     my %data;
  502:     my $resdata = $DynamicData->{$url};
  503:     #
  504:     # Get the statistical data
  505:     foreach my $type (qw/avetries difficulty stdno/) {
  506:         my $count;
  507:         my $sum;
  508:         my @Values;
  509:         #
  510:         foreach my $coursedata (values(%{$resdata->{'statistics'}}),
  511:                                 values(%{$resdata->{'stats'}})) {
  512:             if (ref($coursedata) eq 'HASH' && exists($coursedata->{$type})) {
  513:                 $count++;
  514:                 $sum += $coursedata->{$type};
  515:                 push(@Values,$coursedata->{$type});
  516:             }
  517:         }
  518:         if ($count) {
  519:             $data{$type} = $sum/$count;
  520:             $data{$type.'_list'} = join(',',@Values);
  521:         }
  522:     }
  523:     #
  524:     # Get the context data
  525:     foreach my $type (qw/course goto comefrom/) {
  526:         if (defined($resdata->{$type}) && 
  527:             ref($resdata->{$type}) eq 'ARRAY') {
  528:             $data{$type} = scalar(@{$resdata->{$type}});
  529:             $data{$type.'_list'} = join(',',@{$resdata->{$type}});
  530:         }
  531:     }
  532:     if (defined($resdata->{'usage'}) && 
  533:         ref($resdata->{'usage'}) eq 'ARRAY') {
  534:         $data{'sequsage'} = scalar(@{$resdata->{'usage'}});
  535:         $data{'sequsage_list'} = join(',',@{$resdata->{'usage'}});
  536:     }
  537:     #
  538:     # Get the evaluation data
  539:     foreach my $type (qw/clear technical correct helpful depth/) {
  540:         my $count;
  541:         my $sum;
  542:         foreach my $evaluator (keys(%{$resdata->{'evaluation'}->{$type}})){
  543:             $sum += $resdata->{'evaluation'}->{$type}->{$evaluator};
  544:             $count++;
  545:         }
  546:         if ($count > 0) {
  547:             $data{$type}=$sum/$count;
  548:         }
  549:     }
  550:     #
  551:     # put together comments
  552:     my $comments = '<div class="LCevalcomments">';
  553:     foreach my $evaluator (keys(%{$resdata->{'evaluation'}->{'comments'}})){
  554:         $comments .= '<span class="author">'.$evaluator.'</span>'.
  555:             '<span class="comment">'.
  556:             $resdata->{'evaluation'}->{'comments'}->{$evaluator}.'</span>';
  557:     }
  558:     $comments .= '</div>';
  559:     #
  560:     return %data;
  561: }
  562: 
  563: ######################################################################
  564: ######################################################################
  565: ##
  566: ## The usual suspects, repeated here to reduce dependency hell
  567: ##
  568: ######################################################################
  569: ######################################################################
  570: sub unescape {
  571:     my $str=shift;
  572:     $str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
  573:     return $str;
  574: }
  575: 
  576: sub escape {
  577:     my $str=shift;
  578:     $str =~ s/(\W)/"%".unpack('H2',$1)/eg;
  579:     return $str;
  580: }
  581: 
  582: 
  583: 1;
  584: 
  585: __END__;
  586: 
  587: =pod
  588: 
  589: =back
  590: 
  591: =cut

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>