--- loncom/publisher/lonpublisher.pm 2002/01/08 21:14:53 1.68 +++ loncom/publisher/lonpublisher.pm 2002/08/09 19:49:30 1.92 @@ -1,7 +1,7 @@ # The LearningOnline Network with CAPA # Publication Handler # -# $Id: lonpublisher.pm,v 1.68 2002/01/08 21:14:53 albertel Exp $ +# $Id: lonpublisher.pm,v 1.92 2002/08/09 19:49:30 albertel Exp $ # # Copyright Michigan State University Board of Trustees # @@ -43,6 +43,9 @@ # 12/06,12/07 Gerd Kortemeyer # 12/15,12/16 Scott Harrison # 12/25 Gerd Kortemeyer +# YEAR=2002 +# 1/16,1/17 Scott Harrison +# 1/17 Gerd Kortemeyer # ### @@ -58,6 +61,43 @@ ## ## ############################################################################### + +###################################################################### +###################################################################### + +=pod + +=head1 Name + +lonpublisher - LON-CAPA publishing handler + +=head1 Synopsis + +lonpublisher takes the proper steps to add resources to the LON-CAPA +digital library. This includes updating the metadata table in the +LON-CAPA database. + +=head1 Description + +lonpublisher is many things to many people. +To all people it is woefully documented. +This documentation conforms to this standard. + +This module publishes a file. This involves gathering metadata, +versioning the file, copying file from construction space to +publication space, and copying metadata from construction space +to publication space. + +=head2 Internal Functions + +=over 4 + +=cut + +###################################################################### +###################################################################### + + package Apache::lonpublisher; # ------------------------------------------------- modules used by this module @@ -65,13 +105,14 @@ use strict; use Apache::File; use File::Copy; use Apache::Constants qw(:common :http :methods); -use HTML::TokeParser; +use HTML::LCParser; use Apache::lonxml; use Apache::lonhomework; use Apache::loncacc; use DBI; use Apache::lonnet(); use Apache::loncommon(); +use Apache::lonmysql; my %addid; my %nokey; @@ -84,11 +125,23 @@ my $docroot; my $cuname; my $cudom; -# ----------------------------------------------- Evaluate string with metadata +######################################### +######################################### + +=pod + +=item metaeval + +Evaluate string with metadata + +=cut + +######################################### +######################################### sub metaeval { my $metastring=shift; - my $parser=HTML::TokeParser->new(\$metastring); + my $parser=HTML::LCParser->new(\$metastring); my $token; while ($token=$parser->get_token) { if ($token->[0] eq 'S') { @@ -127,7 +180,19 @@ sub metaeval { } } -# -------------------------------------------------------- Read a metadata file +######################################### +######################################### + +=pod + +=item metaread + +Read a metadata file + +=cut + +######################################### +######################################### sub metaread { my ($logfile,$fn)=@_; unless (-e $fn) { @@ -144,16 +209,49 @@ sub metaread { return '
Processed file: '.$fn.''; } -# ---------------------------- convert 'time' format into a datetime sql format +######################################### +######################################### + +=pod + +=item sqltime + +Convert 'time' format into a datetime sql format + +=cut + +######################################### +######################################### sub sqltime { + my $timef=shift @_; my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = - localtime(@_[0]); + localtime($timef); $mon++; $year+=1900; return "$year-$mon-$mday $hour:$min:$sec"; } -# --------------------------------------------------------- Various form fields +######################################### +######################################### + +=pod + +=item Form field generating functions + +=over 4 + +=item textfield + +=item hiddenfield + +=item selectbox + +=back + +=cut + +######################################### +######################################### sub textfield { my ($title,$name,$value)=@_; return "\n

$title:
". @@ -180,13 +278,25 @@ sub selectbox { return $selout.''; } -# -------------------------------------------------------- Publication Step One +######################################### +######################################### + +=pod + +=item urlfixup +Fix up a url? First step of publication + +=cut + +######################################### +######################################### sub urlfixup { my ($url,$target)=@_; unless ($url) { return ''; } #javascript code needs no fixing if ($url =~ /^javascript:/i) { return $url; } + if ($url =~ /^mailto:/i) { return $url; } #internal document links need no fixing if ($url =~ /^\#/) { return $url; } my ($host)=($url=~/(?:http\:\/\/)*([^\/]+)/); @@ -198,6 +308,25 @@ sub urlfixup { } if ($url=~/^http\:\/\//) { return $url; } $url=~s/\~$cuname/res\/$cudom\/$cuname/; + return $url; +} + +######################################### +######################################### + +=pod + +=item absoluteurl + +Currently undocumented + +=cut + +######################################### +######################################### +sub absoluteurl { + my ($url,$target)=@_; + unless ($url) { return ''; } if ($target) { $target=~s/\/[^\/]+$//; $url=&Apache::lonnet::hreflocation($target,$url); @@ -205,6 +334,390 @@ sub urlfixup { return $url; } +######################################### +######################################### + +=pod + +=item set_allow + +Currently undocumented + +=cut + +######################################### +######################################### +sub set_allow { + my ($allow,$logfile,$target,$tag,$oldurl)=@_; + my $newurl=&urlfixup($oldurl,$target); + my $return_url=$oldurl; + print $logfile 'GUYURL: '.$tag.':'.$oldurl.' - '.$newurl."\n"; + if ($newurl ne $oldurl) { + $return_url=$newurl; + print $logfile 'URL: '.$tag.':'.$oldurl.' - '.$newurl."\n"; + } + if (($newurl !~ /^javascript:/i) && + ($newurl !~ /^mailto:/i) && + ($newurl !~ /^http:/i) && + ($newurl !~ /^\#/)) { + $$allow{&absoluteurl($newurl,$target)}=1; + } + return $return_url +} + +######################################### +######################################### + +=pod + +=item get_subscribed_hosts + +Currently undocumented + +=cut + +######################################### +######################################### +sub get_subscribed_hosts { + my ($target)=@_; + my @subscribed; + my $filename; + $target=~/(.*)\/([^\/]+)$/; + my $srcf=$2; + opendir(DIR,$1); + while ($filename=readdir(DIR)) { + if ($filename=~/$srcf\.(\w+)$/) { + my $subhost=$1; + if ($subhost ne 'meta' && $subhost ne 'subscription') { + push(@subscribed,$subhost); + } + } + } + closedir(DIR); + my $sh; + if ( $sh=Apache::File->new("$target.subscription") ) { + &Apache::lonnet::logthis("opened $target.subscription"); + while (my $subline=<$sh>) { + &Apache::lonnet::logthis("Trying $subline"); + if ($subline =~ /(^\w+):/) { push(@subscribed,$1); } else { + &Apache::lonnet::logthis("No Match for $subline"); + } + } + } else { + &Apache::lonnet::logthis("Un able to open $target.subscription"); + } + &Apache::lonnet::logthis("Got list of ".join(':',@subscribed)); + return @subscribed; +} + + +######################################### +######################################### + +=pod + +=item get_max_ids_indices + +Currently undocumented + +=cut + +######################################### +######################################### +sub get_max_ids_indices { + my ($content)=@_; + my $maxindex=10; + my $maxid=10; + my $needsfixup=0; + + my $parser=HTML::LCParser->new($content); + my $token; + while ($token=$parser->get_token) { + if ($token->[0] eq 'S') { + my $counter; + if ($counter=$addid{$token->[1]}) { + if ($counter eq 'id') { + if (defined($token->[2]->{'id'})) { + $maxid=($token->[2]->{'id'}>$maxid)?$token->[2]->{'id'}:$maxid; + } else { + $needsfixup=1; + } + } else { + if (defined($token->[2]->{'index'})) { + $maxindex=($token->[2]->{'index'}>$maxindex)?$token->[2]->{'index'}:$maxindex; + } else { + $needsfixup=1; + } + } + } + } + } + return ($needsfixup,$maxid,$maxindex); +} + +######################################### +######################################### + +=pod + +=item get_all_text_unbalanced + +Currently undocumented + +=cut + +######################################### +######################################### +sub get_all_text_unbalanced { + #there is a copy of this in lonxml.pm + my($tag,$pars)= @_; + my $token; + my $result=''; + $tag='<'.$tag.'>'; + while ($token = $$pars[-1]->get_token) { + if (($token->[0] eq 'T')||($token->[0] eq 'C')||($token->[0] eq 'D')) { + $result.=$token->[1]; + } elsif ($token->[0] eq 'PI') { + $result.=$token->[2]; + } elsif ($token->[0] eq 'S') { + $result.=$token->[4]; + } elsif ($token->[0] eq 'E') { + $result.=$token->[2]; + } + if ($result =~ /(.*)$tag(.*)/) { + #&Apache::lonnet::logthis('Got a winner with leftovers ::'.$2); + #&Apache::lonnet::logthis('Result is :'.$1); + $result=$1; + my $redo=$tag.$2; + push (@$pars,HTML::LCParser->new(\$redo)); + $$pars[-1]->xml_mode('1'); + last; + } + } + return $result +} + +######################################### +######################################### + +=pod + +=item fix_ids_and_indices + +Currently undocumented + +=cut + +######################################### +######################################### +#Arguably this should all be done as a lonnet::ssi instead +sub fix_ids_and_indices { + my ($logfile,$source,$target)=@_; + + my %allow; + my $content; + { + my $org=Apache::File->new($source); + $content=join('',<$org>); + } + + my ($needsfixup,$maxid,$maxindex)=&get_max_ids_indices(\$content); + + if ($needsfixup) { + print $logfile "Needs ID and/or index fixup\n". + "Max ID : $maxid (min 10)\n". + "Max Index: $maxindex (min 10)\n"; + } + my $outstring=''; + my @parser; + $parser[0]=HTML::LCParser->new(\$content); + $parser[-1]->xml_mode(1); + my $token; + while (@parser) { + while ($token=$parser[-1]->get_token) { + if ($token->[0] eq 'S') { + my $counter; + my $tag=$token->[1]; + my $lctag=lc($tag); + if ($lctag eq 'allow') { + $allow{$token->[2]->{'src'}}=1; + next; + } + my %parms=%{$token->[2]}; + $counter=$addid{$tag}; + if (!$counter) { $counter=$addid{$lctag}; } + if ($counter) { + if ($counter eq 'id') { + unless (defined($parms{'id'})) { + $maxid++; + $parms{'id'}=$maxid; + print $logfile 'ID: '.$tag.':'.$maxid."\n"; + } + } elsif ($counter eq 'index') { + unless (defined($parms{'index'})) { + $maxindex++; + $parms{'index'}=$maxindex; + print $logfile 'Index: '.$tag.':'.$maxindex."\n"; + } + } + } + foreach my $type ('src','href','background','bgimg') { + foreach my $key (keys(%parms)) { + if ($key =~ /^$type$/i) { + $parms{$key}=&set_allow(\%allow,$logfile, + $target,$tag, + $parms{$key}); + } + } + } + # probably a image type

Dependencies

'; my $allowstr=''; - foreach (keys %allow) { + foreach (sort(keys(%allow))) { my $thisdep=$_; + if ($thisdep !~ /[^\s]/) { next; } unless ($style eq 'rat') { $allowstr.="\n".''; } @@ -400,13 +777,16 @@ sub publish { => time); $thisdep=~/^\/res\/(\w+)\/(\w+)\//; if ((defined($1)) && (defined($2))) { - &Apache::lonnet::put('resevaldata',\%temphash,$1,$2); + &Apache::lonnet::put('nohist_resevaldata',\%temphash, + $1,$2); } } } } - $outstring=~s/(\<\/[^\>]+\>\s*)$/$allowstr$1/s; + $outstring=~s/\n*(\<\/[^\>]+\>)\s*$/$allowstr\n$1\n/s; + #Encode any High ASCII characters + $outstring=&HTML::Entities::encode($outstring,"\200-\377"); # ------------------------------------------------------------- Write modified { @@ -420,13 +800,6 @@ sub publish { } $content=$outstring; - if ($needsfixup) { - print $logfile "End of ID and/or index fixup\n". - "Max ID : $maxid (min 10)\n". - "Max Index: $maxindex (min 10)\n"; - } else { - print $logfile "Does not need ID and/or index fixup\n"; - } } # --------------------------------------------- Initial step done, now metadata @@ -437,7 +810,10 @@ sub publish { my %oldparmstores=(); - $scrout.='

Metadata Information

'; + + $scrout.='

Metadata Information ' . + Apache::loncommon::help_open_topic("Metadata_Description") + . '

'; # ------------------------------------------------ First, check out environment unless (-e $source.'.meta') { @@ -491,7 +867,7 @@ sub publish { my $oldenv=$ENV{'request.uri'}; $ENV{'request.uri'}=$target; - $allmeta=Apache::lonxml::xmlparse('meta',$content); + $allmeta=Apache::lonxml::xmlparse(undef,'meta',$content); $ENV{'request.uri'}=$oldenv; &metaeval($allmeta); @@ -514,7 +890,7 @@ sub publish { $chparms; } - my $chparms=''; + $chparms=''; foreach (sort keys %oldparmstores) { if (($_=~/^parameter/) || ($_=~/^stores/)) { unless (($metadatafields{$_.'.name'}) || @@ -532,7 +908,7 @@ sub publish { # ------------------------------------------------------- Now have all metadata $scrout.= - '
'. + ''. '

'. &hiddenfield('phase','two'). &hiddenfield('filename',$ENV{'form.filename'}). @@ -544,7 +920,27 @@ sub publish { # --------------------------------------------------- Scan content for keywords - my $keywordout='

Keywords:
'; + my $keywords_help = Apache::loncommon::help_open_topic("Publishing_Keywords"); + my $keywordout=<<"END"; + +

Keywords: $keywords_help + + +
+END + $keywordout.='

'; my $colcount=0; my %keywords=(); @@ -570,13 +966,13 @@ sub publish { } foreach (sort keys %keywords) { - $keywordout.='
'; if ($colcount>10) { @@ -604,7 +1000,7 @@ sub publish { $scrout.=&selectbox('Language','language', $metadatafields{'language'}, - \&{Apache::loncommon::languagedescription}, + \&Apache::loncommon::languagedescription, (&Apache::loncommon::languageids), ); @@ -619,33 +1015,48 @@ sub publish { $scrout.=&textfield('Publisher/Owner','owner', $metadatafields{'owner'}); # --------------------------------------------------- Correct copyright for rat + if ($style eq 'rat') { if ($metadatafields{'copyright'} eq 'public') { delete $metadatafields{'copyright'}; } $scrout.=&selectbox('Copyright/Distribution','copyright', $metadatafields{'copyright'}, - \&{Apache::loncommon::copyrightdescription}, + \&Apache::loncommon::copyrightdescription, (grep !/^public$/,(&Apache::loncommon::copyrightids))); } else { $scrout.=&selectbox('Copyright/Distribution','copyright', $metadatafields{'copyright'}, - \&{Apache::loncommon::copyrightdescription}, + \&Apache::loncommon::copyrightdescription, (&Apache::loncommon::copyrightids)); } + + my $copyright_help = Apache::loncommon::help_open_topic("Publishing_Copyright"); + $scrout =~ s/DISTRIBUTION:/'DISTRIBUTION: ' . $copyright_help/ge; return $scrout. '

'; } -# -------------------------------------------------------- Publication Step Two +######################################### +######################################### + +=pod + +=item phasetwo +Render second interface showing status of publication steps. +This is publication step two. + +=cut + +######################################### +######################################### sub phasetwo { my ($source,$target,$style,$distarget)=@_; my $logfile; my $scrout=''; - unless ($logfile=Apache::File->new('>>'.$source.'.log')) { return 'No write permission to user directory, FAIL'; @@ -672,9 +1083,10 @@ sub phasetwo { $metadatafields{'dependencies'}=$ENV{'form.dependencies'}; my $allkeywords=$ENV{'form.addkey'}; - foreach (keys %ENV) { - if ($_=~/^form\.key\.(\w+)/) { - $allkeywords.=','.$1; + if (exists($ENV{'form.keywords'}) && (ref($ENV{'form.keywords'}))) { + my @Keywords = @{$ENV{'form.keywords'}}; + foreach (@Keywords) { + $allkeywords.=','.$_; } } $allkeywords=~s/\W+/\,/; @@ -700,7 +1112,9 @@ sub phasetwo { $value=~s/\"/\'\'/g; print $mfh ' '.$_.'="'.$value.'"'; } - print $mfh '>'.$metadatafields{$unikey}.''; + print $mfh '>'. + &HTML::Entities::encode($metadatafields{$unikey}) + .''; } } $scrout.='

Wrote Metadata'; @@ -708,65 +1122,23 @@ sub phasetwo { } # -------------------------------- Synchronize entry with SQL metadata database - my $warning; - - unless ($metadatafields{'copyright'} eq 'priv') { - - my $dbh; - { - unless ( - $dbh = DBI->connect("DBI:mysql:loncapa","www", - $Apache::lonnet::perlvar{'lonSqlAccess'},{ RaiseError =>0,PrintError=>0}) - ) { - $warning='WARNING: Cannot connect to '. - 'database!'; - } - else { - my %sqldatafields; - $sqldatafields{'url'}=$distarget; - my $sth=$dbh->prepare( - 'delete from metadata where url like binary'. - '"'.$sqldatafields{'url'}.'"'); - $sth->execute(); - foreach ('title','author','subject','keywords','notes','abstract', - 'mime','language','creationdate','lastrevisiondate','owner', - 'copyright') { - my $field=$metadatafields{$_}; $field=~s/\"/\'\'/g; - $sqldatafields{$_}=$field; - } - - $sth=$dbh->prepare('insert into metadata values ('. - '"'.delete($sqldatafields{'title'}).'"'.','. - '"'.delete($sqldatafields{'author'}).'"'.','. - '"'.delete($sqldatafields{'subject'}).'"'.','. - '"'.delete($sqldatafields{'url'}).'"'.','. - '"'.delete($sqldatafields{'keywords'}).'"'.','. - '"'.'current'.'"'.','. - '"'.delete($sqldatafields{'notes'}).'"'.','. - '"'.delete($sqldatafields{'abstract'}).'"'.','. - '"'.delete($sqldatafields{'mime'}).'"'.','. - '"'.delete($sqldatafields{'language'}).'"'.','. - '"'. - sqltime(delete($sqldatafields{'creationdate'})) - .'"'.','. - '"'. - sqltime(delete( - $sqldatafields{'lastrevisiondate'})).'"'.','. - '"'.delete($sqldatafields{'owner'}).'"'.','. - '"'.delete( - $sqldatafields{'copyright'}).'"'.')'); - $sth->execute(); - $dbh->disconnect; - $scrout.='

Synchronized SQL metadata database'; - print $logfile "\nSynchronized SQL metadata database"; - } + my $warning; + $metadatafields{'url'} = $distarget; + $metadatafields{'version'} = 'current'; + unless ($metadatafields{'copyright'} eq 'priv') { + my ($error,$success) = &store_metadata(\%metadatafields); + if ($success) { + $scrout.='

Synchronized SQL metadata database'; + print $logfile "\nSynchronized SQL metadata database"; + } else { + $warning.=$error; + print $logfile "\n".$error; + } + } else { + $scrout.='

Private Publication - did not synchronize database'; + print $logfile "\nPrivate: Did not synchronize data into ". + "SQL metadata database"; } - -} else { - $scrout.='

Private Publication - did not synchronize database'; - print $logfile "\nPrivate: Did not synchronize data into ". - "SQL metadata database"; -} # ----------------------------------------------------------- Copy old versions if (-e $target) { @@ -861,56 +1233,26 @@ if (-e $target) { # --------------------------------------------------- Send update notifications -{ - - my $filename; - - $target=~/(.*)\/([^\/]+)$/; - my $srcf=$2; - opendir(DIR,$1); - while ($filename=readdir(DIR)) { - if ($filename=~/$srcf\.(\w+)$/) { - my $subhost=$1; - if ($subhost ne 'meta') { - $scrout.='

Notifying host '.$subhost.':'; - print $logfile "\nNotifying host '.$subhost.':'"; - my $reply=&Apache::lonnet::critical('update:'.$target,$subhost); - $scrout.=$reply; - print $logfile $reply; - } - } + my @subscribed=&get_subscribed_hosts($target); + foreach my $subhost (@subscribed) { + $scrout.='

Notifying host '.$subhost.':'; + print $logfile "\nNotifying host ".$subhost.':'; + my $reply=&Apache::lonnet::critical('update:'.$target,$subhost); + $scrout.=$reply; + print $logfile $reply; } - closedir(DIR); - -} # ---------------------------------------- Send update notifications, meta only -{ - - my $filename; - - $target=~/(.*)\/([^\/]+)$/; - my $srcf=$2.'.meta'; - opendir(DIR,$1); - while ($filename=readdir(DIR)) { - if ($filename=~/$srcf\.(\w+)$/) { - my $subhost=$1; - if ($subhost ne 'meta') { - $scrout.= - '

Notifying host for metadata only '.$subhost.':'; - print $logfile - "\nNotifying host for metadata only '.$subhost.':'"; - my $reply=&Apache::lonnet::critical( - 'update:'.$target.'.meta',$subhost); - $scrout.=$reply; - print $logfile $reply; - } - } + my @subscribedmeta=&get_subscribed_hosts("$target.meta"); + foreach my $subhost (@subscribedmeta) { + $scrout.='

Notifying host for metadata only '.$subhost.':'; + print $logfile "\nNotifying host for metadata only ".$subhost.':'; + my $reply=&Apache::lonnet::critical('update:'.$target.'.meta', + $subhost); + $scrout.=$reply; + print $logfile $reply; } - closedir(DIR); - -} # ------------------------------------------------ Provide link to new resource @@ -925,15 +1267,43 @@ if (-e $target) { return $warning.$scrout. - '


View Target'. + '
View Published Version'. '

Back to Source'. '

Back to Source Directory'; } -# ================================================================ Main Handler +######################################### +######################################### + +=pod + +=item handler + +A basic outline of the handler subroutine follows. + +=over 4 + +=item Get query string for limited number of parameters + +=item Check filename + +=item File is there and owned, init lookup tables + +=item Start page output + +=item Individual file + +=item publish from $thisfn to $thistarget with $thisembstyle + +=back + +=cut + +######################################### +######################################### sub handler { my $r=shift; @@ -945,17 +1315,8 @@ sub handler { # Get query string for limited number of parameters - foreach (split(/&/,$ENV{'QUERY_STRING'})) { - my ($name, $value) = split(/=/,$_); - $value =~ tr/+/ /; - $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg; - if ($name eq 'filename') { - unless ($ENV{'form.'.$name}) { - $ENV{'form.'.$name}=$value; - } - } - } - + &Apache::loncommon::get_unprocessed_cgi($ENV{'QUERY_STRING'}, + ['filename']); # -------------------------------------------------------------- Check filename @@ -1099,107 +1460,9 @@ unless ($ENV{'form.phase'} eq 'two') { 1; __END__ -=head1 NAME - -Apache::lonpublisher - Publication Handler - -=head1 SYNOPSIS - -Invoked by /etc/httpd/conf/srm.conf: - - - PerlAccessHandler Apache::lonacc - SetHandler perl-script - PerlHandler Apache::lonpublisher - ErrorDocument 403 /adm/login - ErrorDocument 404 /adm/notfound.html - ErrorDocument 406 /adm/unauthorized.html - ErrorDocument 500 /adm/errorhandler - - -=head1 INTRODUCTION - -This module publishes a file. This involves gathering metadata, -versioning the file, copying file from construction space to -publication space, and copying metadata from construction space -to publication space. - -This is part of the LearningOnline Network with CAPA project -described at http://www.lon-capa.org. - -=head1 HANDLER SUBROUTINE - -This routine is called by Apache and mod_perl. - -=over 4 - -=item * - -Get query string for limited number of parameters - -=item * - -Check filename - -=item * - -File is there and owned, init lookup tables - -=item * - -Start page output - -=item * - -Individual file - -=item * - -publish from $thisfn to $thistarget with $thisembstyle - -=back - -=head1 OTHER SUBROUTINES - -=over 4 - -=item * - -metaeval() : Evaluate string with metadata - -=item * - -metaread() : Read a metadata file - -=item * - -sqltime() : convert 'time' format into a datetime sql format - -=item * - -textfield() : form field - -=item * - -hiddenfield() : form field - -=item * - -selectbox() : form field - -=item * - -urlfixup() : fixup URL (Publication Step One) - -=item * - -publish() : publish (Publication Step One) - -=item * - -phasetwo() : render second interface showing status of publication steps -(Publication Step Two) +=pod =back =cut +