--- loncom/publisher/lonpublisher.pm 2002/01/08 21:14:53 1.68 +++ loncom/publisher/lonpublisher.pm 2021/06/04 03:06:15 1.299 @@ -1,7 +1,7 @@ # The LearningOnline Network with CAPA # Publication Handler # -# $Id: lonpublisher.pm,v 1.68 2002/01/08 21:14:53 albertel Exp $ +# $Id: lonpublisher.pm,v 1.299 2021/06/04 03:06:15 raeburn Exp $ # # Copyright Michigan State University Board of Trustees # @@ -25,25 +25,6 @@ # # http://www.lon-capa.org/ # -# -# (TeX Content Handler -# -# 05/29/00,05/30,10/11 Gerd Kortemeyer) -# -# 11/28,11/29,11/30,12/01,12/02,12/04,12/23 Gerd Kortemeyer -# 03/23 Guy Albertelli -# 03/24,03/29,04/03 Gerd Kortemeyer -# 04/16/2001 Scott Harrison -# 05/03,05/05,05/07 Gerd Kortemeyer -# 05/28/2001 Scott Harrison -# 06/23,08/07,08/11,8/13,8/17,8/18,8/24,9/26,10/16 Gerd Kortemeyer -# 12/04,12/05 Guy Albertelli -# 12/05 Gerd Kortemeyer -# 12/05 Guy Albertelli -# 12/06,12/07 Gerd Kortemeyer -# 12/15,12/16 Scott Harrison -# 12/25 Gerd Kortemeyer -# ### ############################################################################### @@ -58,6 +39,75 @@ ## ## ############################################################################### + +###################################################################### +###################################################################### + +=pod + +=head1 NAME + +lonpublisher - LON-CAPA publishing handler + +=head1 SYNOPSIS + +B is used by B inside B. This is the +invocation by F: + + + PerlAccessHandler Apache::lonacc + SetHandler perl-script + PerlHandler Apache::lonpublisher + ErrorDocument 403 /adm/login + ErrorDocument 404 /adm/notfound.html + ErrorDocument 406 /adm/unauthorized.html + ErrorDocument 500 /adm/errorhandler + + +=head1 OVERVIEW + +Authors can only write-access the C space. +They can copy resources into the resource area through the +publication step, and move them back through a recover step. +Authors do not have direct write-access to their resource space. + +During the publication step, several events will be +triggered. Metadata is gathered, where a wizard manages default +entries on a hierarchical per-directory base: The wizard imports the +metadata (including access privileges and royalty information) from +the most recent published resource in the current directory, and if +that is not available, from the next directory above, etc. The Network +keeps all previous versions of a resource and makes them available by +an explicit version number, which is inserted between the file name +and extension, for example C, while the most recent +version does not carry a version number (C). Servers +subscribing to a changed resource are notified that a new version is +available. + +=head1 DESCRIPTION + +B takes the proper steps to add resources to the LON-CAPA +digital library. This includes updating the metadata table in the +LON-CAPA database. + +B is many things to many people. + +This module publishes a file. This involves gathering metadata, +versioning the file, copying file from construction space to +publication space, and copying metadata from construction space +to publication space. + +=head2 SUBROUTINES + +Many of the undocumented subroutines implement various magical +parsing shortcuts. + +=cut + +###################################################################### +###################################################################### + + package Apache::lonpublisher; # ------------------------------------------------- modules used by this module @@ -65,139 +115,390 @@ use strict; use Apache::File; use File::Copy; use Apache::Constants qw(:common :http :methods); -use HTML::TokeParser; +use HTML::LCParser; +use HTML::Entities; +use Encode::Encoder; use Apache::lonxml; -use Apache::lonhomework; -use Apache::loncacc; use DBI; -use Apache::lonnet(); +use Apache::lonnet; use Apache::loncommon(); - -my %addid; -my %nokey; - -my %metadatafields; -my %metadatakeys; - +use Apache::lonhtmlcommon; +use Apache::lonmysql; +use Apache::lonlocal; +use Apache::loncfile; +use LONCAPA::lonmetadata; +use Apache::lonmsg; +use vars qw(%metadatafields %metadatakeys %addid $readit); +use LONCAPA qw(:DEFAULT :match); + my $docroot; my $cuname; my $cudom; -# ----------------------------------------------- Evaluate string with metadata +my $registered_cleanup; +my $modified_urls; + +my $lock; + +=pod + +=over 4 + +=item B + +Evaluates a string that contains metadata. This subroutine +stores values inside I<%metadatafields> and I<%metadatakeys>. +The hash key is a I<$unikey> corresponding to a unique id +that is descriptive of the parser location inside the XML tree. + +Parameters: + +=over 4 + +=item I<$metastring> + +A string that contains metadata. + +=back + +Returns: + +nothing + +=cut + +######################################### +######################################### +# +# Modifies global %metadatafields %metadatakeys +# + sub metaeval { - my $metastring=shift; + my ($metastring,$prefix)=@_; - my $parser=HTML::TokeParser->new(\$metastring); - my $token; - while ($token=$parser->get_token) { - if ($token->[0] eq 'S') { - my $entry=$token->[1]; - my $unikey=$entry; - if (defined($token->[2]->{'package'})) { - $unikey.='_package_'.$token->[2]->{'package'}; - } - if (defined($token->[2]->{'part'})) { - $unikey.='_'.$token->[2]->{'part'}; - } - if (defined($token->[2]->{'id'})) { - $unikey.='_'.$token->[2]->{'id'}; - } - if (defined($token->[2]->{'name'})) { - $unikey.='_'.$token->[2]->{'name'}; - } - foreach (@{$token->[3]}) { - $metadatafields{$unikey.'.'.$_}=$token->[2]->{$_}; - if ($metadatakeys{$unikey}) { - $metadatakeys{$unikey}.=','.$_; - } else { - $metadatakeys{$unikey}=$_; - } - } - if ($metadatafields{$unikey}) { - my $newentry=$parser->get_text('/'.$entry); - unless (($metadatafields{$unikey}=~/$newentry/) || - ($newentry eq '')) { - $metadatafields{$unikey}.=', '.$newentry; - } - } else { - $metadatafields{$unikey}=$parser->get_text('/'.$entry); - } - } - } + my $parser=HTML::LCParser->new(\$metastring); + my $token; + while ($token=$parser->get_token) { + if ($token->[0] eq 'S') { + my $entry=$token->[1]; + my $unikey=$entry; + next if ($entry =~ m/^(?:parameter|stores)_/); + if (defined($token->[2]->{'package'})) { + $unikey.="\0package\0".$token->[2]->{'package'}; + } + if (defined($token->[2]->{'part'})) { + $unikey.="\0".$token->[2]->{'part'}; + } + if (defined($token->[2]->{'id'})) { + $unikey.="\0".$token->[2]->{'id'}; + } + if (defined($token->[2]->{'name'})) { + $unikey.="\0".$token->[2]->{'name'}; + } + foreach my $item (@{$token->[3]}) { + $metadatafields{$unikey.'.'.$item}=$token->[2]->{$item}; + if ($metadatakeys{$unikey}) { + $metadatakeys{$unikey}.=','.$item; + } else { + $metadatakeys{$unikey}=$item; + } + } + my $newentry=$parser->get_text('/'.$entry); + if (($entry eq 'customdistributionfile') || + ($entry eq 'sourcerights')) { + $newentry=~s/^\s*//; + if ($newentry !~m|^/res|) { $newentry=$prefix.$newentry; } + } +# actually store + if ( $entry eq 'rule' && exists($metadatafields{$unikey})) { + $metadatafields{$unikey}.=','.$newentry; + } else { + $metadatafields{$unikey}=$newentry; + } + } + } } -# -------------------------------------------------------- Read a metadata file +######################################### +######################################### + +=pod + +=item B + +Read a metadata file + +Parameters: + +=over + +=item I<$logfile> + +File output stream to output errors and warnings to. + +=item I<$fn> + +File name (including path). + +=back + +Returns: + +=over 4 + +=item Scalar string (if successful) + +XHTML text that indicates successful reading of the metadata. + +=back + +=cut + +######################################### +######################################### sub metaread { - my ($logfile,$fn)=@_; + my ($logfile,$fn,$prefix)=@_; unless (-e $fn) { - print $logfile 'No file '.$fn."\n"; - return '
No file: '.$fn.''; + print($logfile 'No file '.$fn."\n"); + return '

' + .&mt('No file: [_1]',&Apache::loncfile::display($fn)) + .'

'; } - print $logfile 'Processing '.$fn."\n"; + print($logfile 'Processing '.$fn."\n"); my $metastring; { - my $metafh=Apache::File->new($fn); - $metastring=join('',<$metafh>); + my $metafh=Apache::File->new($fn); + $metastring=join('',<$metafh>); } - &metaeval($metastring); - return '
Processed file: '.$fn.''; + &metaeval($metastring,$prefix); + return '

' + .&mt('Processed file: [_1]',&Apache::loncfile::display($fn)) + .'

'; } -# ---------------------------- convert 'time' format into a datetime sql format -sub sqltime { - my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = - localtime(@_[0]); - $mon++; $year+=1900; - return "$year-$mon-$mday $hour:$min:$sec"; +######################################### +######################################### + +sub coursedependencies { + my $url=&Apache::lonnet::declutter(shift); + $url=~s/\.meta$//; + my ($adomain,$aauthor)=($url=~ m{^($match_domain)/($match_username)/}); + my $regexp=quotemeta($url); + $regexp='___'.$regexp.'___course'; + my %evaldata=&Apache::lonnet::dump('nohist_resevaldata',$adomain, + $aauthor,$regexp); + my %courses=(); + foreach my $item (keys(%evaldata)) { + if ($item=~/^([a-zA-Z0-9]+_[a-zA-Z0-9]+)___.+___course$/) { + $courses{$1}=1; + } + } + return %courses; } +######################################### +######################################### + + +=pod -# --------------------------------------------------------- Various form fields +=item Form-field-generating subroutines. +For input parameters, these subroutines take in values +such as I<$name>, I<$value> and other form field metadata. +The output (scalar string that is returned) is an XHTML +string which presents the form field (foreseeably inside +
tags). + +=over 4 + +=item B + +=item B + +=item B + +=item B + +=item B + +=back + +=cut + +######################################### +######################################### sub textfield { - my ($title,$name,$value)=@_; - return "\n

$title:
". - ''; + my ($title,$name,$value,$noline)=@_; + $value=~s/^\s+//gs; + $value=~s/\s+$//gs; + $value=~s/\s+/ /gs; + $title=&mt($title); + $env{'form.'.$name}=$value; + return "\n".&Apache::lonhtmlcommon::row_title($title) + .'' + .&Apache::lonhtmlcommon::row_closure($noline); +} + +sub text_with_browse_field { + my ($title,$name,$value,$restriction,$noline)=@_; + $value=~s/^\s+//gs; + $value=~s/\s+$//gs; + $value=~s/\s+/ /gs; + $title=&mt($title); + $env{'form.'.$name}=$value; + return "\n".&Apache::lonhtmlcommon::row_title($title) + .'' + .'
' + .'' + .&mt('Select') + .' ' + .'' + .&mt('Search') + .'' + .&Apache::lonhtmlcommon::row_closure($noline); } sub hiddenfield { my ($name,$value)=@_; - return "\n".''; + $env{'form.'.$name}=$value; + return "\n".''; +} + +sub checkbox { + my ($name,$text)=@_; + return "\n"; } sub selectbox { my ($title,$name,$value,$functionref,@idlist)=@_; - my $uctitle=uc($title); - my $selout="\n

$uctitle:". - "
".''; + foreach my $id (@idlist) { + $selout.='

+'; } -# -------------------------------------------------------- Publication Step One +######################################### +######################################### + +=pod + +=item B + +Fix up a url? First step of publication + +=cut +######################################### +######################################### sub urlfixup { my ($url,$target)=@_; unless ($url) { return ''; } #javascript code needs no fixing if ($url =~ /^javascript:/i) { return $url; } + if ($url =~ /^mailto:/i) { return $url; } #internal document links need no fixing if ($url =~ /^\#/) { return $url; } - my ($host)=($url=~/(?:http\:\/\/)*([^\/]+)/); - foreach (values %Apache::lonnet::hostname) { - if ($_ eq $host) { - $url=~s/^http\:\/\///; - $url=~s/^$host//; - } + my ($host)=($url=~m{(?:(?:http|https|ftp)://)*([^/]+)}); + my @lonids = &Apache::lonnet::machine_ids($host); + if (@lonids) { + $url=~s{^(?:http|https|ftp)://}{}; + $url=~s/^\Q$host\E//; } - if ($url=~/^http\:\/\//) { return $url; } - $url=~s/\~$cuname/res\/$cudom\/$cuname/; + if ($url=~m{^(?:http|https|ftp)://}) { return $url; } + $url=~s{\Q~$cuname\E}{res/$cudom/$cuname}; + return $url; +} + +######################################### +######################################### + +=pod + +=item B + +Currently undocumented. + +=cut + +######################################### +######################################### +sub absoluteurl { + my ($url,$target)=@_; + unless ($url) { return ''; } if ($target) { $target=~s/\/[^\/]+$//; $url=&Apache::lonnet::hreflocation($target,$url); @@ -205,24 +506,616 @@ sub urlfixup { return $url; } +######################################### +######################################### + +=pod + +=item B + +Currently undocumented + +=cut + +######################################### +######################################### +sub set_allow { + my ($allow,$logfile,$target,$tag,$oldurl,$type)=@_; + my $newurl=&urlfixup($oldurl,$target); + my $return_url=$oldurl; + print $logfile 'GUYURL: '.$tag.':'.$oldurl.' - '.$newurl."\n"; + if ($newurl ne $oldurl) { + $return_url=$newurl; + print $logfile 'URL: '.$tag.':'.$oldurl.' - '.$newurl."\n"; + } + if (($newurl !~ /^javascript:/i) && + ($newurl !~ /^mailto:/i) && + ($newurl !~ /^(?:http|https|ftp):/i) && + ($newurl !~ /^\#/)) { + if (($type eq 'src') || ($type eq 'href')) { + if ($newurl =~ /^([^?]+)\?[^?]*$/) { + $newurl = $1; + } + } + $$allow{&absoluteurl($newurl,$target)}=1; + } + return $return_url; +} + +######################################### +######################################### + +=pod + +=item B + +Currently undocumented + +=cut + +######################################### +######################################### +sub get_subscribed_hosts { + my ($target)=@_; + my @subscribed; + my $filename; + $target=~/(.*)\/([^\/]+)$/; + my $srcf=$2; + opendir(DIR,$1); + # cycle through listed files, subscriptions used to exist + # as "filename.lonid" + while ($filename=readdir(DIR)) { + if ($filename=~/\Q$srcf\E\.($match_lonid)$/) { + my $subhost=$1; + if (($subhost ne 'meta' + && $subhost ne 'subscription' + && $subhost ne 'meta.subscription' + && $subhost ne 'tmp') && + ($subhost ne $Apache::lonnet::perlvar{'lonHostID'})) { + push(@subscribed,$subhost); + } + } + } + closedir(DIR); + my $sh; + if ( $sh=Apache::File->new("$target.subscription") ) { + while (my $subline=<$sh>) { + if ($subline =~ /^($match_lonid):/) { + if ($1 ne $Apache::lonnet::perlvar{'lonHostID'}) { + push(@subscribed,$1); + } + } + } + } + return @subscribed; +} + + +######################################### +######################################### + +=pod + +=item B + +Currently undocumented + +=cut + +######################################### +######################################### +sub get_max_ids_indices { + my ($content)=@_; + my $maxindex=10; + my $maxid=10; + my $needsfixup=0; + my $duplicateids=0; + + my %allids; + my %duplicatedids; + + my $parser=HTML::LCParser->new($content); + $parser->xml_mode(1); + my $token; + while ($token=$parser->get_token) { + if ($token->[0] eq 'S') { + my $counter; + if ($counter=$addid{$token->[1]}) { + if ($counter eq 'id') { + if (defined($token->[2]->{'id'}) && + $token->[2]->{'id'} !~ /^\s*$/) { + $maxid=($token->[2]->{'id'}>$maxid)?$token->[2]->{'id'}:$maxid; + if (exists($allids{$token->[2]->{'id'}})) { + $duplicateids=1; + $duplicatedids{$token->[2]->{'id'}}=1; + } else { + $allids{$token->[2]->{'id'}}=1; + } + } else { + $needsfixup=1; + } + } else { + if (defined($token->[2]->{'index'}) && + $token->[2]->{'index'} !~ /^\s*$/) { + $maxindex=($token->[2]->{'index'}>$maxindex)?$token->[2]->{'index'}:$maxindex; + } else { + $needsfixup=1; + } + } + } + } + } + return ($needsfixup,$maxid,$maxindex,$duplicateids, + (keys(%duplicatedids))); +} + +######################################### +######################################### + +=pod + +=item B + +Currently undocumented + +=cut + +######################################### +######################################### +sub get_all_text_unbalanced { + #there is a copy of this in lonxml.pm + my($tag,$pars)= @_; + my $token; + my $result=''; + $tag='<'.$tag.'>'; + while ($token = $$pars[-1]->get_token) { + if (($token->[0] eq 'T')||($token->[0] eq 'C')||($token->[0] eq 'D')) { + $result.=$token->[1]; + } elsif ($token->[0] eq 'PI') { + $result.=$token->[2]; + } elsif ($token->[0] eq 'S') { + $result.=$token->[4]; + } elsif ($token->[0] eq 'E') { + $result.=$token->[2]; + } + if ($result =~ /\Q$tag\E/s) { + ($result,my $redo)=$result =~ /(.*)\Q$tag\E(.*)/is; + #&Apache::lonnet::logthis('Got a winner with leftovers ::'.$2); + #&Apache::lonnet::logthis('Result is :'.$1); + $redo=$tag.$redo; + push (@$pars,HTML::LCParser->new(\$redo)); + $$pars[-1]->xml_mode('1'); + last; + } + } + return $result +} + +######################################### +######################################### + +=pod + +=item B + +Currently undocumented + +=cut + +######################################### +######################################### +#Arguably this should all be done as a lonnet::ssi instead +sub fix_ids_and_indices { + my ($logfile,$source,$target)=@_; + + my %allow; + my $content; + { + my $org=Apache::File->new($source); + $content=join('',<$org>); + } + + my ($needsfixup,$maxid,$maxindex,$duplicateids,@duplicatedids)= + &get_max_ids_indices(\$content); + + print $logfile ("Got $needsfixup,$maxid,$maxindex,$duplicateids--". + join(', ',@duplicatedids)); + if ($duplicateids) { + print $logfile "Duplicate ID(s) exist, ".join(', ',@duplicatedids)."\n"; + my $outstring=''.&mt('Unable to publish file, it contains duplicated ID(s), ID(s) need to be unique. The duplicated ID(s) are').': '.join(', ',@duplicatedids).''; + return ($outstring,1); + } + if ($needsfixup) { + print $logfile "Needs ID and/or index fixup\n". + "Max ID : $maxid (min 10)\n". + "Max Index: $maxindex (min 10)\n"; + } + my $outstring=''; + my $responsecounter=1; + my @parser; + $parser[0]=HTML::LCParser->new(\$content); + $parser[-1]->xml_mode(1); + my $token; + while (@parser) { + while ($token=$parser[-1]->get_token) { + if ($token->[0] eq 'S') { + my $counter; + my $tag=$token->[1]; + my $lctag=lc($tag); + if ($lctag eq 'allow') { + $allow{$token->[2]->{'src'}}=1; + next; + } + if ($lctag eq 'base') { next; } + if (($lctag eq 'part') || ($lctag eq 'problem')) { + $responsecounter=0; + } + if ($lctag=~/response$/) { $responsecounter++; } + if ($lctag eq 'import') { $responsecounter++; } + my %parms=%{$token->[2]}; + $counter=$addid{$tag}; + if (!$counter) { $counter=$addid{$lctag}; } + if ($counter) { + if ($counter eq 'id') { + unless (defined($parms{'id'}) && + $parms{'id'}!~/^\s*$/) { + $maxid++; + $parms{'id'}=$maxid; + print $logfile 'ID(new) : '.$tag.':'.$maxid."\n"; + } else { + print $logfile 'ID(kept): '.$tag.':'.$parms{'id'}."\n"; + } + } elsif ($counter eq 'index') { + unless (defined($parms{'index'}) && + $parms{'index'}!~/^\s*$/) { + $maxindex++; + $parms{'index'}=$maxindex; + print $logfile 'Index: '.$tag.':'.$maxindex."\n"; + } + } + } + unless ($parms{'type'} eq 'zombie') { + foreach my $type ('src','href','background','bgimg') { + foreach my $key (keys(%parms)) { + if ($key =~ /^$type$/i) { + next if (($lctag eq 'img') && ($type eq 'src') && + ($parms{$key} =~ m{^data\:image/gif;base64,})); + $parms{$key}=&set_allow(\%allow,$logfile, + $target,$tag, + $parms{$key},$type); + } + } + } + } + # probably a image type