File:  [LON-CAPA] / loncom / publisher / loncleanup.pm
Revision 1.3: download - view: text, annotated - select for diffs
Sat May 28 18:53:50 2005 UTC (19 years, 1 month ago) by albertel
Branches: MAIN
CVS tags: HEAD
- I hate the Style police too, but they keep breaking into my house and committing cleaned up code...

    1: # The LearningOnline Network with CAPA
    2: # Handler to cleanup XML files
    3: #
    4: # $Id: loncleanup.pm,v 1.3 2005/05/28 18:53:50 albertel Exp $
    5: #
    6: # Copyright Michigan State University Board of Trustees
    7: #
    8: # This file is part of the LearningOnline Network with CAPA (LON-CAPA).
    9: #
   10: # LON-CAPA is free software; you can redistribute it and/or modify
   11: # it under the terms of the GNU General Public License as published by
   12: # the Free Software Foundation; either version 2 of the License, or
   13: # (at your option) any later version.
   14: #
   15: # LON-CAPA is distributed in the hope that it will be useful,
   16: # but WITHOUT ANY WARRANTY; without even the implied warranty of
   17: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   18: # GNU General Public License for more details.
   19: #
   20: # You should have received a copy of the GNU General Public License
   21: # along with LON-CAPA; if not, write to the Free Software
   22: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   23: #
   24: # /home/httpd/html/adm/gpl.txt
   25: #
   26: # http://www.lon-capa.org/
   27: #
   28: #
   29: ###
   30: 
   31: package Apache::loncleanup;
   32: 
   33: use strict;
   34: use Apache::File;
   35: use File::Copy;
   36: use Apache::Constants qw(:common :http :methods);
   37: use Apache::loncacc;
   38: use Apache::loncommon();
   39: use Apache::lonlocal;
   40: use Apache::lonnet;
   41: 
   42: sub latextrans {
   43:     my $symbolfont=shift;
   44:     my %latexsymb=(
   45: 		   '±' => '\pm',
   46: 		   '´' => '\times',
   47: 		   '¸' => '\div',
   48: 		   'Ò' => '(R)',
   49: 		   'Ó' => '\copy',
   50: 		   'Ø' => '\neg',
   51: 		   'â' => '(R)',
   52: 		   'ã' => '\copy',
   53: 		   '¦' => 'f',
   54: 		   'A' => '\Alpha',
   55: 		   'B' => '\Beta',
   56: 		   'G' => '\Gamma',
   57: 		   'D' => '\Delta',
   58: 		   'E' => '\Epsilon',
   59: 		   'Z' => '\Zeta',
   60: 		   'H' => '\Eta',
   61: 		   'Q' => '\Theta',
   62: 		   'I' => '\Iota',
   63: 		   'K' => '\Kappa',
   64: 		   'L' => '\Lambda',
   65: 		   'M' => '\Mu',
   66: 		   'N' => '\Nu',
   67: 		   'X' => '\Xi',
   68: 		   'O' => '\Omicron',
   69: 		   'P' => '\Pi',
   70: 		   'R' => '\Rho',
   71: 		   'S' => '\Sigma',
   72: 		   'T' => '\Tau',
   73: 		   'U' => 'Y',
   74: 		   'F' => '\Phi',
   75: 		   'C' => '\Chi',
   76: 		   'Y' => '\Psi',
   77: 		   'W' => '\Omega',
   78: 		   'a' => '\alpha',
   79: 		   'b' => '\beta',
   80: 		   'g' => '\gamma',
   81: 		   'd' => '\delta',
   82: 		   'e' => '\epsilon',
   83: 		   'z' => '\zeta',
   84: 		   'h' => '\eta',
   85: 		   'q' => '\theta',
   86: 		   'i' => '\iota',
   87: 		   'k' => '\kappa',
   88: 		   'l' => '\lambda',
   89: 		   'm' => '\mu',
   90: 		   'n' => '\nu',
   91: 		   'x' => '\xi',
   92: 		   'o' => '\omicron',
   93: 		   'p' => '\pi',
   94: 		   'r' => '\rho',
   95: 		   'V' => '\sigmaf',
   96: 		   's' => '\sigma',
   97: 		   't' => '\tau',
   98: 		   'u' => '\upsilon',
   99: 		   'f' => '\phi',
  100: 		   'c' => '\chi',
  101: 		   'y' => '\psi',
  102: 		   'w' => '\omega',
  103: 		   'J' => '\vartheta',
  104: 		   'j' => '\varphi',
  105: 		   'v' => '\varpi',
  106: 		   '¡' => '\Upsilon',
  107: 		   '¢' => "'",
  108: 		   '¤' => '/',
  109: 		   '²' => '"',
  110: 		   '¼' => '\ldots',
  111: 		   'À' => '\aleph',
  112: 		   'Á' => '\Im',
  113: 		   'Â' => '\Re',
  114: 		   'Ã' => '\wp',
  115: 		   'Ô' => '^{TM}',
  116: 		   'ä' => '^{TM}',
  117: 		   'ð' => 'EUR',
  118: 		   '«' => '\leftrightarrow',
  119: 		   '¬' => '\leftarrow',
  120: 		   '­' => '\uparrow',
  121: 		   '®' => '\rightarrow',
  122: 		   '¯' => '\downarraw',
  123: 		   '¿' => '\hookleftarrow',
  124: 		   'Û' => '\Leftrightarrow',
  125: 		   'Ü' => '\Leftarrow',
  126: 		   'Ý' => '\Uparrow',
  127: 		   'Þ' => '\Rightarrow',
  128: 		   'ß' => '\Downarrow',
  129: 		   '"' => '\forall',
  130: 		   '$' => '\exists',
  131: 		   ''' => '\ni',
  132: 		   '*' => '\ast',
  133: 		   '-' => '-',
  134: 		   '@' => '\cong',
  135: 		   '\' => '\therefore',
  136: 		   '^' => '\perp',
  137: 		   '~' => '\sim',
  138: 		   '£' => '\leq',
  139: 		   '¥' => '\infty',
  140: 		   '³' => '\geq',
  141: 		   'µ' => '\propto',
  142: 		   '¶' => '\partial',
  143: 		   '·' => '\cdot',
  144: 		   '¹' => '\not=',
  145: 		   'º' => '\equiv',
  146: 		   '»' => '\approx',
  147: 		   'Ä' => '\otimes',
  148: 		   'Å' => '\oplus',
  149: 		   'Æ' => '\emptyset',
  150: 		   'Ç' => '\cap',
  151: 		   'È' => '\cup',
  152: 		   'É' => '\supset',
  153: 		   'Ê' => '\supseteq',
  154: 		   'Ë' => '\not\subset',
  155: 		   'Ì' => '\subset',
  156: 		   'Í' => '\subseteq',
  157: 		   'Î' => '\in',
  158: 		   'Ï' => '\not\in',
  159: 		   'Ð' => '\angle',
  160: 		   'Ñ' => '\nabla',
  161: 		   'Õ' => '\prod',
  162: 		   'Ö' => '\surd',
  163: 		   '×' => '\cdot',
  164: 		   'Ù' => '\wedge',
  165: 		   'Ú' => '\wee',
  166: 		   'å' => '\sum',
  167: 		   'ò' => '\int',
  168: 		   'á' => '\langle',
  169: 		   'ñ' => '\rangle',
  170: 		   'à' => '\diamondsuit',
  171: 		   '§' => '\clubsuit',
  172: 		   '¨' => '\diamondsuit',
  173: 		   '©' => '\heartsuit',
  174: 		   'ª' => '\spadesuit'
  175: 		   );
  176:     my $output='';
  177:     my $char='';
  178:     my $entitymode=0;
  179:     for (my $i=0; $i<length($symbolfont); $i++) {
  180:         my $newchar=substr($symbolfont,$i,1);
  181:         $char.=$newchar;
  182:         if ($newchar eq '&') { $entitymode=1; }
  183:         if (($entitymode) && ($newchar ne ';')) { next; }
  184:         my $latex=$latexsymb{$char};
  185: 	if ($latex) {
  186: 	    $output.=$latex;
  187: 	} else {
  188: 	    $output.=$char;
  189: 	}
  190:         $char='';
  191:         $entitymode=0;
  192:     }
  193:     return $output;
  194: }
  195: 
  196: sub insidetrans {
  197:     my @args=@_;
  198:     return '<font'.$args[0].$args[1].'><m>$'.&latextrans($args[2]).'$</m>';
  199: }
  200: 
  201: sub symbolfontreplace {
  202:     my $text=shift;
  203:     my @fragments=split(/\<\/font\>/si,$text);
  204:     for (my $i=0; $i<=$#fragments;$i++) {
  205: 	$fragments[$i]=~s/\<font([^\>]*)\s+face=[\"\']*symbol[\"\']*([^\>]*)\>(.*)$/&insidetrans($1,$2,$3)/gsie;
  206:     }
  207:     return join('</font>',@fragments);
  208: }
  209: 
  210: sub htmlclean {
  211:     my ($raw,$full,$blocklinefeed,$blockemptytags,$blocklowercasing,$blockdesymboling)=@_;
  212: # Take care of CRLF etc
  213:     unless ($blocklinefeed) {
  214: 	$raw=~s/\r\f/\n/gs; $raw=~s/\f\r/\n/gs;
  215: 	$raw=~s/\r\n/\n/gs; $raw=~s/\n\r/\n/gs;
  216: 	$raw=~s/\f/\n/gs; $raw=~s/\r/\n/gs;
  217: 	$raw=~s/\&\#10\;/\n/gs; $raw=~s/\&\#13\;/\n/gs;
  218:     }
  219: # Generate empty tags, remove wrong end tags
  220:     unless ($blockemptytags) {
  221: 	$raw=~s/\<(br|hr|img|meta|allow|basefont)([^\>\/]*?)\>/\<$1$2 \/\>/gis;
  222: 	$raw=~s/\<\/(br|hr|img|meta|allow|basefont)\>//gis;
  223: 	unless ($full) {
  224: 	    $raw=~s/\<[\/]*(body|head|html)\>//gis;
  225: 	}
  226:     }
  227: # Make standard tags lowercase
  228:     unless ($blocklowercasing) {
  229: 	foreach ('html','body','head','meta','h1','h2','h3','h4','b','i','m',
  230: 		 'table','tr','td','th','p','br','hr','img','embed','font',
  231: 		 'a','strong','center','title','basefont','li','ol','ul',
  232: 		 'input','select','form','option','script','pre') {
  233: 	    $raw=~s/\<$_\s*\>/\<$_\>/gis;
  234: 	    $raw=~s/\<\/$_\s*\>/<\/$_\>/gis;
  235: 	    $raw=~s/\<$_\s([^\>]*)\>/<$_ $1\>/gis;
  236: 	}
  237:     }
  238: # Replace <font face="symbol">
  239:     unless ($blockdesymboling) {
  240: 	$raw=&symbolfontreplace($raw);
  241:     }
  242:     return $raw;
  243: }
  244: 
  245: sub phaseone {
  246:     my ($r,$fn,$uname,$udom)=@_;
  247: }
  248: 
  249: sub phasetwo {
  250:     my ($r,$fn,$uname,$udom)=@_;
  251: }
  252: 
  253: sub phasethree {
  254:     my ($r,$fn,$uname,$udom)=@_;
  255: }
  256: 
  257: # ---------------------------------------------------------------- Main Handler
  258: sub handler {
  259: 
  260:     my $r=shift;
  261: 
  262: 
  263: # Get query string for limited number of parameters
  264: 
  265:     &Apache::loncommon::get_unprocessed_cgi($ENV{'QUERY_STRING'},
  266: 					    ['filename']);
  267: 
  268:     if ($env{'form.filename'}) {
  269: 	$fn=$env{'form.filename'};
  270: 	$fn=~s/^http\:\/\/[^\/]+//;
  271:     } else {
  272: 	$r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
  273: 		       ' unspecified filename for cleanup', $r->filename); 
  274: 	return HTTP_NOT_FOUND;
  275:     }
  276: 
  277:     unless ($fn) { 
  278: 	$r->log_reason($env{'user.name'}.' at '.$env{'user.domain'}.
  279: 		       ' trying to cleanup non-existing file', $r->filename); 
  280: 	return HTTP_NOT_FOUND;
  281:     } 
  282: 
  283: # ----------------------------------------------------------- Start page output
  284:     my $uname;
  285:     my $udom;
  286: 
  287:     ($uname,$udom)=
  288: 	&Apache::loncacc::constructaccess($fn,$r->dir_config('lonDefDomain'));
  289:     unless (($uname) && ($udom)) {
  290: 	$r->log_reason($uname.' at '.$udom.
  291: 		       ' trying to cleanup file '.$env{'form.filename'}.
  292: 		       ' ('.$fn.') - not authorized', 
  293: 		       $r->filename); 
  294: 	return HTTP_NOT_ACCEPTABLE;
  295:     }
  296: 
  297:     $fn=~s/\/\~(\w+)//;
  298: 
  299:     &Apache::loncommon::content_type($r,'text/html');
  300:     $r->send_http_header;
  301: 
  302:     $r->print('<html><head><title>LON-CAPA Construction Space</title></head>');
  303: 
  304:     $r->print(&Apache::loncommon::bodytag('Cleanup XML Document'));
  305: 
  306:     if ($env{'form.phase'} eq 'three') {
  307: 	&phasethree($r,$fn,$uname,$udom);
  308:     } elsif ($env{'form.phase'} eq 'two') {
  309: 	&phasetwo($r,$fn,$uname,$udom);
  310:     } else {
  311: 	&phaseone($r,$fn,$uname,$udom);
  312:     }
  313: 
  314:     $r->print('</body></html>');
  315:     return OK;  
  316: }
  317: 
  318: 1;
  319: __END__

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>