File:  [LON-CAPA] / loncom / build / xfml_parse.pl
Revision 1.2: download - view: text, annotated - select for diffs
Fri Feb 1 10:56:41 2002 UTC (22 years, 5 months ago) by harris41
Branches: MAIN
CVS tags: stable_2002_spring, HEAD
a lot of cleaning up, debugging, and commenting

    1: #!/usr/bin/perl
    2: 
    3: # YEAR=2002
    4: # 1/26,1/27,1/28,1/29,1/30,1/31 - Scott Harrison
    5: #
    6: ###
    7: 
    8: # Read in 2 XML file; first is the filter specification, the second
    9: # is the XML file to be filtered
   10: 
   11: ###############################################################################
   12: ##                                                                           ##
   13: ## ORGANIZATION OF THIS PERL SCRIPT                                          ##
   14: ## 1. Notes                                                                  ##
   15: ## 2. Get command line arguments                                             ##
   16: ## 3. First pass through (grab distribution-specific information)            ##
   17: ## 4. Second pass through (parse out what is not necessary)                  ##
   18: ## 5. Third pass through (translate markup according to specified mode)      ##
   19: ## 6. Functions (most all just format contents of different markup tags)     ##
   20: ## 7. POD (plain old documentation, CPAN style)                              ##
   21: ##                                                                           ##
   22: ###############################################################################
   23: 
   24: # ----------------------------------------------------------------------- Notes
   25: #
   26: # I am using a multiple pass-through approach to parsing
   27: # the xfml file.  This saves memory and makes sure the server
   28: # will never be overloaded.
   29: #
   30: # This is meant to parse files meeting the piml document type.
   31: # See xfml.dtd.  XFML=XML Filtering Markup Language.
   32: 
   33: use HTML::TokeParser;
   34: use strict;
   35: 
   36: unless (@ARGV) {
   37:     print <<END;
   38: Incorrect invocation.
   39: Example usages:
   40: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
   41: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
   42: END
   43: }
   44: 
   45: my %eh;
   46: my %ih;
   47: my $tofilter=shift @ARGV;
   48: open IN,"<$tofilter";
   49: my @lines=<IN>; my $parsestring=join('',@lines); undef @lines;
   50: close IN;
   51: my $parser = HTML::TokeParser->new(\$parsestring) or
   52:     die('can\'t create TokeParser object');
   53: $parser->xml_mode('1');
   54: 
   55: # Define handling methods for mode-dependent text rendering
   56: 
   57: my %conditions; &cc;
   58: 
   59: $parser->{textify}={
   60:     xfml => \&format_xfml,
   61:     'when:name' => \&format_when_name,
   62:     'when:attribute' => \&format_when_attribute,
   63:     'when:cdata' => \&format_when_cdata,
   64:     'choice:include' => \&format_choice_include,
   65:     'choice:exclude' => \&format_choice_exclude,
   66:     };
   67: 
   68: my $text;
   69: my $xfml;
   70: my $wloc=0;
   71: my %eha;
   72: 
   73: while (my $token = $parser->get_tag('xfml')) {
   74:     &format_xfml(@{$token});
   75:     $text = $parser->get_text('/xfml');
   76:     $token = $parser->get_tag('/xfml');
   77: }
   78: 
   79: #open IN,"<$tofilter";
   80: my @lines2=<>; my $parsestring2=join('',@lines2); undef @lines2;
   81: $parser = HTML::TokeParser->new(\$parsestring2) or
   82:     die('can\'t create TokeParser object');
   83: $parser->xml_mode('1');
   84: 
   85: my $token;
   86: my $hloc=0;
   87: my %ts;
   88: my $tr;
   89: my $echild=0;
   90: my $exclude=0;
   91: my $excluden=0;
   92: my $excludea=0;
   93: my $et=0;
   94: my $cdata='';
   95: my $excludenold=0;
   96: my $ign=0;
   97: 
   98: while ($token = $parser->get_token()) {
   99:     if ($token->[0] eq 'D') {
  100: 	print $token->[1];
  101:     }
  102:     elsif ($token->[0] eq 'C') {
  103: 	print $token->[1];
  104:     }
  105:     elsif ($token->[0] eq 'S') {
  106: 	$cdata='';
  107: 	$hloc++;
  108: # if token can be excluded, then pretend it is until all conditions are
  109: # run (eha); then output during end tag processing
  110: # else, output
  111: 
  112: # a token can be excluded when it is an eh key, or a child node of
  113: # an eh key
  114: 
  115: 	if ($eh{$token->[1]}) {
  116: 	    $echild=$token->[1];
  117: 	}
  118: 	if ($echild) {
  119: 	    # run through names for echild
  120: 	    # then attributes and/or values and/or cdata
  121: 	    my $name=$token->[1];
  122: 	    my @attributes=@{$token->[3]};
  123: 	    my %atthash=%{$token->[2]};
  124: 	    foreach my $namemlist (@{$eha{$echild}->{'name'}}) {
  125: 		foreach my $namematch (@{$namemlist}) {
  126: 		    my $nm=$namematch; $nm=~s/^.//; $nm=~s/.$//;
  127: 		    if ($name=~/$nm/) {
  128: 			$excludenold=$excluden;
  129: 			$excluden++;
  130: 			foreach my $attributemlist
  131: 			    (@{$eha{$echild}->{'attribute'}}) {
  132: 				foreach my $attributematch 
  133: 				    (@{$attributemlist}) {
  134: 					my ($an,$am)=
  135: 					    split(/\=/,$attributematch,2);
  136: 					$am=~s/^.//;
  137: 					$am=~s/.$//;
  138: 					if ($atthash{$an}) {
  139: 					    if ($atthash{$an}=~/$am/) {
  140: 						$excludea++;
  141: 					    }
  142: 					}
  143: 				    }
  144: 			    }
  145: 		    }
  146: 		}
  147: 	    }
  148: 	    $tr.=$token->[4];
  149: 	}
  150: 	else {
  151: 	    print $token->[4];
  152: 	}
  153:     }
  154:     elsif ($token->[0] eq 'E') {
  155: 	if ($echild) {
  156: 	    $tr.=$token->[2];
  157: 	    if ($excluden) {
  158: 		my $i=0;
  159: 		CDATALOOP:
  160: 		foreach my $cdatamlist (@{$eha{$echild}->{'cdata'}}) {
  161: 		    $i++;
  162: 		    my $j;
  163: 		    foreach my $cdatamatch (@{$cdatamlist}) {
  164: 			$j++;
  165: #				print "CDATA: $cdatamatch, $cdata\n";
  166: 			my $cm=$cdatamatch;
  167: 			my $not=0;
  168: 			if ($cm=~/\!/) {
  169: 			    $not=1;
  170: 			    $cm=~s/^.//;
  171: 			}
  172: 			$cm=~s/^.//; $cm=~s/.$//;
  173: 			if ($not and $cdata=~/$cm/) {
  174: 			    $ign=1; $exclude=0;
  175: 			}
  176: 			if ((!$not and $cdata!~/$cm/)
  177: 			    or ($not and $cdata=~/$cm/)) {
  178: #				nothing happens
  179: #			    $exclude=0;
  180: 			}
  181: 			elsif (($not and $cdata!~/$cm/)
  182: 			       or (!$not and $cdata=~/$cm/)) {
  183: 			    $exclude++ unless $ign;
  184: 			}
  185: 		    }
  186: 		}
  187: 	    }
  188: 	}
  189: 	if ($eh{$token->[1]}) {
  190: 	    $ign=0;
  191: 	    $echild=0;
  192: 	    if (!$exclude and !$excludea) {
  193: 		print $tr;
  194: #		print $token->[2];
  195: 		$tr='';
  196: 	    }
  197: 	    elsif ($exclude>0 or $excludea>0) {
  198: #		print "EXCLUDING $token->[1] $exclude $excludea $excluden\n";
  199: 		$exclude=0; $excluden=0; $excludea=0;
  200: 		$tr='';
  201: 	    }
  202: 	    $exclude=0; $excluden=0; $excludea=0;
  203: 	}
  204: 	else {
  205: 	    if ($echild) {
  206: #		$tr.=$token->[2];
  207: 	    }
  208: 	    else {
  209: 		print $token->[2];
  210: 		$tr='';
  211: 	    }
  212: 	}
  213: 	$hloc--;
  214:     }
  215:     elsif ($token->[0] eq 'T') {
  216: 	if ($echild) {
  217: 	    $tr.=$token->[1];
  218: 	    $cdata=$token->[1];
  219: 	}
  220: 	else {
  221: 	    print $token->[1];
  222: 	    $tr='';
  223: 	}
  224:     }
  225: }
  226: 
  227: # ------------------------------------------------------------ clear conditions
  228: sub cc {
  229:     @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
  230:     @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
  231:     @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
  232:     @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
  233: }
  234: 
  235: # --------------------------------------- remove starting and ending whitespace
  236: sub trim {
  237:     my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
  238: }
  239: 
  240: 
  241: 
  242: # --------------------------------------------------------- Format xfml section
  243: sub format_xfml {
  244:     my (@tokeninfo)=@_;
  245:     return '';
  246: }
  247: 
  248: # ---------------------------------------------------- Format when:name section
  249: sub format_when_name {
  250:     my (@tokeninfo)=@_;
  251:     $wloc++;
  252:     my $att_match=$tokeninfo[2]->{'match'};
  253:     push @{$conditions{'name'}},$att_match;
  254:     my $text=&trim($parser->get_text('/when:name'));
  255:     $parser->get_tag('/when:name');
  256:     $wloc--;
  257:     &cc unless $wloc;
  258:     return '';
  259: }
  260: 
  261: # ----------------------------------------------- Format when:attribute section
  262: sub format_when_attribute {
  263:     my (@tokeninfo)=@_;
  264:     $wloc++;
  265:     my $att_match=$tokeninfo[2]->{'match'};
  266:     push @{$conditions{'attribute'}},$att_match;
  267:     my $text=&trim($parser->get_text('/when:attribute'));
  268:     $parser->get_tag('/when:attribute');
  269:     $wloc--;
  270:     &cc unless $wloc;
  271:     return '';
  272: }
  273: 
  274: # --------------------------------------------------- Format when:cdata section
  275: sub format_when_cdata {
  276:     my (@tokeninfo)=@_;
  277:     $wloc++;
  278:     my $att_match=$tokeninfo[2]->{'match'};
  279:     push @{$conditions{'cdata'}},$att_match;
  280:     my $text=&trim($parser->get_text('/when:cdata'));
  281:     $parser->get_tag('/when:cdata');
  282:     $wloc--;
  283:     &cc unless $wloc;
  284:     return '';
  285: }
  286: 
  287: # ----------------------------------------------- Format choice:include section
  288: sub format_choice_include {
  289:     my (@tokeninfo)=@_;
  290:     my $text=&trim($parser->get_text('/choice:include'));
  291:     $parser->get_tag('/choice:include');
  292:     $ih{$tokeninfo[2]->{'match'}}++;
  293:     return '';
  294: }
  295: 
  296: # ----------------------------------------------- Format choice:exclude section
  297: sub format_choice_exclude {
  298:     my (@tokeninfo)=@_;
  299:     my $text=&trim($parser->get_text('/choice:exclude'));
  300:     $parser->get_tag('/choice:exclude');
  301:     $eh{$tokeninfo[2]->{'nodename'}}++;
  302:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
  303:          [@{$conditions{'name'}}];
  304:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
  305:          [@{$conditions{'attribute'}}];
  306:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
  307:          [@{$conditions{'value'}}];
  308:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
  309:          [@{$conditions{'cdata'}}];
  310:     return '';
  311: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>