File:  [LON-CAPA] / loncom / build / xfml_parse.pl
Revision 1.1: download - view: text, annotated - select for diffs
Tue Jan 29 10:42:42 2002 UTC (22 years, 4 months ago) by harris41
Branches: MAIN
CVS tags: HEAD
parses xfml; there are some current limitations here (like I don't
match contextual conditions and am not yet supporting choice:include)

    1: #!/usr/bin/perl
    2: 
    3: # YEAR=2002
    4: # 1/26,1/27,1/28 - Scott Harrison
    5: 
    6: # Read in 2 XML file; first is the filter specification, the second
    7: # is the XML file to be filtered
    8: 
    9: use HTML::TokeParser;
   10: use strict;
   11: 
   12: unless (@ARGV) {
   13:     print <<END;
   14: Incorrect invocation.
   15: Example usages:
   16: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml
   17: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml
   18: END
   19: }
   20: 
   21: my %eh;
   22: my %ih;
   23: my $tofilter=shift @ARGV;
   24: my @lines=<>; my $parsestring=join('',@lines); undef @lines;
   25: my $parser = HTML::TokeParser->new(\$parsestring) or
   26:     die('can\'t create TokeParser object');
   27: $parser->xml_mode('1');
   28: 
   29: # Define handling methods for mode-dependent text rendering
   30: 
   31: my %conditions; &cc;
   32: 
   33: $parser->{textify}={
   34:     xfml => \&format_xfml,
   35:     'when:name' => \&format_when_name,
   36:     'when:attribute' => \&format_when_attribute,
   37:     'when:cdata' => \&format_when_cdata,
   38:     'choice:include' => \&format_choice_include,
   39:     'choice:exclude' => \&format_choice_exclude,
   40:     };
   41: 
   42: my $text;
   43: my $xfml;
   44: my $wloc=0;
   45: my %eha;
   46: 
   47: while (my $token = $parser->get_tag('xfml')) {
   48:     &format_xfml(@{$token});
   49:     $text = $parser->get_text('/xfml');
   50: #    print $xfml;
   51: #    print $text;
   52:     $token = $parser->get_tag('/xfml');
   53: }
   54: 
   55: open IN,"<$tofilter";
   56: my @lines2=<IN>; close IN; my $parsestring2=join('',@lines2); undef @lines2;
   57: $parser = HTML::TokeParser->new(\$parsestring2) or
   58:     die('can\'t create TokeParser object');
   59: $parser->xml_mode('1');
   60: 
   61: my $token;
   62: my $hloc=0;
   63: my %ts;
   64: my $tr;
   65: my $echild=0;
   66: my $exclude=0;
   67: my $excluden=0;
   68: my $excludea=0;
   69: my $et=0;
   70: my $cdata='';
   71: while ($token = $parser->get_token()) {
   72: # from HTML::TokeParser documentation:
   73: #             ["S",  $tag, %$attr, @$attrseq, $text]
   74: #             ["E",  $tag, $text]
   75: #             ["T",  $text, $is_data]
   76: #             ["C",  $text]
   77: #             ["D",  $text]
   78: #             ["PI", $token0, $text]
   79: #    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
   80: #         @{$conditions{'name'}};
   81: #    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
   82: #         @{$conditions{'attribute'}};
   83: #    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
   84: #         @{$conditions{'value'}};
   85: #    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
   86: #         @{$conditions{'cdata'}};
   87:     if ($token->[0] eq 'D') {
   88: 	print $token->[1];
   89:     }
   90:     elsif ($token->[0] eq 'C') {
   91: 	print $token->[1];
   92:     }
   93:     elsif ($token->[0] eq 'S') {
   94: 	$cdata='';
   95: 	$hloc++;
   96: # if token can be excluded, then pretend it is until all conditions are
   97: # run (eha); then output during end tag processing
   98: # else, output
   99: 
  100: # a token can be excluded when it is an eh key, or a child node of
  101: # an eh key
  102: 
  103: 	if ($eh{$token->[1]}) {
  104: 	    $echild=$token->[1];
  105: #	    print "ECHILD=$echild\n";
  106: 	}
  107: 	if ($echild) {
  108: 	    # run through names for echild
  109: 	    # then attributes and/or values and/or cdata
  110: 	    my $name=$token->[1];
  111: 	    my @attributes=@{$token->[3]};
  112: 	    my %atthash=%{$token->[2]};
  113: 	    foreach my $namemlist (@{$eha{$echild}->{'name'}}) {
  114: 		foreach my $namematch (@{$namemlist}) {
  115: 		    my $nm=$namematch; $nm=~s/^.//; $nm=~s/.$//;
  116: 		    if ($name=~/$nm/) {
  117: #			print "NMATCH: $nm ($name)\n";
  118: 			$excluden++;
  119: 			foreach my $attributemlist
  120: 			    (@{$eha{$echild}->{'attribute'}}) {
  121: 				foreach my $attributematch 
  122: 				    (@{$attributemlist}) {
  123: 					my ($an,$am)=
  124: 					    split(/\=/,$attributematch,2);
  125: 					$am=~s/^.//;
  126: 					$am=~s/.$//;
  127: #					print 'AM:'."($an,$am)\t";
  128: #					print 'ATT:'.join(',',%atthash)."\n";
  129: 					if ($atthash{$an}) {
  130: 					    if ($atthash{$an}=~/$am/) {
  131: 						$excludea++;
  132: #						print "AMATCH: $am (".
  133: #						    join(',',
  134: #							 @attributes)
  135: #							."\n";
  136: 					    }
  137: 					}
  138: 				    }
  139: 			    }
  140: 		    }
  141: 		}
  142: 	    }
  143: 	    $tr.=$token->[4];
  144: 	}
  145: 	else {
  146: 	    print $token->[4];
  147: 	}
  148:     }
  149:     elsif ($token->[0] eq 'E') {
  150: 	if ($echild) {
  151: 	    $tr.=$token->[2];
  152: 	    if ($excluden) {
  153: 		foreach my $cdatamlist (@{$eha{$echild}->{'cdata'}}) {
  154: 		    foreach my $cdatamatch (@{$cdatamlist}) {
  155: #				print "CDATA: $cdatamatch, $cdata\n";
  156: 			my $cm=$cdatamatch;
  157: 			my $not=0;
  158: 			if ($cm=~/\!/) {
  159: 			    $not=1;
  160: 			    $cm=~s/^.//;
  161: 			}
  162: 			$cm=~s/^.//; $cm=~s/.$//;
  163: 			if ((!$not and $cdata!~/$cm/)
  164: 			    or ($not and $cdata=~/$cm/)) {
  165: #				    print "CMISMATCH: $cm ($cdata)\n";
  166: 			}
  167: 			elsif (($not and $cdata!~/$cm/)
  168: 			       or (!$not and $cdata=~/$cm/)) {
  169: 			    $exclude++;
  170: 			}
  171: 		    }
  172: 		}
  173: 	    }
  174: 	}
  175: 	if ($eh{$token->[1]}) {
  176: 	    $echild=0;
  177: 	    if (!$exclude and !$excludea) {
  178: 		print $tr;
  179: #		print $token->[2];
  180: 		$tr='';
  181: 	    }
  182: 	    elsif ($exclude>0 or $excludea>0) {
  183: #		print "EXCLUDING $token->[1] $excludea $excluden\n";
  184: 		$exclude=0; $excluden=0; $excludea=0;
  185: 		$tr='';
  186: 	    }
  187: 	    $exclude=0; $excluden=0; $excludea=0;
  188: 	}
  189: 	else {
  190: 	    if ($echild) {
  191: #		$tr.=$token->[2];
  192: 	    }
  193: 	    else {
  194: 		print $token->[2];
  195: 		$tr='';
  196: 	    }
  197: 	}
  198: 	$hloc--;
  199:     }
  200:     elsif ($token->[0] eq 'T') {
  201: 	if ($echild) {
  202: 	    $tr.=$token->[1];
  203: 	    $cdata=$token->[1];
  204: 	}
  205: 	else {
  206: 	    print $token->[1];
  207: 	    $tr='';
  208: 	}
  209:     }
  210: }
  211: 
  212: # ------------------------------------------------------------ clear conditions
  213: sub cc {
  214:     @{$conditions{'name'}}=(); pop @{$conditions{'name'}};
  215:     @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}};
  216:     @{$conditions{'value'}}=(); pop @{$conditions{'value'}};
  217:     @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}};
  218: }
  219: 
  220: # --------------------------------------- remove starting and ending whitespace
  221: sub trim {
  222:     my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
  223: }
  224: 
  225: # --------------------------------------------------------- Format xfml section
  226: sub format_xfml {
  227:     my (@tokeninfo)=@_;
  228:     return '';
  229: }
  230: 
  231: # ---------------------------------------------------- Format when:name section
  232: sub format_when_name {
  233:     my (@tokeninfo)=@_;
  234:     $wloc++;
  235:     my $att_match=$tokeninfo[2]->{'match'};
  236:     push @{$conditions{'name'}},$att_match;
  237:     my $text=&trim($parser->get_text('/when:name'));
  238:     $parser->get_tag('/when:name');
  239: #    print 'Name Matching...'.$att_match;
  240:     $wloc--;
  241:     &cc unless $wloc;
  242:     return '';
  243: }
  244: 
  245: # ----------------------------------------------- Format when:attribute section
  246: sub format_when_attribute {
  247:     my (@tokeninfo)=@_;
  248:     $wloc++;
  249:     my $att_match=$tokeninfo[2]->{'match'};
  250:     push @{$conditions{'attribute'}},$att_match;
  251:     my $text=&trim($parser->get_text('/when:attribute'));
  252:     $parser->get_tag('/when:attribute');
  253: #    print 'Attribute Matching...'.$att_match;
  254:     $wloc--;
  255:     &cc unless $wloc;
  256:     return '';
  257: }
  258: 
  259: # --------------------------------------------------- Format when:cdata section
  260: sub format_when_cdata {
  261:     my (@tokeninfo)=@_;
  262:     $wloc++;
  263:     my $att_match=$tokeninfo[2]->{'match'};
  264: #    print 'Cdata Matching...'.$att_match;
  265:     push @{$conditions{'cdata'}},$att_match;
  266:     my $text=&trim($parser->get_text('/when:cdata'));
  267:     $parser->get_tag('/when:cdata');
  268:     $wloc--;
  269:     &cc unless $wloc;
  270:     return '';
  271: }
  272: 
  273: # ----------------------------------------------- Format choice:include section
  274: sub format_choice_include {
  275:     my (@tokeninfo)=@_;
  276:     my $text=&trim($parser->get_text('/choice:include'));
  277:     $parser->get_tag('/choice:include');
  278:     $ih{$tokeninfo[2]->{'match'}}++;
  279:     return '';
  280: }
  281: 
  282: # ----------------------------------------------- Format choice:exclude section
  283: sub format_choice_exclude {
  284:     my (@tokeninfo)=@_;
  285:     my $text=&trim($parser->get_text('/choice:exclude'));
  286:     $parser->get_tag('/choice:exclude');
  287:     $eh{$tokeninfo[2]->{'nodename'}}++;
  288:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
  289:          [@{$conditions{'name'}}];
  290:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
  291:          [@{$conditions{'attribute'}}];
  292:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
  293:          [@{$conditions{'value'}}];
  294:     push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
  295:          [@{$conditions{'cdata'}}];
  296:     return '';
  297: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>