loncom/build/xfml_parse.pl - view

File: [LON-CAPA] / loncom / build / xfml_parse.pl
Revision 1.1: download - view: text, annotated - select for diffs
Tue Jan 29 10:42:42 2002 UTC (22 years, 4 months ago) by harris41
Branches: MAIN
CVS tags: HEAD

parses xfml; there are some current limitations here (like I don't
match contextual conditions and am not yet supporting choice:include)

1: #!/usr/bin/perl 2: 3: # YEAR=2002 4: # 1/26,1/27,1/28 - Scott Harrison 5: 6: # Read in 2 XML file; first is the filter specification, the second 7: # is the XML file to be filtered 8: 9: use HTML::TokeParser; 10: use strict; 11: 12: unless (@ARGV) { 13: print <<END; 14: Incorrect invocation. 15: Example usages: 16: cat loncapafiles.lpml | perl xfml_parse.pl valid_hosts.xfml 17: perl xfml_parse.pl valid_hosts.xfml loncapafiles.lpml 18: END 19: } 20: 21: my %eh; 22: my %ih; 23: my $tofilter=shift @ARGV; 24: my @lines=<>; my $parsestring=join('',@lines); undef @lines; 25: my $parser = HTML::TokeParser->new(\$parsestring) or 26: die('can\'t create TokeParser object'); 27: $parser->xml_mode('1'); 28: 29: # Define handling methods for mode-dependent text rendering 30: 31: my %conditions; &cc; 32: 33: $parser->{textify}={ 34: xfml => \&format_xfml, 35: 'when:name' => \&format_when_name, 36: 'when:attribute' => \&format_when_attribute, 37: 'when:cdata' => \&format_when_cdata, 38: 'choice:include' => \&format_choice_include, 39: 'choice:exclude' => \&format_choice_exclude, 40: }; 41: 42: my $text; 43: my $xfml; 44: my $wloc=0; 45: my %eha; 46: 47: while (my $token = $parser->get_tag('xfml')) { 48: &format_xfml(@{$token}); 49: $text = $parser->get_text('/xfml'); 50: # print $xfml; 51: # print $text; 52: $token = $parser->get_tag('/xfml'); 53: } 54: 55: open IN,"<$tofilter"; 56: my @lines2=<IN>; close IN; my $parsestring2=join('',@lines2); undef @lines2; 57: $parser = HTML::TokeParser->new(\$parsestring2) or 58: die('can\'t create TokeParser object'); 59: $parser->xml_mode('1'); 60: 61: my $token; 62: my $hloc=0; 63: my %ts; 64: my $tr; 65: my $echild=0; 66: my $exclude=0; 67: my $excluden=0; 68: my $excludea=0; 69: my $et=0; 70: my $cdata=''; 71: while ($token = $parser->get_token()) { 72: # from HTML::TokeParser documentation: 73: # ["S", $tag, %$attr, @$attrseq, $text] 74: # ["E", $tag, $text] 75: # ["T", $text, $is_data] 76: # ["C", $text] 77: # ["D", $text] 78: # ["PI", $token0, $text] 79: # push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}}, 80: # @{$conditions{'name'}}; 81: # push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}}, 82: # @{$conditions{'attribute'}}; 83: # push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}}, 84: # @{$conditions{'value'}}; 85: # push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}}, 86: # @{$conditions{'cdata'}}; 87: if ($token->[0] eq 'D') { 88: print $token->[1]; 89: } 90: elsif ($token->[0] eq 'C') { 91: print $token->[1]; 92: } 93: elsif ($token->[0] eq 'S') { 94: $cdata=''; 95: $hloc++; 96: # if token can be excluded, then pretend it is until all conditions are 97: # run (eha); then output during end tag processing 98: # else, output 99: 100: # a token can be excluded when it is an eh key, or a child node of 101: # an eh key 102: 103: if ($eh{$token->[1]}) { 104: $echild=$token->[1]; 105: # print "ECHILD=$echild\n"; 106: } 107: if ($echild) { 108: # run through names for echild 109: # then attributes and/or values and/or cdata 110: my $name=$token->[1]; 111: my @attributes=@{$token->[3]}; 112: my %atthash=%{$token->[2]}; 113: foreach my $namemlist (@{$eha{$echild}->{'name'}}) { 114: foreach my $namematch (@{$namemlist}) { 115: my $nm=$namematch; $nm=~s/^.//; $nm=~s/.$//; 116: if ($name=~/$nm/) { 117: # print "NMATCH: $nm ($name)\n"; 118: $excluden++; 119: foreach my $attributemlist 120: (@{$eha{$echild}->{'attribute'}}) { 121: foreach my $attributematch 122: (@{$attributemlist}) { 123: my ($an,$am)= 124: split(/\=/,$attributematch,2); 125: $am=~s/^.//; 126: $am=~s/.$//; 127: # print 'AM:'."($an,$am)\t"; 128: # print 'ATT:'.join(',',%atthash)."\n"; 129: if ($atthash{$an}) { 130: if ($atthash{$an}=~/$am/) { 131: $excludea++; 132: # print "AMATCH: $am (". 133: # join(',', 134: # @attributes) 135: # ."\n"; 136: } 137: } 138: } 139: } 140: } 141: } 142: } 143: $tr.=$token->[4]; 144: } 145: else { 146: print $token->[4]; 147: } 148: } 149: elsif ($token->[0] eq 'E') { 150: if ($echild) { 151: $tr.=$token->[2]; 152: if ($excluden) { 153: foreach my $cdatamlist (@{$eha{$echild}->{'cdata'}}) { 154: foreach my $cdatamatch (@{$cdatamlist}) { 155: # print "CDATA: $cdatamatch, $cdata\n"; 156: my $cm=$cdatamatch; 157: my $not=0; 158: if ($cm=~/\!/) { 159: $not=1; 160: $cm=~s/^.//; 161: } 162: $cm=~s/^.//; $cm=~s/.$//; 163: if ((!$not and $cdata!~/$cm/) 164: or ($not and $cdata=~/$cm/)) { 165: # print "CMISMATCH: $cm ($cdata)\n"; 166: } 167: elsif (($not and $cdata!~/$cm/) 168: or (!$not and $cdata=~/$cm/)) { 169: $exclude++; 170: } 171: } 172: } 173: } 174: } 175: if ($eh{$token->[1]}) { 176: $echild=0; 177: if (!$exclude and !$excludea) { 178: print $tr; 179: # print $token->[2]; 180: $tr=''; 181: } 182: elsif ($exclude>0 or $excludea>0) { 183: # print "EXCLUDING $token->[1] $excludea $excluden\n"; 184: $exclude=0; $excluden=0; $excludea=0; 185: $tr=''; 186: } 187: $exclude=0; $excluden=0; $excludea=0; 188: } 189: else { 190: if ($echild) { 191: # $tr.=$token->[2]; 192: } 193: else { 194: print $token->[2]; 195: $tr=''; 196: } 197: } 198: $hloc--; 199: } 200: elsif ($token->[0] eq 'T') { 201: if ($echild) { 202: $tr.=$token->[1]; 203: $cdata=$token->[1]; 204: } 205: else { 206: print $token->[1]; 207: $tr=''; 208: } 209: } 210: } 211: 212: # ------------------------------------------------------------ clear conditions 213: sub cc { 214: @{$conditions{'name'}}=(); pop @{$conditions{'name'}}; 215: @{$conditions{'attribute'}}=(); pop @{$conditions{'attribute'}}; 216: @{$conditions{'value'}}=(); pop @{$conditions{'value'}}; 217: @{$conditions{'cdata'}}=(); pop @{$conditions{'cdata'}}; 218: } 219: 220: # --------------------------------------- remove starting and ending whitespace 221: sub trim { 222: my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s; 223: } 224: 225: # --------------------------------------------------------- Format xfml section 226: sub format_xfml { 227: my (@tokeninfo)=@_; 228: return ''; 229: } 230: 231: # ---------------------------------------------------- Format when:name section 232: sub format_when_name { 233: my (@tokeninfo)=@_; 234: $wloc++; 235: my $att_match=$tokeninfo[2]->{'match'}; 236: push @{$conditions{'name'}},$att_match; 237: my $text=&trim($parser->get_text('/when:name')); 238: $parser->get_tag('/when:name'); 239: # print 'Name Matching...'.$att_match; 240: $wloc--; 241: &cc unless $wloc; 242: return ''; 243: } 244: 245: # ----------------------------------------------- Format when:attribute section 246: sub format_when_attribute { 247: my (@tokeninfo)=@_; 248: $wloc++; 249: my $att_match=$tokeninfo[2]->{'match'}; 250: push @{$conditions{'attribute'}},$att_match; 251: my $text=&trim($parser->get_text('/when:attribute')); 252: $parser->get_tag('/when:attribute'); 253: # print 'Attribute Matching...'.$att_match; 254: $wloc--; 255: &cc unless $wloc; 256: return ''; 257: } 258: 259: # --------------------------------------------------- Format when:cdata section 260: sub format_when_cdata { 261: my (@tokeninfo)=@_; 262: $wloc++; 263: my $att_match=$tokeninfo[2]->{'match'}; 264: # print 'Cdata Matching...'.$att_match; 265: push @{$conditions{'cdata'}},$att_match; 266: my $text=&trim($parser->get_text('/when:cdata')); 267: $parser->get_tag('/when:cdata'); 268: $wloc--; 269: &cc unless $wloc; 270: return ''; 271: } 272: 273: # ----------------------------------------------- Format choice:include section 274: sub format_choice_include { 275: my (@tokeninfo)=@_; 276: my $text=&trim($parser->get_text('/choice:include')); 277: $parser->get_tag('/choice:include'); 278: $ih{$tokeninfo[2]->{'match'}}++; 279: return ''; 280: } 281: 282: # ----------------------------------------------- Format choice:exclude section 283: sub format_choice_exclude { 284: my (@tokeninfo)=@_; 285: my $text=&trim($parser->get_text('/choice:exclude')); 286: $parser->get_tag('/choice:exclude'); 287: $eh{$tokeninfo[2]->{'nodename'}}++; 288: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}}, 289: [@{$conditions{'name'}}]; 290: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}}, 291: [@{$conditions{'attribute'}}]; 292: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}}, 293: [@{$conditions{'value'}}]; 294: push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}}, 295: [@{$conditions{'cdata'}}]; 296: return ''; 297: }