--- loncom/build/xfml_parse.pl	2002/01/29 10:42:42	1.1
+++ loncom/build/xfml_parse.pl	2002/02/01 10:56:41	1.2
@@ -1,11 +1,35 @@
 #!/usr/bin/perl
 
 # YEAR=2002
-# 1/26,1/27,1/28 - Scott Harrison
+# 1/26,1/27,1/28,1/29,1/30,1/31 - Scott Harrison
+#
+###
 
 # Read in 2 XML file; first is the filter specification, the second
 # is the XML file to be filtered
 
+###############################################################################
+##                                                                           ##
+## ORGANIZATION OF THIS PERL SCRIPT                                          ##
+## 1. Notes                                                                  ##
+## 2. Get command line arguments                                             ##
+## 3. First pass through (grab distribution-specific information)            ##
+## 4. Second pass through (parse out what is not necessary)                  ##
+## 5. Third pass through (translate markup according to specified mode)      ##
+## 6. Functions (most all just format contents of different markup tags)     ##
+## 7. POD (plain old documentation, CPAN style)                              ##
+##                                                                           ##
+###############################################################################
+
+# ----------------------------------------------------------------------- Notes
+#
+# I am using a multiple pass-through approach to parsing
+# the xfml file.  This saves memory and makes sure the server
+# will never be overloaded.
+#
+# This is meant to parse files meeting the piml document type.
+# See xfml.dtd.  XFML=XML Filtering Markup Language.
+
 use HTML::TokeParser;
 use strict;
 
@@ -21,7 +45,9 @@ END
 my %eh;
 my %ih;
 my $tofilter=shift @ARGV;
-my @lines=<>; my $parsestring=join('',@lines); undef @lines;
+open IN,"<$tofilter";
+my @lines=<IN>; my $parsestring=join('',@lines); undef @lines;
+close IN;
 my $parser = HTML::TokeParser->new(\$parsestring) or
     die('can\'t create TokeParser object');
 $parser->xml_mode('1');
@@ -47,13 +73,11 @@ my %eha;
 while (my $token = $parser->get_tag('xfml')) {
     &format_xfml(@{$token});
     $text = $parser->get_text('/xfml');
-#    print $xfml;
-#    print $text;
     $token = $parser->get_tag('/xfml');
 }
 
-open IN,"<$tofilter";
-my @lines2=<IN>; close IN; my $parsestring2=join('',@lines2); undef @lines2;
+#open IN,"<$tofilter";
+my @lines2=<>; my $parsestring2=join('',@lines2); undef @lines2;
 $parser = HTML::TokeParser->new(\$parsestring2) or
     die('can\'t create TokeParser object');
 $parser->xml_mode('1');
@@ -68,22 +92,10 @@ my $excluden=0;
 my $excludea=0;
 my $et=0;
 my $cdata='';
+my $excludenold=0;
+my $ign=0;
+
 while ($token = $parser->get_token()) {
-# from HTML::TokeParser documentation:
-#             ["S",  $tag, %$attr, @$attrseq, $text]
-#             ["E",  $tag, $text]
-#             ["T",  $text, $is_data]
-#             ["C",  $text]
-#             ["D",  $text]
-#             ["PI", $token0, $text]
-#    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'name'}},
-#         @{$conditions{'name'}};
-#    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'attribute'}},
-#         @{$conditions{'attribute'}};
-#    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'value'}},
-#         @{$conditions{'value'}};
-#    push @{$eha{$tokeninfo[2]->{'nodename'}}->{'cdata'}},
-#         @{$conditions{'cdata'}};
     if ($token->[0] eq 'D') {
 	print $token->[1];
     }
@@ -102,7 +114,6 @@ while ($token = $parser->get_token()) {
 
 	if ($eh{$token->[1]}) {
 	    $echild=$token->[1];
-#	    print "ECHILD=$echild\n";
 	}
 	if ($echild) {
 	    # run through names for echild
@@ -114,7 +125,7 @@ while ($token = $parser->get_token()) {
 		foreach my $namematch (@{$namemlist}) {
 		    my $nm=$namematch; $nm=~s/^.//; $nm=~s/.$//;
 		    if ($name=~/$nm/) {
-#			print "NMATCH: $nm ($name)\n";
+			$excludenold=$excluden;
 			$excluden++;
 			foreach my $attributemlist
 			    (@{$eha{$echild}->{'attribute'}}) {
@@ -124,15 +135,9 @@ while ($token = $parser->get_token()) {
 					    split(/\=/,$attributematch,2);
 					$am=~s/^.//;
 					$am=~s/.$//;
-#					print 'AM:'."($an,$am)\t";
-#					print 'ATT:'.join(',',%atthash)."\n";
 					if ($atthash{$an}) {
 					    if ($atthash{$an}=~/$am/) {
 						$excludea++;
-#						print "AMATCH: $am (".
-#						    join(',',
-#							 @attributes)
-#							."\n";
 					    }
 					}
 				    }
@@ -150,8 +155,13 @@ while ($token = $parser->get_token()) {
 	if ($echild) {
 	    $tr.=$token->[2];
 	    if ($excluden) {
+		my $i=0;
+		CDATALOOP:
 		foreach my $cdatamlist (@{$eha{$echild}->{'cdata'}}) {
+		    $i++;
+		    my $j;
 		    foreach my $cdatamatch (@{$cdatamlist}) {
+			$j++;
 #				print "CDATA: $cdatamatch, $cdata\n";
 			my $cm=$cdatamatch;
 			my $not=0;
@@ -160,19 +170,24 @@ while ($token = $parser->get_token()) {
 			    $cm=~s/^.//;
 			}
 			$cm=~s/^.//; $cm=~s/.$//;
+			if ($not and $cdata=~/$cm/) {
+			    $ign=1; $exclude=0;
+			}
 			if ((!$not and $cdata!~/$cm/)
 			    or ($not and $cdata=~/$cm/)) {
-#				    print "CMISMATCH: $cm ($cdata)\n";
+#				nothing happens
+#			    $exclude=0;
 			}
 			elsif (($not and $cdata!~/$cm/)
 			       or (!$not and $cdata=~/$cm/)) {
-			    $exclude++;
+			    $exclude++ unless $ign;
 			}
 		    }
 		}
 	    }
 	}
 	if ($eh{$token->[1]}) {
+	    $ign=0;
 	    $echild=0;
 	    if (!$exclude and !$excludea) {
 		print $tr;
@@ -180,7 +195,7 @@ while ($token = $parser->get_token()) {
 		$tr='';
 	    }
 	    elsif ($exclude>0 or $excludea>0) {
-#		print "EXCLUDING $token->[1] $excludea $excluden\n";
+#		print "EXCLUDING $token->[1] $exclude $excludea $excluden\n";
 		$exclude=0; $excluden=0; $excludea=0;
 		$tr='';
 	    }
@@ -222,6 +237,8 @@ sub trim {
     my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
 }
 
+
+
 # --------------------------------------------------------- Format xfml section
 sub format_xfml {
     my (@tokeninfo)=@_;
@@ -236,7 +253,6 @@ sub format_when_name {
     push @{$conditions{'name'}},$att_match;
     my $text=&trim($parser->get_text('/when:name'));
     $parser->get_tag('/when:name');
-#    print 'Name Matching...'.$att_match;
     $wloc--;
     &cc unless $wloc;
     return '';
@@ -250,7 +266,6 @@ sub format_when_attribute {
     push @{$conditions{'attribute'}},$att_match;
     my $text=&trim($parser->get_text('/when:attribute'));
     $parser->get_tag('/when:attribute');
-#    print 'Attribute Matching...'.$att_match;
     $wloc--;
     &cc unless $wloc;
     return '';
@@ -261,7 +276,6 @@ sub format_when_cdata {
     my (@tokeninfo)=@_;
     $wloc++;
     my $att_match=$tokeninfo[2]->{'match'};
-#    print 'Cdata Matching...'.$att_match;
     push @{$conditions{'cdata'}},$att_match;
     my $text=&trim($parser->get_text('/when:cdata'));
     $parser->get_tag('/when:cdata');