loncom/build/piml_parse.pl - view

File: [LON-CAPA] / loncom / build / piml_parse.pl
Revision 1.4: download - view: text, annotated - select for diffs
Tue Feb 5 01:29:22 2002 UTC (23 years, 5 months ago) by harris41
Branches: MAIN
CVS tags: HEAD

fix hierarchy tracking in xml document

1: #!/usr/bin/perl 2: 3: # The LearningOnline Network with CAPA 4: # piml_parse.pl - Linux Packaging Markup Language parser 5: # 6: # $Id: piml_parse.pl,v 1.4 2002/02/05 01:29:22 harris41 Exp $ 7: # 8: # Written by Scott Harrison, harris41@msu.edu 9: # 10: # Copyright Michigan State University Board of Trustees 11: # 12: # This file is part of the LearningOnline Network with CAPA (LON-CAPA). 13: # 14: # LON-CAPA is free software; you can redistribute it and/or modify 15: # it under the terms of the GNU General Public License as published by 16: # the Free Software Foundation; either version 2 of the License, or 17: # (at your option) any later version. 18: # 19: # LON-CAPA is distributed in the hope that it will be useful, 20: # but WITHOUT ANY WARRANTY; without even the implied warranty of 21: # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22: # GNU General Public License for more details. 23: # 24: # You should have received a copy of the GNU General Public License 25: # along with LON-CAPA; if not, write to the Free Software 26: # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 27: # 28: # /home/httpd/html/adm/gpl.txt 29: # 30: # http://www.lon-capa.org/ 31: # 32: # YEAR=2002 33: # 1/28,1/29,1/30,1/31 - Scott Harrison 34: # 35: ### 36: 37: ############################################################################### 38: ## ## 39: ## ORGANIZATION OF THIS PERL SCRIPT ## 40: ## 1. Notes ## 41: ## 2. Get command line arguments ## 42: ## 3. First pass through (grab distribution-specific information) ## 43: ## 4. Second pass through (parse out what is not necessary) ## 44: ## 5. Third pass through (translate markup according to specified mode) ## 45: ## 6. Functions (most all just format contents of different markup tags) ## 46: ## 7. POD (plain old documentation, CPAN style) ## 47: ## ## 48: ############################################################################### 49: 50: # ----------------------------------------------------------------------- Notes 51: # 52: # I am using a multiple pass-through approach to parsing 53: # the piml file. This saves memory and makes sure the server 54: # will never be overloaded. 55: # 56: # This is meant to parse files meeting the piml document type. 57: # See piml.dtd. PIML=Post Installation Markup Language. 58: 59: use HTML::TokeParser; 60: 61: my $usage=<<END; 62: **** ERROR ERROR ERROR ERROR **** 63: Usage is for piml file to come in through standard input. 64: 1st argument is the category permissions to use (runtime or development) 65: 2nd argument is the distribution (default,redhat6.2,debian2.2,redhat7.1,etc). 66: 3rd argument is to manually specify a targetroot 67: 68: Only the 1st argument is mandatory for the program to run. 69: 70: Example: 71: 72: cat ../../doc/loncapafiles.piml |\\ 73: perl piml_parse.pl html development default /home/sherbert/loncapa /tmp/install 74: END 75: 76: # ------------------------------------------------- Grab command line arguments 77: 78: my $mode; 79: if (@ARGV==3) { 80: $mode = shift @ARGV; 81: } 82: else { 83: @ARGV=();shift @ARGV; 84: while(<>){} # throw away the input to avoid broken pipes 85: print $usage; 86: exit -1; # exit with error status 87: } 88: 89: my $categorytype; 90: if (@ARGV) { 91: $categorytype = shift @ARGV; 92: } 93: 94: my $dist; 95: if (@ARGV) { 96: $dist = shift @ARGV; 97: } 98: 99: my $targetroot; 100: my $targetrootarg; 101: if (@ARGV) { 102: $targetroot = shift @ARGV; 103: } 104: $targetroot=~s/\/$//; 105: $targetrootarg=$targetroot; 106: 107: my $logcmd='| tee -a WARNINGS'; 108: 109: my $invocation; 110: # --------------------------------------------------- Record program invocation 111: if ($mode eq 'install' or $mode eq 'configinstall' or $mode eq 'build') { 112: $invocation=(<<END); 113: # Invocation: STDINPUT | piml_parse.pl 114: # 1st argument (category type) is: $categorytype 115: # 2nd argument (distribution) is: $dist 116: # 3rd argument (targetroot) is: described below 117: END 118: } 119: 120: # ---------------------------------------------------- Start first pass through 121: my @parsecontents = <>; 122: my $parsestring = join('',@parsecontents); 123: my $outstring; 124: 125: # Need to make a pass through and figure out what defaults are 126: # overrided. Top-down overriding strategy (leaves don't know 127: # about distant leaves). 128: 129: my @hierarchy; 130: $hierarchy[0]=0; 131: my $hloc=0; 132: my $token; 133: $parser = HTML::TokeParser->new(\$parsestring) or 134: die('can\'t create TokeParser object'); 135: $parser->xml_mode('1'); 136: my %hash; 137: my $key; 138: while ($token = $parser->get_token()) { 139: if ($token->[0] eq 'S') { 140: $hloc++; 141: $hierarchy[$hloc]++; 142: $key=$token->[1].join(',',@hierarchy[0..($hloc-1)]); 143: my $thisdist=' '.$token->[2]{'dist'}.' '; 144: if ($thisdist eq ' default ') { 145: $hash{$key}=1; # there is a default setting for this key 146: } 147: elsif ($dist && $hash{$key}==1 && $thisdist=~/\s$dist\s/) { 148: $hash{$key}=2; # disregard default setting for this key if 149: # there is a directly requested distribution match 150: } 151: } 152: if ($token->[0] eq 'E') { 153: $hloc--; 154: } 155: } 156: 157: # --------------------------------------------------- Start second pass through 158: undef $hloc; 159: undef @hierarchy; 160: undef $parser; 161: $hierarchy[0]=0; 162: $parser = HTML::TokeParser->new(\$parsestring) or 163: die('can\'t create TokeParser object'); 164: $parser->xml_mode('1'); 165: my $cleanstring; 166: while ($token = $parser->get_token()) { 167: if ($token->[0] eq 'S') { 168: $hloc++; 169: $hierarchy[$hloc]++; 170: $key=$token->[1].join(',',@hierarchy[0..($hloc-1)]); 171: my $thisdist=' '.$token->[2]{'dist'}.' '; 172: # This conditional clause is set up to ignore two sets 173: # of invalid conditions before accepting entry into 174: # the cleanstring. 175: if ($hash{$key}==2 and 176: !($thisdist eq ' ' or $thisdist =~/\s$dist\s/)) { 177: if ($token->[4]!~/\/>$/) { 178: $parser->get_tag('/'.$token->[1]); 179: $hloc--; 180: } 181: } 182: elsif ($thisdist ne ' ' and $thisdist!~/\s$dist\s/ and 183: !($thisdist eq ' default ' and $hash{$key}!=2)) { 184: if ($token->[4]!~/\/>$/) { 185: $parser->get_tag('/'.$token->[1]); 186: $hloc--; 187: } 188: } 189: else { 190: $cleanstring.=$token->[4]; 191: } 192: if ($token->[4]=~/\/>$/) { 193: # $hloc--; 194: } 195: } 196: if ($token->[0] eq 'E') { 197: $cleanstring.=$token->[2]; 198: $hloc--; 199: } 200: if ($token->[0] eq 'T') { 201: $cleanstring.=$token->[1]; 202: } 203: } 204: $cleanstring=&trim($cleanstring); 205: $cleanstring=~s/\>\s*\n\s*\</\>\</g; 206: 207: # ---------------------------------------------------- Start final pass through 208: 209: # storage variables 210: my $piml; 211: my $categories; 212: my @categorynamelist; 213: my $category; 214: my $category_att_name; 215: my $category_att_type; 216: my $chown; 217: my $chmod; 218: my $abbreviation; # space-free abbreviation; esp. for image names 219: my $categoryname; 220: my $description; 221: my $files; 222: my $file; 223: my $target; 224: my $note; 225: my $commands; 226: my $command; 227: my $dependencies; 228: my @links; 229: my %categoryhash; 230: my $dpathlength; 231: my %fab; # file category abbreviation 232: my $directory_count; 233: my $file_count; 234: my $link_count; 235: my $fileglob_count; 236: my $fileglobnames_count; 237: my %categorycount; 238: 239: my $mode; 240: 241: my @buildall; 242: my @buildinfo; 243: 244: my @configall; 245: 246: # Make new parser with distribution specific input 247: undef $parser; 248: $parser = HTML::TokeParser->new(\$cleanstring) or 249: die('can\'t create TokeParser object'); 250: $parser->xml_mode('1'); 251: 252: # Define handling methods for mode-dependent text rendering 253: 254: $parser->{textify}={ 255: specialnotices => \&format_specialnotices, 256: specialnotice => \&format_specialnotice, 257: targetroot => \&format_targetroot, 258: categories => \&format_categories, 259: category => \&format_category, 260: abbreviation => \&format_abbreviation, 261: chown => \&format_chown, 262: chmod => \&format_chmod, 263: categoryname => \&format_categoryname, 264: files => \&format_files, 265: file => \&format_file, 266: target => \&format_target, 267: note => \&format_note, 268: build => \&format_build, 269: dependencies => \&format_dependencies, 270: filenames => \&format_filenames, 271: perlscript => \&format_perlscript, 272: TARGET => \&format_TARGET, 273: }; 274: 275: my $text; 276: my $token; 277: undef $hloc; 278: undef @hierarchy; 279: my $hloc; 280: my @hierarchy2; 281: while ($token = $parser->get_tag('piml')) { 282: &format_piml(@{$token}); 283: $text = &trim($parser->get_text('/piml')); 284: $token = $parser->get_tag('/piml'); 285: print $piml; 286: print "\n"; 287: print $text; 288: print "\n"; 289: print &end(); 290: } 291: exit; 292: 293: # ---------- Functions (most all just format contents of different markup tags) 294: 295: # ------------------------ Final output at end of markup parsing and formatting 296: sub end { 297: 298: } 299: 300: # ----------------------- Take in string to parse and the separation expression 301: sub extract_array { 302: my ($stringtoparse,$sepexp) = @_; 303: my @a=split(/$sepexp/,$stringtoparse); 304: return \@a; 305: } 306: 307: # --------------------------------------------------------- Format piml section 308: sub format_piml { 309: my (@tokeninfo)=@_; 310: my $date=`date`; chop $date; 311: $piml=<<END; 312: #!/usr/bin/perl 313: 314: # Generated from a PIML (Post Installation Markup Language) document 315: 316: END 317: } 318: # --------------------------------------------------- Format targetroot section 319: sub format_targetroot { 320: my $text=&trim($parser->get_text('/targetroot')); 321: $text=$targetroot if $targetroot; 322: $parser->get_tag('/targetroot'); 323: return '# TARGET INSTALL LOCATION is "'.$targetroot."\"\n"; 324: } 325: # -------------------------------------------------- Format perl script section 326: sub format_perlscript { 327: my (@tokeninfo)=@_; 328: $mode=$tokeninfo->[2]{'mode'}; 329: my $text=$parser->get_text('/perlscript'); 330: $parser->get_tag('/perlscript'); 331: return $text; 332: } 333: # --------------------------------------------------------------- Format TARGET 334: sub format_TARGET { 335: my (@tokeninfo)=@_; 336: $parser->get_tag('/TARGET'); 337: return $target; 338: } 339: # --------------------------------------------------- Format categories section 340: sub format_categories { 341: my $text=&trim($parser->get_text('/categories')); 342: $parser->get_tag('/categories'); 343: return '# CATEGORIES'."\n".$text; 344: } 345: # --------------------------------------------------- Format categories section 346: sub format_category { 347: my (@tokeninfo)=@_; 348: $category_att_name=$tokeninfo[2]->{'name'}; 349: $category_att_type=$tokeninfo[2]->{'type'}; 350: $abbreviation=''; $chmod='';$chown=''; 351: $parser->get_text('/category'); 352: $parser->get_tag('/category'); 353: $fab{$category_att_name}=$abbreviation; 354: if ($category_att_type eq $categorytype) { 355: my ($user,$group)=split(/\:/,$chown); 356: $categoryhash{$category_att_name}='-o '.$user.' -g '.$group. 357: ' -m '.$chmod; 358: } 359: return ''; 360: } 361: # --------------------------------------------------- Format categories section 362: sub format_abbreviation { 363: my @tokeninfo=@_; 364: $abbreviation=''; 365: my $text=&trim($parser->get_text('/abbreviation')); 366: if ($text) { 367: $parser->get_tag('/abbreviation'); 368: $abbreviation=$text; 369: } 370: return ''; 371: } 372: # -------------------------------------------------------- Format chown section 373: sub format_chown { 374: my @tokeninfo=@_; 375: $chown=''; 376: my $text=&trim($parser->get_text('/chown')); 377: if ($text) { 378: $parser->get_tag('/chown'); 379: $chown=$text; 380: } 381: return ''; 382: } 383: # -------------------------------------------------------- Format chmod section 384: sub format_chmod { 385: my @tokeninfo=@_; 386: $chmod=''; 387: my $text=&trim($parser->get_text('/chmod')); 388: if ($text) { 389: $parser->get_tag('/chmod'); 390: $chmod=$text; 391: } 392: return ''; 393: } 394: # ------------------------------------------------- Format categoryname section 395: sub format_categoryname { 396: my @tokeninfo=@_; 397: $categoryname=''; 398: my $text=&trim($parser->get_text('/categoryname')); 399: if ($text) { 400: $parser->get_tag('/categoryname'); 401: $categoryname=$text; 402: } 403: return ''; 404: } 405: # -------------------------------------------------------- Format files section 406: sub format_files { 407: my $text=$parser->get_text('/files'); 408: $parser->get_tag('/files'); 409: return "\n".'# There are '.$file_count.' files this script works on'. 410: "\n\n".$text; 411: } 412: # --------------------------------------------------------- Format file section 413: sub format_file { 414: my @tokeninfo=@_; 415: $file=''; $source=''; $target=''; $categoryname=''; $description=''; 416: $note=''; $build=''; $status=''; $dependencies=''; 417: my $text=&trim($parser->get_text('/file')); 418: $file_count++; 419: $categorycount{$categoryname}++; 420: $parser->get_tag('/file'); 421: return "# File: $target\n". 422: "$text\n"; 423: return ''; 424: } 425: # ------------------------------------------------------- Format target section 426: sub format_target { 427: my @tokeninfo=@_; 428: $target=''; 429: my $text=&trim($parser->get_text('/target')); 430: if ($text) { 431: $parser->get_tag('/target'); 432: $target=$targetrootarg.$text; 433: } 434: return ''; 435: } 436: # --------------------------------------------------------- Format note section 437: sub format_note { 438: my @tokeninfo=@_; 439: $note=''; 440: my $aref; 441: my $text; 442: while ($aref=$parser->get_token()) { 443: if ($aref->[0] eq 'E' && $aref->[1] eq 'note') { 444: last; 445: } 446: elsif ($aref->[0] eq 'S') { 447: $text.=$aref->[4]; 448: } 449: elsif ($aref->[0] eq 'E') { 450: $text.=$aref->[2]; 451: } 452: else { 453: $text.=$aref->[1]; 454: } 455: } 456: if ($text) { 457: $note=$text; 458: } 459: return ''; 460: 461: } 462: # ------------------------------------------------- Format dependencies section 463: sub format_dependencies { 464: my @tokeninfo=@_; 465: $dependencies=''; 466: my $text=&trim($parser->get_text('/dependencies')); 467: if ($text) { 468: $parser->get_tag('/dependencies'); 469: $dependencies=join(';', 470: (map {s/^\s*//;s/\s$//;$_} split(/\;/,$text))); 471: } 472: return ''; 473: } 474: # ------------------------------------------------ Format specialnotice section 475: sub format_specialnotices { 476: $parser->get_tag('/specialnotices'); 477: return ''; 478: } 479: # ------------------------------------------------ Format specialnotice section 480: sub format_specialnotice { 481: $parser->get_tag('/specialnotice'); 482: return ''; 483: } 484: # ------------------------------------- Render less-than and greater-than signs 485: sub htmlsafe { 486: my $text=@_[0]; 487: $text =~ s/</</g; 488: $text =~ s/>/>/g; 489: return $text; 490: } 491: # --------------------------------------- remove starting and ending whitespace 492: sub trim { 493: my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s; 494: } 495: 496: # ----------------------------------- POD (plain old documentation, CPAN style) 497: 498: =head1 NAME 499: 500: piml_parse.pl - This is meant to parse files meeting the piml document type. 501: See piml.dtd. PIML=Post Installation Markup Language. 502: 503: =head1 SYNOPSIS 504: 505: Usage is for piml file to come in through standard input. 506: 507: =over 4 508: 509: =item * 510: 511: 1st argument is the category permissions to use (runtime or development) 512: 513: =item * 514: 515: 2nd argument is the distribution 516: (default,redhat6.2,debian2.2,redhat7.1,etc). 517: 518: =item * 519: 520: 3rd argument is to manually specify a targetroot. 521: 522: =back 523: 524: Only the 1st argument is mandatory for the program to run. 525: 526: Example: 527: 528: cat ../../doc/loncapafiles.piml |\\ 529: perl piml_parse.pl html default /home/sherbert/loncapa /tmp/install 530: 531: =head1 DESCRIPTION 532: 533: I am using a multiple pass-through approach to parsing 534: the piml file. This saves memory and makes sure the server 535: will never be overloaded. 536: 537: =head1 README 538: 539: I am using a multiple pass-through approach to parsing 540: the piml file. This saves memory and makes sure the server 541: will never be overloaded. 542: 543: =head1 PREREQUISITES 544: 545: HTML::TokeParser 546: 547: =head1 COREQUISITES 548: 549: =head1 OSNAMES 550: 551: linux 552: 553: =head1 SCRIPT CATEGORIES 554: 555: Packaging/Administrative 556: 557: =cut