#!/usr/bin/perl
# Scott Harrison
# May 2001
# 06/19/2001,06/20,06/24 - Scott Harrison
# I am using a multiple pass-through approach to parsing
# the lpml file. This saves memory and makes sure the server
# will never be overloaded.
use HTML::TokeParser;
my $usage=<<END;
**** ERROR ERROR ERROR ERROR ****
Usage is for lpml file to come in through standard input.
1st argument is the mode of parsing.
2nd argument is the distribution (default,redhat6.2,debian2.2,redhat7.1,etc).
3rd argument is to manually specify a sourceroot.
4th argument is to manually specify a targetroot.
Only the 1st argument is mandatory for the program to run.
Example:
cat ../../doc/loncapafiles.lpml |\\
perl lpml_parse.pl html default /home/sherbert/loncapa /tmp/install
END
# ------------------------------------------------- Grab command line arguments
my $mode;
if (@ARGV) {
$mode = shift @ARGV;
}
else {
while(<>){} # throw away the input to avoid broken pipes
print $usage;
exit -1; # exit with error status
}
my $dist;
if (@ARGV) {
$dist = shift @ARGV;
}
my $targetroot;
my $sourceroot;
if (@ARGV) {
$targetroot = shift @ARGV;
}
if (@ARGV) {
$sourceroot = shift @ARGV;
}
# ---------------------------------------------------- Start first pass through
my @parsecontents = <>;
my $parsestring = join('',@parsecontents);
my $outstring;
# Need to make a pass through and figure out what defaults are
# overrided. Top-down overriding strategy (leaves don't know
# about distant leaves).
my @hierarchy;
$hierarchy[0]=0;
my $hloc=0;
my $token;
$parser = HTML::TokeParser->new(\$parsestring) or
die('can\'t create TokeParser object');
$parser->xml_mode('1');
my %hash;
my $key;
while ($token = $parser->get_token()) {
if ($token->[0] eq 'S') {
$hloc++;
$hierarchy[$hloc]++;
$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
my $thisdist=' '.$token->[2]{'dist'}.' ';
if ($thisdist eq ' default ') {
$hash{$key}=1; # there is a default setting for this key
}
elsif ($dist && $hash{$key}==1 && $thisdist=~/\s$dist\s/) {
$hash{$key}=2; # disregard default setting for this key if
# there is a directly requested distribution match
}
}
if ($token->[0] eq 'E') {
$hloc--;
}
}
# --------------------------------------------------- Start second pass through
undef $hloc;
undef @hierarchy;
undef $parser;
$hierarchy[0]=0;
$parser = HTML::TokeParser->new(\$parsestring) or
die('can\'t create TokeParser object');
$parser->xml_mode('1');
my $cleanstring;
while ($token = $parser->get_token()) {
if ($token->[0] eq 'S') {
$hloc++;
$hierarchy[$hloc]++;
$key=$token->[1].join(',',@hierarchy[0..($hloc-1)]);
my $thisdist=' '.$token->[2]{'dist'}.' ';
if ($hash{$key}==2 and
!($thisdist eq ' ' or $thisdist =~/\s$dist\s/)) {
if ($token->[4]!~/\/>$/) {
$parser->get_tag('/'.$token->[1]);
$hloc--;
}
}
elsif ($thisdist ne ' ' and $thisdist!~/\s$dist\s/ and
!($thisdist eq ' default ' and $hash{$key}!=2)) {
if ($token->[4]!~/\/>$/) {
$parser->get_tag('/'.$token->[1]);
$hloc--;
}
}
else {
$cleanstring.=$token->[4];
}
if ($token->[4]=~/\/>$/) {
$hloc--;
}
}
if ($token->[0] eq 'E') {
$cleanstring.=$token->[2];
$hloc--;
}
if ($token->[0] eq 'T') {
$cleanstring.=$token->[1];
}
}
$cleanstring=&trim($cleanstring);
# ---------------------------------------------------- Start final pass through
# storage variables
my $lpml;
my $categories;
my $category;
my $category_att_name;
my $category_att_type;
my $chown;
my $chmod;
my $rpm;
my $rpmSummary;
my $rpmName;
my $rpmVersion;
my $rpmRelease;
my $rpmVendor;
my $rpmBuildRoot;
my $rpmCopyright;
my $rpmGroup;
my $rpmSource;
my $rpmAutoReqProv;
my $rpmdescription;
my $rpmpre;
my $directories;
my $directory;
my $targetdirs;
my $targetdir;
my $categoryname;
my $description;
my $files;
my $fileglobs;
my $links;
my $file;
my $link;
my $fileglob;
my $sourcedir;
my $targets;
my $target;
my $source;
my $note;
my $build;
my $commands;
my $command;
my $status;
my $dependencies;
my $dependency;
# Make new parser with distribution specific input
undef $parser;
$parser = HTML::TokeParser->new(\$cleanstring) or
die('can\'t create TokeParser object');
$parser->xml_mode('1');
# Define handling methods for mode-dependent text rendering
$parser->{textify}={
targetroot => \&format_targetroot,
sourceroot => \&format_sourceroot,
categories => \&format_categories,
category => \&format_category,
targetdir => \&format_targetdir,
chown => \&format_chown,
chmod => \&format_chmod,
rpm => \&format_rpm,
rpmSummary => \&format_rpmSummary,
rpmName => \&format_rpmName,
rpmVersion => \&format_rpmVersion,
rpmRelease => \&format_rpmRelease,
rpmVendor => \&format_rpmVendor,
rpmBuildRoot => \&format_rpmBuildRoot,
rpmCopyright => \&format_rpmCopyright,
rpmGroup => \&format_rpmGroup,
rpmSource => \&format_rpmSource,
rpmAutoReqProv => \&format_rpmAutoReqProv,
rpmdescription => \&format_rpmdescription,
rpmpre => \&format_rpmpre,
directories => \&format_directories,
directory => \&format_directory,
categoryname => \&format_categoryname,
description => \&format_description,
files => \&format_files,
file => \&format_file,
fileglob => \&format_fileglob,
link => \&format_link,
linkto => \&format_linkto,
source => \&format_source,
target => \&format_target,
note => \&format_note,
build => \&format_build,
status => \&format_status,
dependencies => \&format_dependencies,
glob => \&format_glob,
sourcedir => \&format_sourcedir,
filenames => \&format_filenames,
};
my $text;
my $token;
undef $hloc;
undef @hierarchy;
my $hloc;
my @hierarchy2;
while ($token = $parser->get_tag('lpml')) {
&format_lpml(@{$token});
$text = &trim($parser->get_text('/lpml'));
$token = $parser->get_tag('/lpml');
print $lpml;
print "\n";
$text=~s/\s*\n\s*\n\s*/\n/g;
print $text;
print "\n";
print &end();
}
exit;
sub end {
if ($mode eq 'html') {
return "THE END\n";
}
}
# ----------------------- Take in string to parse and the separation expression
sub extract_array {
my ($stringtoparse,$sepexp) = @_;
my @a=split(/$sepexp/,$stringtoparse);
return \@a;
}
# --------------------------------------------------------- Format lpml section
sub format_lpml {
my (@tokeninfo)=@_;
my $date=`date`; chop $date;
if ($mode eq 'html') {
$lpml = "LPML BEGINNING: $date";
}
}
# --------------------------------------------------- Format targetroot section
sub format_targetroot {
my $text=&trim($parser->get_text('/targetroot'));
$text=$targetroot if $targetroot;
$parser->get_tag('/targetroot');
if ($mode eq 'html') {
return $targetroot="\nTARGETROOT: $text";
}
else {
return '';
}
}
# --------------------------------------------------- Format sourceroot section
sub format_sourceroot {
my $text=&trim($parser->get_text('/sourceroot'));
$text=$sourceroot if $sourceroot;
$parser->get_tag('/sourceroot');
if ($mode eq 'html') {
return $sourceroot="\nSOURCEROOT: $text";
}
else {
return '';
}
}
# --------------------------------------------------- Format categories section
sub format_categories {
my $text=&trim($parser->get_text('/categories'));
$parser->get_tag('/categories');
if ($mode eq 'html') {
return $categories="\nBEGIN CATEGORIES\n$text\nEND CATEGORIES\n";
}
else {
return '';
}
}
# --------------------------------------------------- Format categories section
sub format_category {
my (@tokeninfo)=@_;
$category_att_name=$tokeninfo[2]->{'name'};
$category_att_type=$tokeninfo[2]->{'type'};
$chmod='';$chown='';
$parser->get_text('/category');
$parser->get_tag('/category');
if ($mode eq 'html') {
return $category="\nCATEGORY $category_att_name $category_att_type ".
"$chmod $chown";
}
else {
return '';
}
}
# -------------------------------------------------------- Format chown section
sub format_chown {
my @tokeninfo=@_;
$chown='';
my $text=&trim($parser->get_text('/chown'));
if ($text) {
$parser->get_tag('/chown');
$chown=$text;
}
return '';
}
# -------------------------------------------------------- Format chmod section
sub format_chmod {
my @tokeninfo=@_;
$chmod='';
my $text=&trim($parser->get_text('/chmod'));
if ($text) {
$parser->get_tag('/chmod');
$chmod=$text;
}
return '';
}
# ---------------------------------------------------------- Format rpm section
sub format_rpm {
my $text=&trim($parser->get_text('/rpm'));
$parser->get_tag('/rpm');
if ($mode eq 'html') {
return $rpm="\nBEGIN RPM\n$text\nEND RPM";
}
else {
return '';
}
}
# --------------------------------------------------- Format rpmSummary section
sub format_rpmSummary {
my $text=&trim($parser->get_text('/rpmSummary'));
$parser->get_tag('/rpmSummary');
if ($mode eq 'html') {
return $rpmSummary="\nRPMSUMMARY $text";
}
else {
return '';
}
}
# ------------------------------------------------------ Format rpmName section
sub format_rpmName {
my $text=&trim($parser->get_text('/rpmName'));
$parser->get_tag('/rpmName');
if ($mode eq 'html') {
return $rpmName="\nRPMNAME $text";
}
else {
return '';
}
}
# --------------------------------------------------- Format rpmVersion section
sub format_rpmVersion {
my $text=$parser->get_text('/rpmVersion');
$parser->get_tag('/rpmVersion');
if ($mode eq 'html') {
return $rpmVersion="\nRPMVERSION $text";
}
else {
return '';
}
}
# --------------------------------------------------- Format rpmRelease section
sub format_rpmRelease {
my $text=$parser->get_text('/rpmRelease');
$parser->get_tag('/rpmRelease');
if ($mode eq 'html') {
return $rpmRelease="\nRPMRELEASE $text";
}
else {
return '';
}
}
# ---------------------------------------------------- Format rpmVendor section
sub format_rpmVendor {
my $text=$parser->get_text('/rpmVendor');
$parser->get_tag('/rpmVendor');
if ($mode eq 'html') {
return $rpmVendor="\nRPMVENDOR $text";
}
else {
return '';
}
}
# ------------------------------------------------- Format rpmBuildRoot section
sub format_rpmBuildRoot {
my $text=$parser->get_text('/rpmBuildRoot');
$parser->get_tag('/rpmBuildRoot');
if ($mode eq 'html') {
return $rpmBuildRoot="\nRPMBUILDROOT $text";
}
else {
return '';
}
}
# ------------------------------------------------- Format rpmCopyright section
sub format_rpmCopyright {
my $text=$parser->get_text('/rpmCopyright');
$parser->get_tag('/rpmCopyright');
if ($mode eq 'html') {
return $rpmCopyright="\nRPMCOPYRIGHT $text";
}
else {
return '';
}
}
# ----------------------------------------------------- Format rpmGroup section
sub format_rpmGroup {
my $text=$parser->get_text('/rpmGroup');
$parser->get_tag('/rpmGroup');
if ($mode eq 'html') {
return $rpmGroup="\nRPMGROUP $text";
}
else {
return '';
}
}
# ---------------------------------------------------- Format rpmSource section
sub format_rpmSource {
my $text=$parser->get_text('/rpmSource');
$parser->get_tag('/rpmSource');
if ($mode eq 'html') {
return $rpmSource="\nRPMSOURCE $text";
}
else {
return '';
}
}
# ----------------------------------------------- Format rpmAutoReqProv section
sub format_rpmAutoReqProv {
my $text=$parser->get_text('/rpmAutoReqProv');
$parser->get_tag('/rpmAutoReqProv');
if ($mode eq 'html') {
return $rpmAutoReqProv="\nRPMAUTOREQPROV $text";
}
else {
return '';
}
}
# ----------------------------------------------- Format rpmdescription section
sub format_rpmdescription {
my $text=$parser->get_text('/rpmdescription');
$parser->get_tag('/rpmdescription');
if ($mode eq 'html') {
return $rpmdescription="\nRPMDESCRIPTION $text";
}
else {
return '';
}
}
# ------------------------------------------------------- Format rpmpre section
sub format_rpmpre {
my $text=$parser->get_text('/rpmpre');
$parser->get_tag('/rpmpre');
if ($mode eq 'html') {
return $rpmpre="\nRPMPRE $text";
}
else {
return '';
}
}
# -------------------------------------------------- Format directories section
sub format_directories {
my $text=&trim($parser->get_text('/directories'));
$parser->get_tag('/directories');
if ($mode eq 'html') {
return $directories="\nBEGIN DIRECTORIES\n$text\nEND DIRECTORIES\n";
}
else {
return '';
}
}
# ---------------------------------------------------- Format directory section
sub format_directory {
my (@tokeninfo)=@_;
$targetdir='';$categoryname='';$description='';
$parser->get_text('/directory');
$parser->get_tag('/directory');
if ($mode eq 'html') {
return $directory="\nDIRECTORY $targetdir $categoryname $description";
}
else {
return '';
}
}
# ---------------------------------------------------- Format targetdir section
sub format_targetdir {
my @tokeninfo=@_;
$targetdir='';
my $text=&trim($parser->get_text('/targetdir'));
if ($text) {
$parser->get_tag('/targetdir');
$targetdir=$text;
}
return '';
}
# ------------------------------------------------- Format categoryname section
sub format_categoryname {
my @tokeninfo=@_;
$categoryname='';
my $text=&trim($parser->get_text('/categoryname'));
if ($text) {
$parser->get_tag('/categoryname');
$categoryname=$text;
}
return '';
}
# -------------------------------------------------- Format description section
sub format_description {
my @tokeninfo=@_;
$description='';
my $text=&trim($parser->get_text('/description'));
if ($text) {
$parser->get_tag('/description');
$description=$text;
}
return '';
}
# -------------------------------------------------------- Format files section
sub format_files {
my $text=&trim($parser->get_text('/files'));
$parser->get_tag('/files');
if ($mode eq 'html') {
return $directories="\nBEGIN FILES\n$text\nEND FILES\n";
}
else {
return '';
}
}
# ---------------------------------------------------- Format fileglobs section
sub format_fileglobs {
}
# -------------------------------------------------------- Format links section
sub format_links {
}
# --------------------------------------------------------- Format file section
sub format_file {
my @tokeninfo=@_;
$file=''; $source=''; $target=''; $categoryname=''; $description='';
$note=''; $build=''; $status=''; $dependencies='';
my $text=&trim($parser->get_text('/file'));
if ($source) {
$parser->get_tag('/file');
if ($mode eq 'html') {
return ($file="\nBEGIN FILE\n".
"$source $target $categoryname $description $note " .
"$build $status $dependencies" .
"\nEND FILE");
}
else {
return '';
}
}
return '';
}
# --------------------------------------------------------- Format link section
sub format_link {
my @tokeninfo=@_;
$link=''; $linkto=''; $target=''; $categoryname=''; $description='';
$note=''; $build=''; $status=''; $dependencies='';
my $text=&trim($parser->get_text('/link'));
if ($linkto) {
$parser->get_tag('/link');
if ($mode eq 'html') {
return $link="\nBEGIN LINK\n".
"$linkto $target $categoryname $description $note " .
"$build $status $dependencies" .
"\nEND LINK";
}
else {
return '';
}
}
return '';
}
# ----------------------------------------------------- Format fileglob section
sub format_fileglob {
my @tokeninfo=@_;
$fileglob=''; $glob=''; $sourcedir='';
$targetdir=''; $categoryname=''; $description='';
$note=''; $build=''; $status=''; $dependencies='';
$filenames='';
my $text=&trim($parser->get_text('/fileglob'));
if ($sourcedir) {
$parser->get_tag('/fileglob');
if ($mode eq 'html') {
return $fileglob="\nBEGIN FILEGLOB\n".
"$glob sourcedir $targetdir $categoryname $description $note ".
"$build $status $dependencies $filenames" .
"\nEND FILEGLOB";
}
else {
return '';
}
}
return '';
}
# ---------------------------------------------------- Format sourcedir section
sub format_sourcedir {
my @tokeninfo=@_;
$sourcedir='';
my $text=&trim($parser->get_text('/sourcedir'));
if ($text) {
$parser->get_tag('/sourcedir');
$sourcedir=$text;
}
return '';
}
# ------------------------------------------------------- Format target section
sub format_target {
my @tokeninfo=@_;
$target='';
my $text=&trim($parser->get_text('/target'));
if ($text) {
$parser->get_tag('/target');
$target=$text;
}
return '';
}
# ------------------------------------------------------- Format source section
sub format_source {
my @tokeninfo=@_;
$source='';
my $text=&trim($parser->get_text('/source'));
if ($text) {
$parser->get_tag('/source');
$source=$text;
}
return '';
}
# --------------------------------------------------------- Format note section
sub format_note {
my @tokeninfo=@_;
$note='';
my $text=&trim($parser->get_text('/note'));
if ($text) {
$parser->get_tag('/note');
$note=$text;
}
return '';
}
# -------------------------------------------------------- Format build section
sub format_build {
my @tokeninfo=@_;
$build='';
my $text=&trim($parser->get_text('/build'));
if ($text) {
$parser->get_tag('/build');
$build=$text;
}
return '';
}
# ------------------------------------------------------- Format status section
sub format_status {
my @tokeninfo=@_;
$status='';
my $text=&trim($parser->get_text('/status'));
if ($text) {
$parser->get_tag('/status');
$status=$text;
}
return '';
}
# ------------------------------------------------- Format dependencies section
sub format_dependencies {
my @tokeninfo=@_;
$dependencies='';
my $text=&trim($parser->get_text('/dependencies'));
if ($text) {
$parser->get_tag('/dependencies');
$dependencies=$text;
}
return '';
}
# --------------------------------------------------------- Format glob section
sub format_glob {
my @tokeninfo=@_;
$glob='';
my $text=&trim($parser->get_text('/glob'));
if ($text) {
$parser->get_tag('/glob');
$glob=$text;
}
return '';
}
# ---------------------------------------------------- Format filenames section
sub format_filenames {
my @tokeninfo=@_;
$glob='';
my $text=&trim($parser->get_text('/filenames'));
if ($text) {
$parser->get_tag('/filenames');
$filenames=$text;
}
return '';
}
# ------------------------------------------------------- Format linkto section
sub format_linkto {
my @tokeninfo=@_;
$glob='';
my $text=&trim($parser->get_text('/linkto'));
if ($text) {
$parser->get_tag('/linkto');
$linkto=$text;
}
return '';
}
# --------------------------------------- remove starting and ending whitespace
sub trim {
my ($s)=@_; $s=~s/^\s*//; $s=~s/\s*$//; return $s;
}
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>