version 1.1, 2001/10/26 00:18:54
|
version 1.4, 2001/11/16 20:06:08
|
Line 1
|
Line 1
|
#!/usr/bin/perl |
#!/usr/bin/perl |
|
|
# Scott Harrison |
# The LearningOnline Network witih CAPA |
|
# |
|
# filecompare.pl - script used to help probe and compare file statistics |
|
# |
# YEAR=2001 |
# YEAR=2001 |
# 9/27, 10/24, 10/25 |
# 9/27, 10/24, 10/25, 11/4 Scott Harrison |
|
# 11/14 Guy Albertelli |
|
# 11/16 Scott Harrison |
|
# |
|
# $Id$ |
|
### |
|
|
my $invocation=<<END; |
|
# ------------------------------------------------------------------ Invocation |
# ------------------------------------------------------------------ Invocation |
# filecompare.pl FILE1 FILE2 |
my $invocation=<<END; |
# or |
filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ] |
# filecompare.pl DIR1 DIR2 |
or |
# |
filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ] |
# A list of space separated values (after the file/dir names) |
|
# can restrict the comparison. |
Restrictions: a list of space separated values (after the file/dir names) |
# These values can be: existence, age, md5sum, size, lines, and/or diffs. |
can restrict the comparison. |
# |
These values can be: existence, cvstime, age, md5sum, size, lines, |
# Flags (before file/dir names): |
and/or diffs. |
# -p show all files the same |
|
# -n show all files different |
Options (before file/dir names): |
# -a show all files (with comparisons) |
-p show all files that have the same comparison |
# -q only show file names (based on first file/dir) |
-n show all files that have different comparisons |
# -v verbose mode (default) |
-a show all files (with comparisons) |
|
-q only show file names (based on first file/dir) |
|
-v verbose mode (default) |
END |
END |
unless (@ARGV) { |
unless (@ARGV) { |
print $invocation; |
print $invocation; |
Line 32 unless (@ARGV) {
|
Line 41 unless (@ARGV) {
|
# |
# |
# Ways of comparison: |
# Ways of comparison: |
# existence similarity |
# existence similarity |
|
# cvs time similarity (first argument treated as CVS source) |
# age similarity (modification time) |
# age similarity (modification time) |
# md5sum similarity |
# md5sum similarity |
# size similarity (bytes) |
# size similarity (bytes) |
Line 40 unless (@ARGV) {
|
Line 50 unless (@ARGV) {
|
# |
# |
# Quantities of comparison: |
# Quantities of comparison: |
# existence (no,yes); other values become 'n/a' |
# existence (no,yes); other values become 'n/a' |
|
# cvstime in seconds |
# age in seconds |
# age in seconds |
# md5sum ("same" or "different") |
# md5sum ("same" or "different") |
# size similarity (byte difference) |
# size similarity (byte difference) |
Line 48 unless (@ARGV) {
|
Line 59 unless (@ARGV) {
|
# |
# |
# Text output of comparison: |
# Text output of comparison: |
# existence VALUE |
# existence VALUE |
|
# cvstime VALUE |
# age VALUE |
# age VALUE |
# md5sum VALUE |
# md5sum VALUE |
# size VALUE |
# size VALUE |
Line 57 unless (@ARGV) {
|
Line 69 unless (@ARGV) {
|
# Output of comparison: |
# Output of comparison: |
# exist |
# exist |
# if md5sum not same, then different |
# if md5sum not same, then different |
|
# if cvstime not 0, then older/newer |
# if age not 0, then older/newer |
# if age not 0, then older/newer |
# if size not 0, then bigger/smaller |
# if size not 0, then bigger/smaller |
# if lines not 0, then more lines of code/less lines of code |
# if lines not 0, then more lines of code/less lines of code |
Line 72 unless (@ARGV) {
|
Line 85 unless (@ARGV) {
|
# -a show all files (with comparisons) |
# -a show all files (with comparisons) |
# -q only show file names (based on first file/dir) |
# -q only show file names (based on first file/dir) |
# -v verbose mode (default) |
# -v verbose mode (default) |
|
# -b build/install mode (returns exitcode) |
my $verbose='1'; |
my $verbose='1'; |
my $show='all'; |
my $show='all'; |
|
my $buildmode=0; |
while (@ARGV) { |
while (@ARGV) { |
my $flag; |
my $flag; |
if ($ARGV[0]=~/^\-(\w)/) { |
if ($ARGV[0]=~/^\-(\w)/) { |
Line 85 while (@ARGV) {
|
Line 100 while (@ARGV) {
|
$show='same', last SWITCH if $flag eq 'p'; |
$show='same', last SWITCH if $flag eq 'p'; |
$show='different', last SWITCH if $flag eq 'n'; |
$show='different', last SWITCH if $flag eq 'n'; |
$show='all', last SWITCH if $flag eq 'a'; |
$show='all', last SWITCH if $flag eq 'a'; |
|
$buildmode=1, last SWITCH if $flag eq 'b'; |
|
$buildmode=2, last SWITCH if $flag eq 'B'; |
|
$buildmode=3, last SWITCH if $flag eq 'g'; |
|
$buildmode=4, last SWITCH if $flag eq 'G'; |
print($invocation), exit(1); |
print($invocation), exit(1); |
} |
} |
} |
} |
Line 92 while (@ARGV) {
|
Line 111 while (@ARGV) {
|
last; |
last; |
} |
} |
} |
} |
warn('Verbose: '.$verbose."\n"); |
dowarn('Verbose: '.$verbose."\n"); |
warn('Show: '.$show."\n"); |
dowarn('Show: '.$show."\n"); |
|
|
# FILE1 FILE2 or DIR1 DIR2 |
# FILE1 FILE2 or DIR1 DIR2 |
my $loc1=shift @ARGV; |
my $loc1=shift @ARGV; |
Line 116 else {
|
Line 135 else {
|
} |
} |
map {chomp; s/^$loc1\///; $_} @files; |
map {chomp; s/^$loc1\///; $_} @files; |
} |
} |
warn('Processing for mode: '.$dirmode."\n"); |
dowarn('Processing for mode: '.$dirmode."\n"); |
warn('Location #1: '.$loc1."\n"); |
dowarn('Location #1: '.$loc1."\n"); |
warn('Location #2: '.$loc2."\n"); |
dowarn('Location #2: '.$loc2."\n"); |
|
|
# A list of space separated values (after the file/dir names) |
# A list of space separated values (after the file/dir names) |
# can restrict the comparison. |
# can restrict the comparison. |
Line 126 my %restrict;
|
Line 145 my %restrict;
|
while (@ARGV) { |
while (@ARGV) { |
my $r=shift @ARGV; |
my $r=shift @ARGV; |
if ($r eq 'existence' or |
if ($r eq 'existence' or |
|
$r eq 'cvstime' or |
$r eq 'md5sum' or |
$r eq 'md5sum' or |
$r eq 'age' or |
$r eq 'age' or |
$r eq 'size' or |
$r eq 'size' or |
Line 143 if (%restrict) {
|
Line 163 if (%restrict) {
|
} |
} |
|
|
my %OUTPUT=( |
my %OUTPUT=( |
'existence'=>( |
'existence'=>( sub {print 'existence: '.@_[0]; return;}), |
sub { |
'md5sum'=>(sub {print 'md5sum: '.@_[0];return;}), |
print 'existence: '.@_[0]; |
'cvstime'=>(sub {print 'cvstime: '.@_[0];return;}), |
return; |
'age'=>(sub {print 'age: '.@_[0];return;}), |
} |
'size'=>(sub {print 'size: '.@_[0];return;}), |
), |
'lines'=>(sub {print 'lines: '.@_[0];return;}), |
'md5sum'=>( |
'diffs'=>(sub {print 'diffs: '.@_[0];return;}), |
sub { |
|
print 'md5sum: '.@_[0]; |
|
return; |
|
} |
|
), |
|
'age'=>( |
|
sub { |
|
print 'age: '.@_[0]; |
|
return; |
|
} |
|
), |
|
'size'=>( |
|
sub { |
|
print 'size: '.@_[0]; |
|
return; |
|
} |
|
), |
|
'lines'=>( |
|
sub { |
|
print 'lines: '.@_[0]; |
|
return; |
|
} |
|
), |
|
'diffs'=>( |
|
sub { |
|
print 'diffs: '.@_[0]; |
|
return; |
|
} |
|
), |
|
); |
); |
|
|
my %MEASURE=( |
my %MEASURE=( |
'existence' => ( |
'existence' => ( sub { my ($file1,$file2)=@_; |
sub { |
|
my ($file1,$file2)=@_; |
|
my $rv1=(-e $file1)?'yes':'no'; |
my $rv1=(-e $file1)?'yes':'no'; |
my $rv2=(-e $file2)?'yes':'no'; |
my $rv2=(-e $file2)?'yes':'no'; |
return ($rv1,$rv2); |
return ($rv1,$rv2); } ), |
} |
'md5sum'=>( sub { my ($file1,$file2)=@_; |
), |
my ($rv1)=split(/ /,`md5sum $file1`); chop $rv1; |
'md5sum'=>( |
my ($rv2)=split(/ /,`md5sum $file2`); chop $rv2; |
sub { |
return ($rv1,$rv2); } ), |
my ($file1,$file2)=@_; |
'cvstime'=>( sub { my ($file1,$file2)=@_; |
my $rv1=`md5sum $file1`; chop $rv1; |
my $rv1=&cvstime($file1); |
my $rv2=`md5sum $file2`; chop $rv2; |
my @a=stat($file2); my $gmt=gmtime($a[9]); |
return ($rv1,$rv2); |
my $rv2=&utctime($gmt); |
} |
return ($rv1,$rv2); } ), |
), |
'age'=>( sub { my ($file1,$file2)=@_; |
'age'=>( |
my @a=stat($file1); my $rv1=$a[9]; |
sub { |
@a=stat($file2); my $rv2=$a[9]; |
my ($file1,$file2)=@_; |
return ($rv1,$rv2); } ), |
my @a=stat($file1); my $rv1=$a[10]; |
'size'=>( sub { my ($file1,$file2)=@_; |
@a=stat($file2); my $rv2=$a[10]; |
|
return ($rv1,$rv2); |
|
} |
|
), |
|
'size'=>( |
|
sub { |
|
my ($file1,$file2)=@_; |
|
my @a=stat($file1); my $rv1=$a[7]; |
my @a=stat($file1); my $rv1=$a[7]; |
@a=stat($file2); my $rv2=$a[7]; |
@a=stat($file2); my $rv2=$a[7]; |
return ($rv1,$rv2); |
return ($rv1,$rv2); } ), |
} |
'lines'=>( sub { my ($file1,$file2)=@_; |
), |
|
'lines'=>( |
|
sub { |
|
my ($file1,$file2)=@_; |
|
my $rv1=`wc -l $file1`; chop $rv1; |
my $rv1=`wc -l $file1`; chop $rv1; |
my $rv2=`wc -l $file2`; chop $rv2; |
my $rv2=`wc -l $file2`; chop $rv2; |
return ($rv1,$rv2); |
return ($rv1,$rv2); } ), |
} |
'diffs'=>( sub { my ($file1,$file2)=@_; |
), |
|
'diffs'=>( |
|
sub { |
|
my ($file1,$file2)=@_; |
|
my $rv1=`diff $file1 $file2 | grep '^<' | wc -l`; |
my $rv1=`diff $file1 $file2 | grep '^<' | wc -l`; |
chop $rv1; $rv1=~s/^\s+//; $rv1=~s/\s+$//; |
chop $rv1; $rv1=~s/^\s+//; $rv1=~s/\s+$//; |
my $rv2=`diff $file1 $file2 | grep '^>' | wc -l`; |
my $rv2=`diff $file1 $file2 | grep '^>' | wc -l`; |
chop $rv2; $rv2=~s/^\s+//; $rv2=~s/\s+$//; |
chop $rv2; $rv2=~s/^\s+//; $rv2=~s/\s+$//; |
return ($rv1,$rv2); |
return ($rv1,$rv2); } ), |
} |
|
), |
|
); |
); |
|
|
FLOP: foreach my $file (@files) { |
FLOP: foreach my $file (@files) { |
Line 247 FLOP: foreach my $file (@files) {
|
Line 219 FLOP: foreach my $file (@files) {
|
} |
} |
my ($existence1,$existence2)=&{$MEASURE{'existence'}}($file1,$file2); |
my ($existence1,$existence2)=&{$MEASURE{'existence'}}($file1,$file2); |
my $existence=$existence1.':'.$existence2; |
my $existence=$existence1.':'.$existence2; |
my ($md5sum,$age,$size,$lines,$diffs); |
my ($cvstime,$md5sum,$age,$size,$lines,$diffs); |
if ($existence1 eq 'no' or $existence2 eq 'no') { |
if ($existence1 eq 'no' or $existence2 eq 'no') { |
$md5sum='n/a'; |
$md5sum='n/a'; |
$age='n/a'; |
$age='n/a'; |
|
$cvstime='n/a'; |
$size='n/a'; |
$size='n/a'; |
$lines='n/a'; |
$lines='n/a'; |
$diffs='n/a'; |
$diffs='n/a'; |
} |
} |
else { |
else { |
|
my ($cvstime1,$cvstime2)=&{$MEASURE{'cvstime'}}($file1,$file2); |
|
$cvstime=$cvstime1-$cvstime2; |
my ($age1,$age2)=&{$MEASURE{'age'}}($file1,$file2); |
my ($age1,$age2)=&{$MEASURE{'age'}}($file1,$file2); |
$age=$age1-$age2; |
$age=$age1-$age2; |
my ($md5sum1,$md5sum2)=&{$MEASURE{'md5sum'}}($file1,$file2); |
my ($md5sum1,$md5sum2)=&{$MEASURE{'md5sum'}}($file1,$file2); |
if ($md5sum1 == $md5sum2) { |
if ($md5sum1 eq $md5sum2) { |
$md5sum='same'; |
$md5sum='same'; |
$size=0; |
$size=0; |
$lines=0; |
$lines=0; |
$diffs=0; |
$diffs=0; |
} |
} |
elsif ($md5sum1 != $md5sum2) { |
elsif ($md5sum1 ne $md5sum2) { |
$md5sum='different'; |
$md5sum='different'; |
my ($size1,$size2)=&{$MEASURE{'size'}}($file1,$file2); |
my ($size1,$size2)=&{$MEASURE{'size'}}($file1,$file2); |
$size=$size1-$size2; |
$size=$size1-$size2; |
Line 282 FLOP: foreach my $file (@files) {
|
Line 257 FLOP: foreach my $file (@files) {
|
if ($show eq 'different') { |
if ($show eq 'different') { |
my @ks=(keys %restrict); |
my @ks=(keys %restrict); |
unless (@ks) { |
unless (@ks) { |
@ks=('existence','md5sum','age','size','lines','diffs'); |
@ks=('existence','cvstime','md5sum','age','size','lines','diffs'); |
} |
} |
FLOP2: for my $key (@ks) { |
FLOP2: for my $key (@ks) { |
if ($key eq 'existence') { |
if ($key eq 'existence') { |
Line 295 FLOP: foreach my $file (@files) {
|
Line 270 FLOP: foreach my $file (@files) {
|
$showflag=1; |
$showflag=1; |
} |
} |
} |
} |
|
elsif ($key eq 'cvstime') { |
|
if ($cvstime!=0) { |
|
$showflag=1; |
|
} |
|
} |
elsif ($key eq 'age') { |
elsif ($key eq 'age') { |
if ($age!=0) { |
if ($age!=0) { |
$showflag=1; |
$showflag=1; |
Line 323 FLOP: foreach my $file (@files) {
|
Line 303 FLOP: foreach my $file (@files) {
|
elsif ($show eq 'same') { |
elsif ($show eq 'same') { |
my @ks=(keys %restrict); |
my @ks=(keys %restrict); |
unless (@ks) { |
unless (@ks) { |
@ks=('existence','md5sum','age','size','lines','diffs'); |
@ks=('existence','md5sum','cvstime','age','size','lines','diffs'); |
} |
} |
my $showcount=length(@ks); |
my $showcount=length(@ks); |
FLOP3: for my $key (@ks) { |
FLOP3: for my $key (@ks) { |
Line 337 FLOP: foreach my $file (@files) {
|
Line 317 FLOP: foreach my $file (@files) {
|
$showcount--; |
$showcount--; |
} |
} |
} |
} |
|
elsif ($key eq 'cvstime') { |
|
if ($cvstime!=0) { |
|
$showcount--; |
|
} |
|
} |
elsif ($key eq 'age') { |
elsif ($key eq 'age') { |
if ($age!=0) { |
if ($age!=0) { |
$showcount--; |
$showcount--; |
Line 362 FLOP: foreach my $file (@files) {
|
Line 347 FLOP: foreach my $file (@files) {
|
$showflag=1; |
$showflag=1; |
} |
} |
} |
} |
|
if ($buildmode==1) { |
|
if ($md5sum eq 'same') { |
|
exit(1); |
|
} |
|
elsif ($cvstime<0) { |
|
exit(2); |
|
} |
|
else { |
|
exit(0); |
|
} |
|
} |
|
elsif ($buildmode==2) { |
|
if ($cvstime<0) { |
|
exit(2); |
|
} |
|
else { |
|
exit(0); |
|
} |
|
} |
|
elsif ($buildmode==3) { |
|
if ($md5sum eq 'same') { |
|
exit(1); |
|
} |
|
elsif ($age<0) { |
|
exit(2); |
|
} |
|
else { |
|
exit(0); |
|
} |
|
} |
|
elsif ($buildmode==4) { |
|
if ($cvstime>0) { |
|
exit(2); |
|
} |
|
else { |
|
exit(0); |
|
} |
|
} |
print "$file"; |
print "$file"; |
if ($verbose==1) { |
if ($verbose==1) { |
print "\t"; |
print "\t"; |
print &{$OUTPUT{'existence'}}($existence); |
print &{$OUTPUT{'existence'}}($existence); |
print "\t"; |
print "\t"; |
|
print &{$OUTPUT{'cvstime'}}($cvstime); |
|
print "\t"; |
print &{$OUTPUT{'age'}}($age); |
print &{$OUTPUT{'age'}}($age); |
print "\t"; |
print "\t"; |
print &{$OUTPUT{'md5sum'}}($md5sum); |
print &{$OUTPUT{'md5sum'}}($md5sum); |
Line 380 FLOP: foreach my $file (@files) {
|
Line 405 FLOP: foreach my $file (@files) {
|
print "\n"; |
print "\n"; |
} |
} |
|
|
|
sub cvstime { |
|
my ($f)=@_; |
|
my $path; my $file; |
|
if ($f=~/^(.*\/)(.*?)$/) { |
|
$f=~/^(.*\/)(.*?)$/; |
|
($path,$file)=($1,$2); |
|
} |
|
else { |
|
$file=$f; $path=''; |
|
} |
|
my $cvstime; |
|
if ($buildmode!=3) { |
|
my $entry=`grep '^/$file/' ${path}CVS/Entries` or |
|
die('*** ERROR *** cannot grep against '.${path}. |
|
'CVS/Entries for ' .$file . "\n"); |
|
my @fields=split(/\//,$entry); |
|
$cvstime=`date -d '$fields[3] UTC' --utc +"%s"`; |
|
chomp $cvstime; |
|
} |
|
else { |
|
$cvstime='n/a'; |
|
} |
|
return $cvstime; |
|
} |
|
|
|
sub utctime { |
|
my ($f)=@_; |
|
my $utctime=`date -d '$f UTC' --utc +"%s"`; |
|
chomp $utctime; |
|
return $utctime; |
|
} |
|
|
|
sub dowarn { |
|
my ($msg)=@_; |
|
warn($msg) unless $buildmode; |
|
} |
|
|
|
=head1 NAME |
|
|
|
filecompare.pl - script used to help probe and compare file statistics |
|
|
|
=head1 SYNOPSIS |
|
|
|
filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ] |
|
|
|
or |
|
|
|
filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ] |
|
|
|
Restrictions: a list of space separated values (after the file/dir names) |
|
can restrict the comparison. |
|
These values can be: existence, cvstime, age, md5sum, size, lines, |
|
and/or diffs. |
|
|
|
Options (before file/dir names): |
|
|
|
-p show all files that have the same comparison |
|
|
|
-n show all files that have different comparisons |
|
|
|
-a show all files (with comparisons) |
|
|
|
-q only show file names (based on first file/dir) |
|
|
|
-v verbose mode (default) |
|
|
|
=head1 DESCRIPTION |
|
|
|
filecompare.pl can work in two modes: file comparison mode, or directory |
|
comparison mode. |
|
|
|
Comparisons can be a function of: |
|
* existence similarity |
|
* cvs time similarity (first argument treated as CVS source) |
|
* age similarity (modification time) |
|
* md5sum similarity |
|
* size similarity (bytes) |
|
* line count difference |
|
* number of different lines |
|
|
|
filecompare.pl integrates smoothly with the LPML installation language |
|
(linux packaging markup language). filecompare.pl is a tool that can |
|
be used for safe CVS source-to-target installations. |
|
|
|
=head1 README |
|
|
|
filecompare.pl integrates smoothly with the LPML installation language |
|
(linux packaging markup language). filecompare.pl is a tool that can |
|
be used for safe CVS source-to-target installations. |
|
|
|
The unique identifier is considered to be the file name(s) independent |
|
of the directory path. |
|
|
|
=head1 PREREQUISITES |
|
|
|
=head1 COREQUISITES |
|
|
|
=head1 OSNAMES |
|
|
|
linux |
|
|
|
=head1 SCRIPT CATEGORIES |
|
|
|
Packaging/Administrative |
|
|
|
=cut |