version 1.4, 2001/11/16 20:06:08
|
version 1.9, 2001/12/06 00:23:16
|
Line 1
|
Line 1
|
#!/usr/bin/perl |
#!/usr/bin/perl |
|
|
# The LearningOnline Network witih CAPA |
# The LearningOnline Network with CAPA |
# |
# |
# filecompare.pl - script used to help probe and compare file statistics |
# filecompare.pl - script used to help probe and compare file statistics |
# |
# |
# YEAR=2001 |
# YEAR=2001 |
# 9/27, 10/24, 10/25, 11/4 Scott Harrison |
# 9/27, 10/24, 10/25, 11/4 Scott Harrison |
# 11/14 Guy Albertelli |
# 11/14 Guy Albertelli |
# 11/16 Scott Harrison |
# 11/16,11/17 Scott Harrison |
|
# 12/3,12/5 Scott Harrison |
# |
# |
# $Id$ |
# $Id$ |
### |
### |
|
|
|
############################################################################### |
|
## ## |
|
## ORGANIZATION OF THIS PERL SCRIPT ## |
|
## ## |
|
## 1. Invocation ## |
|
## 2. Notes ## |
|
## 3. Dependencies ## |
|
## 4. Process command line arguments ## |
|
## 5. Process file/dir location arguments ## |
|
## 6. Process comparison restrictions ## |
|
## 7. Define output and measure subroutines ## |
|
## 8. Loop through files and calculate differences ## |
|
## 9. Subroutines ## |
|
## 10. POD (plain old documentation, CPAN style) ## |
|
## ## |
|
############################################################################### |
|
|
# ------------------------------------------------------------------ Invocation |
# ------------------------------------------------------------------ Invocation |
my $invocation=<<END; |
my $invocation=<<END; |
filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ] |
filecompare.pl [ options ... ] [FILE1] [FILE2] [ restrictions ... ] |
or |
or |
filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ] |
filecompare.pl [ options ... ] [DIR1] [DIR2] [ restrictions ... ] |
|
or |
|
filecompare.pl [ options ... ] -s TARGET=[target] SOURCE=[source] MODE=[mode] |
|
LOC1 LOC2 |
|
|
Restrictions: a list of space separated values (after the file/dir names) |
Restrictions: a list of space separated values (after the file/dir names) |
can restrict the comparison. |
can restrict the comparison. |
Line 29 Options (before file/dir names):
|
Line 50 Options (before file/dir names):
|
-a show all files (with comparisons) |
-a show all files (with comparisons) |
-q only show file names (based on first file/dir) |
-q only show file names (based on first file/dir) |
-v verbose mode (default) |
-v verbose mode (default) |
|
-bN buildmode (controls exit code of this script; 0 unless...) |
|
N=1: md5sum=same --> 1; cvstime<0 --> 2 |
|
N=2: same as N=1 except without md5sum |
|
N=3: md5sum=same --> 1; age<0 --> 2 |
|
N=4: cvstime>0 --> 2 |
|
|
|
The third way to pass arguments is set by the -s flag. |
|
filecompare.pl -s SOURCE=[source] TARGET=[target] MODE=[mode] LOC1 LOC2 |
|
|
|
TARGET corresponds to the root path of LOC2. SOURCE corresponds to |
|
the root path of LOC1. MODE can either be file, directory, link, or fileglob. |
|
|
END |
END |
unless (@ARGV) { |
unless (@ARGV) { |
print $invocation; |
print $invocation; |
exit 1; |
exit 1; |
} |
} |
|
|
# ----------------------------------------------------------------------- Notes |
# ----------------------------------------------------------------------- Notes |
# |
# |
# What are all the different ways to compare two files and how to look |
# What are all the different ways to compare two files and how to look |
Line 41 unless (@ARGV) {
|
Line 75 unless (@ARGV) {
|
# |
# |
# Ways of comparison: |
# Ways of comparison: |
# existence similarity |
# existence similarity |
# cvs time similarity (first argument treated as CVS source) |
# cvs time similarity (1st arg treated as CVS source; only for buildmode) |
# age similarity (modification time) |
# age similarity (modification time) |
# md5sum similarity |
# md5sum similarity |
# size similarity (bytes) |
# size similarity (bytes) |
Line 56 unless (@ARGV) {
|
Line 90 unless (@ARGV) {
|
# size similarity (byte difference) |
# size similarity (byte difference) |
# line count difference (integer) |
# line count difference (integer) |
# number of different lines (integer) |
# number of different lines (integer) |
# |
|
# Text output of comparison: |
|
# existence VALUE |
|
# cvstime VALUE |
|
# age VALUE |
|
# md5sum VALUE |
|
# size VALUE |
|
# lines VALUE |
|
# diffs VALUE |
|
# |
|
# Output of comparison: |
|
# exist |
|
# if md5sum not same, then different |
|
# if cvstime not 0, then older/newer |
|
# if age not 0, then older/newer |
|
# if size not 0, then bigger/smaller |
|
# if lines not 0, then more lines of code/less lines of code |
|
# if diffs not 0, then subtracted lines/added lines/changed lines |
|
|
|
|
# ---------------------------------------------------------------- Dependencies |
# implementing from unix command line (assuming bash) |
# implementing from unix command line (assuming bash) |
# md5sum, diff, wc -l |
# md5sum, diff, wc -l |
|
|
Line 85 unless (@ARGV) {
|
Line 102 unless (@ARGV) {
|
# -a show all files (with comparisons) |
# -a show all files (with comparisons) |
# -q only show file names (based on first file/dir) |
# -q only show file names (based on first file/dir) |
# -v verbose mode (default) |
# -v verbose mode (default) |
# -b build/install mode (returns exitcode) |
# -bN build/install mode (returns exitcode) |
|
# -s status checking mode for lpml |
|
|
my $verbose='1'; |
my $verbose='1'; |
my $show='all'; |
my $show='all'; |
my $buildmode=0; |
my $buildmode=0; |
while (@ARGV) { |
my $statusmode=0; |
|
ALOOP: while (@ARGV) { |
my $flag; |
my $flag; |
if ($ARGV[0]=~/^\-(\w)/) { |
if ($ARGV[0]=~/^\-(\w)/) { |
$flag=$1; |
$flag=$1; |
|
if ($flag eq 'b') { |
|
$ARGV[0]=~/^\-\w(\d)/; |
|
$buildmode=$1; |
|
shift @ARGV; |
|
next ALOOP; |
|
} |
shift @ARGV; |
shift @ARGV; |
SWITCH: { |
SWITCH: { |
$verbose=0, last SWITCH if $flag eq 'q'; |
$verbose=0, last SWITCH if $flag eq 'q'; |
Line 100 while (@ARGV) {
|
Line 126 while (@ARGV) {
|
$show='same', last SWITCH if $flag eq 'p'; |
$show='same', last SWITCH if $flag eq 'p'; |
$show='different', last SWITCH if $flag eq 'n'; |
$show='different', last SWITCH if $flag eq 'n'; |
$show='all', last SWITCH if $flag eq 'a'; |
$show='all', last SWITCH if $flag eq 'a'; |
$buildmode=1, last SWITCH if $flag eq 'b'; |
$statusmode=1, last SWITCH if $flag eq 's'; |
$buildmode=2, last SWITCH if $flag eq 'B'; |
|
$buildmode=3, last SWITCH if $flag eq 'g'; |
|
$buildmode=4, last SWITCH if $flag eq 'G'; |
|
print($invocation), exit(1); |
print($invocation), exit(1); |
} |
} |
} |
} |
Line 114 while (@ARGV) {
|
Line 137 while (@ARGV) {
|
dowarn('Verbose: '.$verbose."\n"); |
dowarn('Verbose: '.$verbose."\n"); |
dowarn('Show: '.$show."\n"); |
dowarn('Show: '.$show."\n"); |
|
|
|
my @files; |
|
my $loc1; |
|
my $loc2; |
|
# ----------------------------------------- If status checking mode for lpml |
|
my ($sourceroot,$targetroot,$mode,$sourceglob,$targetglob); |
|
my ($source,$target); |
|
if ($statusmode==1) { |
|
($sourceroot,$targetroot,$mode,$sourceglob,$targetglob)=splice(@ARGV,0,5); |
|
$targetroot.='/' if $targetroot!~/\/$/; |
|
$sourceroot=~s/^SOURCE\=//; |
|
$targetroot=~s/^TARGET\=//; |
|
$source=$sourceroot.'/'.$sourceglob; |
|
$target=$targetroot.''.$targetglob; |
|
# print "SOURCE: $source\n"; |
|
# print "TARGET: $target\n"; |
|
if ($mode eq 'MODE=fileglob') { |
|
@files=glob($source); |
|
} |
|
} |
|
else { |
|
|
|
# ----------------------------------------- Process file/dir location arguments |
# FILE1 FILE2 or DIR1 DIR2 |
# FILE1 FILE2 or DIR1 DIR2 |
my $loc1=shift @ARGV; |
$loc1=shift @ARGV; |
my $loc2=shift @ARGV; |
$loc2=shift @ARGV; |
my $dirmode='directories'; |
my $dirmode='directories'; |
my @files; |
|
unless ($loc1 and $loc2) { |
unless ($loc1 and $loc2) { |
|
print "LOC1: $loc1\nLOC2: $loc2\n"; |
print($invocation), exit(1); |
print($invocation), exit(1); |
} |
} |
if (-f $loc1) { |
if (-f $loc1) { |
Line 138 else {
|
Line 183 else {
|
dowarn('Processing for mode: '.$dirmode."\n"); |
dowarn('Processing for mode: '.$dirmode."\n"); |
dowarn('Location #1: '.$loc1."\n"); |
dowarn('Location #1: '.$loc1."\n"); |
dowarn('Location #2: '.$loc2."\n"); |
dowarn('Location #2: '.$loc2."\n"); |
|
} |
|
# --------------------------------------------- Process comparison restrictions |
# A list of space separated values (after the file/dir names) |
# A list of space separated values (after the file/dir names) |
# can restrict the comparison. |
# can restrict the comparison. |
|
my %rhash=('existence'=>0,'cvstime'=>0,'md5sum'=>0,'age'=>0,'size'=>0, |
|
'lines'=>0,'diffs'=>0); |
my %restrict; |
my %restrict; |
while (@ARGV) { |
while (@ARGV) { |
my $r=shift @ARGV; |
my $r=shift @ARGV; |
if ($r eq 'existence' or |
if ($rhash{$r}==0) {$restrict{$r}=1;} |
$r eq 'cvstime' or |
else {print($invocation), exit(1);} |
$r eq 'md5sum' or |
|
$r eq 'age' or |
|
$r eq 'size' or |
|
$r eq 'lines' or |
|
$r eq 'diffs') { |
|
$restrict{$r}=1; |
|
} |
|
else { |
|
print($invocation), exit(1); |
|
} |
|
} |
} |
if (%restrict) { |
if (%restrict) { |
warn('Restricting comparison to: '. |
dowarn('Restricting comparison to: '. |
join(' ',keys %restrict)."\n"); |
join(' ',keys %restrict)."\n"); |
} |
} |
|
|
|
# --------------------------------------- Define output and measure subroutines |
my %OUTPUT=( |
my %OUTPUT=( |
'existence'=>( sub {print 'existence: '.@_[0]; return;}), |
'existence'=>( sub {print 'existence: '.@_[0]; return;}), |
'md5sum'=>(sub {print 'md5sum: '.@_[0];return;}), |
'md5sum'=>(sub {print 'md5sum: '.@_[0];return;}), |
Line 206 my %MEASURE=(
|
Line 245 my %MEASURE=(
|
return ($rv1,$rv2); } ), |
return ($rv1,$rv2); } ), |
); |
); |
|
|
FLOP: foreach my $file (@files) { |
FLOOP: foreach my $file (@files) { |
my $file1; |
my $file1; |
my $file2; |
my $file2; |
if ($dirmode eq 'directories') { |
if ($dirmode eq 'directories') { |
Line 229 FLOP: foreach my $file (@files) {
|
Line 268 FLOP: foreach my $file (@files) {
|
$diffs='n/a'; |
$diffs='n/a'; |
} |
} |
else { |
else { |
my ($cvstime1,$cvstime2)=&{$MEASURE{'cvstime'}}($file1,$file2); |
if ($buildmode) { |
$cvstime=$cvstime1-$cvstime2; |
my ($cvstime1,$cvstime2)=&{$MEASURE{'cvstime'}}($file1,$file2); |
|
$cvstime=$cvstime1-$cvstime2; |
|
} |
|
else { |
|
$cvstime='n/a'; |
|
} |
my ($age1,$age2)=&{$MEASURE{'age'}}($file1,$file2); |
my ($age1,$age2)=&{$MEASURE{'age'}}($file1,$file2); |
$age=$age1-$age2; |
$age=$age1-$age2; |
my ($md5sum1,$md5sum2)=&{$MEASURE{'md5sum'}}($file1,$file2); |
my ($md5sum1,$md5sum2)=&{$MEASURE{'md5sum'}}($file1,$file2); |
Line 238 FLOP: foreach my $file (@files) {
|
Line 282 FLOP: foreach my $file (@files) {
|
$md5sum='same'; |
$md5sum='same'; |
$size=0; |
$size=0; |
$lines=0; |
$lines=0; |
$diffs=0; |
$diffs='0:0'; |
} |
} |
elsif ($md5sum1 ne $md5sum2) { |
elsif ($md5sum1 ne $md5sum2) { |
$md5sum='different'; |
$md5sum='different'; |
Line 259 FLOP: foreach my $file (@files) {
|
Line 303 FLOP: foreach my $file (@files) {
|
unless (@ks) { |
unless (@ks) { |
@ks=('existence','cvstime','md5sum','age','size','lines','diffs'); |
@ks=('existence','cvstime','md5sum','age','size','lines','diffs'); |
} |
} |
FLOP2: for my $key (@ks) { |
FLOOP2: for my $key (@ks) { |
if ($key eq 'existence') { |
if ($key eq 'existence') { |
if ($existence ne 'yes:yes') { |
if ($existence ne 'yes:yes') { |
$showflag=1; |
$showflag=1; |
Line 270 FLOP: foreach my $file (@files) {
|
Line 314 FLOP: foreach my $file (@files) {
|
$showflag=1; |
$showflag=1; |
} |
} |
} |
} |
elsif ($key eq 'cvstime') { |
elsif ($key eq 'cvstime' and $buildmode) { |
if ($cvstime!=0) { |
if ($cvstime!=0) { |
$showflag=1; |
$showflag=1; |
} |
} |
Line 296 FLOP: foreach my $file (@files) {
|
Line 340 FLOP: foreach my $file (@files) {
|
} |
} |
} |
} |
if ($showflag) { |
if ($showflag) { |
last FLOP2; |
last FLOOP2; |
} |
} |
} |
} |
} |
} |
Line 306 FLOP: foreach my $file (@files) {
|
Line 350 FLOP: foreach my $file (@files) {
|
@ks=('existence','md5sum','cvstime','age','size','lines','diffs'); |
@ks=('existence','md5sum','cvstime','age','size','lines','diffs'); |
} |
} |
my $showcount=length(@ks); |
my $showcount=length(@ks); |
FLOP3: for my $key (@ks) { |
$showcount-- unless $buildmode; |
|
FLOOP3: for my $key (@ks) { |
if ($key eq 'existence') { |
if ($key eq 'existence') { |
if ($existence ne 'yes:yes') { |
if ($existence ne 'yes:yes') { |
$showcount--; |
$showcount--; |
Line 317 FLOP: foreach my $file (@files) {
|
Line 362 FLOP: foreach my $file (@files) {
|
$showcount--; |
$showcount--; |
} |
} |
} |
} |
elsif ($key eq 'cvstime') { |
elsif ($key eq 'cvstime' and $buildmode) { |
if ($cvstime!=0) { |
if ($cvstime!=0) { |
$showcount--; |
$showcount--; |
} |
} |
Line 378 FLOP: foreach my $file (@files) {
|
Line 423 FLOP: foreach my $file (@files) {
|
} |
} |
} |
} |
elsif ($buildmode==4) { |
elsif ($buildmode==4) { |
if ($cvstime>0) { |
if ($existence=~/no$/) { |
|
exit(3); |
|
} |
|
elsif ($cvstime>0) { |
exit(2); |
exit(2); |
} |
} |
|
elsif ($existence=~/^no/) { |
|
exit(1); |
|
} |
else { |
else { |
exit(0); |
exit(0); |
} |
} |
} |
} |
print "$file"; |
if ($showflag) { |
if ($verbose==1) { |
print "$file"; |
print "\t"; |
if ($verbose==1) { |
print &{$OUTPUT{'existence'}}($existence); |
print "\t"; |
print "\t"; |
print &{$OUTPUT{'existence'}}($existence); |
print &{$OUTPUT{'cvstime'}}($cvstime); |
print "\t"; |
print "\t"; |
print &{$OUTPUT{'cvstime'}}($cvstime); |
print &{$OUTPUT{'age'}}($age); |
print "\t"; |
print "\t"; |
print &{$OUTPUT{'age'}}($age); |
print &{$OUTPUT{'md5sum'}}($md5sum); |
print "\t"; |
print "\t"; |
print &{$OUTPUT{'md5sum'}}($md5sum); |
print &{$OUTPUT{'size'}}($size); |
print "\t"; |
print "\t"; |
print &{$OUTPUT{'size'}}($size); |
print &{$OUTPUT{'lines'}}($lines); |
print "\t"; |
print "\t"; |
print &{$OUTPUT{'lines'}}($lines); |
print &{$OUTPUT{'diffs'}}($diffs); |
print "\t"; |
|
print &{$OUTPUT{'diffs'}}($diffs); |
|
} |
|
print "\n"; |
} |
} |
print "\n"; |
|
} |
} |
|
|
|
# ----------------------------------------------------------------- Subroutines |
|
|
sub cvstime { |
sub cvstime { |
my ($f)=@_; |
my ($f)=@_; |
my $path; my $file; |
my $path; my $file; |
Line 442 sub dowarn {
|
Line 497 sub dowarn {
|
warn($msg) unless $buildmode; |
warn($msg) unless $buildmode; |
} |
} |
|
|
|
# ----------------------------------- POD (plain old documentation, CPAN style) |
|
|
=head1 NAME |
=head1 NAME |
|
|
filecompare.pl - script used to help probe and compare file statistics |
filecompare.pl - script used to help probe and compare file statistics |