File:  [LON-CAPA] / loncom / build / filecompare.pl
Revision 1.3: download - view: text, annotated - select for diffs
Wed Nov 14 13:19:36 2001 UTC (22 years, 7 months ago) by albertel
Branches: MAIN
CVS tags: HEAD
- used ==/!= to compare strings
- md5sum also outputs file name, need to strip that off

    1: #!/usr/bin/perl
    2: 
    3: # Scott Harrison
    4: # YEAR=2001
    5: # 9/27, 10/24, 10/25, 11/4
    6: 
    7: my $invocation=<<END;
    8: # ------------------------------------------------------------------ Invocation
    9: # filecompare.pl FILE1 FILE2
   10: # or
   11: # filecompare.pl DIR1 DIR2
   12: #
   13: # A list of space separated values (after the file/dir names)
   14: # can restrict the comparison.
   15: # These values can be: existence, cvstime, age, md5sum, size, lines,
   16: # and/or diffs.
   17: #
   18: # Flags (before file/dir names):
   19: # -p show all files the same
   20: # -n show all files different
   21: # -a show all files (with comparisons)
   22: # -q only show file names (based on first file/dir)
   23: # -v verbose mode (default)
   24: END
   25: unless (@ARGV) {
   26:     print $invocation;
   27:     exit 1;
   28: }
   29: # ----------------------------------------------------------------------- Notes
   30: #
   31: # What are all the different ways to compare two files and how to look
   32: # at the differences?
   33: #
   34: # Ways of comparison:
   35: #   existence similarity
   36: #   cvs time similarity (first argument treated as CVS source)
   37: #   age similarity (modification time)
   38: #   md5sum similarity
   39: #   size similarity (bytes)
   40: #   line count difference
   41: #   number of different lines
   42: #
   43: # Quantities of comparison:
   44: #   existence (no,yes); other values become 'n/a'
   45: #   cvstime in seconds
   46: #   age in seconds
   47: #   md5sum ("same" or "different")
   48: #   size similarity (byte difference)
   49: #   line count difference (integer)
   50: #   number of different lines (integer)
   51: #   
   52: # Text output of comparison:
   53: #   existence VALUE
   54: #   cvstime VALUE
   55: #   age VALUE
   56: #   md5sum VALUE
   57: #   size VALUE
   58: #   lines VALUE
   59: #   diffs VALUE
   60: #
   61: # Output of comparison:
   62: #   exist
   63: #   if md5sum not same, then different
   64: #   if cvstime not 0, then older/newer
   65: #   if age not 0, then older/newer
   66: #   if size not 0, then bigger/smaller
   67: #   if lines not 0, then more lines of code/less lines of code
   68: #   if diffs not 0, then subtracted lines/added lines/changed lines
   69: 
   70: # implementing from unix command line (assuming bash)
   71: # md5sum, diff, wc -l
   72: 
   73: # ---------------------------------------------- Process command line arguments
   74: # Flags (before file/dir names):
   75: # -p show all files the same
   76: # -n show all files different
   77: # -a show all files (with comparisons)
   78: # -q only show file names (based on first file/dir)
   79: # -v verbose mode (default)
   80: # -b build/install mode (returns exitcode)
   81: my $verbose='1';
   82: my $show='all';
   83: my $buildmode=0;
   84: while (@ARGV) {
   85:     my $flag;
   86:     if ($ARGV[0]=~/^\-(\w)/) {
   87: 	$flag=$1;
   88: 	shift @ARGV;
   89:       SWITCH: {
   90: 	  $verbose=0, last SWITCH if $flag eq 'q';
   91: 	  $verbose=1, last SWITCH if $flag eq 'v';
   92: 	  $show='same', last SWITCH if $flag eq 'p';
   93: 	  $show='different', last SWITCH if $flag eq 'n';
   94: 	  $show='all', last SWITCH if $flag eq 'a';
   95: 	  $buildmode=1, last SWITCH if $flag eq 'b';
   96: 	  $buildmode=2, last SWITCH if $flag eq 'B';
   97: 	  $buildmode=3, last SWITCH if $flag eq 'g';
   98: 	  $buildmode=4, last SWITCH if $flag eq 'G';
   99: 	  print($invocation), exit(1);
  100:       }
  101:     }
  102:     else {
  103: 	last;
  104:     }
  105: }
  106: dowarn('Verbose: '.$verbose."\n");
  107: dowarn('Show: '.$show."\n");
  108: 
  109: # FILE1 FILE2 or DIR1 DIR2
  110: my $loc1=shift @ARGV;
  111: my $loc2=shift @ARGV;
  112: my $dirmode='directories';
  113: my @files;
  114: unless ($loc1 and $loc2) {
  115:     print($invocation), exit(1);
  116: }
  117: if (-f $loc1) {
  118:     $dirmode='files';
  119:     @files=($loc1);
  120: }
  121: else {
  122:     if (-e $loc1) {
  123: 	@files=`find $loc1 -type f`;
  124:     }
  125:     else {
  126: 	@files=($loc1);
  127:     }
  128:     map {chomp; s/^$loc1\///; $_} @files;
  129: }
  130: dowarn('Processing for mode: '.$dirmode."\n");
  131: dowarn('Location #1: '.$loc1."\n");
  132: dowarn('Location #2: '.$loc2."\n");
  133: 
  134: # A list of space separated values (after the file/dir names)
  135: # can restrict the comparison.
  136: my %restrict;
  137: while (@ARGV) {
  138:     my $r=shift @ARGV;
  139:     if ($r eq 'existence' or
  140: 	$r eq 'cvstime' or
  141: 	$r eq 'md5sum' or
  142: 	$r eq 'age' or
  143: 	$r eq 'size' or
  144: 	$r eq 'lines' or
  145: 	$r eq 'diffs') {
  146: 	$restrict{$r}=1;
  147:     }
  148:     else {
  149: 	print($invocation), exit(1);
  150:     }
  151: }
  152: if (%restrict) {
  153:     warn('Restricting comparison to: '.
  154: 	 join(' ',keys %restrict)."\n");
  155: }
  156: 
  157: my %OUTPUT=(
  158: 	 'existence'=>(
  159: 		    sub {
  160: 			print 'existence: '.@_[0];
  161: 			return;
  162: 		    }
  163:          ),
  164: 	 'md5sum'=>(
  165: 		    sub {
  166: 			print 'md5sum: '.@_[0];
  167: 			return;
  168: 		    }
  169:          ),
  170:          'cvstime'=>(
  171:                     sub {
  172: 	                print 'cvstime: '.@_[0];
  173: 			return;
  174: 		    }
  175:          ),
  176:          'age'=>(
  177:                     sub {
  178: 	                print 'age: '.@_[0];
  179: 			return;
  180: 		    }
  181:          ),
  182:          'size'=>(
  183:                     sub {
  184: 			print 'size: '.@_[0];
  185: 			return;
  186: 		    }
  187:          ),
  188:          'lines'=>(
  189:                     sub {
  190: 			print 'lines: '.@_[0];
  191: 			return;
  192: 		    }
  193:          ),
  194:          'diffs'=>(
  195:                     sub {
  196: 			print 'diffs: '.@_[0];
  197: 			return;
  198: 		    }
  199:          ),
  200: );
  201: 
  202: my %MEASURE=(
  203: 	 'existence' => (
  204:                     sub {
  205: 			my ($file1,$file2)=@_;
  206: 		        my $rv1=(-e $file1)?'yes':'no';
  207: 			my $rv2=(-e $file2)?'yes':'no';
  208: 			return ($rv1,$rv2);
  209: 		    }
  210:          ),
  211: 	 'md5sum'=>(
  212: 		    sub {
  213: 			my ($file1,$file2)=@_;
  214: 			my ($rv1)=split(/ /,`md5sum $file1`); chop $rv1;
  215: 			my ($rv2)=split(/ /,`md5sum $file2`); chop $rv2;
  216: 			return ($rv1,$rv2);
  217: 		    }
  218:          ),
  219: 	 'cvstime'=>(
  220: 		    sub {
  221: 			my ($file1,$file2)=@_;
  222: 			my $rv1=&cvstime($file1);
  223: 			my @a=stat($file2); my $gmt=gmtime($a[9]);
  224: 			my $rv2=&utctime($gmt);
  225: 			return ($rv1,$rv2);
  226: 		    }
  227:          ),
  228:          'age'=>(
  229:                     sub {
  230: 			my ($file1,$file2)=@_;
  231: 			my @a=stat($file1); my $rv1=$a[9];
  232: 			@a=stat($file2); my $rv2=$a[9];
  233: 			return ($rv1,$rv2);
  234: 		    }
  235:          ),
  236:          'size'=>(
  237:                     sub {
  238: 			my ($file1,$file2)=@_;
  239: 			my @a=stat($file1); my $rv1=$a[7];
  240: 			@a=stat($file2); my $rv2=$a[7];
  241: 			return ($rv1,$rv2);
  242: 		    }
  243:          ),
  244:          'lines'=>(
  245:                     sub {
  246: 			my ($file1,$file2)=@_;
  247: 			my $rv1=`wc -l $file1`; chop $rv1;
  248: 			my $rv2=`wc -l $file2`; chop $rv2;
  249: 			return ($rv1,$rv2);
  250: 		    }
  251:          ),
  252:          'diffs'=>(
  253:                     sub {
  254: 			my ($file1,$file2)=@_;
  255: 			my $rv1=`diff $file1 $file2 | grep '^<' | wc -l`;
  256: 			chop $rv1; $rv1=~s/^\s+//; $rv1=~s/\s+$//;
  257: 			my $rv2=`diff $file1 $file2 | grep '^>' | wc -l`;
  258: 			chop $rv2; $rv2=~s/^\s+//; $rv2=~s/\s+$//;
  259: 			return ($rv1,$rv2);
  260: 		    }
  261:          ),
  262: );
  263: 
  264: FLOP: foreach my $file (@files) {
  265:     my $file1;
  266:     my $file2;
  267:     if ($dirmode eq 'directories') {
  268:         $file1=$loc1.'/'.$file;
  269:         $file2=$loc2.'/'.$file;
  270:     }
  271:     else {
  272:         $file1=$loc1;
  273:         $file2=$loc2;
  274:     }
  275:     my ($existence1,$existence2)=&{$MEASURE{'existence'}}($file1,$file2);
  276:     my $existence=$existence1.':'.$existence2;
  277:     my ($cvstime,$md5sum,$age,$size,$lines,$diffs);
  278:     if ($existence1 eq 'no' or $existence2 eq 'no') {
  279:         $md5sum='n/a';
  280:         $age='n/a';
  281:         $cvstime='n/a';
  282:         $size='n/a';
  283:         $lines='n/a';
  284:         $diffs='n/a';
  285:     }
  286:     else {
  287:         my ($cvstime1,$cvstime2)=&{$MEASURE{'cvstime'}}($file1,$file2);
  288:         $cvstime=$cvstime1-$cvstime2;
  289:         my ($age1,$age2)=&{$MEASURE{'age'}}($file1,$file2);
  290:         $age=$age1-$age2;
  291:         my ($md5sum1,$md5sum2)=&{$MEASURE{'md5sum'}}($file1,$file2);
  292:         if ($md5sum1 eq $md5sum2) {
  293:             $md5sum='same';
  294:             $size=0;
  295:             $lines=0;
  296:             $diffs=0;
  297: 	}
  298:         elsif ($md5sum1 ne $md5sum2) {
  299:             $md5sum='different';
  300:             my ($size1,$size2)=&{$MEASURE{'size'}}($file1,$file2);
  301:             $size=$size1-$size2;
  302:             my ($lines1,$lines2)=&{$MEASURE{'lines'}}($file1,$file2);
  303:             $lines=$lines1-$lines2;
  304:             my ($diffs1,$diffs2)=&{$MEASURE{'diffs'}}($file1,$file2);
  305:             $diffs=$diffs1.':'.$diffs2;
  306:         }
  307:     }
  308:     my $showflag=0;
  309:     if ($show eq 'all') {
  310:         $showflag=1;
  311:     }
  312:     if ($show eq 'different') {
  313:         my @ks=(keys %restrict);
  314:         unless (@ks) {
  315: 	    @ks=('existence','cvstime','md5sum','age','size','lines','diffs');
  316: 	}
  317:         FLOP2: for my $key (@ks) {
  318: 	    if ($key eq 'existence') {
  319: 		if ($existence ne 'yes:yes') {
  320: 		    $showflag=1;
  321: 		}
  322: 	    }
  323: 	    elsif ($key eq 'md5sum') {
  324: 		if ($md5sum ne 'same') {
  325: 		    $showflag=1;
  326: 		}
  327: 	    }
  328: 	    elsif ($key eq 'cvstime') {
  329: 		if ($cvstime!=0) {
  330: 		    $showflag=1;
  331: 		}
  332: 	    }
  333: 	    elsif ($key eq 'age') {
  334: 		if ($age!=0) {
  335: 		    $showflag=1;
  336: 		}
  337: 	    }
  338: 	    elsif ($key eq 'size') {
  339: 		if ($size!=0) {
  340: 		    $showflag=1;
  341: 		}
  342: 	    }
  343: 	    elsif ($key eq 'lines') {
  344: 		if ($lines!=0) {
  345: 		    $showflag=1;
  346: 		}
  347: 	    }
  348: 	    elsif ($key eq 'diffs') {
  349: 		if ($diffs ne '0:0') {
  350: 		    $showflag=1;
  351: 		}
  352: 	    }
  353: 	    if ($showflag) {
  354: 		last FLOP2;
  355: 	    }
  356:         }
  357:     }
  358:     elsif ($show eq 'same') {
  359:         my @ks=(keys %restrict);
  360:         unless (@ks) {
  361: 	    @ks=('existence','md5sum','cvstime','age','size','lines','diffs');
  362: 	}
  363:         my $showcount=length(@ks);
  364:         FLOP3: for my $key (@ks) {
  365: 	    if ($key eq 'existence') {
  366: 		if ($existence ne 'yes:yes') {
  367: 		    $showcount--;
  368: 		}
  369: 	    }
  370: 	    elsif ($key eq 'md5sum') {
  371: 		if ($md5sum ne 'same') {
  372: 		    $showcount--;
  373: 		}
  374: 	    }
  375: 	    elsif ($key eq 'cvstime') {
  376: 		if ($cvstime!=0) {
  377: 		    $showcount--;
  378: 		}
  379: 	    }
  380: 	    elsif ($key eq 'age') {
  381: 		if ($age!=0) {
  382: 		    $showcount--;
  383: 		}
  384: 	    }
  385: 	    elsif ($key eq 'size') {
  386: 		if ($size!=0) {
  387: 		    $showcount--;
  388: 		}
  389: 	    }
  390: 	    elsif ($key eq 'lines') {
  391: 		if ($lines!=0) {
  392: 		    $showcount--;
  393: 		}
  394: 	    }
  395: 	    elsif ($key eq 'diffs') {
  396: 		if ($diffs ne '0:0') {
  397: 		    $showcount--;
  398: 		}
  399: 	    }
  400:         }
  401:         if ($showcount==0) {
  402: 	    $showflag=1;
  403: 	}
  404:     }
  405:     if ($buildmode==1) {
  406:         if ($md5sum eq 'same') {
  407: 	    exit(1);
  408: 	}
  409:         elsif ($cvstime<0) {
  410: 	    exit(2);
  411: 	}
  412:         else {
  413: 	    exit(0);
  414: 	}
  415:     }
  416:     elsif ($buildmode==2) {
  417:         if ($cvstime<0) {
  418: 	    exit(2);
  419: 	}
  420:         else {
  421: 	    exit(0);
  422: 	}
  423:     }
  424:     elsif ($buildmode==3) {
  425:         if ($md5sum eq 'same') {
  426: 	    exit(1);
  427: 	}
  428:         elsif ($age<0) {
  429: 	    exit(2);
  430: 	}
  431:         else {
  432: 	    exit(0);
  433: 	}
  434:     }
  435:     elsif ($buildmode==4) {
  436:         if ($cvstime>0) {
  437: 	    exit(2);
  438: 	}
  439:         else {
  440: 	    exit(0);
  441: 	}
  442:     }
  443:     print "$file";
  444:     if ($verbose==1) {
  445:         print "\t";
  446: 	print &{$OUTPUT{'existence'}}($existence);
  447:         print "\t";
  448: 	print &{$OUTPUT{'cvstime'}}($cvstime);
  449:         print "\t";
  450: 	print &{$OUTPUT{'age'}}($age);
  451:         print "\t";
  452: 	print &{$OUTPUT{'md5sum'}}($md5sum);
  453:         print "\t";
  454: 	print &{$OUTPUT{'size'}}($size);
  455:         print "\t";
  456: 	print &{$OUTPUT{'lines'}}($lines);
  457:         print "\t";
  458: 	print &{$OUTPUT{'diffs'}}($diffs);
  459:     }
  460:     print "\n";
  461: }
  462: 
  463: sub cvstime {
  464:     my ($f)=@_;
  465:     my $path; my $file;
  466:     if ($f=~/^(.*\/)(.*?)$/) {
  467: 	$f=~/^(.*\/)(.*?)$/;
  468: 	($path,$file)=($1,$2);
  469:     }
  470:     else {
  471: 	$file=$f; $path='';
  472:     }
  473:     my $cvstime;
  474:     if ($buildmode!=3) {
  475: 	my $entry=`grep '^/$file/' ${path}CVS/Entries` or
  476: 	    die('*** ERROR *** cannot grep against '.${path}.
  477: 		'CVS/Entries for ' .$file . "\n");
  478:         my @fields=split(/\//,$entry);
  479:         $cvstime=`date -d '$fields[3] UTC' --utc +"%s"`;
  480:         chomp $cvstime;
  481:     }
  482:     else {
  483: 	$cvstime='n/a';
  484:     }
  485:     return $cvstime;
  486: }
  487: 
  488: sub utctime {
  489:     my ($f)=@_;
  490:     my $utctime=`date -d '$f UTC' --utc +"%s"`;
  491:     chomp $utctime;
  492:     return $utctime;
  493: }
  494: 
  495: sub dowarn {
  496:     my ($msg)=@_;
  497:     warn($msg) unless $buildmode;
  498: }

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>