File:  [LON-CAPA] / loncom / localize / localize / checksimilar_2files.pl
Revision 1.7: download - view: text, annotated - select for diffs
Wed Sep 25 13:22:42 2013 UTC (10 years, 9 months ago) by bisitz
Branches: MAIN
CVS tags: HEAD
Also ignore brackets for better matching of similar phrases.

    1: #!/usr/bin/perl
    2: # The LearningOnline Network with CAPA
    3: # $Id: checksimilar_2files.pl,v 1.7 2013/09/25 13:22:42 bisitz Exp $
    4: 
    5: use strict;
    6: use warnings;
    7: use utf8;
    8: use open ':utf8';
    9: 
   10: ####
   11: #### Checks, if there are similar keys in the two inputfiles.
   12: #### For example, check the current lang.pm (first input) and newphrases.
   13: #### So if there are similar keys you don't have to translate
   14: #### them again but use the old value and just modify it.
   15: #### IMPORTANT: Both inputfiles have to contain a hash %Lexicon (like lang.pm) !!!
   16: 
   17: 
   18: ####--------Subroutines--------#### 
   19: 
   20: sub read {
   21:     # Read file into memory
   22:     my $file = shift;
   23:     open(IN,$file) or die "Error: Could not open file: $file\n";
   24:     my %filecontent = ();
   25:     my $contents = join('',<IN>);
   26:     close(IN);
   27:     # Build hash with hash from file
   28:     my %Lexicon = ();
   29:     eval($contents.'; %filecontent=%Lexicon;');
   30:     if ($@ ne "") {
   31:         die "\nAn error occurred during the attempt to retrieve the translation hash.\n"
   32:              ."Error: ".$@."\n";
   33:     }
   34:     return %filecontent;
   35: }
   36: 
   37: sub similar_chars {
   38:     my $text = shift;
   39:     $text =~ s/\[_\d\]//g; # translation parameters
   40:     $text =~ s/[.,\_\-?!: \/\(\)]//g; # punctuation
   41:     return $text;
   42: }
   43: 
   44: 
   45: 
   46: sub similar_phrases {
   47:     
   48:     my $text1 = shift;
   49:     my $text2 = shift;
   50:     
   51:     my %phrases = (
   52:         'courses'     => 1,
   53:         'communities' => 1,
   54:         'course'      => 2,
   55:         'community'   => 2,
   56:         'member'      => 3,
   57:         'student'     => 3,
   58:         'students'    => 3,
   59:         'construction'=> 4,
   60:         'authoring'   => 4,
   61:     );
   62: 
   63:     foreach my $word (keys %phrases) {
   64:         $text1 =~ s/$word/X$phrases{$word}X/gi;
   65:         $text2 =~ s/$word/X$phrases{$word}X/gi;
   66:     }
   67: 
   68:     if (lc($text1) eq lc($text2)) {
   69:         return 1;
   70:     }
   71:     
   72:     return 0;
   73: }
   74: 
   75: 
   76: 
   77: ####--------Main Program--------####
   78: 
   79: if (!$ARGV[0] or !$ARGV[1]) {
   80:     die 'Error: Invalid files! Please specify two files which should be checked.'."\n";
   81: }
   82: 
   83: my $file1 = $ARGV[0];  # Old language.pm
   84: my $file2 = $ARGV[1];  # New Phrases
   85: 
   86: print("Checking for similar expressions in phrases in $file1 and $file2...\n");
   87: 
   88: my %langOLD = &read($file1); #Hash with old phrases
   89: my %langNEW = &read($file2); #Hash with new phrases
   90: my $dlm; 
   91: my $count = 0;
   92: 
   93: # For each new phrase, check if there is already a similar one
   94: while( my ($kNEW, $vNEW) = each %langNEW ) {
   95:     my $temp1 = $kNEW;
   96:     $temp1 = &similar_chars($temp1);
   97:    
   98:     while( my ($kOLD, $vOLD) = each %langOLD ) {
   99:         my $temp2 = $kOLD;
  100:         $temp2 = &similar_chars($temp2);
  101: 
  102:         #Check for similar punctuation (case insensitive) or
  103:         #similarity related to similar phrases 
  104:         if (lc($temp1) eq lc($temp2) || &similar_phrases($temp1,$temp2)) {
  105:             #Find delimiter for key and value
  106:             if (($kNEW=~/\'/) & ($kNEW=~/\"/)) {
  107:                 print " (Warning: Both, ' and \", occur!)";
  108:             }
  109:             if ($kNEW=~/\'/) {
  110: 	        $dlm = '"';
  111: 	    } else {
  112: 	        $dlm = "'";
  113: 	    }
  114:             print (<<ENDNEW);
  115: #   $kOLD #(Old key)
  116:    $dlm$kNEW$dlm
  117: => $dlm$vOLD$dlm,
  118: 
  119: ENDNEW
  120:             $count++;
  121: 
  122:         }
  123:     }
  124: }
  125: print("Finished. ".$count." similar expressions found!\n");
  126: 
  127: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>