loncom/localize/localize/checksimilar_2files.pl - annotate

Return to checksimilar_2files.pl CVS log
Up to [LON-CAPA] / loncom / localize / localize
Annotation of loncom/localize/localize/checksimilar_2files.pl, revision 1.3

1.1       wenzelju    1: #!/usr/bin/perl
                      2: # The LearningOnline Network with CAPA
1.3     ! bisitz      3: # $Id: checksimilar_2files.pl,v 1.2 2010/08/03 13:00:21 bisitz Exp $
1.1       wenzelju    4: 
                      5: use strict;
                      6: use warnings;
                      7: use utf8;
                      8: use open ':utf8';
                      9: 
                     10: ####
                     11: #### Checks, if there are similar keys in the two inputfiles.
                     12: #### For example, check the current lang.pm (first input) and newphrases.
                     13: #### So if there are similar keys you don't have to translate
                     14: #### them again but use the old value and just modify it.
                     15: #### IMPORTANT: Both inputfiles have to contain a hash %Lexicon (like lang.pm) !!!
                     16: 
                     17: 
                     18: ####--------Subroutines--------#### 
                     19: 
                     20: sub read {
                     21:     # Read file into memory
                     22:     my $file = shift;
                     23:     open(IN,$file) or die;
                     24:     my %filecontent = ();
                     25:     my $contents = join('',<IN>);
                     26:     close(IN);
                     27:     # Build hash with hash from file
                     28:     my %Lexicon = ();
                     29:     eval($contents.'; %filecontent=%Lexicon;');
                     30:     if ($@ ne "") {
                     31:         print "\nAn error occurred during the attempt to retrieve the translation hash.\n"
                     32:              ."Error: ".$@."\n";
                     33:         die;
                     34:     }
                     35:     return %filecontent;
                     36: }
                     37: 
                     38: sub similarities{
                     39:     my $text = shift;
                     40:     $text =~ s/[.,\_\-?!:]//g;
                     41:     return $text;
                     42: }
                     43: 
                     44: 
                     45: 
                     46: sub CourseCommunity {
                     47:     
                     48:     my $text1 = shift;
                     49:     my $text2 = shift;
                     50:     
1.2       bisitz     51:     $text1 =~ s/courses/X001X/gi;
                     52:     $text1 =~ s/communities/X001X/gi;    
                     53:     $text1 =~ s/course/X002X/gi;
                     54:     $text1 =~ s/community/X002X/gi;
                     55:     $text2 =~ s/courses/X001X/gi;
                     56:     $text2 =~ s/communities/X001X/gi;
                     57:     $text2 =~ s/course/X002X/gi;
                     58:     $text2 =~ s/community/X002X/gi;
1.1       wenzelju   59: 
                     60:     if(lc($text1) eq lc($text2)) {
                     61:         return 1;
                     62:     }
                     63:     
                     64:     return 0;
                     65: }
                     66: 
                     67: 
                     68: 
                     69: ####--------Main Program--------####
                     70: 
                     71: my $file1 = $ARGV[0];  # Old language.pm
                     72: my $file2 = $ARGV[1];  # New Phrases
1.3     ! bisitz     73: 
        !            74: print("Checking for similar expressions in phrases in $file1 and $file2...\n");
        !            75: 
1.1       wenzelju   76: my %langOLD = &read($file1); #Hash with old phrases
                     77: my %langNEW = &read($file2); #Hash with new phrases
                     78: my $dlm; 
1.3     ! bisitz     79: my $count = 0;
1.1       wenzelju   80: 
                     81: # For each new phrase, check if there is already a similar one
                     82: while( my ($kNEW, $vNEW) = each %langNEW ) {
                     83:     my $temp1 = $kNEW;
                     84:     $temp1 = &similarities($temp1);
                     85:    
                     86:     while( my ($kOLD, $vOLD) = each %langOLD ) {
                     87:         my $temp2 = $kOLD;
                     88:         $temp2 = &similarities($temp2);
                     89: 
                     90:         #Check for similar punctuation (case insensitive) or
                     91:         #similarity related to Course/Community 
                     92:         if(lc($temp1) eq lc($temp2) || &CourseCommunity($temp1,$temp2)){
                     93:             #Find delimiter for key and value
                     94:             if (($kNEW=~/\'/) & ($kNEW=~/\"/)) {
                     95:                 print " (Warning: Both, ' and \", occur!)";
                     96:             }
                     97:             if ($kNEW=~/\'/) {
                     98: 	        $dlm = '"';
                     99: 	    } else {
                    100: 	        $dlm = "'";
                    101: 	    }
1.3     ! bisitz    102:             print (<<ENDNEW);
        !           103: #   $kOLD #(Old key)
1.1       wenzelju  104:    $dlm$kNEW$dlm
                    105: => $dlm$vOLD$dlm,
                    106: 
                    107: ENDNEW
                    108:             $count++;
                    109: 
                    110:         }
                    111:     }
                    112: }
                    113: print("Finished. ".$count." similar expressions found!\n");
                    114: 
                    115:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>