Annotation of loncom/localize/localize/checksimilar_2files.pl, revision 1.1

1.1     ! wenzelju    1: #!/usr/bin/perl
        !             2: # The LearningOnline Network with CAPA
        !             3: # $Id: checksimilar_2files.pl,v 1.1 2010/03/09 16:15:00 wenzelju Exp $
        !             4: 
        !             5: use strict;
        !             6: use warnings;
        !             7: use utf8;
        !             8: use open ':utf8';
        !             9: 
        !            10: ####
        !            11: #### Checks, if there are similar keys in the two inputfiles.
        !            12: #### For example, check the current lang.pm (first input) and newphrases.
        !            13: #### So if there are similar keys you don't have to translate
        !            14: #### them again but use the old value and just modify it.
        !            15: #### IMPORTANT: Both inputfiles have to contain a hash %Lexicon (like lang.pm) !!!
        !            16: 
        !            17: 
        !            18: ####--------Subroutines--------#### 
        !            19: 
        !            20: sub read {
        !            21:     # Read file into memory
        !            22:     my $file = shift;
        !            23:     open(IN,$file) or die;
        !            24:     my %filecontent = ();
        !            25:     my $contents = join('',<IN>);
        !            26:     close(IN);
        !            27:     # Build hash with hash from file
        !            28:     my %Lexicon = ();
        !            29:     eval($contents.'; %filecontent=%Lexicon;');
        !            30:     if ($@ ne "") {
        !            31:         print "\nAn error occurred during the attempt to retrieve the translation hash.\n"
        !            32:              ."Error: ".$@."\n";
        !            33:         die;
        !            34:     }
        !            35:     return %filecontent;
        !            36: }
        !            37: 
        !            38: sub similarities{
        !            39:     my $text = shift;
        !            40:     $text =~ s/[.,\_\-?!:]//g;
        !            41:     return $text;
        !            42: }
        !            43: 
        !            44: 
        !            45: 
        !            46: sub CourseCommunity {
        !            47:     
        !            48:     my $text1 = shift;
        !            49:     my $text2 = shift;
        !            50:     
        !            51:     $text1 =~ s/courses//gi;
        !            52:     $text1 =~ s/communities//gi;    
        !            53:     $text1 =~ s/course//gi;
        !            54:     $text1 =~ s/community//gi;
        !            55:     $text2 =~ s/courses//gi;
        !            56:     $text2 =~ s/communities//gi;
        !            57:     $text2 =~ s/course//gi;
        !            58:     $text2 =~ s/community//gi;
        !            59: 
        !            60:     if(lc($text1) eq lc($text2)) {
        !            61:         return 1;
        !            62:     }
        !            63:     
        !            64:     return 0;
        !            65: }
        !            66: 
        !            67: 
        !            68: 
        !            69: ####--------Main Program--------####
        !            70: 
        !            71: my $file1 = $ARGV[0];  # Old language.pm
        !            72: my $file2 = $ARGV[1];  # New Phrases
        !            73: my %langOLD = &read($file1); #Hash with old phrases
        !            74: my %langNEW = &read($file2); #Hash with new phrases
        !            75: my $dlm; 
        !            76: my $count = 1; #Counter
        !            77: 
        !            78: open(OUT,'>similarities.txt') or die;
        !            79: 
        !            80: # For each new phrase, check if there is already a similar one
        !            81: while( my ($kNEW, $vNEW) = each %langNEW ) {
        !            82:     my $temp1 = $kNEW;
        !            83:     $temp1 = &similarities($temp1);
        !            84:    
        !            85:     while( my ($kOLD, $vOLD) = each %langOLD ) {
        !            86:         my $temp2 = $kOLD;
        !            87:         $temp2 = &similarities($temp2);
        !            88: 
        !            89:         #Check for similar punctuation (case insensitive) or
        !            90:         #similarity related to Course/Community 
        !            91:         if(lc($temp1) eq lc($temp2) || &CourseCommunity($temp1,$temp2)){
        !            92:             #Find delimiter for key and value
        !            93:             if (($kNEW=~/\'/) & ($kNEW=~/\"/)) {
        !            94:                 print " (Warning: Both, ' and \", occur!)";
        !            95:             }
        !            96:             if ($kNEW=~/\'/) {
        !            97: 	        $dlm = '"';
        !            98: 	    } else {
        !            99: 	        $dlm = "'";
        !           100: 	    }
        !           101:             print OUT (<<ENDNEW);
        !           102: #Old key: $kOLD
        !           103:    $dlm$kNEW$dlm
        !           104: => $dlm$vOLD$dlm,
        !           105: 
        !           106: ENDNEW
        !           107:             $count++;
        !           108: 
        !           109:         }
        !           110:     }
        !           111: }
        !           112: print("Finished. ".$count." similar expressions found!\n");
        !           113: 
        !           114: 

FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>