Annotation of loncom/localize/localize/checksimilar_2files.pl, revision 1.1
1.1 ! wenzelju 1: #!/usr/bin/perl
! 2: # The LearningOnline Network with CAPA
! 3: # $Id: checksimilar_2files.pl,v 1.1 2010/03/09 16:15:00 wenzelju Exp $
! 4:
! 5: use strict;
! 6: use warnings;
! 7: use utf8;
! 8: use open ':utf8';
! 9:
! 10: ####
! 11: #### Checks, if there are similar keys in the two inputfiles.
! 12: #### For example, check the current lang.pm (first input) and newphrases.
! 13: #### So if there are similar keys you don't have to translate
! 14: #### them again but use the old value and just modify it.
! 15: #### IMPORTANT: Both inputfiles have to contain a hash %Lexicon (like lang.pm) !!!
! 16:
! 17:
! 18: ####--------Subroutines--------####
! 19:
! 20: sub read {
! 21: # Read file into memory
! 22: my $file = shift;
! 23: open(IN,$file) or die;
! 24: my %filecontent = ();
! 25: my $contents = join('',<IN>);
! 26: close(IN);
! 27: # Build hash with hash from file
! 28: my %Lexicon = ();
! 29: eval($contents.'; %filecontent=%Lexicon;');
! 30: if ($@ ne "") {
! 31: print "\nAn error occurred during the attempt to retrieve the translation hash.\n"
! 32: ."Error: ".$@."\n";
! 33: die;
! 34: }
! 35: return %filecontent;
! 36: }
! 37:
! 38: sub similarities{
! 39: my $text = shift;
! 40: $text =~ s/[.,\_\-?!:]//g;
! 41: return $text;
! 42: }
! 43:
! 44:
! 45:
! 46: sub CourseCommunity {
! 47:
! 48: my $text1 = shift;
! 49: my $text2 = shift;
! 50:
! 51: $text1 =~ s/courses//gi;
! 52: $text1 =~ s/communities//gi;
! 53: $text1 =~ s/course//gi;
! 54: $text1 =~ s/community//gi;
! 55: $text2 =~ s/courses//gi;
! 56: $text2 =~ s/communities//gi;
! 57: $text2 =~ s/course//gi;
! 58: $text2 =~ s/community//gi;
! 59:
! 60: if(lc($text1) eq lc($text2)) {
! 61: return 1;
! 62: }
! 63:
! 64: return 0;
! 65: }
! 66:
! 67:
! 68:
! 69: ####--------Main Program--------####
! 70:
! 71: my $file1 = $ARGV[0]; # Old language.pm
! 72: my $file2 = $ARGV[1]; # New Phrases
! 73: my %langOLD = &read($file1); #Hash with old phrases
! 74: my %langNEW = &read($file2); #Hash with new phrases
! 75: my $dlm;
! 76: my $count = 1; #Counter
! 77:
! 78: open(OUT,'>similarities.txt') or die;
! 79:
! 80: # For each new phrase, check if there is already a similar one
! 81: while( my ($kNEW, $vNEW) = each %langNEW ) {
! 82: my $temp1 = $kNEW;
! 83: $temp1 = &similarities($temp1);
! 84:
! 85: while( my ($kOLD, $vOLD) = each %langOLD ) {
! 86: my $temp2 = $kOLD;
! 87: $temp2 = &similarities($temp2);
! 88:
! 89: #Check for similar punctuation (case insensitive) or
! 90: #similarity related to Course/Community
! 91: if(lc($temp1) eq lc($temp2) || &CourseCommunity($temp1,$temp2)){
! 92: #Find delimiter for key and value
! 93: if (($kNEW=~/\'/) & ($kNEW=~/\"/)) {
! 94: print " (Warning: Both, ' and \", occur!)";
! 95: }
! 96: if ($kNEW=~/\'/) {
! 97: $dlm = '"';
! 98: } else {
! 99: $dlm = "'";
! 100: }
! 101: print OUT (<<ENDNEW);
! 102: #Old key: $kOLD
! 103: $dlm$kNEW$dlm
! 104: => $dlm$vOLD$dlm,
! 105:
! 106: ENDNEW
! 107: $count++;
! 108:
! 109: }
! 110: }
! 111: }
! 112: print("Finished. ".$count." similar expressions found!\n");
! 113:
! 114:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>