Annotation of loncom/localize/localize/checksimilar_2files.pl, revision 1.3
1.1 wenzelju 1: #!/usr/bin/perl
2: # The LearningOnline Network with CAPA
1.3 ! bisitz 3: # $Id: checksimilar_2files.pl,v 1.2 2010/08/03 13:00:21 bisitz Exp $
1.1 wenzelju 4:
5: use strict;
6: use warnings;
7: use utf8;
8: use open ':utf8';
9:
10: ####
11: #### Checks, if there are similar keys in the two inputfiles.
12: #### For example, check the current lang.pm (first input) and newphrases.
13: #### So if there are similar keys you don't have to translate
14: #### them again but use the old value and just modify it.
15: #### IMPORTANT: Both inputfiles have to contain a hash %Lexicon (like lang.pm) !!!
16:
17:
18: ####--------Subroutines--------####
19:
20: sub read {
21: # Read file into memory
22: my $file = shift;
23: open(IN,$file) or die;
24: my %filecontent = ();
25: my $contents = join('',<IN>);
26: close(IN);
27: # Build hash with hash from file
28: my %Lexicon = ();
29: eval($contents.'; %filecontent=%Lexicon;');
30: if ($@ ne "") {
31: print "\nAn error occurred during the attempt to retrieve the translation hash.\n"
32: ."Error: ".$@."\n";
33: die;
34: }
35: return %filecontent;
36: }
37:
38: sub similarities{
39: my $text = shift;
40: $text =~ s/[.,\_\-?!:]//g;
41: return $text;
42: }
43:
44:
45:
46: sub CourseCommunity {
47:
48: my $text1 = shift;
49: my $text2 = shift;
50:
1.2 bisitz 51: $text1 =~ s/courses/X001X/gi;
52: $text1 =~ s/communities/X001X/gi;
53: $text1 =~ s/course/X002X/gi;
54: $text1 =~ s/community/X002X/gi;
55: $text2 =~ s/courses/X001X/gi;
56: $text2 =~ s/communities/X001X/gi;
57: $text2 =~ s/course/X002X/gi;
58: $text2 =~ s/community/X002X/gi;
1.1 wenzelju 59:
60: if(lc($text1) eq lc($text2)) {
61: return 1;
62: }
63:
64: return 0;
65: }
66:
67:
68:
69: ####--------Main Program--------####
70:
71: my $file1 = $ARGV[0]; # Old language.pm
72: my $file2 = $ARGV[1]; # New Phrases
1.3 ! bisitz 73:
! 74: print("Checking for similar expressions in phrases in $file1 and $file2...\n");
! 75:
1.1 wenzelju 76: my %langOLD = &read($file1); #Hash with old phrases
77: my %langNEW = &read($file2); #Hash with new phrases
78: my $dlm;
1.3 ! bisitz 79: my $count = 0;
1.1 wenzelju 80:
81: # For each new phrase, check if there is already a similar one
82: while( my ($kNEW, $vNEW) = each %langNEW ) {
83: my $temp1 = $kNEW;
84: $temp1 = &similarities($temp1);
85:
86: while( my ($kOLD, $vOLD) = each %langOLD ) {
87: my $temp2 = $kOLD;
88: $temp2 = &similarities($temp2);
89:
90: #Check for similar punctuation (case insensitive) or
91: #similarity related to Course/Community
92: if(lc($temp1) eq lc($temp2) || &CourseCommunity($temp1,$temp2)){
93: #Find delimiter for key and value
94: if (($kNEW=~/\'/) & ($kNEW=~/\"/)) {
95: print " (Warning: Both, ' and \", occur!)";
96: }
97: if ($kNEW=~/\'/) {
98: $dlm = '"';
99: } else {
100: $dlm = "'";
101: }
1.3 ! bisitz 102: print (<<ENDNEW);
! 103: # $kOLD #(Old key)
1.1 wenzelju 104: $dlm$kNEW$dlm
105: => $dlm$vOLD$dlm,
106:
107: ENDNEW
108: $count++;
109:
110: }
111: }
112: }
113: print("Finished. ".$count." similar expressions found!\n");
114:
115:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>