Annotation of loncom/localize/localize/checksimilar_2files.pl, revision 1.5
1.1 wenzelju 1: #!/usr/bin/perl
2: # The LearningOnline Network with CAPA
1.5 ! bisitz 3: # $Id: checksimilar_2files.pl,v 1.4 2013/01/10 18:07:52 bisitz Exp $
1.1 wenzelju 4:
5: use strict;
6: use warnings;
7: use utf8;
8: use open ':utf8';
9:
10: ####
11: #### Checks, if there are similar keys in the two inputfiles.
12: #### For example, check the current lang.pm (first input) and newphrases.
13: #### So if there are similar keys you don't have to translate
14: #### them again but use the old value and just modify it.
15: #### IMPORTANT: Both inputfiles have to contain a hash %Lexicon (like lang.pm) !!!
16:
17:
18: ####--------Subroutines--------####
19:
20: sub read {
21: # Read file into memory
22: my $file = shift;
23: open(IN,$file) or die;
24: my %filecontent = ();
25: my $contents = join('',<IN>);
26: close(IN);
27: # Build hash with hash from file
28: my %Lexicon = ();
29: eval($contents.'; %filecontent=%Lexicon;');
30: if ($@ ne "") {
31: print "\nAn error occurred during the attempt to retrieve the translation hash.\n"
32: ."Error: ".$@."\n";
33: die;
34: }
35: return %filecontent;
36: }
37:
1.4 bisitz 38: sub similar_chars {
1.1 wenzelju 39: my $text = shift;
1.4 bisitz 40: $text =~ s/\[_\d\]//g; # translation parameters
41: $text =~ s/[.,\_\-?!: \/]//g; # punctuation
1.1 wenzelju 42: return $text;
43: }
44:
45:
46:
1.4 bisitz 47: sub similar_phrases {
1.1 wenzelju 48:
49: my $text1 = shift;
50: my $text2 = shift;
51:
1.5 ! bisitz 52: my %phrases = (
! 53: 'courses' => 1,
! 54: 'communities' => 1,
! 55: 'course' => 2,
! 56: 'community' => 2,
! 57: 'member' => 3,
! 58: 'student' => 3,
! 59: 'students' => 3,
! 60: 'construction'=> 4,
! 61: 'authoring' => 4,
! 62: );
! 63:
! 64: foreach my $word (keys %phrases) {
! 65: $text1 =~ s/$phrase/X$phrases{$word}X/gi;
! 66: $text2 =~ s/$phrase/X$phrases{$word}X/gi;
! 67: }
1.1 wenzelju 68:
1.4 bisitz 69: if (lc($text1) eq lc($text2)) {
1.1 wenzelju 70: return 1;
71: }
72:
73: return 0;
74: }
75:
76:
77:
78: ####--------Main Program--------####
79:
80: my $file1 = $ARGV[0]; # Old language.pm
81: my $file2 = $ARGV[1]; # New Phrases
1.3 bisitz 82:
83: print("Checking for similar expressions in phrases in $file1 and $file2...\n");
84:
1.1 wenzelju 85: my %langOLD = &read($file1); #Hash with old phrases
86: my %langNEW = &read($file2); #Hash with new phrases
87: my $dlm;
1.3 bisitz 88: my $count = 0;
1.1 wenzelju 89:
90: # For each new phrase, check if there is already a similar one
91: while( my ($kNEW, $vNEW) = each %langNEW ) {
92: my $temp1 = $kNEW;
1.4 bisitz 93: $temp1 = &similar_chars($temp1);
1.1 wenzelju 94:
95: while( my ($kOLD, $vOLD) = each %langOLD ) {
96: my $temp2 = $kOLD;
1.4 bisitz 97: $temp2 = &similar_chars($temp2);
1.1 wenzelju 98:
99: #Check for similar punctuation (case insensitive) or
1.4 bisitz 100: #similarity related to similar phrases
101: if (lc($temp1) eq lc($temp2) || &similar_phrases($temp1,$temp2)) {
1.1 wenzelju 102: #Find delimiter for key and value
103: if (($kNEW=~/\'/) & ($kNEW=~/\"/)) {
104: print " (Warning: Both, ' and \", occur!)";
105: }
106: if ($kNEW=~/\'/) {
107: $dlm = '"';
108: } else {
109: $dlm = "'";
110: }
1.3 bisitz 111: print (<<ENDNEW);
112: # $kOLD #(Old key)
1.1 wenzelju 113: $dlm$kNEW$dlm
114: => $dlm$vOLD$dlm,
115:
116: ENDNEW
117: $count++;
118:
119: }
120: }
121: }
122: print("Finished. ".$count." similar expressions found!\n");
123:
124:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>