1: #!/usr/bin/perl
2: # The LearningOnline Network with CAPA
3: # $Id: checksimilar_2files.pl,v 1.7 2013/09/25 13:22:42 bisitz Exp $
4:
5: use strict;
6: use warnings;
7: use utf8;
8: use open ':utf8';
9:
10: ####
11: #### Checks, if there are similar keys in the two inputfiles.
12: #### For example, check the current lang.pm (first input) and newphrases.
13: #### So if there are similar keys you don't have to translate
14: #### them again but use the old value and just modify it.
15: #### IMPORTANT: Both inputfiles have to contain a hash %Lexicon (like lang.pm) !!!
16:
17:
18: ####--------Subroutines--------####
19:
20: sub read {
21: # Read file into memory
22: my $file = shift;
23: open(IN,$file) or die "Error: Could not open file: $file\n";
24: my %filecontent = ();
25: my $contents = join('',<IN>);
26: close(IN);
27: # Build hash with hash from file
28: my %Lexicon = ();
29: eval($contents.'; %filecontent=%Lexicon;');
30: if ($@ ne "") {
31: die "\nAn error occurred during the attempt to retrieve the translation hash.\n"
32: ."Error: ".$@."\n";
33: }
34: return %filecontent;
35: }
36:
37: sub similar_chars {
38: my $text = shift;
39: $text =~ s/\[_\d\]//g; # translation parameters
40: $text =~ s/[.,\_\-?!: \/\(\)]//g; # punctuation
41: return $text;
42: }
43:
44:
45:
46: sub similar_phrases {
47:
48: my $text1 = shift;
49: my $text2 = shift;
50:
51: my %phrases = (
52: 'courses' => 1,
53: 'communities' => 1,
54: 'course' => 2,
55: 'community' => 2,
56: 'member' => 3,
57: 'student' => 3,
58: 'students' => 3,
59: 'construction'=> 4,
60: 'authoring' => 4,
61: );
62:
63: foreach my $word (keys %phrases) {
64: $text1 =~ s/$word/X$phrases{$word}X/gi;
65: $text2 =~ s/$word/X$phrases{$word}X/gi;
66: }
67:
68: if (lc($text1) eq lc($text2)) {
69: return 1;
70: }
71:
72: return 0;
73: }
74:
75:
76:
77: ####--------Main Program--------####
78:
79: if (!$ARGV[0] or !$ARGV[1]) {
80: die 'Error: Invalid files! Please specify two files which should be checked.'."\n";
81: }
82:
83: my $file1 = $ARGV[0]; # Old language.pm
84: my $file2 = $ARGV[1]; # New Phrases
85:
86: print("Checking for similar expressions in phrases in $file1 and $file2...\n");
87:
88: my %langOLD = &read($file1); #Hash with old phrases
89: my %langNEW = &read($file2); #Hash with new phrases
90: my $dlm;
91: my $count = 0;
92:
93: # For each new phrase, check if there is already a similar one
94: while( my ($kNEW, $vNEW) = each %langNEW ) {
95: my $temp1 = $kNEW;
96: $temp1 = &similar_chars($temp1);
97:
98: while( my ($kOLD, $vOLD) = each %langOLD ) {
99: my $temp2 = $kOLD;
100: $temp2 = &similar_chars($temp2);
101:
102: #Check for similar punctuation (case insensitive) or
103: #similarity related to similar phrases
104: if (lc($temp1) eq lc($temp2) || &similar_phrases($temp1,$temp2)) {
105: #Find delimiter for key and value
106: if (($kNEW=~/\'/) & ($kNEW=~/\"/)) {
107: print " (Warning: Both, ' and \", occur!)";
108: }
109: if ($kNEW=~/\'/) {
110: $dlm = '"';
111: } else {
112: $dlm = "'";
113: }
114: print (<<ENDNEW);
115: # $kOLD #(Old key)
116: $dlm$kNEW$dlm
117: => $dlm$vOLD$dlm,
118:
119: ENDNEW
120: $count++;
121:
122: }
123: }
124: }
125: print("Finished. ".$count." similar expressions found!\n");
126:
127:
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>