version 1.3, 2013/01/07 15:13:26
|
version 1.4, 2013/01/10 18:07:52
|
Line 35 sub read {
|
Line 35 sub read {
|
return %filecontent; |
return %filecontent; |
} |
} |
|
|
sub similarities{ |
sub similar_chars { |
my $text = shift; |
my $text = shift; |
$text =~ s/[.,\_\-?!:]//g; |
$text =~ s/\[_\d\]//g; # translation parameters |
|
$text =~ s/[.,\_\-?!: \/]//g; # punctuation |
return $text; |
return $text; |
} |
} |
|
|
|
|
|
|
sub CourseCommunity { |
sub similar_phrases { |
|
|
my $text1 = shift; |
my $text1 = shift; |
my $text2 = shift; |
my $text2 = shift; |
Line 52 sub CourseCommunity {
|
Line 53 sub CourseCommunity {
|
$text1 =~ s/communities/X001X/gi; |
$text1 =~ s/communities/X001X/gi; |
$text1 =~ s/course/X002X/gi; |
$text1 =~ s/course/X002X/gi; |
$text1 =~ s/community/X002X/gi; |
$text1 =~ s/community/X002X/gi; |
|
$text1 =~ s/member/X003X/gi; |
|
$text1 =~ s/student/X003X/gi; |
|
$text1 =~ s/students/X003X/gi; |
|
|
$text2 =~ s/courses/X001X/gi; |
$text2 =~ s/courses/X001X/gi; |
$text2 =~ s/communities/X001X/gi; |
$text2 =~ s/communities/X001X/gi; |
$text2 =~ s/course/X002X/gi; |
$text2 =~ s/course/X002X/gi; |
$text2 =~ s/community/X002X/gi; |
$text2 =~ s/community/X002X/gi; |
|
$text2 =~ s/member/X003X/gi; |
|
$text2 =~ s/student/X003X/gi; |
|
$text2 =~ s/students/X003X/gi; |
|
|
if(lc($text1) eq lc($text2)) { |
if (lc($text1) eq lc($text2)) { |
return 1; |
return 1; |
} |
} |
|
|
Line 81 my $count = 0;
|
Line 89 my $count = 0;
|
# For each new phrase, check if there is already a similar one |
# For each new phrase, check if there is already a similar one |
while( my ($kNEW, $vNEW) = each %langNEW ) { |
while( my ($kNEW, $vNEW) = each %langNEW ) { |
my $temp1 = $kNEW; |
my $temp1 = $kNEW; |
$temp1 = &similarities($temp1); |
$temp1 = &similar_chars($temp1); |
|
|
while( my ($kOLD, $vOLD) = each %langOLD ) { |
while( my ($kOLD, $vOLD) = each %langOLD ) { |
my $temp2 = $kOLD; |
my $temp2 = $kOLD; |
$temp2 = &similarities($temp2); |
$temp2 = &similar_chars($temp2); |
|
|
#Check for similar punctuation (case insensitive) or |
#Check for similar punctuation (case insensitive) or |
#similarity related to Course/Community |
#similarity related to similar phrases |
if(lc($temp1) eq lc($temp2) || &CourseCommunity($temp1,$temp2)){ |
if (lc($temp1) eq lc($temp2) || &similar_phrases($temp1,$temp2)) { |
#Find delimiter for key and value |
#Find delimiter for key and value |
if (($kNEW=~/\'/) & ($kNEW=~/\"/)) { |
if (($kNEW=~/\'/) & ($kNEW=~/\"/)) { |
print " (Warning: Both, ' and \", occur!)"; |
print " (Warning: Both, ' and \", occur!)"; |