--- loncom/localize/localize/checksimilar_2files.pl 2013/01/07 15:13:26 1.3 +++ loncom/localize/localize/checksimilar_2files.pl 2013/01/10 18:07:52 1.4 @@ -1,6 +1,6 @@ #!/usr/bin/perl # The LearningOnline Network with CAPA -# $Id: checksimilar_2files.pl,v 1.3 2013/01/07 15:13:26 bisitz Exp $ +# $Id: checksimilar_2files.pl,v 1.4 2013/01/10 18:07:52 bisitz Exp $ use strict; use warnings; @@ -35,15 +35,16 @@ sub read { return %filecontent; } -sub similarities{ +sub similar_chars { my $text = shift; - $text =~ s/[.,\_\-?!:]//g; + $text =~ s/\[_\d\]//g; # translation parameters + $text =~ s/[.,\_\-?!: \/]//g; # punctuation return $text; } -sub CourseCommunity { +sub similar_phrases { my $text1 = shift; my $text2 = shift; @@ -52,12 +53,19 @@ sub CourseCommunity { $text1 =~ s/communities/X001X/gi; $text1 =~ s/course/X002X/gi; $text1 =~ s/community/X002X/gi; + $text1 =~ s/member/X003X/gi; + $text1 =~ s/student/X003X/gi; + $text1 =~ s/students/X003X/gi; + $text2 =~ s/courses/X001X/gi; $text2 =~ s/communities/X001X/gi; $text2 =~ s/course/X002X/gi; $text2 =~ s/community/X002X/gi; + $text2 =~ s/member/X003X/gi; + $text2 =~ s/student/X003X/gi; + $text2 =~ s/students/X003X/gi; - if(lc($text1) eq lc($text2)) { + if (lc($text1) eq lc($text2)) { return 1; } @@ -81,15 +89,15 @@ my $count = 0; # For each new phrase, check if there is already a similar one while( my ($kNEW, $vNEW) = each %langNEW ) { my $temp1 = $kNEW; - $temp1 = &similarities($temp1); + $temp1 = &similar_chars($temp1); while( my ($kOLD, $vOLD) = each %langOLD ) { my $temp2 = $kOLD; - $temp2 = &similarities($temp2); + $temp2 = &similar_chars($temp2); #Check for similar punctuation (case insensitive) or - #similarity related to Course/Community - if(lc($temp1) eq lc($temp2) || &CourseCommunity($temp1,$temp2)){ + #similarity related to similar phrases + if (lc($temp1) eq lc($temp2) || &similar_phrases($temp1,$temp2)) { #Find delimiter for key and value if (($kNEW=~/\'/) & ($kNEW=~/\"/)) { print " (Warning: Both, ' and \", occur!)";