#!/usr/bin/perl
# The LearningOnline Network with CAPA
# $Id: checksimilar_2files.pl,v 1.7 2013/09/25 13:22:42 bisitz Exp $
use strict;
use warnings;
use utf8;
use open ':utf8';
####
#### Checks, if there are similar keys in the two inputfiles.
#### For example, check the current lang.pm (first input) and newphrases.
#### So if there are similar keys you don't have to translate
#### them again but use the old value and just modify it.
#### IMPORTANT: Both inputfiles have to contain a hash %Lexicon (like lang.pm) !!!
####--------Subroutines--------####
sub read {
# Read file into memory
my $file = shift;
open(IN,$file) or die "Error: Could not open file: $file\n";
my %filecontent = ();
my $contents = join('',<IN>);
close(IN);
# Build hash with hash from file
my %Lexicon = ();
eval($contents.'; %filecontent=%Lexicon;');
if ($@ ne "") {
die "\nAn error occurred during the attempt to retrieve the translation hash.\n"
."Error: ".$@."\n";
}
return %filecontent;
}
sub similar_chars {
my $text = shift;
$text =~ s/\[_\d\]//g; # translation parameters
$text =~ s/[.,\_\-?!: \/\(\)]//g; # punctuation
return $text;
}
sub similar_phrases {
my $text1 = shift;
my $text2 = shift;
my %phrases = (
'courses' => 1,
'communities' => 1,
'course' => 2,
'community' => 2,
'member' => 3,
'student' => 3,
'students' => 3,
'construction'=> 4,
'authoring' => 4,
);
foreach my $word (keys %phrases) {
$text1 =~ s/$word/X$phrases{$word}X/gi;
$text2 =~ s/$word/X$phrases{$word}X/gi;
}
if (lc($text1) eq lc($text2)) {
return 1;
}
return 0;
}
####--------Main Program--------####
if (!$ARGV[0] or !$ARGV[1]) {
die 'Error: Invalid files! Please specify two files which should be checked.'."\n";
}
my $file1 = $ARGV[0]; # Old language.pm
my $file2 = $ARGV[1]; # New Phrases
print("Checking for similar expressions in phrases in $file1 and $file2...\n");
my %langOLD = &read($file1); #Hash with old phrases
my %langNEW = &read($file2); #Hash with new phrases
my $dlm;
my $count = 0;
# For each new phrase, check if there is already a similar one
while( my ($kNEW, $vNEW) = each %langNEW ) {
my $temp1 = $kNEW;
$temp1 = &similar_chars($temp1);
while( my ($kOLD, $vOLD) = each %langOLD ) {
my $temp2 = $kOLD;
$temp2 = &similar_chars($temp2);
#Check for similar punctuation (case insensitive) or
#similarity related to similar phrases
if (lc($temp1) eq lc($temp2) || &similar_phrases($temp1,$temp2)) {
#Find delimiter for key and value
if (($kNEW=~/\'/) & ($kNEW=~/\"/)) {
print " (Warning: Both, ' and \", occur!)";
}
if ($kNEW=~/\'/) {
$dlm = '"';
} else {
$dlm = "'";
}
print (<<ENDNEW);
# $kOLD #(Old key)
$dlm$kNEW$dlm
=> $dlm$vOLD$dlm,
ENDNEW
$count++;
}
}
}
print("Finished. ".$count." similar expressions found!\n");
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>