#!/usr/bin/perl
# The LearningOnline Network with CAPA
# $Id: checksimilar_1file.pl,v 1.2 2013/01/21 15:29:30 bisitz Exp $
use strict;
use warnings;
####
#### Checks, if there are similar keys in the inputfile (for example de.pm)
####
####--------Configuration--------####
# Include check for similar phrases -> set to 1
my $inclphrases = 0;
####--------Subroutines--------####
sub read {
# Read file into memory
my $fn = shift;
open(IN,$fn) or die;
my %filecontent = ();
my $contents = join('',<IN>);
close(IN);
# Build hash with hash from file
my %Lexicon=();
eval($contents.'; %filecontent=%Lexicon;');
return %filecontent;
}
sub similarities {
my $text = shift;
$text =~ s/\[_\d\]//g; # translation parameters
$text =~ s/[.,\_\-?!: \/]//g; # punctuation
if ($inclphrases) {
$text =~ s/course/X002X/gi;
$text =~ s/community/X002X/gi;
$text =~ s/communities/X001X/gi;
$text =~ s/member/X003X/gi;
$text =~ s/student/X003X/gi;
$text =~ s/students/X003X/gi;
}
return $text;
}
####--------Main programm--------####
my $file = $ARGV[0];
my %lang=&read($file);
my $count = 0;
#Copy hash for comparision
my %lang2=%lang;
my %sim;
#For each key in the hash compare it with each other key in the hash except itself
while( my ($kOUT, $vOUT) = each %lang ) {
#Delete the current key, so that it does not find itself
#(revert this action later, see below)
delete $lang2{$kOUT};
my $temp = $kOUT;
$temp = &similarities($temp);
while( my ($kIN, $vIN) = each %lang2 ) {
my $temp2 = $kIN;
$temp2 = &similarities($temp2);
#Print key, if it has similarity to another key and if it has not been checked already
if(lc($temp) eq lc($temp2) && !($sim{$kOUT})){
print ('###'.$kOUT."###".$kIN."###\n");
#Remeber key as already checked
$sim{$kIN} = $kOUT;
$count++;
}
}
$lang2{$kOUT}=$vOUT;
}
print("Finished. ".$count." similar keys found.\n");
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>