#!/usr/bin/perl
#
# The LearningOnline Network
#
# $Id: parse_activity_log.pl,v 1.11 2004/12/22 20:42:39 matthew Exp $
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# /home/httpd/html/adm/gpl.txt
#
# http://www.lon-capa.org/
#
#--------------------------------------------------------------------
#
# Exit codes
# 0 Everything is okay
# 1 Another copy is running on this course
# 2 Activity log does not exist
# 3 Unable to connect to database
# 4 Unable to create database tables
# 5 Unable to open log file
# 6 Unable to get lock on activity log
#
#
# Notes:
#
# Logging is done via the $logthis variable, which may be the result of
# overcleverness. log via $logthis->('logtext'); Those are parentheses,
# not curly braces. If the -log command line parameter is set, the $logthis
# routine is set to a routine which writes to a file. If the command line
# parameter is not set $logthis is set to ¬hing, which does what you
# would expect.
#
use strict;
use DBI;
use lib '/home/httpd/lib/perl/Apache';
use lib '/home/httpd/lib/perl/';
use LONCAPA::Configuration();
use Apache::lonmysql();
use lonmysql();
use Time::HiRes();
use Getopt::Long();
use IO::File;
use File::Copy;
use Fcntl qw(:flock);
#
# Determine parameters
my ($help,$course,$domain,$drop,$file,$time_run,$nocleanup,$log,$backup);
&Getopt::Long::GetOptions( "course=s" => \$course,
"domain=s" => \$domain,
"backup" => \$backup,
"help" => \$help,
"logfile=s" => \$file,
"timerun" => \$time_run,
"nocleanup" => \$nocleanup,
"drop" => \$drop,
"log" => \$log);
if (! defined($course) || $help) {
print<<USAGE;
parse_activity_log.pl
Process a lon-capa activity log into a database.
Parameters:
course Required
domain Optional
backup optional if present, backup the activity log file
before processing it
drop optional if present, drop all course
specific activity log tables.
file optional Specify the file to parse, including path
time optional if present, print out timing data
nocleanup optional if present, do not remove old files
log optional if present, prepare log file of activity
Examples:
$0 -course=123456abcdef -domain=msu
$0 -course=123456abcdef -file=activity.log
USAGE
exit;
}
##
## Set up timing code
my $time_this = \¬hing;
if ($time_run) {
$time_this = \&time_action;
}
my $initial_time = Time::HiRes::time;
##
## Read in configuration parameters
##
my %perlvar = %{&LONCAPA::Configuration::read_conf('loncapa.conf')};
if (! defined($domain) || $domain eq '') {
$domain = $perlvar{'lonDefDomain'};
}
&update_process_name($course.'@'.$domain);
##
## Set up logging code
my $logthis = \¬hing;
if ($log) {
my $logfile = $perlvar{'lonDaemons'}.'/tmp/parse_activity_log.log.'.time;
print STDERR "$0: logging to $logfile".$/;
if (! open(LOGFILE,">$logfile")) {
warn("Unable to open $logfile for writing. Run aborted.");
exit 5;
} else {
$logthis = \&log_to_file;
}
}
##
## Determine filenames
##
my $sourcefilename; # activity log data
my $newfilename; # $sourcefilename will be renamed to this
my $gz_sql_filename; # the gzipped mysql backup data file name.
my $error_filename; # Errors in parsing the activity log will be written here
if ($file) {
$sourcefilename = $file;
} else {
$sourcefilename = &get_filename($course,$domain);
}
my $sql_filename = $sourcefilename;
$sql_filename =~ s|[^/]*$|activity.log.sql|;
$gz_sql_filename = $sql_filename.'.gz';
$error_filename = $sourcefilename;
$error_filename =~ s|[^/]*$|activity.log.errors|;
$logthis->('Beginning logging '.time);
#
# Wait for a lock on the lockfile to avoid collisions
my $lockfilename = $sourcefilename.'.lock';
open(LOCKFILE,'>'.$lockfilename);
if (!flock(LOCKFILE,LOCK_EX)) {
warn("Unable to lock $lockfilename. Aborting".$/);
exit 6;
}
##
## There will only be a $newfilename file if a copy of this program is already
## running.
my $newfilename = $sourcefilename.'.processing';
if (-e $newfilename) {
warn "$newfilename exists";
$logthis->($newfilename.' exists, so I cannot work on it.');
exit 2;
}
if (-e $sourcefilename) {
$logthis->('renaming '.$sourcefilename.' to '.$newfilename);
rename($sourcefilename,$newfilename);
Copy($newfilename,$newfilename.'.'.time) if ($backup);
$logthis->("renamed $sourcefilename to $newfilename");
} else {
my $command = 'touch '.$newfilename;
$logthis->($command);
system($command);
$logthis->('touch was completed');
}
close(LOCKFILE);
##
## Table definitions
##
my $prefix = $course.'_'.$domain.'_';
my $student_table = $prefix.'students';
my $student_table_def =
{ id => $student_table,
permanent => 'no',
columns => [
{ name => 'student_id',
type => 'MEDIUMINT UNSIGNED',
restrictions => 'NOT NULL',
auto_inc => 'yes', },
{ name => 'student',
type => 'VARCHAR(100) BINARY',
restrictions => 'NOT NULL', },
],
'PRIMARY KEY' => ['student_id',],
};
my $res_table = $prefix.'resource';
my $res_table_def =
{ id => $res_table,
permanent => 'no',
columns => [{ name => 'res_id',
type => 'MEDIUMINT UNSIGNED',
restrictions => 'NOT NULL',
auto_inc => 'yes', },
{ name => 'resource',
type => 'MEDIUMTEXT',
restrictions => 'NOT NULL'},
],
'PRIMARY KEY' => ['res_id'],
};
#my $action_table = $prefix.'actions';
#my $action_table_def =
#{ id => $action_table,
# permanent => 'no',
# columns => [{ name => 'action_id',
# type => 'MEDIUMINT UNSIGNED',
# restrictions => 'NOT NULL',
# auto_inc => 'yes', },
# { name => 'action',
# type => 'VARCHAR(100)',
# restrictions => 'NOT NULL'},
# ],
# 'PRIMARY KEY' => ['action_id',],
#};
my $machine_table = $prefix.'machine_table';
my $machine_table_def =
{ id => $machine_table,
permanent => 'no',
columns => [{ name => 'machine_id',
type => 'MEDIUMINT UNSIGNED',
restrictions => 'NOT NULL',
auto_inc => 'yes', },
{ name => 'machine',
type => 'VARCHAR(100)',
restrictions => 'NOT NULL'},
],
'PRIMARY KEY' => ['machine_id',],
};
my $activity_table = $prefix.'activity';
my $activity_table_def =
{ id => $activity_table,
permanent => 'no',
columns => [
{ name => 'res_id',
type => 'MEDIUMINT UNSIGNED',
restrictions => 'NOT NULL',},
{ name => 'time',
type => 'DATETIME',
restrictions => 'NOT NULL',},
{ name => 'student_id',
type => 'MEDIUMINT UNSIGNED',
restrictions => 'NOT NULL',},
{ name => 'action',
type => 'VARCHAR(10)',
restrictions => 'NOT NULL',},
{ name => 'idx', # This is here in case a student
type => 'MEDIUMINT UNSIGNED', # has multiple submissions during
restrictions => 'NOT NULL', # one second. It happens, trust
auto_inc => 'yes', }, # me.
{ name => 'machine_id',
type => 'MEDIUMINT UNSIGNED',
restrictions => 'NOT NULL',},
{ name => 'action_values',
type => 'MEDIUMTEXT', },
],
'PRIMARY KEY' => ['time','student_id','res_id','idx'],
'KEY' => [{columns => ['student_id']},
{columns => ['time']},],
};
my @Activity_Table = ($activity_table_def);
my @ID_Tables = ($student_table_def,$res_table_def,$machine_table_def);
##
## End of table definitions
##
$logthis->('Connectiong to mysql');
&Apache::lonmysql::set_mysql_user_and_password('www',
$perlvar{'lonSqlAccess'});
if (!&Apache::lonmysql::verify_sql_connection()) {
warn "Unable to connect to MySQL database.";
$logthis->("Unable to connect to MySQL database.");
exit 3;
}
$logthis->('SQL connection is up');
if ($drop) { &drop_tables(); $logthis->('dropped tables'); }
if (-s $gz_sql_filename) {
my $backup_modification_time = (stat($gz_sql_filename))[9];
$logthis->($gz_sql_filename.' was last modified '.
localtime($backup_modification_time).
'('.$backup_modification_time.')');
# Check for missing tables
my @Current_Tables = &Apache::lonmysql::tables_in_db();
$logthis->(join(',',@Current_Tables));
my %Found;
foreach my $tablename (@Current_Tables) {
foreach my $table (@Activity_Table,@ID_Tables) {
if ($tablename eq $table->{'id'}) {
$Found{$tablename}++;
}
}
}
$logthis->('Found tables '.join(',',keys(%Found)));
my $missing_a_table = 0;
foreach my $table (@Activity_Table,@ID_Tables) {
# Hmmm, should I dump the tables?
if (! $Found{$table->{'id'}}) {
$logthis->('Missing table '.$table->{'id'});
$missing_a_table = 1;
last;
}
}
if ($missing_a_table) {
my $table_modification_time = $backup_modification_time;
# If the backup happened prior to the last table modification,
foreach my $table (@Activity_Table,@ID_Tables) {
my %tabledata = &Apache::lonmysql::table_information($table->{'id'});
next if (! scalar(keys(%tabledata))); # table does not exist
if ($table_modification_time < $tabledata{'Update_time'}) {
$table_modification_time = $tabledata{'Update_time'};
}
}
$logthis->("Table modification time = ".$table_modification_time);
if ($table_modification_time > $backup_modification_time) {
# Save the current tables in case we need them another time.
my $backup_name = $gz_sql_filename.'.'.time;
$logthis->('Backing existing tables up in '.$backup_name);
&backup_tables($backup_name);
}
$time_this->();
&load_backup_tables($gz_sql_filename);
$time_this->('load backup tables');
}
}
##
## Ensure the tables we need exist
# create_tables does not complain if the tables already exist
$logthis->('creating tables');
if (! &create_tables()) {
warn "Unable to create tables";
$logthis->('Unable to create tables');
exit 4;
}
##
## Read the ids used for various tables
$logthis->('reading id tables');
&read_id_tables();
$logthis->('finished reading id tables');
##
## Set up the errors file
my $error_fh = IO::File->new(">>$error_filename");
##
## Parse the course log
$logthis->('processing course log');
if (-s $newfilename) {
my $result = &process_courselog($newfilename,$error_fh);
if (! defined($result)) {
# Something went wrong along the way...
$logthis->('process_courselog returned undef');
exit 5;
} elsif ($result > 0) {
$time_this->();
$logthis->('process_courselog returned '.$result.' backing up tables');
&backup_tables($gz_sql_filename);
$time_this->('write backup tables');
}
}
close($error_fh);
##
## Clean up the filesystem
&Apache::lonmysql::disconnect_from_db();
unlink($newfilename) if (-e $newfilename && ! $nocleanup);
##
## Print timing data
$logthis->('printing timing data');
if ($time_run) {
my $elapsed_time = Time::HiRes::time - $initial_time;
print "Overall time: ".$elapsed_time.$/;
print &outputtimes();
$logthis->("Overall time: ".$elapsed_time);
$logthis->(&outputtimes());
}
if ($log) {
close LOGFILE;
}
exit 0; # Everything is okay, so end here before it gets worse.
########################################################
########################################################
##
## Process Course Log
##
########################################################
########################################################
#
# Returns the number of lines in the activity.log file that were processed.
sub process_courselog {
my ($inputfile,$error_fh) = @_;
if (! open(IN,$inputfile)) {
warn "Unable to open '$inputfile' for reading";
$logthis->("Unable to open '$inputfile' for reading");
return undef;
}
my ($linecount,$insertcount);
my $dbh = &Apache::lonmysql::get_dbh();
#
# Timing variables
my @RowData;
while (my $line=<IN>){
# last if ($linecount > 1000);
#
# Bulk storage variables
$time_this->();
chomp($line);
$linecount++;
# print $linecount++.$/;
my ($timestamp,$host,$log)=split(/\:/,$line,3);
$time_this->('splitline');
#
# $log has the actual log entries; currently still escaped, and
# %26(timestamp)%3a(url)%3a(user)%3a(domain)
# then additionally
# %3aPOST%3a(name)%3d(value)%3a(name)%3d(value)
# or
# %3aCSTORE%3a(name)%3d(value)%26(name)%3d(value)
#
# get delimiter between timestamped entries to be &&&
$log=~s/\%26(\d{9,10})\%3a/\&\&\&$1\%3a/g;
$log = &unescape($log);
$time_this->('translate_and_unescape');
# now go over all log entries
if (! defined($host)) { $host = 'unknown'; }
my $machine_id = &get_id($machine_table,'machine',$host);
my $prevchunk = 'none';
foreach my $chunk (split(/\&\&\&/,$log)) {
my $warningflag = '';
$time_this->();
my ($time,$res,$uname,$udom,$action,@values)= split(/:/,$chunk);
my $student = $uname.':'.$udom;
if (! defined($res) || $res =~ /^\s*$/) {
$res = '/adm/roles';
$action = 'LOGIN';
}
if ($res =~ m|^/prtspool/|) {
$res = '/prtspool/';
}
if (! defined($action) || $action eq '') {
$action = 'VIEW';
}
if ($action !~ /^(LOGIN|VIEW|POST|CSTORE|STORE)$/) {
$warningflag .= 'action';
print $error_fh 'full log entry:'.$log.$/;
print $error_fh 'error on chunk:'.$chunk.$/;
$logthis->('(action) Unable to parse '.$/.$chunk.$/.
'got '.
'time = '.$time.$/.
'res = '.$res.$/.
'uname= '.$uname.$/.
'udom = '.$udom.$/.
'action='.$action.$/.
'@values = '.join('&',@values));
next; #skip it if we cannot understand what is happening.
}
if (! defined($student) || $student eq ':') {
$student = 'unknown';
$warningflag .= 'student';
}
if (! defined($res) || $res =~ /^\s*$/) {
$res = 'unknown';
$warningflag .= 'res';
}
if (! defined($action) || $action =~ /^\s*$/) {
$action = 'unknown';
$warningflag .= 'action';
}
if (! defined($time) || $time !~ /^\d+$/) {
$time = 0;
$warningflag .= 'time';
}
#
$time_this->('split_and_error_check');
my $student_id = &get_id($student_table,'student',$student);
my $res_id = &get_id($res_table,'resource',$res);
# my $action_id = &get_id($action_table,'action',$action);
my $sql_time = &Apache::lonmysql::sqltime($time);
#
if (! defined($student_id) || $student_id eq '') {
$warningflag.='student_id';
}
if (! defined($res_id) || $res_id eq '') {
$warningflag.='res_id';
}
# if (! defined($action_id) || $action_id eq '') {
# $warningflag.='action_id';
# }
if ($warningflag ne '') {
print $error_fh 'full log entry:'.$log.$/;
print $error_fh 'error on chunk:'.$chunk.$/;
$logthis->('warningflag ('.$warningflag.') on chunk '.
$/.$chunk.$/.'prevchunk = '.$/.$prevchunk);
$prevchunk .= $chunk;
next; # skip this chunk
}
#
my $store_values;
if ($action eq 'POST') {
$store_values =
$dbh->quote(join('&',map { &escape($_); } @values));
} else {
$store_values = $dbh->quote(join('&',@values));
}
$time_this->('get_ids');
#
my $row = [$res_id,
qq{'$sql_time'},
$student_id,
"'".$action."'",
# $action_id,
qq{''}, # idx
$machine_id,
$store_values];
push(@RowData,$row);
$time_this->('push_row');
$prevchunk = $chunk;
#
}
$time_this->();
if ((scalar(@RowData) > 0) && ($linecount % 100 == 0)) {
my $result = &Apache::lonmysql::bulk_store_rows($activity_table,
undef,
\@RowData);
# $logthis->('result = '.$result);
$time_this->('bulk_store_rows');
if (! defined($result)) {
my $error = &Apache::lonmysql::get_error();
warn "Error occured during insert.".$error;
$logthis->('error = '.$error);
}
undef(@RowData);
}
}
if (@RowData) {
$time_this->();
$logthis->('storing '.$linecount);
my $result = &Apache::lonmysql::bulk_store_rows($activity_table,
undef,
\@RowData);
$logthis->('result = '.$result);
$time_this->('bulk_store_rows');
if (! defined($result)) {
my $error = &Apache::lonmysql::get_error();
warn "Error occured during insert.".$error;
$logthis->('error = '.$error);
}
undef(@RowData);
}
close IN;
# print "Number of lines: ".$linecount.$/;
# print "Number of inserts: ".$insertcount.$/;
return $linecount;
}
##
## Somtimes, instead of doing something, doing nothing is appropriate.
sub nothing {
return;
}
##
## Logging routine
##
sub log_to_file {
my ($input)=@_;
print LOGFILE $input.$/;
}
##
## Timing routines
##
{
my %Timing;
my $starttime;
sub time_action {
my ($key) = @_;
if (defined($key)) {
$Timing{$key}+=Time::HiRes::time-$starttime;
$Timing{'count_'.$key}++;
}
$starttime = Time::HiRes::time;
}
sub outputtimes {
my $Str;
if ($time_run) {
$Str = "Timing Data:".$/;
while (my($k,$v) = each(%Timing)) {
next if ($k =~ /^count_/);
my $count = $Timing{'count_'.$k};
$Str .=
' '.sprintf("%25.25s",$k).
' '.sprintf('% 8d',$count).
' '.sprintf('%12.5f',$v).$/;
}
}
return $Str;
}
}
##
## Use mysqldump to store backups of the tables
##
sub backup_tables {
my ($gz_sql_filename) = @_;
my $command = qq{mysqldump --opt loncapa };
foreach my $table (@ID_Tables,@Activity_Table) {
my $tablename = $table->{'id'};
$command .= $tablename.' ';
}
$command .= '| gzip >'.$gz_sql_filename;
$logthis->($command);
system($command);
}
##
## Load in mysqldumped files
##
sub load_backup_tables {
my ($gz_sql_filename) = @_;
if (-s $gz_sql_filename) {
$logthis->('loading data from gzipped sql file');
my $command='gzip -dc '.$gz_sql_filename.' | mysql --database=loncapa';
system($command);
$logthis->('finished loading gzipped data');;
} else {
return undef;
}
}
##
##
##
sub update_process_name {
my ($text) = @_;
$0 = 'parse_activity_log.pl: '.$text;
}
sub get_filename {
my ($course,$domain) = @_;
my ($a,$b,$c,undef) = split('',$course,4);
return "$perlvar{'lonUsersDir'}/$domain/$a/$b/$c/$course/activity.log";
}
sub create_tables {
foreach my $table (@ID_Tables,@Activity_Table) {
my $table_id = &Apache::lonmysql::create_table($table);
# print STDERR "Unable to create table ".$table->{'id'}.$/;
# print STDERR join($/,&Apache::lonmysql::build_table_creation_request($table)).$/;
if (! defined($table_id)) {
warn "Unable to create table ".$table->{'id'}.$/;
warn join($/,&Apache::lonmysql::build_table_creation_request($table)).$/;
return 0;
}
}
return 1;
}
sub drop_tables {
foreach my $table (@ID_Tables,@Activity_Table) {
my $table_id = $table->{'id'};
&Apache::lonmysql::drop_table($table_id);
}
}
#################################################################
#################################################################
##
## Database item id code
##
#################################################################
#################################################################
{ # Scoping for ID lookup code
my %IDs;
sub read_id_tables {
foreach my $table (@ID_Tables) {
my @Data = &Apache::lonmysql::get_rows($table->{'id'});
my $count = 0;
foreach my $row (@Data) {
$IDs{$table->{'id'}}->{$row->[1]} = $row->[0];
}
}
return;
}
sub get_id {
my ($table,$fieldname,$value) = @_;
if (exists($IDs{$table}->{$value})) {
return $IDs{$table}->{$value};
} else {
# insert into the table - if the item already exists, that is
# okay.
my $result = &Apache::lonmysql::store_row($table,[undef,$value]);
if (! defined($result)) {
warn("Got error on id insert for $value\n".&Apache::lonmysql::get_error());
}
# get the id
my @Data =
&Apache::lonmysql::get_rows($table,qq{$fieldname='$value'});
if (@Data) {
$IDs{$table}->{$value}=$Data[0]->[0];
return $IDs{$table}->{$value};
} else {
$logthis->("Unable to retrieve id for $table $fieldname $value");
return undef;
}
}
}
} # End of ID scoping
###############################################################
###############################################################
##
## The usual suspects
##
###############################################################
###############################################################
sub escape {
my $str=shift;
$str =~ s/(\W)/"%".unpack('H2',$1)/eg;
return $str;
}
sub unescape {
my $str=shift;
$str =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C",hex($1))/eg;
return $str;
}
FreeBSD-CVSweb <freebsd-cvsweb@FreeBSD.org>