![]() ![]() | ![]() |
Invent a description if we do not have one.
1: #!/usr/local/bin/perl 2: 3: # 4: # lon-capa.pl 5: # Parse the LON-CAPA metadata 6: # 7: # Andy Dong <adong@smete.org> 10/23/2002 8: # 9: # Contact Gerd Kortemeyer (korte@lite.msu.edu) 10: 11: use strict; 12: use LWP::UserAgent; 13: use Getopt::Std; 14: use Digest::MD5 qw(md5_hex); 15: use IO::File; 16: 17: my $basepath='/home/httpd/cgi-bin/OAI-XMLFile/XMLFile/nsdlexport/data'; 18: 19: my $pub_month; 20: my $pub_year; 21: my @loncapa; 22: 23: # HTTP requests 24: 25: my $content; 26: my $content_regex = 'File Not Found'; 27: 28: # Configuration 29: 30: my $debug = 0; 31: 32: # The list of servers is from the LON-CAPA CVS repository in /loncapa/loncom/production_hosts.tab 33: my @servers = ( 34: 'newscience.westshore.cc.mi.us', 35: 's10.lite.msu.edu', 36: 's12.lite.msu.edu', 37: 'lon-capa.chem.sunysb.edu', 38: 'schubert.tmcc.edu', 39: 'dalton.chem.sfu.ca', 40: 'capa2.phy.ohiou.edu', 41: 'pollux.physics.fsu.edu', 42: 'loncapa.physics.sc.edu', 43: 'loncapa.math.ucf.edu', 44: 'zappa.ags.udel.edu', 45: 'loncapa.gwu.edu', 46: 'neptune.physics.ndsu.nodak.edu', 47: 'capa1.uwsp.edu', 48: 'natasha.it.fit.edu', 49: 'loncapa.Mines.EDU', 50: 'loncapa.chm.nau.edu'); 51: 52: foreach (@servers) { 53: my $url='http://'.$_.'/cgi-bin/metadata_harvest.pl'; 54: # End Configuration 55: 56: my $ua = new LWP::UserAgent; 57: $ua->timeout(600); 58: 59: my $request = new HTTP::Request GET => $url; 60: $request->authorization_basic('reaper', 'cat4u'); 61: 62: my $response = $ua->request( $request ); 63: 64: if ( $response->is_success ) { 65: print 'SUCCESS: ' . $response->message.' for '.$url."\n\n"; 66: $content = $response->content; 67: # Delete all blank lines 68: $content =~ s/(?<!.)\n//g; 69: # Replace all ^M with spaces 70: $content =~ s/ /\s/g; 71: # Push the content into an array 72: @loncapa = split /\n/, $content; 73: } else { 74: print 'LON-CAPA request failed: ' . $response->message.' for '.$url."\n\n"; 75: next; 76: } 77: 78: #@loncapa=undef; 79: #open (LON_FILE, 'metadata_harvest.txt') || die; 80: 81: #while (<LON_FILE>) { 82: # chomp; 83: # push(@loncapa,$_); 84: #} 85: 86: my %records = ();; 87: 88: foreach my $metadata (@loncapa) { 89: chomp $metadata; 90: $metadata=~s/[^\w\d\s\.\;\:\,\|\/]/ /gs; 91: my @tkline = split('\|', $metadata); 92: my $title = $tkline[0]; 93: next if ( $title eq '' ); 94: my $author = $tkline[1]; 95: next if ( $author eq '' ); 96: my @authorname = split(' ', $author); 97: my $author_fname = $authorname[0]; 98: my $author_lname = $authorname[1]; 99: # We have to make an exception for Multimedia Physics which is an organization not a person 100: my $object_type; 101: if ( $author_lname eq 'Physics' ) { 102: $object_type = 'organization'; 103: } else { 104: $object_type = 'person'; 105: } 106: my $subject = $tkline[2]; 107: next if ( ($subject eq 'Sample') || ($subject eq 'Something') ); 108: my $resourceurl = 'http://nsdl.lon-capa.org' . $tkline[3]; 109: my $baseid=$tkline[3]; 110: my ($adom,$auname)=($baseid=~/^\/res\/(\w+)\/(\w+)\//); 111: $baseid=~s/\W/\_/g; 112: $baseid=~s/^\_res\_//g; 113: my $fileid=md5_hex($baseid); 114: 115: next if ( $resourceurl =~ /(.*)\/demo\/(.*)/ ); 116: my $keywords = $tkline[4]; 117: my $version = $tkline[5]; 118: my $notes = $tkline[6]; 119: my $abstract = $tkline[7]; 120: unless ($abstract) { $abstract=$subject; } 121: unless ($abstract) { $abstract=$title; } 122: unless ($abstract) { $abstract=$keywords; } 123: my $type = $tkline[8]; 124: my $learning_resource_type; 125: if ( $type eq 'problem' ) { 126: $learning_resource_type = 114; 127: } elsif ( $type eq 'exam' ) { 128: $learning_resource_type = 114; 129: } elsif ( $type eq 'quiz' ) { 130: $learning_resource_type = 114; 131: } elsif ( $type eq 'assess' ) { 132: $learning_resource_type = 114; 133: } elsif ( $type eq 'survey' ) { 134: $learning_resource_type = 114; 135: } elsif ( $type eq 'form' ) { 136: $learning_resource_type = 114; 137: } elsif ( $type eq 'library' ) { 138: $learning_resource_type = 107; 139: } elsif ( $type eq 'page' ) { 140: $learning_resource_type = 104; 141: } elsif ( $type eq 'sequence' ) { 142: $learning_resource_type = 104; 143: } elsif ( $type eq 'spreadsheet' ) { 144: $learning_resource_type = 114; 145: } else { 146: $learning_resource_type = 0; 147: } 148: 149: my $media_format; 150: if ( ($type eq 'htm') || ($type eq 'gif') || ($type eq 'mov') || ($type eq 'xml') ) { 151: $media_format = 70; 152: } else { 153: $media_format = 0; 154: } 155: 156: my $language = $tkline[9]; # Look only for seniso 157: next if ( $language ne 'seniso'); 158: my $primary_language='en-US'; 159: my $creation_date = $tkline[10]; 160: my ($pub_year,$pub_month,$pub_day) = ( $creation_date =~ /^(\d{4}) (\d{2}) (\d{2})\s(\d{2}):(\d{2}):(\d{2})$/ ); 161: my $revision_date = $tkline[11]; 162: my ($rev_year,$rev_month,$rev_day) = ( $revision_date =~ /^(\d{4}) (\d{2}) (\d{2})\s(\d{2}):(\d{2}):(\d{2})$/ ); 163: my $owner = $tkline[12]; 164: my $rights_description; 165: my $copyright = $tkline[13]; # public,domain,default,private (skip if private and domain) 166: # Public means no login required 167: 168: if ( $copyright eq 'public' ) { 169: $rights_description = 'LON-CAPA Public Resource. No login required.'; 170: } elsif ($copyright eq 'domain') { 171: $rights_description = 'Restricted to certain LON-CAPA domains.'; 172: } else { 173: $rights_description = 'LON-CAPA Default Use Restriction. Login required.'; 174: } 175: # Domain means restricted to a particular LON-CAPA domain 176: # Defaults mean access open to any registered LON-CAPA user 177: # Private means open only to author of material 178: next if ( $copyright eq 'private'); 179: next if ( $copyright eq 'domain'); 180: my $platform = "5"; # HTML Browser (not specified but construed from metadata) 181: # 182: # Create path 183: # 184: unless (-e $basepath.'/'.$adom) { mkdir($basepath.'/'.$adom); } 185: unless (-e $basepath.'/'.$adom.'/'.$auname) { 186: mkdir($basepath.'/'.$adom.'/'.$auname) || die 'Could not create '.$basepath.'/'.$adom.'/'.$auname; 187: } 188: open(XML,'>'.$basepath.'/'.$adom.'/'.$auname.'/'.$baseid.'.xml'); 189: print XML (<<ENDMETA); 190: <?xml version="1.0" encoding="UTF-8"?> 191: 192: <oaidc:dc xmlns="http://purl.org/dc/elements/1.1/" 193: xmlns:oaidc="http://www.openarchives.org/OAI/2.0/oai_dc/" 194: xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 195: xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/oai_dc/ 196: http://www.openarchives.org/OAI/2.0/oai_dc.xsd" 197: > 198: <title>$title</title> 199: <creator>$author_fname $author_lname</creator> 200: <identifier>$resourceurl</identifier> 201: <subject>$keywords</subject> 202: <subject>$subject</subject> 203: <language>$primary_language</language> 204: <description>$abstract</description> 205: <date>$rev_year-$rev_month-$rev_day</date> 206: </oaidc:dc> 207: ENDMETA 208: close (XML); 209: } 210: }