log_notice "Year $year"
# Before 1997, event files were distrubuted for each
# league (National League, nl and American League, al).
# Strating in 1997, only one file for the Major League (ml)
# was distrubuted.
if {$year < 1997 } {
set league {al nl}
} else {
set league {ml}
}
# Big loop to download the files, extract the event files.
# YEARTEA.EVA: Event files for American League home team (TEA).
# YEARTEA.EVN: Event files for National League home team (TEA).
# TEAYEAR.ROS: Roster files for each team (TEA)
# TEAMYEAR: Team files contain the TEA designations, full team
# name and if they are in the American or National League.
foreach lg $league {
log_notice "League $lg"
# Zip files are available at
# http://www.retrosheet.org/YEAR/YEAR.LEAGUE.zip∞
set url ${retro_url}/${year}/${year}${lg}.zip
# Assume we will not need to update the files for the year.
set update_year_p 0
# Attempt to get the zip file
log_notice "Attempting to get $url"
if {[catch {set token [http::geturl $url -timeout $http_timeout]} errmsg]} {
log_error "Error connecting to $url: $errmsg"
continue
}
switch -glob [http::ncode $token] {
4* {
log_error "URL Error:([http::ncode $token]), $url."
continue
}
5* {
log_error "URL Error:([http::ncode $token]), $url."
continue
}
default {
set file [file join $zipfiles ${year}${lg}.zip]
set tmp_file [file join $zipfiles "tmp_${year}${lg}.zip"]
if {[catch {open $tmp_file "w"} fp]} {
log_error "Error, opening file temp file."
}
# File was available, get the data and write to
# a file. Since this is a binary file, the
# channel needs to be configured correctly.
set data [http::data $token]
fconfigure $fp -translation binary -encoding binary
puts -nonewline $fp $data
close $fp
catch {http::cleanup $token}
# Check to see if the files differ. Yes, this is a
# bit slow, but it works.
if {[file exists $file] && ([md5::md5 -hex -file $file] eq [md5::md5 -hex -file $tmp_file])} {
log_notice "File [file tail $file] has not changed, disregarding."
catch {file delete $tmp_file}
# Since the files are the same, we don't need
# to extract new ones, continue to the next one.
continue
}
# The file is either new or updated. All files
# will need to be generated for that year.
set update_year_p 1
if {[catch {file copy -force $tmp_file $file} err]} {
log_error "Error copying $file: $err"
continue
} else {
log_notice "Wrote $file"
catch {file delete -force $tmp_file}
}
}
}
# Using VFS, mount the zip file and copy the files
# in the archive to the event files directory
if {[catch {set fd [vfs::zip::Mount $file tmp]} errmsg]} {
log_error "Could not unzip $file: errmsg"
continue
}
foreach zfile [glob tmp/*] {
if {[catch {file copy -force $zfile $eventfiles} errmsg]} {
log_error "Error copying [file tail $zfile] to $eventfiles: $errmsg"
}
log_notice "Extracted [file tail $zfile]"
}
# Make sure to unmount it.
vfs::zip::Unmount $fd tmp
}
# If the year file(s) have not changed, we don't need to update
if {!$update_year_p} {
log_notice "File(s) for $year have not changed, keeping current files."
continue
}
# Deleted any current csv files.
set eventscsv [file join $csvfiles ${year}-events.csv]
set gamescsv [file join $csvfiles ${year}-games.csv]
catch {file delete $eventscsv}
catch {file delete $gamescsv}
# The Retrosheet tools require you to be in the directory
# containing the event, roster, and team files. Save
# the working directory before running the commands.
set cur_dir [pwd]
# Retrosheet's tools need to be run on EACH file.
foreach efile [glob [file join $eventfiles "${year}*.EV*"]] {
log_notice "Processing [file tail $efile]"
set status 0
# Changed into the eventfiles directory and run the BEVENT.EXE
# and BGAME.EXE, sending the output into the respective
# csv files.
# NOTE: When running wine, TCL's exec can be hard pressed
# to know if there is an error because of wine or the
# Retrosheet tools. Unfortunately, both programs write
# to stderr even if there is not a problem. We just
# hope for the best when these run. Error checking
# doesn't help much.
cd $eventfiles
if $cfg(usewine) {
set cmd [list exec $wine $bevent -f 0-96 -y $year [file tail $efile]
$eventscsv]
} else {
set cmd [list exec $bevent -f 0-96 -y $year [file tail $efile]
$eventscsv]
}
if {[catch $cmd results]} {
switch -exact [string tolower [lindex $::errorCode 0]] {
childstatus { set status [lindex $::errorCode 2] }
default { set status 0 }
}
log_debug "BEVENT.EXE:status:$status"
log_debug "BEVENT.EXE:results:$results"
log_notice "BEVENT.EXE run on [file tail $efile]."
}
if $cfg(usewine) {
set cmd [list exec $wine $bgame -f 0-83 -y $year [file tail $efile]
$gamescsv]
} else {
set cmd [list exec $bgame -f 0-81 -y $year [file tail $efile]
$gamescsv]
}
if {[catch $cmd results]} {
switch -exact [string tolower [lindex $::errorCode 0]] {
childstatus { set status [lindex $::errorCode 2] }
default { set status 0}
}
log_debug "BGAME.EXE:status:$status"
log_debug "BGAME.EXE:results:$results"
log_notice "BGAME.EXE run on [file tail $efile]."
}
# Go back home, collect $200 and a free dinner
cd $cur_dir
}
# Generate the roster csv file. After 2002, Retrosheet started
# to include a postion for each player. Before that, the
# field was not used. I just use NON to designate that.
# Also, the year is not included in the retrosheet files,
# so we add it here.
set rfile [file join $csvfiles "${year}-rosters.csv"]
catch {file delete $rfile}
set roster_pattern "*${year}.ROS"
foreach ros [glob [file join $eventfiles $roster_pattern]] {
set name [file tail $ros]
if {[regexp {(\w{3})(\d{4}).ROS} $name match team y]} {
if {$year != $y} { continue }
if {[catch {open $rfile "a"} output]} {
log_error "error opening $outfile"
continue
}
log_notice "Working on roster $name."
set in [open $ros r]
while {![eof $in]} {
if {[gets $in line] < 0} {
continue
}
set data [::csv::split $line ","]
if {[regexp {\w{5}\d{3}} [lindex $data 0] playerid]} {
set newline $year
foreach item $data { lappend newline $item }
if {$year < 2002} {
lappend newline $team
lappend newline "NON"
}
puts $output [::csv::join $newline ","]
}
}
catch {close $in}
}
catch {close $output}
}
# Generated a team file, using the TEAMYEAR file. Basically
# just adding the specific year to the file.
set teamfile [file join $eventfiles "TEAM${year}"]
if [file exists $teamfile] {
set tfile [file join $csvfiles "${year}-teams.csv"]
catch {file delete $tfile}
if {[catch {open $tfile "a"} output]} {
log_error "error opening $outfile"
continue
}
log_notice "Working on TEAM${year}."
set in [open $teamfile r]
while {![eof $in]} {
if {[gets $in line] < 0} {
continue
}
set data [::csv::split $line ","]
set newline $year
foreach item $data { lappend newline $item }
puts $output [::csv::join $newline ","]
}
}
catch {close $in}
catch {close $output}