# UDF.pm -- utility functions for dealing with "UDF" # # UDF is a data format developed by Chris Gurgiolo . # It is being used on the MENA experiment. # # See for lots more information on # the UDF file layout, and tools available for working with UDF. # # $Id: UDF.pm,v 1.22 2001/01/16 19:06:42 esm Exp $ # package UDF; # Infrastructure require Exporter; use Carp; use Cwd; # For converting relative paths to absolute use File::Path; # For making temporary directories use strict; use vars qw($VERSION @ISA @EXPORT); # Housekeeping use vars qw($UDF_HOME $UDF_DATA $UDF_BIN); # Our data variables use vars qw($DEBUG $VERBOSE); $VERSION = 0.2; @ISA = qw(Exporter); @EXPORT = qw(udf_unpack udf_database); # # Make sure we have the UDF envariables loaded # require "/packages/lib/udf/.env.perl" unless exists $ENV{UDF_HOME}; # # Initialize paths to the data and to the UDF binaries # $UDF_DATA = $ENV{UDF_DATA} or croak "no value for UDF_DATA "; $UDF_HOME = $ENV{UDF_HOME} or croak "no value for UDF_HOME "; $UDF_BIN = $UDF_HOME . "/bin"; $VERBOSE = 0; $DEBUG = ($ENV{UDF_DEBUG} || 0) + 0; ############################################################################### # CODE STARTS HERE # ############################################################################### ################ # udf_unpack # Given a .tar.gz file, unpack it & move into place ################ # # Given a path to a .tar.gz file, we unpack it in an (empty) temporary # directory, and call special code to move it into place. PIDF and VIDF # files are processed differently than instrument data files, but we let # other code deal with that. Our mission here is: # # * extract the files from the .tar.gz file # # * call the appropriate handler(s) for this tarfile's type # # * make sure the temporary directory is empty when we leave. # sub udf_unpack { my $tarfile = shift; # # UDF filenames are of the form # # ..UDF...tar.gz # e.g., # IMAGE.IMAGE-1.UDF.MENA.199910714.tar.gz # # Let's make sure the given filename parses properly, then extract # the separate components. # # Note that might be PIDF or VIDF, in which case we # do some special postprocessing. # unless ($tarfile =~ m!^(.*/)?(\S+)\.(\S+)\.UDF(\w+)\.(\d+)\.tgz$!) { carp "'$tarfile' must be of the form XX.XX.UDFx.dddd.tgz"; return 0; } my ($project, $mission, $experiment, $date) = ($2,$3,$4,$5); # Define a path to the $UDF_DATA// subdirectory my $d = "$UDF_DATA/$project"; croak "no project dir '$d' " unless -d $d; $d .= "/$mission"; croak "no mission dir '$d' " unless -d $d; # If path to tar file is not absolute, make it so. my $cwd = getcwd; $tarfile = sprintf("%s/%s", $cwd, $tarfile) unless $tarfile =~ m!^/!; # Make a temporary directory in which to unpack the tar files. In order # to keep things simple, this dir MUST be on the same filesystem as the # destination data. my $tmpdir = "$d/tmp/$experiment.$date.$$"; mkpath($tmpdir, 0, 02775); chdir($tmpdir); # Unpack the tar file. This will dump a bunch of files into the current # directory, and possibly make further subdirectories (e.g., PIDF and VIDF). system("tar", "xzf", $tarfile); croak "tar failed with status $? " if $?; # Indicate what we're doing printf "Installing: %-8s %-8s %-8s %s\n",$project,$mission,$experiment,$date; # Postprocessing. Once we've unpacked the .tar.gz files, we need to # move the extracted files into place and run some actions based on # the file type. my (%POST_PROCESS) = (p => [ \&move_pvidf ], v => [ \&move_pvidf, \&vidf_to_idf ], DEFAULT => [ \&move_udf ]); my $post = $POST_PROCESS{$experiment} || $POST_PROCESS{"DEFAULT"}; foreach my $func (@$post) { &$func($project, $mission, $experiment); } # # Clean up. # First, remove "core". Certain bogus VIDF "releases" cause mk_idf # to barf, dumping core, and thereby preventing us from rmdir'ing # successfully. unlink "core"; # Next, remove our temporary directory. If it doesn't unlink, it's # because there are files left over (that is, files that weren't # in the XFER file). This is a serious enough bug, that we should # die... but since I'm no longer around to keep an eye on things, # let's just do a warning instead. chdir "/"; rmdir($tmpdir) or carp "rmdir( $tmpdir ): $! "; # Now should we keep the data file, or move it into a "done/" directory, # or perhaps keep a zero-size stub with the mtime? Maybe move into /tempest? # Move back to the directory where we started chdir($cwd); } ############################################################################### # MOVE # ############################################################################### # # The functions below will move the files extracted from a .tar.gz file to # their proper destination. There are two cases of interest: # # 1) PIDF and VIDF: these .tar.gz files unpack into deep directories # //PIDF/* and /VIDF/* respectively. # # 2) UDF data: these unpack flatly into the current directory, and # include a file, "UDFXFER", detailing where to move the new files. # # For more details, see each function's header comments. # # Note that our calling environment is that we're cd'ed to a temporary # directory containing the results of extracting the .tar.gz file. # ############################################################################### ################ # move_pvidf # move PIDF and VIDF files into their proper home ################ # # When we're invoked, we are cd'ed to a (temporary) directory in which # we have a //PIDFS|VIDFS subhierarchy. Our job is to # move the files to their real destinations. # # We do this pretty darn tooting carefully: each file gets rename()d # atomically into its destination, that is, from # # ./IMAGE/IMAGE-1/PIDFS/FOOBAR # to $UDF_HOME/IMAGE/IMAGE-1/PIDFS/FOOBAR # # This step-by-step process guarantees that the PIDF or VIDF directory # will be populated at all times. Even if it's temporarily inconsistent # between instruments, the directory will not be empty or nonexistent # at any instant. This gives us some peace of mind, since we can now # run this script without caring if anyone else is using the UDF tree. # # Note that we simply clobber the old contents of the existing PIDF # or VIDF directories. No saving, no checking, no diffs, no anything. # sub move_pvidf($$$) { my ($project, $mission, $experiment) = @_; # Sigh. The new moronic naming rules got rid of the PIDF/VIDF in filename. $experiment = uc $experiment . "IDF"; # Perform basic consistency checks. Note that the directory names for # PIDF and VIDF are PIDFS and VIDFS (with a trailing "S"), respectively. my $subdir = $project; croak "move_pvidf(): no project dir '$subdir' " unless -d $subdir; $subdir = "$subdir/$mission"; croak "move_pvidf(): no mission dir '$subdir' " unless -d $subdir; $subdir = "$subdir/$experiment" . "S"; croak "move_pvidf(): no PIDF/VIDF dir '$subdir' " unless -d $subdir; # Okay, we now suspect that the tar file unpacked okay. # Destination directory looks exactly like our subdirectory, but is # an absolute path (subdir is relative to pwd). my $dest = "$UDF_DATA/$subdir"; # First step is to get a list of all files in the new subdirectory. opendir(DDD, $subdir) or croak "move_pvidf(): opendir( $subdir ): $!"; my (@newfiles) = sort grep(-f "$subdir/$_", readdir DDD); closedir DDD; # Now iterate over that list, moving the files one by one to the destination foreach my $f (@newfiles) { chmod 0444, "$subdir/$f"; # Standard esm paranoia rename("$subdir/$f", "$dest/$f") or croak "rename( $f -> $dest ): $!"; } # Finally, look in the destination directory for files that may have # been there before, but are not in the new tarfile. We delete these. opendir(DDD, $dest) or croak "move_pvidf(): opendir( $dest ): $! "; foreach my $f (grep(-f "$dest/$_", readdir DDD)) { unless (grep($f eq $_, @newfiles)) { printf "[ No longer valid: removing '$experiment/$f' ]\n"; unlink "$dest/$f"; } } # Clean up: remove all the (presumably empty) extracted subdirectories. rmdir("$subdir") or croak "rmdir( $subdir ): $! "; rmdir("$project/$mission") or croak "rmdir( $project/$mission ): $! "; rmdir("$project") or croak "rmdir( $project ): $! "; } ############## # move_udf # move UDF data files into place ############## # # Just like move_pvidf(), we're invoked while cd'ed to a temporary directory # in which the tar file has been unpacked. We differ from move_pvidf() in # that we have (should have?) a flat directory: files only, no subdirectories. # In this directory there will be a file, UDFXFER, containing a list of # files and where they belong. For example: # # FOOBAR MENA/MENASCI # # tells us that the file "FOOBAR" belongs in the MENA/MENASCI directory. # # Note: we expect UDFXFER to contain an identical match to the rest of the # files in this directory. If it lists a file that doesn't exist, or if # there's an extracted file that isn't listed in UDFXFER, that is bad news. # sub move_udf($$$) { my ($project, $mission, $experiment) = @_; my (%NEWDATA); # Keep track of all the new files we've moved # Read the UDFXFER file, and move all the files to their respective homes # # The first line of the file "UDFXFER" must be "XFER", no more, no less. open(XFER, "UDFXFER") or croak "open( UDFXFER ): $! "; chop($_ = ); croak "first line of UDFXFER is '$_'; expected 'XFER' " if $_ ne "XFER"; # All subsequent lines are of the form " /" while () { chop; croak "in UDFXFER, illegal line '$_' " unless m!^(\S+)\s+(\S+)/(\S+)$!; my ($file, $e, $i) = ($1, $2, $3); my $destfile = "$UDF_DATA/$project/$mission/$e/$i/$file"; chmod 0444, $file; # First things first... make r--r--r-- # If destination file already exists, see if they differ. If they # do, move the older one into a backup file and warn the user. If # they are the same, just ignore the new file. if (-f $destfile) { system("cmp", "-s", $file, $destfile); if ($?) { printf "overwriting existing file '$e/$i/$file'\n"; my $bak = $destfile . ".BAK"; unlink $bak; rename $destfile, $bak; } else { unlink $file; # silently next; } } # Okey-doke, we don't have that file. Let's move it. printf "%s -> %s/%s...", $file, $e, $i if $VERBOSE & 0x01; rename($file, $destfile) or croak "rename( $file): $!"; print "\n" if $VERBOSE & 0x01; # Now keep a running track of the files we've moved push(@{$NEWDATA{$e}->{$i}}, $file); } close XFER; unlink "UDFXFER"; # # Done. All files have been moved from our tmp directory to their # proper homes. # # Now go over all those files again, and add them to the respective # databases for their instruments. # foreach my $experiment (sort keys %NEWDATA) { foreach my $instrument (sort keys %{$NEWDATA{$experiment}}) { udf_database($project, $mission, $experiment, $instrument, "D", 0, # do not redo @{$NEWDATA{$experiment}->{$instrument}}); } } } ################# # vidf_to_idf # convert VIDF files (ASCII) to IDF (binary) ################# # # When invoked, we expect a fully unpacked and extracted VIDFS # directory. This directory will contain a bunch of files # ending in "V". Our job is to convert those to binary ("I"), # move them into the appropriate experiment/instrument directory, # and regenerate the database files. # sub vidf_to_idf($$$) { my ($project, $mission, $trashme) = @_; # "trashme" is "VIDF" # Point to various directories we will need. my $base_dir = "$UDF_DATA/$project/$mission"; my $VIDF_dir = "$base_dir/VIDFS"; my $VIDF_list = "$VIDF_dir/VIDFLIST"; $VIDF_list .= ".A" if ! -f $VIDF_list; my %processed; # # The "VIDFLIST" file contains a list of files, along with their # respective experiment directories. Process each file in turn, # converting to "idf" and moving into place. # open(VLIST, $VIDF_list) or croak "open( $VIDF_list ): $! "; while () { my ($filebase, $experiment, $instrument) = split; my $vfile = $filebase . "V"; my $ifile = $filebase . "I"; if (! -f "$VIDF_dir/$vfile") { warn "no such VIDF file '$vfile' ($experiment/$instrument)\n"; next; } # Sigh. We can't always count on a clean distribution. if (-f "$VIDF_dir/$ifile") { print STDERR "I-file '$ifile' exists... removing!"; unlink "$VIDF_dir/$ifile"; } # run mk_idf. This generates an "I" file, which we then move # into the appropriate instrument/experiment directory. print "mk_idf $filebase\n" if $DEBUG & 0x04; system("$UDF_BIN/mk_idf", "$VIDF_dir/$filebase"); if ($?) { carp "mk_idf (VIDFS/$filebase) failed with status $?, skipping "; next; } my $destdir = "$base_dir/$experiment/$instrument"; mkpath($destdir, 0, 02775) unless -d $destdir; # Move "VIDFS/xxxxI" to "//xxxxI" my $srcfile = "$VIDF_dir/$ifile"; chmod 0444, $srcfile; my $destfile = "$destdir/$ifile"; rename($srcfile, $destfile) or croak "rename( -> $destfile ): $! "; push(@{$processed{$experiment}->{$instrument}}, $ifile); } close VLIST; # # Done. We have now converted all "V" files to "I", and moved them # into their respective experiment/instrument directories. # # We now have to regenerate the databases for each. # foreach my $experiment (sort keys %processed) { foreach my $instrument (sort keys %{$processed{$experiment}}) { udf_database($project, $mission, $experiment, $instrument, "I", 1, # redo @{$processed{$experiment}->{$instrument}}); } } } ################## # udf_database # main (public) interface to database functions ################## # # UDF keeps two databases for each instrument: one "I", for VIDF, # and one "HD", for data. Each database consists of two files, # an index (.NDX) and data (.DBF) file. Databases live in the # "Database" directory of each project/mission. For instance: # # $UDF_DATA/IMAGE/IMAGE-1/Database # |-- MENAHSKP.HD.DBF # |-- MENAHSKP.HD.NDX # |-- MENAHSKP.I.DBF # `-- MENAHSKP.I.NDX # # I think, but am not certain, that the databases contain the starting # and ending times for each file in the experiment/instrument directory. # # Databases are created and updated by tools in $UDF_BIN. Our job is # to make wrappers for those. The steps involved are: # # * If the "redo" option is set, remove any existing databases. # # * Obtain a list of files to be added to the database. We can # be called with that list, but if not, call someone to find # all files that match in the experiment/instrument directory. # # * Find out the "times" for each of those files (it's a different # procedure for "I" vs. "D" or "H" files). Write these times # out to an ASCII file. # # * Add the contents of the ASCII file to the binary database. # # Converted from AD_DodB.tcl # sub udf_database($$$$$$@) { my ($project,$mission, $experiment,$instrument, $options,$redo, @files) = @_; my $base_dir = "$UDF_DATA/$project/$mission"; my $data_dir = "$base_dir/$experiment/$instrument"; my $db_dir = "$base_dir/Database"; my (%db_asc, %db_dbf, %db_ndx); foreach my $type ("I", "D") { $db_asc{$type} = $db_dir . "/" . db_filename($instrument,$type,"ASC"); $db_dbf{$type} = $db_dir . "/" . db_filename($instrument,$type,"DBF"); $db_ndx{$type} = $db_dir . "/" . db_filename($instrument,$type,"NDX"); unlink $db_asc{$type}; unlink $db_dbf{$type} if $redo && $options =~ /$type/; unlink $db_ndx{$type} if $redo && $options =~ /$type/; } # # Main loop: for each file in TODO, find its "times" # @files = files_to_process($data_dir, $options) unless @files; foreach my $f (@files) { my $asc; my @times; my $head = $f; $head =~ s/\d{11}\w$//; if ($f =~ /I$/) { $asc = $db_asc{"I"}; # The "itimes" command gives us the times we want, as a space-separated # string of numbers. I don't know what the first field is, though. # The GUIU tosses it, so let's just do that. my $t = `$UDF_BIN/itimes $data_dir/$f`; chop $t; croak "itimes (data_dir/$f) died with code $?" if $?; @times = split(/\s+/, $t); shift @times; } elsif ($f =~ /D$/) { my $Hfile = $f; $Hfile =~ s/D$/H/; my $Ifile = find_Ifile($data_dir, $head); $asc = $db_asc{"D"}; # Run "udf_swap". I have no idea what this does, but it always # seems to return -1. Note that it returns it on stdout, not # as an execution status. Sigh. my $status = `$UDF_BIN/udf_swap $data_dir/$Ifile $data_dir/$f $data_dir/$Hfile`; croak "udf_swap (data_dir/$Ifile) died with code $?" if $?; if ($status < 0 && $status != -1) { warn "udf_swap($f) returned status $status\n"; } # the "udf_time" command is what gets us our times. my $t = `$UDF_BIN/udf_time $data_dir/$Ifile $data_dir/$f`; chop $t; croak "udf_time (data_dir/$f) died with code $?" if $?; @times = split(/\s+/, $t); } elsif ($f =~ /H$/) { # do nothing } else { croak "udf_database(): cannot grok '$f'"; } if ($asc) { asc_append($asc,$head,@times[0,1,6,7,8,13],-1," "," "," ","xxx",today()); } } # Done. We should now have an ASCII database file, which we then # convert to binary. foreach my $type (keys %db_asc) { my $asc = $db_asc{$type}; if (-s $asc) { db_create($db_dbf{$type}, $db_ndx{$type}) unless -f $db_dbf{$type}; db_append($db_dbf{$type}, $db_ndx{$type}, $db_asc{$type}); } unlink $asc; } } ################# # db_filename # Returns the name of the database for the desired instrument ################# sub db_filename { my ($instrument, $type, $ext) = @_; my $TYPE = uc $type; $TYPE = "HD" if $TYPE eq "D"; return sprintf("%s.%s.%s", $instrument, $TYPE, $ext); } ############### # db_create # Create a DBF and NDX file for a given database ############### sub db_create { my ($dbf, $ndx) = @_; printf "Creating: $dbf\n" if $VERBOSE & 0x01; printf "db_create $dbf\n" if $DEBUG & 0x04; system("$UDF_BIN/db_create", "$dbf"); croak "db_create ($dbf) barfed with error status $?" if $?; # Index printf "db_index $dbf $ndx\n" if $DEBUG & 0x04; system("$UDF_BIN/db_index", $dbf, $ndx, qw(V_INST B_YR B_DAY B_MSEC)); croak "db_index ($dbf) barfed with error status $?" if $?; } ############### # db_append # Append ASCII data to a database ############### sub db_append { my ($dbf, $ndx, $asc) = @_; # Sigh. db_append generates output no matter what. Let's try to gobble it. printf "db_append $dbf $asc $ndx\n" if $DEBUG & 0x04; my $output = `$UDF_BIN/db_append $dbf $asc $ndx`; croak "db_append ($dbf) barfed with error status $?" if $?; unless ($output =~ /^\s*\d+\s+recs,\s+\d+\s+appended$/) { chop $output; printf STDERR "db_index ($dbf): unexpected output:\n"; croak " '$output'\n"; } } ################ # asc_append # Append to an ASCII pre-database file ################ sub asc_append { my $f = shift; open(OUT, ">>$f") or croak "asc_append: open( >> $f ): $!"; print OUT join("|", @_), "\n"; close OUT; } ########### # today # returns today's date in YYYYMMDD format ########### sub today() { my (@t) = gmtime(time); return sprintf("%04d%02d%02d", $t[5]+1900, $t[4]+1, $t[3]); } ###################### # files_to_process # Look for files of the given type in a directory ###################### sub files_to_process($$) { my ($d, $options) = @_; my (@list); # If called with "D" or "I" as an option, look for files of the # desired type(s) in the instrument directory. if ($options =~ /[ID]/) { opendir(DDD, $d) or croak "files_to_process: opendir( $d ): $!"; foreach my $f (sort grep(-f "$d/$_", readdir DDD)) { next if $f =~ /^\./; # skip "dot" files if ($f =~ /([DI])$/ && $options =~ /$1/) { push(@list, $f); } } closedir DDD; } return @list; } ################ # find_Ifile # Given a file pattern (head), look for its "I" file ################ # # UDF data ("D") files must have an associated "I" (VIDF binary) file. # For instance, for the file "IMMHRSNG19991911641D", we look for a file # of the form "IMMHRSNG*I". # # As of November 2000, we now have multiple "VIDF" files for ImmImage. # This destroys the old theory that There Can Be Only One "I" file. # # I _think_ the right thing to do is to use the first one we find, # without complaining. At least, that's what I think Chris is doing # in $UDF_HOME/bin/Procs.tcl . # sub find_Ifile { my ($d, $pat) = @_; opendir(DDD, $d) or croak "find_Ifile: opendir( $d ): $!"; my (@match) = sort grep(/^$pat\d{11}I$/, readdir DDD); if (@match == 0) { croak "could not find I file '$d/$pat*I'"; } elsif (@match == 1) { return $match[0]; } else { # printf STDERR "too many matches for '$d/$pat*I':\n"; # foreach (@match) { # printf STDERR "\t%s\n", $_; # } # printf STDERR "...using first match\n"; return $match[0]; } } # We must return good status 1;