#! /usr/bin/env python # # Export regular CDF file to the Enstore via SAM # F.Ratnikov (Rutgers) # import sys import os import string import getopt import pwd import shutil from time import gmtime, strftime # from sam import sam class CdfStoreError (Exception): def __init__(self, value): self.value = value def __str__(self): return repr(self.value) class CdfStore: def usage (self): print 'Store regular CDF files in SAM. Version 2.1' print 'Usage: samStoreCdfFile ' print ' possible options are:' print ' --help - this message' print ' --file= - full name of the file to be stored - OBSOLETE' print ' --dataset= - CDF dataset assigned to the file - this or "--requestId" is mandatory' print ' --requestId= - Request ID associated with this data - this or "--dataset" is mandatory' print ' --station= - local SAM station, may be set via $SAM_STATION - mandatory' print ' --host= - hostname for the local SAM station, may be set via $SAM_HOST_NAME - mandatory' print ' --dest= - file store destination' print ' --suffix= - suffix to be added to the standard CDF file name, like .date.user' print ' --description= - any description of the file' print ' --html= - reference to the description of data' print ' --storeoptions= - auxiliary options to be forwarded to the "sam store" command' print ' --rename - rename file according CDF convention' print ' --copydir= - copy file into directory before processing and remove copy afterwards' print ' --linkdir= - link file into directory before processing and remove link afterwards' print ' --parents= - comma separated list of parents declared for these files' print ' --metadata_only - only metadata file is created' print ' -v - verbose output' print ' -t - test only, do not write data and metadata' def __init__ (self): self.verbose = None self.rename = None self.filename = None self.filedir = None self.dataset = None self.pnfsDestination = None self.pyfilename = None self.description = None self.html = None self.storeoptions = None self.test = None self.files = [] self.suffix = None self.requestId = None self.request = {} self.copydir = None self.linkdir = None self.parents = None self.metadata_only = None try: self.getArgs () for (dir, file) in self.files : self.filedir = dir self.filename = file self.getFileInfo () self.makeFileDescription () if not self.pnfsDestination : self.getDestination () self.storeFile () self.cleanup () except CdfStoreError, where: # print '\nERROR in', where.value, '. Shutdown...' sys.exit (1) def getArgs (self): try: opts, args = getopt.getopt(sys.argv[1:], "hvt", ["help", "file=", "dataset=", "dest=", "pnfs=", "station=", "rename", "host=", "description=", "html=", "storeoptions=", "requestId=", "suffix=", "copydir=", "linkdir=", "parent=", "parents=", "metadata_only"]) except getopt.GetoptError: # print help information and exit: self.usage() sys.exit(2) filename = None for key, value in opts: if key == "-t": self.test = 1 if key == "-v": self.verbose = 1 if key in ("-h", "--help"): self.usage() sys.exit() if key == "--rename": self.rename = 1 if key == "--metadata_only": self.metadata_only = 1 if key == "--requestId": self.requestId = value if key == "--file": print "\nERROR option --file is not supported. Specify file as a command argument.\n" self.usage() raise CdfStoreError ('getArgs') if key in ("--pnfs", "--dest") : self.pnfsDestination = value if key == "--station": os.environ ['SAM_STATION'] = value if key == "--host": os.environ ['SAM_HOST_NAME'] = value if key == "--suffix": self.suffix = value if key == "--storeoptions": self.storeoptions = value if key == "--copydir": self.copydir = value if key == "--linkdir": self.linkdir = value if key in ("--parent", "--parents") : self.parents = value if key == "--dataset": self.dataset = value print ' Hint: Dataset is better be specified by requst' if key == "--description": self.description = value print ' Hint: Description is better be specified by requst' if key == "--html": self.html = value print ' Hint: HTML is better be specified by requst' for filename in args: (dir, file) = os.path.split (filename) if dir is '' : dir = '.' self.files.append ((dir, file)) if self.requestId : self.getDataFromRequest (self.requestId) if not self.dataset : self.getDatasetFromRequest (self.requestId) # check consistency if self.dataset is None: print "\nError: Dataset is not set directly or by request\n" self.usage () sys.exit (1) if self.linkdir and self.copydir : print "\nwarning: Both 'copydir' and 'linkdir' options are used. Ignore 'copydir' option\n" self.copydir = None try: sam = os.environ ['SETUP_SAM'] except: print "\nError: SAM environment is not set. 'setup sam' before running data export.\n" raise CdfStoreError ('getArgs') try: station = os.environ ['SAM_STATION'] except: print "\nError: SAM station is not set\n" self.usage () raise CdfStoreError ('getArgs') try: host = os.environ ['SAM_HOST_NAME'] except: print "\nError: SAM host is not set\n" self.usage () raise CdfStoreError ('getArgs') if self.parents and len (self.files) > 1 : print "\nError: Same parents specified for multiple files. If this was desired, store files separately\n" raise CdfStoreError ('getArgs') if self.verbose : print '\nWill use settings:' print 'file: %s'%self.filename print 'file directory: %s'%self.filedir print 'requestId: %s'%self.requestId print 'dataset: %s'%self.dataset print 'pnfs destination: %s'%self.pnfsDestination print 'SAM station: %s'%os.environ ['SAM_STATION'] print 'SAM host: %s'%os.environ ['SAM_HOST_NAME'] print 'description: %s'%self.description print 'parent(s): %s' % self.parents if self.copydir: print 'copy file temporary into %s' % self.copydir if self.linkdir: print 'link file temporary into %s' % self.linkdir if (self.metadata_only) : print 'create metadata file only' print '\n' def getFileInfo (self): cmd = "getFileInfo " + os.path.join (self.filedir, self.filename) + " -dsid " + self.dataset if self.verbose : print "executing", cmd, " ..." out = os.popen (cmd) output = out.readlines () for line in output: if self.verbose: print 'Response-> ', line, (key, value) = string.split (line, ':', 1) value = string.strip (value) if string.find (key, "size") >= 0 : self.filesize = long (value) / 1024 if string.find (key, "name") >= 0 : cdfname = value if string.find (key, "First") >= 0 : (self.firstrun, self.firstevent) = string.split (value, '/', 1) if string.find (key, "Last") >= 0 : (self.lastrun, self.lastevent) = string.split (value, '/', 1) if string.find (key, "events") >= 0 : self.totalevents = value if string.find (key, "runsections") >= 0 : self.runsections = value if not (self.filesize and self.firstrun and self.firstevent and self.totalevents and self.runsections): print "\nERROR problem getting information from the file", os.path.join (self.filedir, self.filename) raise CdfStoreError ('getFileInfo') # now copy/rename file source = os.path.join (self.filedir, self.filename) if self.copydir: if self.verbose : print 'Copying file %s to %s directory...' % (source, self.copydir) if self.metadata_only : print 'Metadata only: skip copying file %s to %s directory...' % (source, self.copydir) else : try: shutil.copy (source, self.copydir) except: print 'Can not copy %s into %s directory' % (source, self.copydir) raise CdfStoreError ('getFileInfo') self.filedir = self.copydir if self.linkdir: dest = os.path.join (self.linkdir, self.filename) if self.verbose : print 'Linking file %s to %s ...' % (dest, source) if self.metadata_only : print 'Metadata only: skip linking file %s to %s ...' % (dest, source) else : try: os.symlink (source, dest) except: print 'Can not link %s to %s' % (dest, source) raise CdfStoreError ('getFileInfo') self.filedir = self.linkdir if self.rename : source = os.path.join (self.filedir, self.filename) if self.suffix : cdfname = cdfname + self.suffix dest = os.path.join (self.filedir, cdfname) if self.test and not (self.copydir or self.linkdir) : print 'Would rename file', source, 'to', dest else: if self.verbose : print 'Renaming file', source, 'to', dest try: os.rename (source, dest) except: print 'Can not rename', source, 'to', dest raise CdfStoreError ('getFileInfo') self.filename = cdfname if self.verbose: print '\nInformation extracted from the file:' print "name:", self.filename print "size:", self.filesize print "first run/event", self.firstrun, '/' , self.firstevent print "last run/event", self.lastrun, '/', self.lastevent print "events:", self.totalevents print "sections:", self.runsections def getDestination (self): if self.metadata_only : print 'Metadata only: skip search for destination' else : family = 'SM' cmd = 'DFCTapeLabelTool -db bla -dbname OTL -dblogin DFC_SAM/SAM_DFC@cdfofprd -create -prefix %s' % family if self.verbose : print "executing", cmd, " ..." out = os.popen (cmd) output = out.readlines () for line in output: if self.verbose: print 'Response-> ', line, if string.find (line, family) == 0 and string.find (line, '.') == 6: self.pnfsDestination = '/pnfs/cdfen/filesets/%s/%s/%s/%s' % (line[0:2], line[0:4], line[0:6], line[0:8]) if self.verbose: print 'Set PNFS destination:', self.pnfsDestination break return self.pnfsDestination; def getDatasetFromRequest (self, requestId): self.getDataFromRequest (requestId) try: self.dataset = self.request ['cdf#dataset'] except: print 'Request', requestId, 'has no dataset definition' def getDataFromRequest (self, requestId): if not self.request: cmd = 'sam get request details --format=mdc --requestId=%s' % requestId if self.verbose : print "executing", cmd, " ..." out = os.popen (cmd) output = out.readlines () for line in output: if self.verbose: print 'Response-> ', line, if string.find (line, ':') >= 0 : (key, value) = string.split (line, ':', 1) self.request [key] = string.strip (value) if self.verbose: print 'Got data. Key: "%s", value: "%s"' % (key, self.request [key]) def makeFileDescription (self): self.pyfilename = 'file_descriptor_%s.py' % self.filename if self.verbose: print 'Creating file description python file:', self.pyfilename pyfile = open (self.pyfilename, 'w+') pyfile.write ("from import_classes import *\n") pyfile.write ("t = SAMMCFile(\n") pyfile.write (" name = '%s',\n" % self.filename) pyfile.write (" events = Events(%s,%s,%s),\n" % (self.firstevent, self.lastevent, self.totalevents)) pyfile.write (" tier = 'generated',\n") pyfile.write (" appfamily = AppFamily ('generator', '1.00', 'SamUploader'),\n") pyfile.write (" start_time = '%s',\n" % strftime("%m/%d/%Y %H:%M:%S", gmtime())) pyfile.write (" end_time = '%s',\n" % strftime("%m/%d/%Y %H:%M:%S", gmtime())) pyfile.write (" sizeK = %d,\n" % self.filesize) pyfile.write (" keywordDict = { \n") pyfile.write (" 'Global':\n") pyfile.write (" { 'ProducedByName':'%s',\n" % pwd.getpwuid(os.getuid())[0]) pyfile.write (" 'OriginName':'NA',\n") pyfile.write (" 'FacilityName':'users data',\n") if self.requestId: pyfile.write (" 'RequestId':'%s',\n" % self.requestId) # replicate metadata from request key = 'global#producedforname' if key in self.request.keys () : value = self.request [key] else: value = pwd.getpwuid(os.getuid())[0] pyfile.write (" 'ProducedForName':'%s',\n" % value) key = 'global#groupname' if key in self.request.keys () : value = self.request [key] else: value = 'cdf' pyfile.write (" 'GroupName':'%s',\n" % value) key = 'global#stream' if key in self.request.keys () : value = self.request [key] else: value = 'unidentified' pyfile.write (" 'Stream':'%s',\n" % value) value = self.description if not value: key = 'global#description' if key in self.request.keys () : value = self.request [key] else: value = 'not described' pyfile.write (" 'Description':'%s',\n" % value) key = 'global#runtype' if key in self.request.keys () : value = self.request [key] else: value = 'other' pyfile.write (" 'RunType':'%s',\n" % value) key = 'global#phase' if key in self.request.keys () : value = self.request [key] else: value = 'unspecified' pyfile.write (" 'Phase':'%s',\n" % value) pyfile.write (" },\n") pyfile.write (" 'CDF':\n") pyfile.write (" { 'runsections':'%s',\n" % self.runsections) value = self.dataset if not value: key = 'cdf#dataset' if key in self.request.keys () : value = self.request [key] else: value = 'undefined' pyfile.write (" 'DataSet':'%s',\n" % value) value = self.html if not value: key = 'cdf#html' if key in self.request.keys () : value = self.request [key] else: value = 'undefined' pyfile.write (" 'html':'%s',\n" % value) pyfile.write (" },\n") pyfile.write (" 'generated' :{\n") pyfile.write (" 'AppFamily':'generator',\n") if self.parents : pyfile.write (" 'parents':'%s',\n" % self.parents) pyfile.write (" 'FirstEvent':%s,\n" % self.firstevent) pyfile.write (" 'AppVersion':'1.00',\n") pyfile.write (" 'LastEvent':%s,\n" % self.lastevent) pyfile.write (" 'NumRecords':%s,\n" % self.totalevents) pyfile.write (" 'AppName':'SamUploader',\n") pyfile.write (" 'TotalEvents':%s,\n" % self.totalevents) pyfile.write (" 'RunNumber':%s,}\n" % self.firstrun) pyfile.write (" }\n") pyfile.write (" )\n") # end of t=SAMMCFile() pyfile.close () def storeFile (self): if self.metadata_only : print 'Metadata only: skip File store' return None result = None cmd = 'sam store' cmd = cmd + ' --descrip=%s'%self.pyfilename cmd = cmd + ' --source=%s'%self.filedir cmd = cmd + ' --dest=%s'%self.pnfsDestination if self.storeoptions: cmd = cmd + ' ' + self.storeoptions if self.test: print 'Would execute command :', cmd else: if self.verbose: print 'Executing command :', cmd out = os.popen (cmd) output = out.readlines () for line in output: if string.find (line, "File store complete") >= 0: result = 1 if not result or self.verbose : print '\nCommand: ', cmd for line in output: print 'Response-> ', line, if result : result = self.checkFileOnDestination () if not result: print "\nERROR file %s has been lost on destination" % self.filename if not result: print "\nERROR storing file" raise CdfStoreError ('storeFile') if self.verbose: print 'Deleting file description python file:', self.pyfilename os.remove (self.pyfilename) def checkFileOnDestination (self) : if self.metadata_only : print 'Metadata only: skip File check' return None result = None cmd = 'sam locate %s' % self.filename if self.test: print 'Would execute command :', cmd else: if self.verbose: print 'Executing command :', cmd out = os.popen (cmd) output = out.readlines () result = 0 for line in output: if string.find (line, "/") >= 0: result = 1 break if not result or self.verbose: print '\nCommand: ', cmd for line in output: print 'Response-> ', line, return result return 0 def cleanup (self): if not self.metadata_only and (self.copydir or self.linkdir): file = os.path.join (self.filedir, self.filename) if self.verbose: print 'Removing %s...' % file try: os.remove (file) except: print 'Can not remove file %s' % file raise CdfStoreError ('cleanup') if __name__ == '__main__': obj = CdfStore ()