#!/usr/bin/env sampy # # A script for generating the luminosity parentage files required # by the lm_access package, starting from maps stored on disk which # were generated with the makeLumTables.sh script (very slow). # import os,sys,getopt,string,gzip from Sam import sam # Convert the application name and version into the versions of D0reco # and of the thumbnail which are used inside the stage 3 files. def getStage3Version(fileName,applName,applVers): idx = string.find(fileName,'.raw') ftype = fileName[idx+1:] if string.find(ftype,'p17.03.03_p17.07.00_p17.09.03')>0: d0reco = 'p17.03.03' tmb = 'p17.09.03' elif string.find(ftype,'p17.03.03_p17.07.01_p17.09.03')>0: d0reco = 'p17.03.03' tmb = 'p17.09.03' elif string.find(ftype,'p17.05.01_p17.07.01_p17.09.03')>0: d0reco = 'p17.03.03' tmb = 'p17.09.03' elif string.find(ftype,'p17.05.01_p17.07.01_p17.09.03')>0: d0reco = 'p17.03.03' tmb = 'p17.09.03' elif string.find(ftype,'p17.03.03_p17.07.00')>0: d0reco = 'p17.03.03' tmb = 'p17.07.00' elif string.find(ftype,'p17.03.03_p17.07.01')>0: d0reco = 'p17.03.03' tmb = 'p17.07.01' elif string.find(ftype,'p17.05.01_p17.07.00')>0: d0reco = 'p17.05.01' tmb = 'p17.07.00' elif string.find(ftype,'p17.05.01_p17.07.01')>0: d0reco = 'p17.05.01' tmb = 'p17.07.01' elif string.find(ftype,'p14.05.02_p14.06.00')>0: d0reco = 'p14.05.02' tmb = 'p14.06.00' elif string.find(ftype,'p14.05.02_p14.06.00')>0: d0reco = 'p14.05.02' tmb = 'p14.06.00' elif string.find(ftype,'p14.05.02_p14.06.00')>0: d0reco = 'p14.05.02' tmb = 'p14.06.00' elif ftype=='raw_p14.03.00_p14.fixtmb.01': d0reco = 'p14.03.00' tmb = 'p14.fixtmb.01' elif ftype=='raw_p14.03.01_p14.fixtmb.01': d0reco = 'p14.03.01' tmb = 'p14.fixtmb.01' elif ftype=='raw_p14.03.02_p14.fixtmb.01': d0reco = 'p14.03.02' tmb = 'p14.fixtmb.01' elif ftype=='raw_p14.05.00_p14.fixtmb.02': d0reco = 'p14.05.00' tmb = 'p14.fixtmb.02' elif ftype=='raw_p13.05.00_000_p14.05.02_prod_p14.fixtmb2.02': d0reco = 'p14.05.02' tmb = 'p14.fixtmb2.02' elif ftype=='raw_p13.06.01_000_p14.05.02_prod_p14.fixtmb2.02': d0reco = 'p14.05.02' tmb = 'p14.fixtmb2.02' elif ftype=='raw_r13.06.01_000_p14.05.02_prod_p14.fixtmb2.02': d0reco = 'p14.05.02' tmb = 'p14.fixtmb2.02' elif string.find(ftype,'p14.fixtmb2.02')>0: d0reco = ftype[4:13] tmb = 'p14.fixtmb2.02' elif ftype[0:27]=='raw_p13.05.00_000_p14.05.02': d0reco = 'p14.05.02' tmb = 'p14.05.02' elif ftype[0:27]=='raw_p13.06.01_000_p14.05.02': d0reco = 'p14.05.02' tmb = 'p14.05.02' elif ftype[0:27]=='raw_r13.06.01_000_p14.05.02': d0reco = 'p14.05.02' tmb = 'p14.05.02' else: d0reco = ftype[4:13] tmb = d0reco return [d0reco,tmb] # Main part of the script. def generateFiles(skimName,outType,fileList,csgPass,format): # Name of the original table generated by makeLumTables.sh script. tableDir = '/prj_root/500/com/parentage' lbnTable = '' if format=='raw': lbnTable = '%s/rawData.lbnTable.gz'%tableDir else: lbnTable = '%s/%s.%s.%s.lbnTable.gz'%(tableDir,csgPass,format,skimName) timeStamp = '%s/%s.%s.makeLumTables.timeStamp'%(tableDir,csgPass,format) if os.path.isfile(timeStamp): stampTime = open(timeStamp,'r') for line in stampTime.readlines(): line = string.replace(line,'\n','') sys.stdout.write('%s\n'%line) stampTime.close() # Parse the user provided file list. genMode = 'filelist' if fileList==[]: genMode = 'all' # Output directory. outDir = '' if format!='raw': outDir = '%s/not_skimmed'%(os.getcwd()) if skimName!='none': outDir = '%s/%s'%(os.getcwd(),skimName) sys.stdout.write('genLBNtables: create luminosity parentage files for the %s skim\n'%(skimName)) else: outDir = '%s/raw_data'%(os.getcwd()) sys.stdout.write('genLBNtables: create luminosity parentage files for raw data\n') if not os.path.isdir(outDir): os.mkdir(outDir) # Write a welcome message. sys.stdout.write(' reading information from %s\n'%(lbnTable)) if genMode=='all': sys.stdout.write(' for all the files available in SAM\n') else: sys.stdout.write(' for %d files specified in input\n'%(len(fileList))) if outType=='NP': sys.stdout.write(' use the np_tmb_stream format (first LBN, last LBN)\n') elif outType=='LM': sys.stdout.write(' use the lm_access stage3 format (first LBN, last LBN, run number, stream, D0reco version, TMB version, number of streams)\n') elif outType=='DB': sys.stderr.write(' use the lm_access lumiDB format\n') elif outType=='DUP': sys.stdout.write(' use the duplication format (raw data file, first LBN, last LBN, run number, stream, D0reco version, TMB version, skimmed file)\n') elif outType=='RUNS': sys.stdout.write(' just produce a list of all the runs\n') # Status flags and counters. writeEntries = 0 openOutFile = 0 version = '' outFile = '' ngenFiles = 0 runsList = [] # Open the output file if the user is running the code to search # for duplicate events within the skimming. if outType=='DUP': outFileName='%s/%s.dupli'%(outDir,skimName) if os.path.isfile(outFileName): os.remove(outFileName) outFile = open(outFileName,'w') openOutFile = 1 # Loop in the input LBN table. tableLBN = gzip.GzipFile(lbnTable) for line in tableLBN.readlines(): # Check whether this is a line indicating a new file. line = string.replace(line,'\n','') if line[0:1]=='#': tokens = string.split(line) fileName = tokens[1] dataTier = tokens[2] applName = tokens[3] applVers = tokens[4] numFiles = int(tokens[5]) versionD0reco,versionTMB = getStage3Version(fileName,applName,applVers) # Decide whether to generate the relevant file for # the user or not. writeEntries = 0 if genMode=='all': writeEntries = 1 else: if fileName in fileList: writeEntries = 1 # If a file containing the luminosity parentage information # has to be generated, create it in the output directory. If # there was a previously open file, close it. if writeEntries: # Support for np_tmb_stream style luminosity files. if outType=='NP': if openOutFile: outFile.close() ngenFiles = ngenFiles+1 openOutFile = 0 outFileName='%s/%s.lum'%(outDir,string.split(line)[1]) if os.path.isfile(outFileName): os.remove(outFileName) outFile = open(outFileName,'w') openOutFile = 1 # Support for lm_access stage3 style parentage files. elif outType=='LM': if openOutFile: outFile.close() ngenFiles = ngenFiles+1 openOutFile = 0 outFileName='%s/%s.parentage'%(outDir,string.split(line)[1]) if os.path.isfile(outFileName): os.remove(outFileName) outFile = open(outFileName,'w') openOutFile = 1 # Support for OLD private parentage files (required for testing/comparison). elif outType=='OLD': if openOutFile==0: outFileName='%s/%s.lbnTable.old'%(outDir,skimName) outFile = open(outFileName,'w') openOutFile=1 outFile.write('%s\t%s\t%s\t%s\t%s\n'%(fileName,versionTMB,versionD0reco,applVers,numFiles)) # Support for lm_access lumiDB style parentage files. elif outType=='DB': if openOutFile: outFile.close() ngenFiles = ngenFiles+1 openOutFile = 0 outFileName='%s/%s.lumiDB'%(outDir,string.split(line)[1]) if os.path.isfile(outFileName): os.remove(outFileName) outFile = open(outFileName,'w') openOutFile = 1 # Unknown format. else: if ( outType!='DUP' ): writeEntries = 0 # Dealing with an LBN entry. Just copy it to the output file # (if requested) in the appropriate format. else: tokens = string.split(line) rawData = tokens[0] runNum = int(tokens[1]) numEvts = int(tokens[2]) firstLBN = tokens[3] lastLBN = tokens[4] streamName = tokens[5] dataTier = tokens[6] applName = 'datalogger' applVers = 0.0 if format!='raw': applName = tokens[7] applVers = tokens[8] numStreams = tokens[9] partitionNum = string.split(rawData,".raw")[0][-3:] if not runNum in runsList: runsList.append(runNum) if writeEntries and openOutFile: if outType=='LM': if partitionNum!='000': if csgPass[1:3]=='17' or csgPass[1:3]=='21': versionD0reco = applVers versionTMB = applVers if format=='raw': outFile.write('%s %s %s %s dummy dummy %s\n'%(firstLBN,lastLBN,runNum,streamName,numStreams)) else: outFile.write('%s %s %s %s %s %s %s\n'%(firstLBN,lastLBN,runNum,streamName,versionD0reco,versionTMB,numStreams)) elif outType=='DB': if partitionNum!='000': outFile.write('%s %s %s %s %s %s %s %s %s %s\n'%(firstLBN,lastLBN,rawData,streamName,runNum,applName,applVers,dataTier,numEvts,numStreams)) elif outType=='OLD': outFile.write('%s\t%s\t%s\t%s\t%s\n'%(rawData,runNum,numEvts,firstLBN,lastLBN)) elif outType=='NP': if partitionNum!='000': outFile.write('%s %s\n'%(firstLBN,lastLBN)) elif outType=='DUP': outFile.write('%s %s %s %s %s %s %s %s\n'%(rawData,firstLBN,lastLBN,runNum,streamName,versionD0reco,versionTMB,fileName)) # Close any output file which is still open. if openOutFile: outFile.close() ngenFiles = ngenFiles+1 #Close the input LBN table. tableLBN.close() sys.stdout.write('genLBNtables: generated %d luminosity parentage files in %s\n'%(ngenFiles,outDir)) # Write the list of runs if required. if outType=='RUNS': runsList.sort() outFileName='%s/%s.runlist'%(outDir,skimName) if os.path.isfile(outFileName): os.remove(outFileName) outFile = open(outFileName,'w') for run in runsList: outFile.write('%d\n'%(run)) outFile.close() return # Print a list of the valid skims for each data version. def printValidSkims(): sys.stdout.write('Table with a list of the valid skims by D0 verson code\n') sys.stdout.write('------------------------------------------------------------\n') sys.stdout.write('Skim Name P14 pass 1 P14 pass 2 P17 pass * P21 pass * \n') sys.stdout.write('1EM2JET yes yes no no \n') sys.stdout.write('1EMloose yes yes no no \n') sys.stdout.write('1MU2JET yes yes no no \n') sys.stdout.write('1MUloose yes yes no no \n') sys.stdout.write('2EM yes yes no no \n') sys.stdout.write('2EMhighpt yes yes yes yes \n') sys.stdout.write('2MU yes yes no no \n') sys.stdout.write('2MUhighpt yes yes yes yes \n') sys.stdout.write('3JET yes yes yes yes \n') sys.stdout.write('3LEP no yes no no \n') sys.stdout.write('AA yes yes no no \n') sys.stdout.write('AA_JPSI no yes no no \n') sys.stdout.write('BID yes yes no no \n') sys.stdout.write('bMU yes no no no \n') sys.stdout.write('DIFF yes yes yes no \n') sys.stdout.write('EM1TRK yes yes no no \n') sys.stdout.write('EMinclusive no no yes yes \n') sys.stdout.write('EMMU yes yes yes yes \n') sys.stdout.write('Higgs yes yes yes yes \n') sys.stdout.write('JESB yes yes no no \n') sys.stdout.write('JPSI yes yes yes yes \n') sys.stdout.write('METTRK yes yes no no \n') sys.stdout.write('MU2TRK yes yes no no \n') sys.stdout.write('MUinclusive no no yes yes \n') sys.stdout.write('NP yes yes yes yes \n') sys.stdout.write('QCD yes yes yes yes \n') sys.stdout.write('TAU2TRK yes yes no no \n') sys.stdout.write('TAUTRIG no no yes yes \n') sys.stdout.write('TOPJETTRIG yes yes yes yes \n') sys.stdout.write('ZBMB yes yes yes yes \n') sys.stdout.write('------------------------------------------------------------\n') sys.stdout.write('Use -skim none for unskimmed data (available for all passes)\n') sys.stdout.write('------------------------------------------------------------\n\n') return # Print an help message. def usage(): sys.stdout.write('\ngenLBNtables: generate the luminosity parentage files for the Common Sample group skims\n\n') sys.stdout.write('usage: genLBNtables -format [raw|TMB|CAF] -pass [p**pass*] \n') sys.stdout.write(' -skim ..... [-filelist ..... | -defname .....]\n') sys.stdout.write(' where -format [raw|TMB|CAF] indicates the format of the data for which the user wants\n') sys.stdout.write(' the parentage tables (parentage files for CAF trees are \n') sys.stdout.write(' not available for p14pass1 and p14pass2 data\n') sys.stdout.write(' -pass ....... indicates the type of data for which the user is requesting\n') sys.stdout.write(' the generation of the parentage files (currently the valid\n') sys.stdout.write(' options are listed in /prj_root/500/com/parentage/validPasses.txt\n') sys.stdout.write(' -skim ....... name of the skim for which the user wants to generate the\n') sys.stdout.write(' the parentage files (see below for the complete list)\n') sys.stdout.write(' -type [LM|DB] indicates the format of the luminosity parentage files\n') sys.stdout.write(' the current default (LM) is used by the runrange_luminosity\n') sys.stdout.write(' program in the lm_access package, using input from stage3 files\n') sys.stdout.write(' the DB option can be used for the calculation of the luminosity\n') sys.stdout.write(' using input from the luminosity database\n') sys.stdout.write(' -filelist ..... name of the file containing a list of files for which the parentage\n') sys.stdout.write(' files will be created\n') sys.stdout.write(' -defname ...... name of the SAM dataset definition for which the parentage files will\n') sys.stdout.write(' be created\n') sys.stdout.write(' -help display this page\n\n') printValidSkims() sys.stdout.write('Additional expert options are listed below:\n') sys.stdout.write(' -type [NP|RUNS|DUP] different formats used for special purposes (NP: old format,\n') sys.stdout.write(' RUNS: used for generating a list of runs, DUP: generates the tables\n') sys.stdout.write(' used by the program looking for duplicated LBNs)\n\n') sys.exit(1) # Main part of the script. if __name__=='__main__': # Do not execute the version from /home/mverzocc/scripts/genLBNtables if sys.argv[0]=='/home/mverzocc/scripts/genLBNtables' and ( os.getuid()!=3782 and os.getuid()!=11248 and os.getuid()!=11879 ): sys.stdout.write('You are trying to use the development version of the genLBNtables script\n') sys.stdout.write('Please use the official version obtained by doing "setup lm_tools" instead\n') sys.exit(1) args = sys.argv[1:] if ( "-help" in args or len(args)==0 ): usage() # List of common sample group skims. validCSskims = [ '1EM2JET', '1EMloose', '1MU2JET', '1MUloose', '2EM', '2EMhighpt', '2MU', '2MUhighpt', '3JET', 'AA', 'EM1TRK', 'EMMU', 'Higgs', 'JESB', 'JPSI', 'METTRK', 'MU2TRK', 'NP', 'QCD', 'TAU2TRK', 'TOPJETTRIG', 'ZBMB', 'bMU', '3LEP', 'AA_JPSI', 'BID', 'DIFF', 'EMinclusive', 'MUinclusive', 'TAUTRIG', 'none' ] # Parse the command line options. skimName = '' outType = 'LM' fileList = [] defName = '' csgPass = 'p17pass3' format = 'CAF' if "-skim" in args: skimName = args[args.index("-skim")+1] del args[args.index("-skim"):args.index("-skim")+2] if "-format" in args: format = args[args.index("-format")+1] del args[args.index("-format"):args.index("-format")+2] if "-type" in args: outType = args[args.index("-type")+1] del args[args.index("-type"):args.index("-type")+2] if "-defname" in args: defName = args[args.index("-defname")+1] del args[args.index("-defname"):args.index("-defname")+2] sys.stdout.write('genLBNtables: executing sam translate constraints --dim="__set__ %s"\n'%defName) samQueryRC = 0 try: samQuery = sam.translateConstraints(dimensions='__set__ %s'%defName) if len(samQuery)>0: sys.stdout.write(' dataset definition contains %6d files\n'%len(samQuery)) for file in samQuery: fileList.append(file['fileName']) else: sys.stderr.write('\ngenLBNtables: required SAM dataset is empty (no files satisfy the criteria)\n') sys.stderr.write(' abort execution\n') sys.exit(1) except: sys.stderr.write('\ngenLBNtables: failed to perform SAM query on requested dataset\n') sys.stderr.write(' abort execution\n') if "-filelist" in args: listFiles = args[args.index("-filelist")+1] del args[args.index("-filelist"):args.index("-filelist")+2] for line in open(listFiles,'r').readlines(): fname = string.replace(line,'\n','') fileList.append(os.path.basename(fname)) if "-pass" in args: csgPass = args[args.index("-pass")+1] del args[args.index("-pass"):args.index("-pass")+2] # Check the validity of the command line options. if format=='raw': skimName = 'none' if skimName=='': usage() if not skimName in validCSskims: sys.stdout.write('\ngenLBNtables: invalid skim name %s\n'%(skimName)) sys.stdout.write('\ngenLBNtables: you must provide the name of the Common Sample group skim\n') usage() validPass = [] for line in open("/prj_root/500/com/parentage/validPasses.txt","r").readlines(): validPass.append(string.split(string.replace(line,"\n",""))) if format!='raw': userRequest = [ csgPass,format ] if not userRequest in validPass: sys.stdout.write('\ngenLBNtables: you must specify whether the data pass\n') sys.stdout.write(' you are looking at, valid options are:\n') for tokens in validPass: sys.stdout.write(' -pass %s -format %s\n'%(tokens[0],tokens[1])) usage() if ( outType!='NP' and outType!='LM' and outType!='DUP' and outType!='RUNS' and outType!='DB' and outType!='OLD' ): sys.stdout.write('\ngenLBNtables: invalid output format %s\n'%(outType)) usage() # Generate the parentage files. generateFiles(skimName,outType,fileList,csgPass,format) # End of the script. sys.exit(0)