#!/bin/sh VERSION=mcimport.20070126 # DO # add foreground uption, for use in cron # # Consolidate and archive MC import files # usage : ./mcimport kordosky echo # OUTLINE # MC import files show up in /local/scratch26/mindata/${USER} # where USER is initially one of kordosky or howcroft # # These files are combined into large tarfiles, # and moved into /pnfs/minos/mcimport/${USER} # # log files for mc production will be rsync'd to # $MINOS_DATA/log_data/mcimport/${USER} # # pid() { if [ -r "${PIFL}" ] then echo " OOPS - found ${PIFL} " prepid=`cat ${PIFL}` if ps -p ${prepid} then echo " OOPS - ${prepid} is still running " echo " BAILING OUT " date echo "mcimport ${INDIR} still running, pid ${prepid}" | Mail -s "mcimport ${INDIR} conflict on `hostname | cut -f1 -d.`" ${MAILTO} exit 1 else echo " OK - stale pid file " rm -f ${PIFL} fi fi } TAR() { cd ${INPAT} ALLFILES=`find . -maxdepth 1 -name \*.tar.gz -mmin +10 | cut -f 2 -d /` # set classes from file names stripping out run/subrun CLASSES=`printf "${ALLFILES}\n" | cut -c 1-5,15- | sort -u` for CLASS in ${CLASSES} ; do # # # C L A S S # # # CLASS1=`echo ${CLASS} | cut -c 1-5` CLASS2=`echo ${CLASS} | cut -c 6-` printf " \n`date`\nFor class ${CLASS1} ${CLASS2} \n " FILES=`printf "${ALLFILES}\n" | grep ${CLASS1}.*${CLASS2}` FLAST=`printf "${FILES}\n" | tail -1` ### for testing with 2001-10 ### FILES=F00000892_0000.mdaq.root ### FLAST=F00000892_0000.mdaq.root let "NFILES=`printf "${FILES}\n" | wc -w`" TFILS=`echo ${FILES} | cut -f 1 -d ' '` SFILS=`ls -l ${INPAT}/${TFILS} | tr -s ' ' | cut -f 5 -d ' '` NFILS=1 NTOTS=0 NOUTS=0 SNEXT=0 if [ ${NFILES} -eq 0 ] ; then # # # NFILES # # # printf " No data files to process \n" else printf " ${NFILES} files from \n" printf " ${TFILS} to\n" printf " ${FLAST} \n" printf "\n" # Prime the pump mkdir -p ${TWPAT} rm -f ${TWPAT}/* # make checksums as needed # these should be generated on the initial copy since Jan 2007 if [ -z "${NOOP}" ] ; then [ -r "md5/all.md5" ] || touch md5/all.md5 for FIL in ${FILES} ; do if grep -q ${FIL} md5/all.md5 ; then true ; else printf "md5sum ${FIL}\n" md5sum ${FIL} >> md5/$$.md5 fi done [ -r "md5/$$.md5" ] && cat md5/$$.md5 >> md5/all.md5 && rm md5/$$.md5 fi # Tar them up, ${TFILS} into ${TOUT} SFILES=`echo ${FILES} EOF | cut -f 2- -d ' '` for FIL in ${SFILES} ; do # # # FILs # # # if [ -r "${INPAT}/${FIL}" ] ; then SIZ=`ls -l ${INPAT}/${FIL} | tr -s ' ' | cut -f 5 -d ' '` SNEXT=`echo ${SFILS} + ${SIZ} | bc` fi if [ ${SNEXT} -gt ${SLIM} -o "${FIL}" = "EOF" ] then TFILL=`echo ${TFILS} | tr ' ' \\\n` # printf " OK, tarring ${TFILL}\n" TFIR=`printf "${TFILL}\n" | head -1` TLAS=`printf "${TFILL}\n" | tail -1` # printf " TFIR TLAS ${TFIR} ${TLAS} \n" TFIP=`echo ${TFIR} | cut -f 1 -d '.'` TLAP=`echo ${TLAS} | cut -f 1 -d '.'` # printf " TFIP TLAP ${TFIP} ${TLAP} \n" let "NOUTS++" TOUP="${TFIP}-${TLAP}" TOUT=${TOUP}.tar rm -f ${TWPAT}/${TOUT} printf "${TOUT} ${NFILS}\n" printf " ${TFIR} to\n" printf " ${TLAS} \n" # TAR printf "\n" ${ECHO} tar cf ${TAPAT}/${TOUT} ${TFILS} # UNTAR AND VERIFY COPIES # if [ -z "${NOOP}" ] ; then rm -f ${TWPAT}/* ${ECHO} tar xf ${TAPAT}/${TOUT} -C ${TWPAT} # VERIFY MD5SUM printf " " for FI in ${TFILS} ; do printf "." MD5SUM=`grep ${FI} md5/all.md5 | cut -f 1 -d ' '` MD5TAR=`cat ${TWPAT}/${FI} | md5sum | cut -f 1 -d ' '` [ "${VERB}" = "true" ] && printf " ${FI} ${MD5SUM} ${MD5TAR} \n" if [ "${MD5SUM}" != "${MD5TAR}" ] ; then printf " OOPS - tar file corrupt ${TWPAT}/${TOUT} \n\n" ls -l ${INPAT}/${FI} printf " md5sum ${MD5SUM}\n" ls -l ${TWPAT}/${FI} printf " md5sum ${MD5TAR}\n" exit 1 fi done printf "\n" rm -f ${TWPAT}/* # INDEX printf "${TFILL}\n" > ${INPAT}/index/${TOUP}.index # REMOVE ORIGINALS for FI in ${TFILS} ; do rm -f ${INPAT}/${FI} done # PRINT SUMMARY OF OUTPUT let "STAR=`ls -l ${TAPAT}/${TOUT} | tr -s ' ' | cut -f 5 -d ' '`" let "NDIF = ${STAR} - ${SFILS}" printf " from ${NFILS} files, ${SFILS} bytes\n" printf " tar ${NFILS} files, ${STAR} bytes (${NDIF})\n" fi # UNTAR AND VERIFY COPIES let "NTOTS = ${NTOTS} + ${NFILS}" NFILS=1 TFILS="${FIL}" SFILS=${SIZ} else let "NFILS++" TFILS="${TFILS} ${FIL}" SFILS=${SNEXT} fi done printf "${NTOTS}/${NFILES} TOTAL FILES \n" du -sm ${TAPAT} date fi # # # NFILES # # # done # # # C L A S S # # # } WRITE() { cd ${TAPAT} FILES=`ls` NFILES=`printf "${FILES}\n" | wc -w` printf " OK - have ${NFILES} tarfiles \n" date setup srmcp v1_21 setup dcap v2_36_f0506 -q unsecured for FILE in ${FILES} ; do ${ECHO} srmcp -streams_num=1 -server_mode=active file:///${FILE} ${SRMP}/${FILE} sleep 1 if [ -z "${NOOP}" ] ; then FSIZ=`ls -l ${FILE} | tr -s ' ' | cut -f 5 -d ' '` PSIZ=`ls -l ${PNFS}/${FILE} | tr -s ' ' | cut -f 5 -d ' '` [ "${VERB}" = "true" ] && printf " ${FILE} ${FSIZ} ${PSIZ} \n" if [ ${FSIZ} -eq ${PSIZ} ] ; then printf "SRMCPed ${FILE} \n" mv ${FILE} ${DCPAT}/ ${ECHO} dccp -P ${DCHP}/${FILE} else printf " OOPS - file size mismatch \n" ls -l ${FILE} ls -l ${PNFS}/${FILE} fi fi done cd ${DCPAT} FILES=`ls` NFILES=`printf "${FILES}\n" | wc -l` printf " OK - have ${NFILES} dcache files to purge \n" for FILE in ${FILES} ; do if [ -r "${FILE}" -a -r "${PNFS}/${FILE}" ] ; then [ "${VERB}" = "true" ] && printf "PURGE WRITE ${FILE} \n" PINFO=`(cd ${PNFS} ; cat ".(use)(4)(${FILE})" | tr '\n' '\t')` if [ `printf "${PINFO}" | tr '\t' \\\n | wc -l` = 11 ] then # HAVE PINFO ECRC=`printf "${PINFO}" | cut -f 11` ESIZ=`printf "${PINFO}" | cut -f 3 ` WCRC=`ecrc ${FILE} | cut -f 2 -d ' '` WSIZ=`ls -l ${FILE} | tr -s ' ' | cut -f 5 -d ' '` if [ "${VERB}" = "true" ] then printf " ESIZ = ${ESIZ} \n" printf " ECRC = ${ECRC} \n" fi if [ ${ESIZ} -eq ${WSIZ} -a ${ECRC} -eq ${WCRC} ] ; then ${ECHO} rm ${FILE} printf "PURGED dcache/${FILE}\n" else printf "OOPS - mismatched Enstore and local size/crc \n" printf " SIZE ${ESIZ}/${WSIZ}\n" printf " CRC ${ECRC}/${WCRC}\n" printf "PINFO\n${PINFO}\n" fi else [ "${VERB}" = "true" ] && printf "PINFO ${FILE} \n${PINFO}\n" fi fi # PURGE dcache done } MAIN() { printf "\n" date echo $$ > ${PIFL} [ -n "${PAUSE}" ] && sleep ${PAUSE} [ ! -d "${INPAT}" ] && printf " OOPS, no ${INPAT}\n" && exit 1 mkdir -p ${INPAT}/md5 [ ! -d "${INPAT}/md5" ] && printf " OOPS, no ${TAPAT}/md5\n" && exit 1 mkdir -p ${INPAT}/index [ ! -d "${INPAT}/index" ] && printf " OOPS, no ${TAPAT}/index\n" && exit 1 mkdir -p ${TWPAT} [ ! -d "${TWPAT}" ] && printf " OOPS, no ${TWPAT}\n" && exit 1 mkdir -p ${TAPAT} [ ! -d "${TAPAT}" ] && printf " OOPS, no ${TAPAT}\n" && exit 1 mkdir -p ${DCPAT} [ ! -d "${DCPAT}" ] && printf " OOPS, no ${DCPAT}\n" && exit 1 printf " OK - processing from ${INPAT} \n" unset SETUPS_DIR unset SETUP_UPS . /afs/fnal.gov/ups/etc/setups.sh export PRODUCTS=/afs/fnal.gov/files/code/e875/general/ups/db setup encp # for ecrc if [ "${WRITE}" = "only" ] ; then printf " WRITE ONLY \n" WRITE elif [ "${WRITE}" = "true" ] ; then printf " TAR and WRITE \n" TAR ; WRITE else printf " TAR ONLY \n" TAR fi for DIR in ' ' tar dcache ; do du -sm ${INPAT}/${DIR} ; done rm -f ${PIFL} date } # MAIN ######### # ENTRY # ######### WRITE=true while getopts cdf:Fnp:r:s:vwW OPT; do case $OPT in c) CRON=true ;; d) DEBUG=debug ECHO=echo NOOP=echo ;; f) FLUSH=true RETIR="$OPTARG" ;; F) FLUSH=true RETIR="0" ;; n) NOOP=echo ECHO=echo ;; p) PAUSE="$OPTARG" ;; r) REL="$OPTARG" ;; s) SEL="$OPTARG" ;; v) VERB=true unset QUIET ;; w) WRITE=only ;; W) WRITE='' ;; esac done shift `expr $OPTIND - 1` INDIR=${1} if [ -z "${INDIR}" ] then printf " OOPS - specify input directory\n" cd /local/scratch26/mindata ls exit 1 fi [ -n "${VERB}" ] && printf " mcimport version ${VERSION} \n" SLIM=1800000000 # size limit in bytes, 1.8 GBytes DCPOR=2811 DCPOR=24136 MCIN=/local/scratch26/mindata INPAT=${MCIN}/${INDIR} TWPAT=/var/tmp/mindata/MCTAR/${INDIR} TAPAT=${INPAT}/tar DCPAT=${INPAT}/dcache SRMP=srm://fndca1.fnal.gov:8443/pnfs/fnal.gov/usr/minos/stage/${INDIR} DCHP=dcap://fndca1.fnal.gov:${DCPOR}/pnfs/fnal.gov/usr/minos/stage/${INDIR} PNFS=/pnfs/minos/stage/${INDIR} export SRM_CONFIG=/home/mindata/.srmconfig/kreymer.xml PIFL=${INPAT}/log/mcimport.pid MAILTO=minos-data@fnal.gov if pid ; then true ; else printf " PID status is $?" ; exit 1 ; fi if [ -n "${NOOP}" -o \ -n "${VERB}" ] ; then MAIN elif [ "${CRON}" = "true" ] ; then MAIN >> ${INPAT}/log/mcimport.log else MAIN >> ${INPAT}/log/mcimport.log 2>&1 & printf " OK, logging activity to ${INPAT}/log/mcimport.log \n" fi exit 0 2007 01 26 kreymer Grouping tarfiles by CLASS, where this string excludes run/sub, so that similar configurations are tarred up together Maybe I should call this CONFIG... ( beam/current/physics ) 2007 01 25 kreymer PID cleanup Added -c cron option, runs with logging but in the foreground Added -p # option, to pause in MAIN, for testing pid Changed pid to use $$ rather than a filtered ps output, to get the pid Removed the sleep before removing PIFL, now that pid is handled cleanly. Added INDIR and pid number to email subject and content TAR speedup Finding files using -cmin +10 to ignore files newer than 10 minutes Added md5 directory Create the tarfile directly in TAPAT. Removed file counting with tar tf 5 passes on date were 2 tar to /var/tmp 2 copy back 2 diff 2 untar 1 tar tf 2 diff 2 passes on data are 2 tar/md5 2 untar/md5 2007 01 18 kreymer added dccp -P to prestage files to the read pools 2007 01 04 kreymer Added pid check to prevent multiple runs 2006 12 22 kreymer Forked mcimport from rawcopy Adjusted input and output paths Dropped local copy, already have files in hand. Set file name of tarfile FLINT.tar to $FIRST-$LAST.tar Created FLINT.index listing content of each file, put this in index subdirectory. Created FLINT.ecrc listing ecrc for the file.