#!/bin/sh VERSION=mcimport.20070130 # DO # add foreground uption, for use in cron # # Consolidate and archive MC import files # usage : ./mcimport kordosky echo # OUTLINE # MC import files show up in /local/scratch26/mindata/${USER} # where USER is initially one of kordosky or howcroft # # These files are combined into large tarfiles, # and moved into /pnfs/minos/mcimport/${USER} # # log files for mc production will be rsync'd to # $MINOS_DATA/log_data/mcimport/${USER} # # pid() { if [ -r "${PIFL}" ] then echo " OOPS - found ${PIFL} " prepid=`cat ${PIFL}` if ps -p ${prepid} then echo " OOPS - ${prepid} is still running " echo " BAILING OUT " date echo "mcimport ${INDIR} still running, pid ${prepid}" | Mail -s "mcimport ${INDIR} conflict on `hostname | cut -f1 -d.`" ${MAILTO} exit 1 else echo " OK - stale pid file " rm -f ${PIFL} fi fi } SPACE() { SFREE=`df -m /local/scratch26 | grep /dev | tr -s ' ' | cut -f 4 -d ' '` VFREE=`df -m /var/tmp | grep /dev | tr -s ' ' | cut -f 4 -d ' '` if [ ${SFREE} -lt ${FREEWARN} ] ; then date printf " WARNING, minos26 scratch space ${SFREE} under ${FREEWARN} MBytes \n" printf " WARNING, minos26 scratch space ${SFREE} under ${FREEWARN} MBytes \n" \ | Mail -s "mcimport scratch space" ${MAILTO} fi if [ ${SFREE} -lt ${FREESTOP} ] ; then date printf " WARNING, minos26 scratch space ${SFREE} under ${FREESTOP} MBytes \n" printf " WARNING, minos26 scratch space ${SFREE} under ${FREESTOP} MBytes \n" \ | Mail -s "mcimport scratch space" ${MAILTO} exit 1 fi if [ ${VFREE} -lt ${FREESTOP} ] ; then date printf " WARNING, minos26 /var/tmp space ${VFREE} under ${FREESTOP} MBytes \n" printf " WARNING, minos26 /var/tmp space ${VFREE} under ${FREESTOP} MBytes \n" \ | Mail -s "mcimport /var/tmp space" ${MAILTO} exit 1 fi } FREE=`df -m /local/scratch26/mindata | grep /local/scratch26 | tr -s ' ' | cut -f 4 -d ' '` TAR() { cd ${INPAT} ALLFILES=`find . -maxdepth 1 -name \*.tar.gz -mmin +${MINAGE} | cut -f 2 -d / | sort` # set configs from file names stripping out run/subrun CONFIGS=`printf "${ALLFILES}\n" | cut -c 1-5,15- | sort -u` for CONFIG in ${CONFIGS} ; do # # # C L A S S # # # CONFIG1=`echo ${CONFIG} | cut -c 1-5` CONFIG2=`echo ${CONFIG} | cut -c 6-` printf " \n`date`\nFor config ${CONFIG1} ${CONFIG2} \n " FILES=`printf "${ALLFILES}\n" | grep ${CONFIG1}.*${CONFIG2}` FLAST=`printf "${FILES}\n" | tail -1` ### for testing with 2001-10 ### FILES=F00000892_0000.mdaq.root ### FLAST=F00000892_0000.mdaq.root let "NFILES=`printf "${FILES}\n" | wc -w`" TFILS=`echo ${FILES} | cut -f 1 -d ' '` SFILS=`ls -l ${INPAT}/${TFILS} | tr -s ' ' | cut -f 5 -d ' '` NFILS=1 NTOTS=0 NOUTS=0 SNEXT=0 if [ ${NFILES} -eq 0 ] ; then # # # NFILES # # # printf " No data files to process \n" else printf " ${NFILES} files from \n" printf " ${TFILS} to\n" printf " ${FLAST} \n" printf "\n" # Prime the pump mkdir -p ${TWPAT} rm -f ${TWPAT}/* # make checksums as needed # these should be generated on the initial copy since Jan 2007 if [ -z "${NOOP}" ] ; then [ -r "md5/all.md5" ] || touch md5/all.md5 for FIL in ${FILES} ; do if grep -q ${FIL} md5/all.md5 ; then true ; else printf "md5sum ${FIL}\n" md5sum ${FIL} >> md5/$$.md5 fi done [ -r "md5/$$.md5" ] && cat md5/$$.md5 >> md5/all.md5 && rm md5/$$.md5 fi # Tar them up, ${TFILS} into ${TOUT} SFILES=`echo ${FILES} EOF | cut -f 2- -d ' '` for FIL in ${SFILES} ; do # # # FILs # # # if [ -r "${INPAT}/${FIL}" ] ; then SIZ=`ls -l ${INPAT}/${FIL} | tr -s ' ' | cut -f 5 -d ' '` SNEXT=`echo ${SFILS} + ${SIZ} | bc` fi if [ ${SNEXT} -gt ${SLIM} -o "${FIL}" = "EOF" ] then ISEC=`date +%s` #initial time, get calculate rates TFILL=`echo ${TFILS} | tr ' ' \\\n` [ -n "${VERB}" ] && printf " OK, tarring ${TFILL}\n" TFIR=`printf "${TFILL}\n" | head -1` TLAS=`printf "${TFILL}\n" | tail -1` [ -n "${VERB}" ] && printf " TFIR TLAS ${TFIR} ${TLAS} \n" TFIP=`echo ${TFIR} | cut -f 1 -d '.'` TLAP=`echo ${TLAS} | cut -f 1 -d '.'` [ -n "${VERB}" ] && printf " TFIP TLAP ${TFIP} ${TLAP} \n" let "NOUTS++" TOUP="${TFIP}-${TLAP}" TOUT=${TOUP}.tar rm -f ${TWPAT}/${TOUT} printf "${TOUT} ${NFILS}\n" printf " ${TFIR} to\n" printf " ${TLAS} \n" # TAR printf "\n" # ${ECHO} tar cf ${TAPAT}/${TOUT} ${TFILS} for FILE in ${TFILS} ; do [ -n "${VERB}" ] && printf "tar input ${FILE}\n" if ${ECHO} gunzip -t ${FILE} ; then ${ECHO} tar -rf ${TAPAT}/${TOUT} ${FILE} else printf "\n\n OOPS, ${FILE} is not a gzip file, bailing \n\n" rm -f ${TAPAT}/${TOUT} printf "mcinput ${INPAT} ${FILE} is not a gzip file, bailing \n" \ | Mail -s "mcimport ${INPAT} ${FILE} gzip error " ${MAILTO} exit 1 fi done # UNTAR AND VERIFY COPIES # if [ -z "${NOOP}" ] ; then rm -f ${TWPAT}/* ${ECHO} tar xf ${TAPAT}/${TOUT} -C ${TWPAT} # VERIFY MD5SUM printf " " for FI in ${TFILS} ; do printf "." MD5SUM=`grep ${FI} md5/all.md5 | tail -1 | cut -f 1 -d ' '` MD5TAR=`cat ${TWPAT}/${FI} | md5sum | cut -f 1 -d ' '` [ "${VERB}" = "true" ] && printf " ${FI} ${MD5SUM} ${MD5TAR} \n" if [ "${MD5SUM}" != "${MD5TAR}" ] ; then printf " OOPS - tar file corrupt ${TWPAT}/${TOUT} \n\n" ls -l ${INPAT}/${FI} printf " md5sum ${MD5SUM}\n" ls -l ${TWPAT}/${FI} printf " md5sum ${MD5TAR}\n" exit 1 fi done printf "\n" rm -f ${TWPAT}/* # INDEX printf "${TFILL}\n" > ${INPAT}/index/${TOUP}.index # REMOVE ORIGINALS for FI in ${TFILS} ; do rm -f ${INPAT}/${FI} done # PRINT SUMMARY OF OUTPUT OSEC=`date +%s` let "STAR=`ls -l ${TAPAT}/${TOUT} | tr -s ' ' | cut -f 5 -d ' '`" let "NDIF = ${STAR} - ${SFILS}" let "RATE = ( ${SFILS} / 1000000 ) / ( ${OSEC} - ${ISEC} )" printf " from ${NFILS} files, ${SFILS} bytes\n" printf " tar ${NFILS} files, ${STAR} bytes (${NDIF})\n" printf " rate ${RATE} MB/sec\n" fi # UNTAR AND VERIFY COPIES let "NTOTS = ${NTOTS} + ${NFILS}" NFILS=1 TFILS="${FIL}" SFILS=${SIZ} else let "NFILS++" TFILS="${TFILS} ${FIL}" SFILS=${SNEXT} fi done printf "${NTOTS}/${NFILES} TOTAL FILES \n" du -sm ${TAPAT} date fi # # # NFILES # # # done # # # C L A S S # # # } WRITE() { cd ${TAPAT} FILES=`ls` NFILES=`printf "${FILES}\n" | wc -w` printf " OK - writing ${NFILES} tarfiles \n" date setup srmcp v1_21 setup dcap v2_36_f0506 -q unsecured for FILE in ${FILES} ; do ${ECHO} srmcp -streams_num=1 -server_mode=active file:///${FILE} ${SRMP}/${FILE} sleep 1 if [ -z "${NOOP}" ] ; then FSIZ=`ls -l ${FILE} | tr -s ' ' | cut -f 5 -d ' '` PSIZ=`ls -l ${PNFS}/${FILE} | tr -s ' ' | cut -f 5 -d ' '` [ "${VERB}" = "true" ] && printf " ${FILE} ${FSIZ} ${PSIZ} \n" if [ ${FSIZ} -eq ${PSIZ} ] ; then printf "SRMCPed ${FILE} \n" mv ${FILE} ${DCPAT}/ ${ECHO} dccp -P ${DCHP}/${FILE} else printf " OOPS - file size mismatch \n" ls -l ${FILE} ls -l ${PNFS}/${FILE} fi fi done } PURGE() { cd ${DCPAT} FILES=`ls` NFILES=`printf "${FILES}\n" | wc -l` printf "\n OK - purging ${NFILES} files ?\n" date for FILE in ${FILES} ; do if [ -r "${FILE}" -a -r "${PNFS}/${FILE}" ] ; then [ "${VERB}" = "true" ] && printf "PURGE WRITE ${FILE} \n" PINFO=`(cd ${PNFS} ; cat ".(use)(4)(${FILE})" | tr '\n' '\t')` if [ `printf "${PINFO}" | tr '\t' \\\n | wc -l` = 11 ] then # HAVE PINFO ECRC=`printf "${PINFO}" | cut -f 11` ESIZ=`printf "${PINFO}" | cut -f 3 ` if [ -z "${NOOP}" ] ; then WCRC=`ecrc ${FILE} | cut -f 2 -d ' '` else WCRC=${ECRC} fi WSIZ=`ls -l ${FILE} | tr -s ' ' | cut -f 5 -d ' '` if [ "${VERB}" = "true" ] then printf " ESIZ = ${ESIZ} \n" printf " ECRC = ${ECRC} \n" fi if [ ${ESIZ} -eq ${WSIZ} -a ${ECRC} -eq ${WCRC} ] ; then ${ECHO} rm ${FILE} printf "PURGED ${FILE}\n" else printf "OOPS - mismatched Enstore and local size/crc \n" printf " SIZE ${ESIZ}/${WSIZ}\n" printf " CRC ${ECRC}/${WCRC}\n" printf "PINFO\n${PINFO}\n" fi else printf "PENDING ${FILE}\n" [ "${VERB}" = "true" ] && printf "PINFO ${FILE} \n${PINFO}\n" fi else printf " OOPS ${FILE} not in PNFS\n" fi # PURGE dcache done } MAIN() { printf "\n" date echo $$ > ${PIFL} [ -n "${PAUSE}" ] && sleep ${PAUSE} [ ! -d "${INPAT}" ] && printf " OOPS, no ${INPAT}\n" && exit 1 mkdir -p ${INPAT}/md5 [ ! -d "${INPAT}/md5" ] && printf " OOPS, no ${TAPAT}/md5\n" && exit 1 mkdir -p ${INPAT}/index [ ! -d "${INPAT}/index" ] && printf " OOPS, no ${TAPAT}/index\n" && exit 1 mkdir -p ${TWPAT} [ ! -d "${TWPAT}" ] && printf " OOPS, no ${TWPAT}\n" && exit 1 mkdir -p ${TAPAT} [ ! -d "${TAPAT}" ] && printf " OOPS, no ${TAPAT}\n" && exit 1 mkdir -p ${DCPAT} [ ! -d "${DCPAT}" ] && printf " OOPS, no ${DCPAT}\n" && exit 1 printf " OK - processing from ${INPAT} \n" unset SETUPS_DIR unset SETUP_UPS . /afs/fnal.gov/ups/etc/setups.sh export PRODUCTS=/afs/fnal.gov/files/code/e875/general/ups/db setup encp # for ecrc if [ "${WRITE}" = "only" ] ; then printf " PURGE, WRITE \n" PURGE ; WRITE elif [ "${WRITE}" = "true" ] ; then printf " PURGE, TAR, WRITE \n" PURGE ; TAR ; WRITE else printf " TAR \n" TAR fi for DIR in ' ' tar dcache ; do du -sm ${INPAT}/${DIR} ; done rm -f ${PIFL} date } # MAIN ######### # ENTRY # ######### MAILTO=minos-data@fnal.gov MAILTO=kreymer@fnal.gov FREEWARN=20000 FREESTOP=1000 MINAGE=30 # minimum age in minutes for consideration if SPACE ; then true ; else printf " SPACE status is $?" ; exit 1 ; fi WRITE=true while getopts cdf:Fnp:r:s:vwW OPT; do case $OPT in c) CRON=true ;; d) DEBUG=debug ECHO=echo NOOP=echo ;; f) FLUSH=true RETIR="$OPTARG" ;; F) FLUSH=true RETIR="0" ;; n) NOOP=echo ECHO=echo ;; p) PAUSE="$OPTARG" ;; r) REL="$OPTARG" ;; s) SEL="$OPTARG" ;; v) VERB=true unset QUIET ;; w) WRITE=only ;; W) WRITE='' ;; esac done shift `expr $OPTIND - 1` INDIR=${1} if [ -z "${INDIR}" ] then printf " OOPS - specify input directory\n" cd /local/scratch26/mindata ls exit 1 fi [ -n "${VERB}" ] && printf " mcimport version ${VERSION} \n" SLIM=1800000000 # size limit in bytes, 1.8 GBytes DCPOR=2811 DCPOR=24136 export SRM_CONFIG=/home/mindata/.srmconfig/kreymer.xml MCIN=/local/scratch26/mindata INPAT=${MCIN}/${INDIR} if [ ! -d "${INPAT}" ] then printf " OOPS - ${INPAT} not a directory\n" cd /local/scratch26/mindata ls exit 1 fi TWPAT=/var/tmp/mindata/MCTAR/${INDIR} TAPAT=${INPAT}/tar DCPAT=${INPAT}/dcache SRMP=srm://fndca1.fnal.gov:8443/pnfs/fnal.gov/usr/minos/stage/${INDIR} DCHP=dcap://fndca1.fnal.gov:${DCPOR}/pnfs/fnal.gov/usr/minos/stage/${INDIR} PNFS=/pnfs/minos/stage/${INDIR} PIFL=${INPAT}/log/mcimport.pid if pid ; then true ; else printf " PID status is $?" ; exit 1 ; fi if [ -n "${NOOP}" -o \ -n "${VERB}" ] ; then MAIN elif [ "${CRON}" = "true" ] ; then MAIN >> ${INPAT}/log/mcimport.log else MAIN >> ${INPAT}/log/mcimport.log 2>&1 & printf " OK, logging activity to ${INPAT}/log/mcimport.log \n" fi exit 0 2007 01 30 kreymer Added test for valid .gz file, gunzip -t this required tar -r , 1 file at a time Added test for free disk space in TAR Added test for existence of INPAT directory Take final all.md5sum match, not first, to handle duplicates Added sort of file ALLFILES Added rate report for TAR Added PURGE ahead of TAR Added log message for PURGE files not in PNFS Added MINAGE variable to set minimum file age, changed from 10 to 30 Changed CLASS variable name to CONFIG Do not do ecrc in PURGE when NOOP is set For next version : + Added ALL users, using MCIMPORT to control activity + NOIMPORT, TRIGTIME, TRIGSIZE trigger concatenation in generic running 2007 01 26 kreymer Grouping tarfiles by CONFIG, where this string excludes run/sub, so that similar configurations are tarred up together Maybe I should call this CONFIG... ( beam/current/physics ) 2007 01 25 kreymer PID cleanup Added -c cron option, runs with logging but in the foreground Added -p # option, to pause in MAIN, for testing pid Changed pid to use $$ rather than a filtered ps output, to get the pid Removed the sleep before removing PIFL, now that pid is handled cleanly. Added INDIR and pid number to email subject and content TAR speedup Finding files using -cmin +10 to ignore files newer than 10 minutes Added md5 directory Create the tarfile directly in TAPAT. Removed file counting with tar tf 5 passes on date were 2 tar to /var/tmp 2 copy back 2 diff 2 untar 1 tar tf 2 diff 2 passes on data are 2 tar/md5 2 untar/md5 2007 01 18 kreymer added dccp -P to prestage files to the read pools 2007 01 04 kreymer Added pid check to prevent multiple runs 2006 12 22 kreymer Forked mcimport from rawcopy Adjusted input and output paths Dropped local copy, already have files in hand. Set file name of tarfile FLINT.tar to $FIRST-$LAST.tar Created FLINT.index listing content of each file, put this in index subdirectory. Created FLINT.ecrc listing ecrc for the file.