#!/bin/bash # this script has been written to run on La Palma on the machine data # i.e. paths are only working on this machine # the script starts from the zipped files # this causes a delay until files are in the database # because they have to be rsynced and zipped first (see RsyncRawData.sh, ZipRawData.sh) # missing # entry in status table # options: skipmd5sum="no" # fill md5 sums in any case skipmd5sum="iffilled" # fill md5 sum only if they are not yet in db # default #skipmd5sum="yes" # do not fill md5 sums in any case doupdate="yes" # update all entries doupdate="no" # fill only entries which are not yet existing #default source `dirname $0`/../Sourcefile.sh printprocesslog "INFO starting $0 with options doupdate="$doupdate" and skipmd5sum="$skipmd5sum # setup to use ftools source $HEADAS/headas-init.sh # check if software is available if ! ls $factpath/fitsdump >/dev/null 2>&1 then printprocesslog "ERROR "$factpath"/fitsdump is not available." finish fi # check if paths are available if ! ls /daq/raw >/dev/null 2>&1 then printprocesslog "ERROR /daq/raw is not available." finish fi if ! ls /loc_data/raw >/dev/null 2>&1 then printprocesslog "ERROR /loc_data/raw is not available." finish fi if ! ls /loc_data/zipraw >/dev/null 2>&1 then printprocesslog "ERROR /loc_data/zipraw is not available." finish fi # get last 3, 6 or 9 nights dates=( `date +%Y/%m/%d --date="-12hour"` `date +%Y/%m/%d --date="-36hour"` `date +%Y/%m/%d --date="-60hour"` \ # `date +%Y/%m/%d --date="-84hour"` `date +%Y/%m/%d --date="-108hour"` `date +%Y/%m/%d --date="-132hour"` \ # `date +%Y/%m/%d --date="-156hour"` `date +%Y/%m/%d --date="-180hour"` `date +%Y/%m/%d --date="-204hour"` \ ) #dates=( `find /loc_data/zipraw/ -mindepth 3 -type d | sort | sed -e 's/\/loc_data\/zipraw\///g'` ) #all available dates in /loc_data/zipraw #dates=( `find /loc_data/aux/ -mindepth 3 -type d | sort | sed -e 's/\/loc_data\/aux\///g'` ) #all available dates in /loc_data/zipraw # do check for rawfiles of these dates for date in ${dates[@]} do date2=`echo $date | sed -e 's/\///g'` night= query="SELECT fNight FROM AuxFilesAvailISDCStatus WHERE fNight="$date2 night=`sendquery` if [ "$night" == "" ] then printprocesslog "INFO insert "$date2" to AuxFilesAvailISDCStatus" query="INSERT AuxFilesAvailISDCStatus SET fNight="$date2", fPriority="$date2 sendquery >/dev/null 2>&1 fi night= query="SELECT fNight FROM DriveFileAvailISDCStatus WHERE fNight="$date2 night=`sendquery` if [ "$night" == "" ] then printprocesslog "INFO insert "$date2" to DriveFileAvailISDCStatus" query="INSERT DriveFileAvailISDCStatus SET fNight="$date2", fPriority="$date2 sendquery >/dev/null 2>&1 fi night= query="SELECT fNight FROM AuxDataInsertStatus WHERE fNight="$date2 night=`sendquery` if [ "$night" == "" ] then printprocesslog "INFO insert "$date2" to AuxDataInsertStatus" query="INSERT AuxDataInsertStatus SET fNight="$date2", fPriority="$date2 sendquery >/dev/null 2>&1 fi ziprawdir=/loc_data/zipraw/$date # check if data are available from that night if ! [ -d $ziprawdir ] then printprocesslog "INFO "$ziprawdir" does not exist." continue else printprocesslog "INFO processing "$ziprawdir"..." fi # find all fits.gz files starting with the oldest file printprocesslog "INFO finding files to be checked in $ziprawdir..." fitsgzfiles=`find $ziprawdir -type f -name '*.fits.gz'| sort ` # get runnumber from date runnumber=`echo $date | sed -e 's/\///g'` # loop to check files for file in $fitsgzfiles do printprocesslog "INFO checking file "$file #echo "INFO checking file "$file # raw and original file rawfile=`echo $file | sed -e 's/zipraw/raw/' -e 's/fits[.]gz/fits/'` origfile=`echo $rawfile | sed -e 's/loc_data/daq/'` # check if it is drs file isdrs=`ls $file | grep drs` if [ "$isdrs" != "" ] then printprocesslog "INFO "$file" is a drs file. -> continue" continue fi # check if file is already finished # original file on daq (if data was taken on daq if [ -e $origfile ] then # check if raw file was changed in the last 30 minutes isnew=`find $origfile -cmin -30` if [ "$isnew" != "" ] then printprocesslog "WARN "$origfile" is not older than 30 min. -> continue" continue fi # get time of last modification as seconds since Epoch for both files timeorig=`stat -c %Y $origfile` timecopy=`stat -c %Y $rawfile` # compare times if ! [ $timeorig -eq $timecopy ] then # if times are not the same, the file is still open => no check printprocesslog "INFO file "$rawfile" not yet closed. -> continue" continue fi else # if the origfile doesn't exist, the data was probably written not on daq but on data printprocesslog "INFO file "$rawfile" was probably taken on data and not daq." fi # get run and file number form filename runnumbererror="no" numbererror="no" numberfromname=`echo $file | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]_[0-9]{3}'` runnumberfromname=`echo $numberfromname | cut -d_ -f1` filenumberfromname=`echo $numberfromname | cut -d_ -f2 | sed -e 's/^0//g' -e 's/^0//g'` if [ "$runnumber" != "$runnumberfromname" ] then runnumbererror="yes" printprocesslog "ERROR for file "$file": runnumber from date ("$runnumber") and filename ("$runnumberfromname") don't agree." fi # check if entry already exists query="SELECT fNight FROM RunInfo WHERE Concat(fNight, '_', LPAD(fRunID, 3, 0))='"$numberfromname"'" printprocesslog "DEBUG check if entry already exists in DB. QUERY: "$query #result3=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query3"` result3=`sendquery` # only proceed with file # if information is not yet in database # and no update is wished ($doupdate) if [ "$result3" != "" ] && [ "$doupdate" == "no" ] then printprocesslog "INFO "$file" has been inserted already. -> continue " continue fi runtype= # check if fits file is corrupted numfitserrors=0 #checkfitsfile=`fverify $rawfile 2>/dev/null | grep '0 error(s)'` numfitserrors=`fverify $rawfile 2>/dev/null | grep -o '[0-9][ ]error(s)' | grep -E -o '[0-9]'` #if [ "$checkfitsfile" == "" ] if [ $numfitserrors -gt 0 ] then printprocesslog "WARN "$rawfile" has "$numfitserrors" fitserror(s). " #numfitserrors=1 fi numdrsfiles= step= if [ $numfitserrors -eq 0 ] then # get run and file number from file runnumberfromfile=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NIGHT | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]'` filenumberfromfileorig=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep RUNID | grep -E -o '[0-9]{1,3}'` if [ "$runnumberfromfile" = "" ] || [ "$filenumberfromfileorig" = "" ] then printprocesslog "ERROR couldn't get run or file number from file name ("$file")." fi numberfromfile=$runnumberfromfile"_"`printf %03d $filenumberfromfileorig` # compare numbers if [ "$numberfromfile" != "$numberfromname" ] then numbererror="yes" printprocesslog "ERROR for file "$file": number from filename ("$numberfromname") and file ("$numberfromfile") don't agree." fi # check if this run has drs file # in case file is available, get STEP from header # in the very beginning only drs-files were existing # in the beginning the keywords DRSCALIB and STEP were not existing drsfile=`echo $file | sed -e 's/fits/drs.fits/'` numdrsfiles=`ls $drsfile 2>/dev/null | wc -l` drscalib=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DRSCALIB | grep -E -o "['][TF][']" | sed -e "s/'//g"` if [ "$drscalib" == "T" ] then step=`$factpath/fitsdump -h -t Events $drsfile 2>/dev/null | grep STEP | grep -E -o "['][012][']" | sed -e "s/'//g"` if ! [ $numdrsfiles -eq 1 ] then printprocesslog "ERROR for file "$file" number of drsfiles ("$numdrsfiles") and information from header ("$drscalib") don't agree." fi else if ! [ "$drscalib" == "F" ] then printprocesslog "WARN for file "$file" DRSCALIB is neither T nor F." fi fi # get other variables from header runtype=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep RUNTYPE | grep -E -o "['][a-z0-9._-]+[']" | sed -e "s/'//g" -e "s/_/-/g" -e "s/[.]//g"` #echo "runtype for file "$file": "$runtype roi=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NROI | grep -v NROITM | grep -E -o '[0-9]{1,4}'` roitm=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NROITM | grep -E -o '[0-9]{1,4}'` numevents=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep Events | grep -E -o '[0-9]+'` numphys=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRG ' | grep -E -o '[0-9]+'` numext1=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGEXT1' | grep -E -o '[ ][0-9]+[ ]' | sed -e 's/\ //g'` numext2=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGEXT2' | grep -E -o '[ ][0-9]+[ ]' | sed -e 's/\ //g'` numelp=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGLPE' | grep -E -o '[0-9]+'` numilp=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGLPI' | grep -E -o '[0-9]+'` numoth=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGMISC' | grep -E -o '[0-9]+'` numped=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGPED' | grep -E -o '[0-9]+'` numtime=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGTIM' | grep -E -o '[0-9]+'` compiled=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'COMPILED' | grep -E -o "['][a-zA-Z]+[ ][ 12][0-9][ ]20[0-9][0-9][ ][0-2][0-9]:[0-5][0-9]:[0-5][0-9][']" | sed -e "s/'//g"` if ! [ "$compiled" == "" ] then compiletime=`date +'%F %H:%M:%S' --date="${compiled}" ` else compiletime= fi revnum=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'REVISION' | grep -E -o "['][0-9]+[:]?[0-9]*[MSP]*[']" | sed -e "s/'//g"` # in newest data start time is in DATE-OBS # in older data start time is in TSTART # in the beginning TSTART was empty runstart=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DATE-OBS | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'` runstart2=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep TSTART | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'` if [ "$runstart" == "" ] then if [ "$runstart2" == "" ] then runstart=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DATE | grep -v 'DATE-' | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'` else runstart=$runstart2 fi fi # in newest data start time is in DATE-END # in older data start time is in TSTOP # in the beginning TSTOP was empty runstop=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DATE-END | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'` runstop2=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep TSTOP | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'` if [ "$runstop" == "" ] then if [ "$runstop2" == "" ] then runstop=`stat $file 2>/dev/null | grep Modify | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9][ ][0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{9}'` else runstop=$runstop2 fi fi fi # set runtype to 'unknown', if no runtype could be retrieved from file if [ "$runtype" == "" ] then runtype="n/a" fi # on 15.11.2011 the runtypes had different names if [ "$date" == "2011/11/15" ] then if [ "$runtype" == "drs-calib" ] then runtype="drs-gain" fi if [ "$runtype" == "drs-time-calib" ] then runtype="drs-time" fi if [ "$runtype" == "pedestal" ] then runtype="drs-pedestal" fi if [ "$runtype" == "light-pulser" ] then runtype="light-pulser-ext" fi if [ "$runtype" == "pedestal-on" ] then runtype="pedestal" fi fi # get runtype query="SELECT fRunTypeKEY FROM RunType WHERE fRunTypeName='"$runtype"'" printprocesslog "DEBUG get run type from DB. QUERY:"$query #result2=( `/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query2"` ) result2=( `sendquery` ) if [ ${#result2} -eq 0 ] then printprocesslog "ERROR "$numberfromname": Could not query fRunTypeKey for runtype "$runtype" ." continue fi # check if entry has already checksums query="SELECT fNight FROM RunInfo WHERE Concat(fNight, '_', LPAD(fRunID, 3, 0))='"$numberfromname"'" query=$query" AND NOT ISNULL(fMd5sumRaw) AND NOT ISNULL(fMd5sumRawZip)" printprocesslog "DEBUG check if md5sums are alreay in DB. QUERY:"$query #result5=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query5"` result5=`sendquery` # get md5sums of raw and zip file # to safe time for tests and update this can be skipped ($skipmd5sum) md5sum= md5sumzip= if [ "$skipmd5sum" == "no" ] || [ "$skipmd5sum" == "iffilled" ] then #echo "skip: "$skipmd5sum >> $logfile 2>&1 #echo "res5: -"$result5"-" >> $logfile 2>&1 #if [ "$skipmd5sum" == "no" ] || [ "$result5$skipmd5sum" == "iffilled" ] if [ "$skipmd5sum" == "no" ] || [ "$result5" == "" ] then #zipfile=`echo $file | sed -e 's/raw/zipraw/' -e 's/fits/fits.gz/'` # only do the md5sum if the zipfile is already available if ls $file >/dev/null 2>&1 then printprocesslog "INFO calculation md5sum for rawfile "$rawfile md5sum=`md5sum $rawfile | cut -d' ' -f1` printprocesslog "INFO calculation md5sum for zipfile "$file md5sumzip=`md5sum $file | cut -d' ' -f1` fi fi fi # insert or update depending on whether run exists if [ "$result3" == "" ] then query="INSERT" querymid=" fNight="$runnumber", fRunID="$filenumberfromname", " querystop= else query="UPDATE" querymid= querystop=" WHERE fNight="$runnumber" AND fRunID="$filenumberfromname fi query=$query" RunInfo SET "$querymid" fRunTypeKey="${result2[0]} if [ $numfitserrors -eq 0 ] then query=$query", fRunStart='"$runstart"', fRunStop='"$runstop"'" if [ "$numevents" != "" ] then query=$query", fNumEvents="$numevents fi if [ "$roi" != "" ] then query=$query", fROI="$roi fi if [ "$roitm" != "" ] then query=$query", fROITimeMarker="$roitm fi if [ "$numphys" != "" ] then query=$query", fNumPhysicsTrigger="$numphys fi if [ "$numext1" != "" ] then query=$query", fNumExt1Trigger="$numext1 fi if [ "$numext2" != "" ] then query=$query", fNumExt2Trigger="$numext2 fi if [ "$numelp" != "" ] then query=$query", fNumELPTrigger="$numelp fi if [ "$numilp" != "" ] then query=$query", fNumILPTrigger="$numilp fi if [ "$numped" != "" ] then query=$query", fNumPedestalTrigger="$numped fi if [ "$numtime" != "" ] then query=$query", fNumTimeTrigger="$numtime fi if [ "$numoth" != "" ] then query=$query", fNumOtherTrigger="$numoth fi fi if [ "$md5sum" != "" ] then query=$query", fMd5sumRaw='"$md5sum"', fMd5sumRawZip='"$md5sumzip"'" fi if [ "$numdrsfiles" != "" ] then query=$query", fHasDrsFile="$numdrsfiles fi if [ "$step" != "" ] then query=$query", fDrsStep="$step fi if [ "$compiletime" != "" ] then query=$query", fCompileTime='"$compiletime"'" fi if [ "$revnum" != "" ] then query=$query", fRevisionNumber='"$revnum"'" fi query=$query", fFitsFileErrors="$numfitserrors query=$query" "$querystop # send query to DB sendquery >/dev/null done done finish