Index: trunk/DataCheck/Processing/CheckRawData.sh
===================================================================
--- trunk/DataCheck/Processing/CheckRawData.sh	(revision 13039)
+++ trunk/DataCheck/Processing/CheckRawData.sh	(revision 13039)
@@ -0,0 +1,414 @@
+#!/bin/bash
+
+# this script has been written to run on La Palma on the machine data
+#   i.e. paths are only working on this machine
+# the script starts from the zipped files
+#   this causes a delay until files are in the database
+#   because they have to be rsynced and zipped first (see RsyncRawData.sh, ZipRawData.sh)
+
+# missing
+# entry in status table
+
+# options: 
+skipmd5sum="no" # fill md5 sums in any case
+skipmd5sum="iffilled" # fill md5 sum only if they are not yet in db # default
+#skipmd5sum="yes" # do not fill md5 sums in any case
+
+doupdate="yes" # update all entries
+doupdate="no" # fill only entries which are not yet existing #default
+
+source `dirname $0`/Sourcefile.sh
+printprocesslog "INFO starting $0 with options doupdate="$doupdate" and skipmd5sum="$skipmd5sum
+
+# setup to use ftools
+source $HEADAS/headas-init.sh
+
+# check if software is available
+if ! ls $factpath/fitsdump >/dev/null 2>&1
+then 
+   printprocesslog "ERROR "$factpath"/fitsdump is not available."
+   finish
+fi
+
+# check if paths are available
+if ! ls /daq/raw >/dev/null 2>&1
+then 
+   printprocesslog "ERROR /daq/raw is not available."
+   finish
+fi
+if ! ls /loc_data/raw >/dev/null 2>&1
+then 
+   printprocesslog "ERROR /loc_data/raw is not available."
+   finish
+fi
+if ! ls /loc_data/zipraw >/dev/null 2>&1
+then 
+   printprocesslog "ERROR /loc_data/zipraw is not available."
+   finish
+fi
+
+# get last 3 nights
+dates=( `date +%Y/%m/%d --date="-12hour"` `date +%Y/%m/%d --date="-36hour"` `date +%Y/%m/%d --date="-60hour"` )
+#dates=( `find /loc_data/zipraw/ -mindepth 3 -type d | sort | sed -e 's/\/loc_data\/zipraw\///g'` ) #all available dates in /loc_data/zipraw
+
+# do check for rawfiles of these dates
+for date in ${dates[@]}
+do 
+   ziprawdir=/loc_data/zipraw/$date
+   # check if data are available from that night
+   if ! [ -d $ziprawdir ]
+   then
+      printprocesslog "INFO "$ziprawdir" does not exist."
+      continue
+   else
+      printprocesslog "INFO processing "$ziprawdir"..."
+   fi
+
+   # find all fits.gz files starting with the oldest file
+   printprocesslog "INFO finding files to be checked in $ziprawdir..."
+   fitsgzfiles=`find $ziprawdir -type f -name '*.fits.gz'| sort `
+
+   # get runnumber from date
+   runnumber=`echo $date | sed -e 's/\///g'`
+   
+   # loop to check files
+   for file in $fitsgzfiles
+   do
+      printprocesslog "INFO checking file "$file
+      #echo "INFO checking file "$file
+
+      # raw and original file
+      rawfile=`echo $file | sed -e 's/zipraw/raw/' -e 's/fits[.]gz/fits/'`
+      origfile=`echo $rawfile | sed -e 's/loc_data/daq/'`
+
+      # check if it is drs file
+      isdrs=`ls $file | grep drs`
+      if [ "$isdrs" != "" ]
+      then
+         printprocesslog "INFO "$file" is a drs file. -> continue"
+         continue
+      fi
+
+      # check if file is already finished
+      # original file on daq (if data was taken on daq
+      if [ -e $origfile ]
+      then
+         # check if raw file was changed in the last 30 minutes
+         isnew=`find $origfile -cmin -30`
+         if [ "$isnew" != "" ]
+         then
+            printprocesslog "WARN "$origfile" is not older than 30 min. -> continue"
+            continue
+         fi
+
+         # get time of last modification as seconds since Epoch for both files
+         timeorig=`stat -c %Y $origfile`
+         timecopy=`stat -c %Y $rawfile`
+         # compare times
+         if ! [ $timeorig -eq $timecopy ]
+         then
+            # if times are not the same, the file is still open => no check
+            printprocesslog "INFO file "$rawfile" not yet closed. -> continue"
+            continue
+         fi
+      else
+         # if the origfile doesn't exist, the data was probably written not on daq but on data
+         printprocesslog "INFO file "$rawfile" was probably taken on data and not daq."
+      fi
+      
+      # get run and file number form filename
+      runnumbererror="no"
+      numbererror="no"
+      numberfromname=`echo $file | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]_[0-9]{3}'`
+      runnumberfromname=`echo $numberfromname | cut -d_ -f1`
+      filenumberfromname=`echo $numberfromname | cut -d_ -f2 | sed -e 's/^0//g' -e 's/^0//g'`
+      if [ "$runnumber" != "$runnumberfromname" ]
+      then
+         runnumbererror="yes"
+         printprocesslog "ERROR for file "$file": runnumber from date ("$runnumber") and filename ("$runnumberfromname") don't agree."
+      fi
+      
+      # check if entry already exists
+      query="SELECT fNight FROM RunInfo WHERE Concat(fNight, '_', LPAD(fRunID, 3, 0))='"$numberfromname"'"
+      printprocesslog "DEBUG check if entry already exists in DB. QUERY: "$query
+      #result3=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query3"`
+      result3=`sendquery`
+
+      # only proceed with file 
+      #  if information is not yet in database
+      #  and no update is wished ($doupdate)
+      if [ "$result3" != "" ] && [ "$doupdate" == "no" ]
+      then 
+         printprocesslog "INFO "$file" has been inserted already. -> continue "
+         continue
+      fi
+      
+      runtype=
+      # check if fits file is corrupted
+      numfitserrors=0
+      #checkfitsfile=`fverify $rawfile 2>/dev/null | grep '0 error(s)'`
+      numfitserrors=`fverify $rawfile 2>/dev/null | grep -o '[0-9][ ]error(s)'  | grep -E -o '[0-9]'`
+      #if [ "$checkfitsfile" == "" ]
+      if [ $numfitserrors -gt 0 ]
+      then
+         printprocesslog "WARN "$rawfile" has "$numfitserrors" fitserror(s). "
+         #numfitserrors=1
+      fi
+
+      if [ $numfitserrors -eq 0 ]
+      then 
+         # get run and file number from file
+         runnumberfromfile=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NIGHT | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]'`
+         filenumberfromfileorig=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep RUNID | grep -E -o '[0-9]{1,3}'`
+         if [ "$runnumberfromfile" = "" ] || [ "$filenumberfromfileorig" = "" ]
+         then
+            printprocesslog "ERROR couldn't get run or file number from file name ("$file")."
+         fi
+         numberfromfile=$runnumberfromfile"_"`printf %03d $filenumberfromfileorig`
+         # compare numbers
+         if [ "$numberfromfile" != "$numberfromname" ]
+         then
+            numbererror="yes"
+            printprocesslog "ERROR for file "$file": number from filename ("$numberfromname") and file ("$numberfromfile") don't agree."
+         fi
+      
+         # check if this run has drs file
+         #   in case file is available, get STEP from header
+         # in the very beginning only drs-files were existing
+         # in the beginning the keywords DRSCALIB and STEP were not existing
+         numdrsfiles=
+         step=
+         drsfile=`echo $file | sed -e 's/fits/drs.fits/'`
+         numdrsfiles=`ls $drsfile 2>/dev/null | wc -l`
+         drscalib=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep DRSCALIB | grep -E -o "['][TF][']" | sed -e "s/'//g"`
+         if [ "$drscalib" == "T" ]
+         then 
+            step=`$factpath/fitsdump -h -t Events $drsfile  2>/dev/null | grep STEP | grep -E -o "['][012][']" | sed -e "s/'//g"`
+            if ! [ $numdrsfiles -eq 1 ]
+            then 
+               printprocesslog "ERROR for file "$file" number of drsfiles ("$numdrsfiles") and information from header ("$drscalib") don't agree."
+            fi
+         else
+            if ! [ "$drscalib" == "F" ]
+            then
+               printprocesslog "WARN for file "$file" DRSCALIB is neither T nor F."
+            fi
+         fi
+         
+         # get other variables from header 
+         runtype=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep RUNTYPE | grep -E -o "['][a-z0-9._-]+[']" | sed -e "s/'//g" -e "s/_/-/g" -e "s/[.]//g"`
+         #echo "runtype for file "$file": "$runtype
+         roi=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep NROI | grep -v NROITM | grep -E -o '[0-9]{1,4}'`
+         roitm=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep NROITM | grep -E -o '[0-9]{1,4}'`
+         numevents=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep Events | grep -E -o '[0-9]+'`
+         numphys=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep 'NTRG ' | grep -E -o '[0-9]+'`
+         numext1=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep 'NTRGEXT1' | grep -E -o '[ ][0-9]+[ ]' | sed -e 's/\ //g'`
+         numext2=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep 'NTRGEXT2' | grep -E -o '[ ][0-9]+[ ]' | sed -e 's/\ //g'`
+         numelp=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep 'NTRGLPE' | grep -E -o '[0-9]+'`
+         numilp=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep 'NTRGLPI' | grep -E -o '[0-9]+'`
+         numoth=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep 'NTRGMISC' | grep -E -o '[0-9]+'`
+         numped=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep 'NTRGPED' | grep -E -o '[0-9]+'`
+         numtime=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep 'NTRGTIM' | grep -E -o '[0-9]+'`
+         compiled=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep 'COMPILED' | grep -E -o "['][a-zA-Z]+[ ][ 12][0-9][ ]20[0-9][0-9][ ][0-2][0-9]:[0-5][0-9]:[0-5][0-9][']" | sed -e "s/'//g"`
+         if ! [ "$compiled" == "" ]
+         then 
+            compiletime=`date +'%F %H:%M:%S' --date="${compiled}" `
+         else
+            compiletime=
+         fi
+         revnum=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep 'REVISION' | grep -E -o "['][0-9]+[:]?[0-9]*[MSP]*[']" | sed -e "s/'//g"`
+         # in newest data start time is in DATE-OBS
+         # in older data start time is in TSTART
+         # in the beginning TSTART was empty
+         runstart=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep DATE-OBS | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
+         runstart2=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep TSTART | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
+         if [ "$runstart" == ""  ]
+         then
+            if [ "$runstart2" == ""  ]
+            then
+               runstart=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep DATE | grep -v 'DATE-' | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
+            else
+               runstart=$runstart2
+            fi
+         fi
+         # in newest data start time is in DATE-END
+         # in older data start time is in TSTOP
+         # in the beginning TSTOP was empty
+         runstop=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep DATE-END | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
+         runstop2=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep TSTOP | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
+         if [ "$runstop" == ""  ]
+         then
+            if [ "$runstop2" == ""  ]
+            then
+               runstop=`stat $file  2>/dev/null | grep Modify | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9][ ][0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{9}'`
+            else
+               runstop=$runstop2
+            fi
+         fi
+      fi
+      
+      # set runtype to 'unknown', if no runtype could be retrieved from file
+      if [ "$runtype" == "" ]
+      then
+         runtype="n/a"
+      fi
+      # on 15.11.2011 the runtypes had different names
+      if [ "$date" == "2011/11/15" ]
+      then
+         if [ "$runtype" == "drs-calib" ]
+         then
+            runtype="drs-gain"
+         fi
+         if [ "$runtype" == "drs-time-calib" ]
+         then
+            runtype="drs-time"
+         fi
+         if [ "$runtype" == "pedestal" ]
+         then
+            runtype="drs-pedestal"
+         fi
+         if [ "$runtype" == "light-pulser" ]
+         then
+            runtype="light-pulser-ext"
+         fi
+         if [ "$runtype" == "pedestal-on" ]
+         then
+            runtype="pedestal"
+         fi
+      fi
+      # get runtype
+      query="SELECT fRunTypeKEY FROM RunType WHERE fRunTypeName='"$runtype"'"
+      printprocesslog "DEBUG get run type from DB. QUERY:"$query
+      #result2=( `/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query2"` )
+      result2=( `sendquery` )
+      if [ ${#result2} -eq 0 ]
+      then 
+         printprocesslog "ERROR "$numberfromname": Could not query fRunTypeKey for runtype "$runtype" ."
+         continue
+      fi
+
+      # check if entry has already checksums
+      query="SELECT fNight FROM RunInfo WHERE Concat(fNight, '_', LPAD(fRunID, 3, 0))='"$numberfromname"'"
+      query=$query" AND NOT ISNULL(fMd5sumRaw) AND NOT ISNULL(fMd5sumRawZip)"
+      printprocesslog "DEBUG check if md5sums are alreay in DB. QUERY:"$query
+      #result5=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query5"`
+      result5=`sendquery`
+
+      # get md5sums of raw and zip file
+      #   to safe time for tests and update this can be skipped ($skipmd5sum)
+      md5sum=
+      md5sumzip=
+      if [ "$skipmd5sum" == "no" ] || [ "$skipmd5sum" == "iffilled" ]
+      then
+         #echo "skip: "$skipmd5sum >> $logfile 2>&1
+         #echo "res5: -"$result5"-" >> $logfile 2>&1
+         #if [ "$skipmd5sum" == "no" ] || [ "$result5$skipmd5sum" == "iffilled" ] 
+         if [ "$skipmd5sum" == "no" ] || [ "$result5" == "" ] 
+         then 
+            #zipfile=`echo $file | sed -e 's/raw/zipraw/' -e 's/fits/fits.gz/'`
+            # only do the md5sum if the zipfile is already available
+            if ls $file >/dev/null 2>&1
+            then 
+               printprocesslog "INFO calculation md5sum for rawfile "$rawfile
+               md5sum=`md5sum $rawfile | cut -d' ' -f1`
+               printprocesslog "INFO calculation md5sum for zipfile "$file
+               md5sumzip=`md5sum $file | cut -d' ' -f1`
+            fi
+         fi
+      fi
+
+      # insert or update depending on whether run exists
+      if [ "$result3" == "" ]
+      then 
+         query="INSERT RawFileAvailISDCStatus SET fNight="$runnumber", fRunID="$filenumberfromname
+         sendquery >/dev/null
+         query="INSERT RawFileRsyncedISDCStatus SET fNight="$runnumber", fRunID="$filenumberfromname
+         sendquery >/dev/null
+         query="INSERT RawFileAvailWueStatus SET fNight="$runnumber", fRunID="$filenumberfromname
+         sendquery >/dev/null
+         query="INSERT"
+         querymid=" fNight="$runnumber", fRunID="$filenumberfromname", "
+         querystop=
+      else
+         query="UPDATE"
+         querymid=
+         querystop=" WHERE fNight="$runnumber" AND fRunID="$filenumberfromname
+      fi
+      query=$query" RunInfo SET "$querymid" fRunTypeKey="${result2[0]}
+      if [ $numfitserrors -eq 0 ]
+      then 
+         query=$query", fRunStart='"$runstart"', fRunStop='"$runstop"'"
+         if [ "$numevents" != "" ]
+         then
+            query=$query", fNumEvents="$numevents
+         fi
+         if [ "$roi" != "" ]
+         then
+            query=$query", fROI="$roi
+         fi
+         if [ "$roitm" != "" ]
+         then
+            query=$query", fROITimeMarker="$roitm
+         fi
+         if [ "$numphys" != "" ]
+         then
+            query=$query", fNumPhysicsTrigger="$numphys
+         fi
+         if [ "$numext1" != "" ]
+         then
+            query=$query", fNumExt1Trigger="$numext1
+         fi
+         if [ "$numext2" != "" ]
+         then
+            query=$query", fNumExt2Trigger="$numext2
+         fi
+         if [ "$numelp" != "" ]
+         then
+            query=$query", fNumELPTrigger="$numelp
+         fi
+         if [ "$numilp" != "" ]
+         then
+            query=$query", fNumILPTrigger="$numilp
+         fi
+         if [ "$numped" != "" ]
+         then
+            query=$query", fNumPedestalTrigger="$numped
+         fi
+         if [ "$numtime" != "" ]
+         then
+            query=$query", fNumTimeTrigger="$numtime
+         fi
+         if [ "$numoth" != "" ]
+         then
+            query=$query", fNumOtherTrigger="$numoth
+         fi
+      fi
+      if [ "$md5sum" != "" ]
+      then
+         query=$query", fMd5sumRaw='"$md5sum"', fMd5sumRawZip='"$md5sumzip"'"
+      fi
+      if [ "$numdrsfiles" != "" ]
+      then
+         query=$query", fHasDrsFile="$numdrsfiles
+      fi
+      if [ "$step" != "" ]
+      then
+         query=$query", fDrsStep="$step
+      fi
+      if [ "$compiletime" != "" ]
+      then
+         query=$query", fCompileTime='"$compiletime"'"
+      fi
+      if [ "$revnum" != "" ]
+      then
+         query=$query", fRevisionNumber='"$revnum"'"
+      fi
+      query=$query", fFitsFileErrors="$numfitserrors
+      query=$query" "$querystop
+      # send query to DB
+      sendquery >/dev/null
+   done
+done
+
+finish
+
