Index: /trunk/DataCheck/CheckRawData.sh
===================================================================
--- /trunk/DataCheck/CheckRawData.sh	(revision 12787)
+++ /trunk/DataCheck/CheckRawData.sh	(revision 12788)
@@ -1,7 +1,8 @@
 #!/bin/bash
 
-today=`date +%F`
-logfile=/home/`whoami`/DataCheck/log/CheckRaw$today.log
-
+# missing
+# entry in status table
+
+# options: 
 skipmd5sum="no" # fill md5 sums in any case
 skipmd5sum="iffilled" # fill md5 sum only if they are not yet in db
@@ -11,61 +12,64 @@
 doupdate="no" # fill only entries which are not yet existing
 
-fillonly="ifzipped" # fill only entries which are not yet existing + where zipped file is availabl
-
-echo ""  >> $logfile 2>&1
-echo ""  >> $logfile 2>&1
-echo ""  >> $logfile 2>&1
-echo `date`"executing "$0"..." >> $logfile 2>&1
-echo "=====> doupdate: "$doupdate >> $logfile 2>&1
-echo "=====> skipmd5sum: "$skipmd5sum >> $logfile 2>&1
-echo "=====> fillonly: "$fillonly >> $logfile 2>&1
-
-password=`cat /home/fact/DataCheck/.pw`
+source `dirname $0`/Sourcefile.sh
+printprocesslog "INFO starting $0 with options doupdate="$doupdate" and skipmd5sum="$skipmd5sum
+
+# setup to use ftools
+source $HEADAS/headas-init.sh
+
+pwfile=`dirname $0`/.pw
+password=`cat $pwfile 2>/dev/null`
 if [ "$password" == "" ]
 then
-   echo "please insert password in .pw file"
+   echo "please insert password in $pwfile"
+   printprocesslog "ERROR password for DB access in $pwfile missing"
+   finish
 fi
 
-# path to FACT++ version
-factpath=/home/fact/FACT++.2012.01.19
-
-# setup to use ftools
-export HEADAS=/opt/heasoft-6.11/x86_64-unknown-linux-gnu-libc2.13-0/
-source $HEADAS/headas-init.sh
-
-# get last 2 nights
+# check if software is available
+if ! ls $factpath/fitsdump >/dev/null 2>&1
+then 
+   printprocesslog "ERROR "$factpath"/fitsdump is not available."
+   finish
+fi
+
+# get last 3 nights
 dates=( `date +%Y/%m/%d --date="-12hour"` `date +%Y/%m/%d --date="-36hour"` )
-
-# do rsync for rawfiles of these dates
+#dates=( `find /loc_data/zipraw/ -mindepth 3 -type d | sort | sed -e 's/\/loc_data\/zipraw\///g'` ) #all available dates in /loc_data/zipraw
+
+# do check for rawfiles of these dates
 for date in ${dates[@]}
 do 
-   echo "" >> $logfile 2>&1
-   echo "" >> $logfile 2>&1
-   echo "" >> $logfile 2>&1
-   rawdir=/loc_data/raw/$date
+   ziprawdir=/loc_data/zipraw/$date
+   # check if data are available from that night
+   if ! [ -d $ziprawdir ]
+   then
+      printprocesslog "INFO "$ziprawdir" does not exist."
+      continue
+   else
+      printprocesslog "INFO processing "$ziprawdir"..."
+   fi
+
+   # find all fits.gz files starting with the oldest file
+   printprocesslog "INFO finding files to be checked in $ziprawdir..."
+   fitsgzfiles=`find $ziprawdir -type f -name '*.fits.gz'| sort `
+
+   # get runnumber from date
    runnumber=`echo $date | sed -e 's/\///g'`
-   echo `date`": processing files in "$rawdir >> $logfile 2>&1
-   # check if data are available from that night
-   if ! [ -d $rawdir ]
-   then
-      echo `date`": no data available in "$rawdir >> $logfile 2>&1
-      continue
-   fi
-
-   # find all fits-files starting with the oldest file
-   echo `date`": finding files to be zipped in $rawdir..." >> $logfile 2>&1
-   fitsfiles=`find $rawdir -type f -name '*.fits'| sort `
-
-   # loop to zip files
-   echo `date`": checking files in $rawdir..." >> $logfile 2>&1
-   for file in $fitsfiles
+   
+   # loop to check files
+   for file in $fitsgzfiles
    do
-      echo "" >> $logfile 2>&1
-      echo "checking file "$file >> $logfile 2>&1
+      printprocesslog "INFO checking file "$file
+
+      # raw and original file
+      rawfile=`echo $file | sed -e 's/zipraw/raw/' -e 's/fits[.]gz/fits/'`
+      origfile=`echo $rawfile | sed -e 's/loc_data/daq/'`
+
       # check if raw file was changed in the last 30 minutes
       isnew=`find $file -cmin -30`
       if [ "$isnew" != "" ]
       then
-         echo $file" is not older than 30 min => continue" >> $logfile 2>&1
+         printprocesslog "INFO "$file" is not older than 30 min. -> continue"
          continue
       fi
@@ -75,5 +79,5 @@
       if [ "$isnew" != "" ]
       then
-         echo $file" is a drs file => continue" >> $logfile 2>&1
+         printprocesslog "INFO "$file" is a drs file. -> continue"
          continue
       fi
@@ -81,20 +85,19 @@
       # check if file is already finished
       # original file on daq (if data was taken on daq
-      origfile=`echo $file | sed -e 's/loc_data/daq/'`
       if [ -e $origfile ]
       then
          # get time of last modification as seconds since Epoch for both files
          timeorig=`stat -c %Y $origfile`
-         timecopy=`stat -c %Y $file`
+         timecopy=`stat -c %Y $rawfile`
          # compare times
          if ! [ $timeorig -eq $timecopy ]
          then
-            # if times are not the same, the file is still open => no zip
-            echo `date`": file "$file" not yet closed => continue" >> $logfile 2>&1
+            # if times are not the same, the file is still open => no check
+            printprocesslog "INFO file "$rawfile" not yet closed. -> continue"
             continue
          fi
       else
          # if the origfile doesn't exist, the data was probably written not on daq but on data
-         echo `date`": file "$file" was probably taken on data and not daq " >> $logfile 2>&1
+         printprocesslog "INFO file "$rawfile" was probably taken on data and not daq."
       fi
       
@@ -108,12 +111,11 @@
       then
          runnumbererror="yes"
-         echo "ERROR: for file "$file" runnumber from date and filename don't agree ("$runnumber" - "$runnumberfromname")"
-         echo "ERROR: for file "$file" runnumber from date and filename don't agree ("$runnumber" - "$runnumberfromname")" >> $logfile 2>&1
+         printprocesslog "ERROR for file "$file": runnumber from date ("$runnumber") and filename ("$runnumberfromname") don't agree."
       fi
       
       # check if entry already exists
       query3="SELECT fRunNumber FROM RunInfo WHERE Concat(fRunNumber, '_', LPAD(fFileNumber, 3, 0))='"$numberfromname"'"
-      echo "Q3:"$query3 >> $logfile 2>&1 2>&1
-      result3=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query3" 2>> $logfile`
+      printprocesslog "DEBUG check if entry already exists in DB. QUERY: "$query3
+      result3=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query3"`
 
       # only proceed with file 
@@ -122,46 +124,12 @@
       if [ "$result3" != "" ] && [ "$doupdate" == "no" ]
       then 
-         echo $file" has been inserted already => continue " >> $logfile 2>&1
+         printprocesslog "INFO "$file" has been inserted already. -> continue "
          continue
       fi
       
-      # check if entry has already checksums
-      query5="SELECT fRunNumber FROM RunInfo WHERE Concat(fRunNumber, '_', LPAD(fFileNumber, 3, 0))='"$numberfromname"'"
-      query5=$query5" AND NOT ISNULL(fMd5sumRaw) AND NOT ISNULL(fMd5sumRawZip)"
-      echo "Q5:"$query5 >> $logfile 2>&1 
-      result5=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query5" 2>> $logfile`
-
-      # get md5sums of raw and zip file
-      #   to safe time for tests and update this can be skipped ($skipmd5sum)
-      md5sum=
-      md5sumzip=
-      if [ "$skipmd5sum" == "no" ] || [ "$skipmd5sum" == "iffilled" ]
-      then
-         #echo "skip: "$skipmd5sum >> $logfile 2>&1
-         #echo "res5: -"$result5"-" >> $logfile 2>&1
-         #if [ "$skipmd5sum" == "no" ] || [ "$result5$skipmd5sum" == "iffilled" ] 
-         if [ "$skipmd5sum" == "no" ] || [ "$result5" == "" ] 
-         then 
-            zipfile=`echo $file | sed -e 's/raw/zipraw/' -e 's/fits/fits.gz/'`
-            # only do the md5sum if the zipfile is already available
-            if ls $zipfile >/dev/null >> $logfile 2>&1 
-            then 
-               echo "calculation md5sum for raw..." >> $logfile 2>&1
-               md5sum=`md5sum $file | cut -d' ' -f1`
-               echo "calculation md5sum for zipraw..." >> $logfile 2>&1
-               md5sumzip=`md5sum $zipfile | cut -d' ' -f1`
-            fi
-         fi
-      fi
-      if [ "$result3" == "" ] && [ "$md5sum" == "" ] && [ "$fillonly" == "ifzipped" ]
-      then 
-         echo $file" is still missing zip => do not insert yet => continue " >> $logfile 2>&1
-         continue
-      fi
-
       runtype=
       # check if fits file is corrupted
       numfitserrors=0
-      checkfitsfile=`fverify $file  2>> $logfile | grep '0 error(s)'`
+      checkfitsfile=`fverify $rawfile | grep '0 error(s)'`
       if [ "$checkfitsfile" == "" ]
       then
@@ -172,6 +140,10 @@
       then 
          # get run and file number from file
-         runnumberfromfile=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep NIGHT | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]'`
-         filenumberfromfileorig=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep RUNID | grep -E -o '[0-9]{1,3}'`
+         runnumberfromfile=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NIGHT | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]'`
+         filenumberfromfileorig=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep RUNID | grep -E -o '[0-9]{1,3}'`
+         if [ "$runnumberfromfile" = "" ] || [ "$filenumberfromfileorig" = "" ]
+         then
+            printprocesslog "ERROR couldn't get run or file number from file name ("$file")."
+         fi
          numberfromfile=$runnumberfromfile"_"`printf %03d $filenumberfromfileorig`
          # compare numbers
@@ -179,14 +151,27 @@
          then
             numbererror="yes"
-            echo "ERROR: for file "$file" number from filename and file don't agree ("$numberfromname" -"$numberfromfile")"
-            echo "ERROR: for file "$file" number from filename and file don't agree ("$numberfromname" -"$numberfromfile")" >> $logfile 2>&1
+            printprocesslog "ERROR for file "$file": number from filename ("$numberfromname") and file ("$numberfromfile") don't agree."
          fi
       
          # check if this run has drs file
+         #   in case file is available, get STEP from header
+         # in the very beginning only drs-files were existing
+         # in the beginning the keywords DRSCALIB and STEP were not existing
+         step=
          drsfile=`echo $file | sed -e 's/fits/drs.fits/'`
          numdrsfiles=`ls $drsfile 2>/dev/null | wc -l`
+         drscalib=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep DRSCALIB | grep -E -o "['][TF][']" | sed -e "s/'//g"`
+         if [ "$drscalib" == "T" ]
+         then 
+            step=`$factpath/fitsdump -h -t Events $drsfile  2>/dev/null | grep STEP | grep -E -o "['][012][']" | sed -e "s/'//g"`
+            if ! [ $numdrsfiles -eq 1 ]
+            then 
+               printprocesslog "ERROR for file "$file" number of drsfiles ("$numdrsfiles") and information from header ("$drscalib") don't agree."
+            fi
+         fi
          
          # get other variables from header 
-         runtype=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep RUNTYPE | grep -E -o "['][a-z-]+[']" | sed -e "s/'//g"`
+         runtype=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep RUNTYPE | grep -E -o "['][a-z0-9._-]+[']" | sed -e "s/'//g" -e "s/_/-/g" -e "s/[.]//g"`
+         #echo "runtype for file "$file": "$runtype
          roi=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep NROI | grep -v NROITM | grep -E -o '[0-9]{1,4}'`
          roitm=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep NROITM | grep -E -o '[0-9]{1,4}'`
@@ -230,5 +215,5 @@
       fi
       
-      # set runtype to 'unknown' if no runtype could be queried
+      # set runtype to 'unknown', if no runtype could be retrieved from file
       if [ "$runtype" == "" ]
       then
@@ -261,11 +246,39 @@
       # get runtype
       query2="SELECT fRunTypeKEY FROM RunType WHERE fRunTypeName='"$runtype"'"
-      echo "Q2:"$query2 >> $logfile 2>&1
-      result2=( `/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query2" 2>> $logfile` )
+      printprocesslog "DEBUG get run type from DB. QUERY:"$query2
+      result2=( `/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query2"` )
       if [ ${#result2} -eq 0 ]
       then 
-         echo $numberfromname": Could not query fRunTypeKey for runtype "$runtype
-         echo $numberfromname": Could not query fRunTypeKey for runtype "$runtype >> $logfile 2>&1
+         printprocesslog "ERROR "$numberfromname": Could not query fRunTypeKey for runtype "$runtype" ."
          continue
+      fi
+
+      # check if entry has already checksums
+      query5="SELECT fRunNumber FROM RunInfo WHERE Concat(fRunNumber, '_', LPAD(fFileNumber, 3, 0))='"$numberfromname"'"
+      query5=$query5" AND NOT ISNULL(fMd5sumRaw) AND NOT ISNULL(fMd5sumRawZip)"
+      printprocesslog "DEBUG check if md5sums are alreay in DB. QUERY:"$query5
+      result5=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query5"`
+
+      # get md5sums of raw and zip file
+      #   to safe time for tests and update this can be skipped ($skipmd5sum)
+      md5sum=
+      md5sumzip=
+      if [ "$skipmd5sum" == "no" ] || [ "$skipmd5sum" == "iffilled" ]
+      then
+         #echo "skip: "$skipmd5sum >> $logfile 2>&1
+         #echo "res5: -"$result5"-" >> $logfile 2>&1
+         #if [ "$skipmd5sum" == "no" ] || [ "$result5$skipmd5sum" == "iffilled" ] 
+         if [ "$skipmd5sum" == "no" ] || [ "$result5" == "" ] 
+         then 
+            #zipfile=`echo $file | sed -e 's/raw/zipraw/' -e 's/fits/fits.gz/'`
+            # only do the md5sum if the zipfile is already available
+            if ls $file >/dev/null 2>&1
+            then 
+               printprocesslog "INFO calculation md5sum for rawfile "$rawfile
+               md5sum=`md5sum $rawfile | cut -d' ' -f1`
+               printprocesslog "INFO calculation md5sum for zipfile "$file
+               md5sumzip=`md5sum $file | cut -d' ' -f1`
+            fi
+         fi
       fi
 
@@ -284,6 +297,17 @@
       if [ $numfitserrors -eq 0 ]
       then 
-         query4=$query4", fNumEvents="$numevents", fROI="$roi", fROITimeMarker="$roitm
          query4=$query4", fRunStart='"$runstart"', fRunStop='"$runstop"'"
+         if [ "$numevents" != "" ]
+         then
+            query4=$query4", fNumEvents="$numevent
+         fi
+         if [ "$roi" != "" ]
+         then
+            query4=$query4", fROI="$roi
+         fi
+         if [ "$roitm" != "" ]
+         then
+            query4=$query4", fROITimeMarker="$roitm
+         fi
          if [ "$numphys" != "" ]
          then
@@ -324,33 +348,19 @@
          query4=$query4", fHasDrsFile="$numdrsfiles
       fi
+      if [ "$step" != "" ]
+      then
+         query4=$query4", fDrsStep="$step
+      fi
       query4=$query4", fFitsFileErrors="$numfitserrors
       query4=$query4" "$querystop
-      echo "Q4:"$query4 >> $logfile 2>&1
-      if ! /usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query4" >> $logfile 2>&1
-      then
-         echo "insert/update of "$numfromfile" to mysql failed" >> $logfile 2>&1
-      fi
-
-      # print summary for debug
-      echo "*******"  >> $logfile 2>&1
-      echo "summary for file "$file >> $logfile 2>&1
-      echo "*******"  >> $logfile 2>&1
-      echo "  errors: "$numfitserrors" (1 means errors exist)" >> $logfile 2>&1
-      echo "  number of drs files: "$numdrsfiles >> $logfile 2>&1
-      echo "  runnumber error: "$runnumbererror >> $logfile 2>&1
-      echo "  number error: "$numbererror >> $logfile 2>&1
-      echo "  roi: "$roi >> $logfile 2>&1
-      echo "  roitm: "$roitm >> $logfile 2>&1
-      echo "  runtype: "$runtype >> $logfile 2>&1
-      echo "  numevents: "$numevents >> $logfile 2>&1
-      echo "  md5sum: "$md5sum >> $logfile 2>&1
-      echo "  md5sum(zip): "$md5sumzip >> $logfile 2>&1
-      echo "  start: "$runstart >> $logfile 2>&1
-      echo "  stop: "$runstop >> $logfile 2>&1
-
-      # missing
-      # check wether entry has been made (status table)
+      printprocesslog "INFO insert/update entry in DB. QUERY: "$query4
+      if ! /usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query4"
+      then
+         printprocesslog "ERROR insert/update of "$numfromfile" to mysql failed."
+         finish
+      fi
    done
 done
 
-
+finish
+
