Changeset 14838 for trunk/DataCheck


Ignore:
Timestamp:
02/04/13 06:00:02 (12 years ago)
Author:
Daniela Dorner
Message:
adapted to new structure in LP (newdaw): removed md5sum, changed method for verfying that file is finished (check if file was closed via TSTOP), create directories only if data available
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/DataCheck/Processing/CheckRawData.sh

    r14155 r14838  
    99# starting from 2012/06/05 the keywords CHECKSUM and DATASUM are
    1010#   available in the fits header of the rawfile
    11 #   starting from this time the md5sums are not calculated anymore
     11#   starting from this time the md5sums are not calculated anymore (not true, only from 2012/12/17)
    1212#
    1313
     
    1515# tempfile for fitsdump -h output
    1616
    17 # options:
    18 skipmd5sum="no" # fill md5 sums in any case
    19 skipmd5sum="iffilled" # fill md5 sum only if they are not yet in db # default
    20 skipmd5sum="yes" # do not fill md5 sums in any case #new default since 2012/06/05 when the checksum is available in heaser
     17## options:
     18#skipmd5sum="no" # fill md5 sums in any case
     19#skipmd5sum="iffilled" # fill md5 sum only if they are not yet in db # default
     20#skipmd5sum="yes" # do not fill md5 sums in any case #new default since 2012/06/05 when the checksum is available in heaser
    2121
    2222doupdate="yes" # update all entries
    2323doupdate="no" # fill only entries which are not yet existing #default
    2424
     25root=/opt/root_svn/bin/thisroot.sh
     26
    2527source `dirname $0`/../Sourcefile.sh
    26 printprocesslog "INFO starting $0 with options doupdate="$doupdate" and skipmd5sum="$skipmd5sum
     28printprocesslog "INFO starting $0 with options doupdate="$doupdate #" and skipmd5sum="$skipmd5sum
    2729
    2830# setup to use ftools
     
    4244   finish
    4345fi
    44 if ! ls /loc_data/raw >/dev/null 2>&1
     46if ! ls /newdaq/raw >/dev/null 2>&1
    4547then
    46    printprocesslog "ERROR /loc_data/raw is not available."
     48   printprocesslog "ERROR /newdaq/raw is not available."
    4749   finish
    4850fi
     
    5557# get last 3, 6 or 9 nights
    5658dates=( `date +%Y/%m/%d --date="-12hour"` `date +%Y/%m/%d --date="-36hour"` `date +%Y/%m/%d --date="-60hour"` \
    57 #        `date +%Y/%m/%d --date="-84hour"` `date +%Y/%m/%d --date="-108hour"` `date +%Y/%m/%d --date="-132hour"` \
     59        `date +%Y/%m/%d --date="-84hour"` `date +%Y/%m/%d --date="-108hour"` `date +%Y/%m/%d --date="-132hour"` \
    5860#        `date +%Y/%m/%d --date="-156hour"` `date +%Y/%m/%d --date="-180hour"` `date +%Y/%m/%d --date="-204hour"` \
    5961        )
     
    6870   query="SELECT fNight FROM AuxFilesAvailISDCStatus WHERE fNight="$date2
    6971   night=`sendquery`
    70    if [ "$night" == "" ]
     72   if [ "$night" == "" ] && ls /loc_data/zipraw/$date >/dev/null 2>&1
    7173   then
    7274      printprocesslog "INFO insert "$date2" to AuxFilesAvailISDCStatus"
     
    7779   query="SELECT fNight FROM DriveFileAvailISDCStatus WHERE fNight="$date2
    7880   night=`sendquery`
    79    if [ "$night" == "" ]
     81   if [ "$night" == "" ] && ls /loc_data/zipraw/$date >/dev/null 2>&1
    8082   then
    8183      printprocesslog "INFO insert "$date2" to DriveFileAvailISDCStatus"
     
    8688   query="SELECT fNight FROM AuxDataInsertStatus WHERE fNight="$date2
    8789   night=`sendquery`
    88    if [ "$night" == "" ]
     90   if [ "$night" == "" ] && ls /loc_data/zipraw/$date >/dev/null 2>&1
    8991   then
    9092      printprocesslog "INFO insert "$date2" to AuxDataInsertStatus"
     
    118120      # raw and original file
    119121      # file: /loc_data/zipraw
    120       # rawfile: /loc_data/raw
    121       # origfile: /daq/raw
    122       rawfile=`echo $file | sed -e 's/zipraw/raw/' -e 's/fits[.]gz/fits/'`
    123       origfile=`echo $rawfile | sed -e 's/loc_data/daq/'`
     122      # rawfile: /daq/raw
     123      # origfile: /newdaq/raw
     124      rawfile=`echo $file | sed -e 's/loc_data/daq/' -e 's/zipraw/raw/' -e 's/fits[.]gz/fits/'`
     125      rawfile2=`echo $file | sed -e 's/zipraw/raw/' -e 's/fits[.]gz/fits/'`
     126      origfile=`echo $rawfile | sed -e 's/daq/newdaq/'`
    124127
    125128      # check if it is drs file
     
    131134      fi
    132135
    133       # check if file is already finished
    134       # original file on daq (if data was taken on daq
    135       if [ -e $origfile ]
    136       then
    137          # check if raw file was changed in the last 30 minutes
    138          isnew=`find $origfile -cmin -30`
    139          if [ "$isnew" != "" ]
    140          then
    141             printprocesslog "WARN "$origfile" is not older than 30 min. -> continue"
    142             continue
    143          fi
    144 
    145          # get time of last modification as seconds since Epoch for both files
    146          timeorig=`stat -c %Y $origfile`
    147          timecopy=`stat -c %Y $rawfile`
    148          # compare times
    149          if ! [ $timeorig -eq $timecopy ]
    150          then
    151             # if times are not the same, the file is still open => no check
    152             printprocesslog "INFO file "$rawfile" not yet closed. -> continue"
    153             continue
    154          fi
    155       else
    156          # if the origfile doesn't exist, the data was probably written not on daq but on data
    157          printprocesslog "INFO file "$rawfile" was probably taken on data and not daq."
    158       fi
     136# not needed anymore as files are only rsynced from newdaq to daq when they are closed
     137#      # check if file is already finished
     138#      # original file on daq (if data was taken on daq
     139#      if [ -e $origfile ]
     140#      then
     141#         # check if raw file was changed in the last 30 minutes
     142#         isnew=`find $origfile -cmin -30`
     143#         if [ "$isnew" != "" ]
     144#         then
     145#            printprocesslog "WARN "$origfile" is not older than 30 min. -> continue"
     146#            continue
     147#         fi
     148#
     149#         # get time of last modification as seconds since Epoch for both files
     150#         timeorig=`stat -c %Y $origfile`
     151#         timecopy=`stat -c %Y $rawfile`
     152#         # compare times
     153#         if ! [ $timeorig -eq $timecopy ]
     154#         then
     155#            # if times are not the same, the file is still open => no check
     156#            printprocesslog "INFO file "$rawfile" not yet closed. -> continue"
     157#            continue
     158#         fi
     159#      else
     160#         # if the origfile doesn't exist, the data was probably written not on daq but on data
     161#         printprocesslog "INFO file "$rawfile" was probably taken on data and not daq."
     162#      fi
    159163     
    160164      # get run and file number form filename
     
    189193      numfitserrors=0
    190194      #checkfitsfile=`fverify $rawfile 2>/dev/null | grep '0 error(s)'`
    191       numfitserrors=`fverify $rawfile 2>/dev/null | grep -o '[0-9][ ]error(s)'  | grep -E -o '[0-9]'`
     195      #numfitserrors=`fverify $rawfile 2>/dev/null | grep -o '[0-9][ ]error(s)'  | grep -E -o '[0-9]'`
     196# ftools missing on daq
     197#      numfitserrors=`ssh daq "export HEADAS=/opt/heasoft-6.11/x86_64-unknown-linux-gnu-libc2.13-0/ ; source $HEADAS/headas-init.sh ; fverify $rawfile2 2>/dev/null | grep -o '[0-9][ ]error(s)'  | grep -E -o '[0-9]'"`
     198#      echo $rawfile2" ------ "$numfitserrors
    192199      #if [ "$checkfitsfile" == "" ]
    193200      if [ $numfitserrors -gt 0 ]
    194201      then
    195          printprocesslog "WARN "$rawfile" has "$numfitserrors" fitserror(s). "
     202         printprocesslog "WARN "$rawfile2" has "$numfitserrors" fitserror(s). "
    196203         fitsdumperrors=`$factpath/fitsdump -h -t Events $file 2>&1 | grep corrupted`
    197204         if [ "$fitsdumperrors" != "" ]
     
    200207         fi
    201208         #numfitserrors=1
     209      fi
     210
     211      # check if file was closed properly
     212      if [ "`echo $file | grep -o drs`" == "drs" ]
     213      then
     214         nondrsfile=`echo $file | sed -e 's/[.]drs//g'`
     215         tstop=`ssh newdaq "source $root ; $factpath/fitsdump -h $nondrsfile  2>/dev/null | grep TSTOPI | grep -E -o '[0-9]+'"`
     216      else
     217         tstop=`ssh newdaq "source $root ; $factpath/fitsdump -h $file  2>/dev/null | grep TSTOPI | grep -E -o '[0-9]+'"`
     218      fi
     219      if [ "$tstop" == "0" ]
     220      then
     221         echo $file" not yet closed."
     222         echo "WARN: $file has empty TSTOP"
     223         filecorrupt="yes"
    202224      fi
    203225
     
    361383      fi
    362384
    363       # check if entry has already checksums
    364       query="SELECT fNight FROM RunInfo WHERE Concat(fNight, '_', LPAD(fRunID, 3, 0))='"$numberfromname"'"
    365       query=$query" AND NOT ISNULL(fMd5sumRaw) AND NOT ISNULL(fMd5sumRawZip)"
    366       printprocesslog "DEBUG check if md5sums are alreay in DB. QUERY:"$query
    367       #result5=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query5"`
    368       result5=`sendquery`
    369 
    370       # get md5sums of raw and zip file
    371       #   to safe time for tests and update this can be skipped ($skipmd5sum)
    372       md5sum=
    373       md5sumzip=
    374       if [ "$skipmd5sum" == "no" ] || [ "$skipmd5sum" == "iffilled" ]
    375       then
    376          #echo "skip: "$skipmd5sum >> $logfile 2>&1
    377          #echo "res5: -"$result5"-" >> $logfile 2>&1
    378          #if [ "$skipmd5sum" == "no" ] || [ "$result5$skipmd5sum" == "iffilled" ]
    379          if [ "$skipmd5sum" == "no" ] || [ "$result5" == "" ]
    380          then
    381             #zipfile=`echo $file | sed -e 's/raw/zipraw/' -e 's/fits/fits.gz/'`
    382             # only do the md5sum if the zipfile is already available
    383             if ls $file >/dev/null 2>&1
    384             then
    385                printprocesslog "INFO calculation md5sum for rawfile "$rawfile
    386                md5sum=`md5sum $rawfile | cut -d' ' -f1`
    387                printprocesslog "INFO calculation md5sum for zipfile "$file
    388                md5sumzip=`md5sum $file | cut -d' ' -f1`
    389             fi
    390          fi
    391       fi
     385#      # check if entry has already checksums
     386#      query="SELECT fNight FROM RunInfo WHERE Concat(fNight, '_', LPAD(fRunID, 3, 0))='"$numberfromname"'"
     387#      query=$query" AND NOT ISNULL(fMd5sumRaw) AND NOT ISNULL(fMd5sumRawZip)"
     388#      printprocesslog "DEBUG check if md5sums are alreay in DB. QUERY:"$query
     389#      #result5=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query5"`
     390#      result5=`sendquery`
     391#
     392#      # get md5sums of raw and zip file
     393#      #   to safe time for tests and update this can be skipped ($skipmd5sum)
     394#      md5sum=
     395#      md5sumzip=
     396#      if [ "$skipmd5sum" == "no" ] || [ "$skipmd5sum" == "iffilled" ]
     397#      then
     398#         #echo "skip: "$skipmd5sum >> $logfile 2>&1
     399#         #echo "res5: -"$result5"-" >> $logfile 2>&1
     400#         #if [ "$skipmd5sum" == "no" ] || [ "$result5$skipmd5sum" == "iffilled" ]
     401#         if [ "$skipmd5sum" == "no" ] || [ "$result5" == "" ]
     402#         then
     403#            #zipfile=`echo $file | sed -e 's/raw/zipraw/' -e 's/fits/fits.gz/'`
     404#            # only do the md5sum if the zipfile is already available
     405#            if ls $file >/dev/null 2>&1
     406#            then
     407#               printprocesslog "INFO calculation md5sum for rawfile "$rawfile
     408#               md5sum=`md5sum $rawfile | cut -d' ' -f1`
     409#               printprocesslog "INFO calculation md5sum for zipfile "$file
     410#               md5sumzip=`md5sum $file | cut -d' ' -f1`
     411#            fi
     412#         fi
     413#      fi
    392414
    393415      # insert or update depending on whether run exists
     
    460482         query=$query", fDataSum='"$datasum"'"
    461483      fi
    462       if [ "$md5sum" != "" ]
    463       then
    464          query=$query", fMd5sumRaw='"$md5sum"', fMd5sumRawZip='"$md5sumzip"'"
    465       fi
     484#      if [ "$md5sum" != "" ]
     485#      then
     486#         query=$query", fMd5sumRaw='"$md5sum"', fMd5sumRawZip='"$md5sumzip"'"
     487#      fi
    466488      if [ "$numdrsfiles" != "" ]
    467489      then
Note: See TracChangeset for help on using the changeset viewer.