Ignore:
Timestamp:
11/22/11 06:18:09 (13 years ago)
Author:
Daniela Dorner
Message:
added new information and checks
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/DataCheck/CheckRawData.sh

    r12583 r12609  
    33today=`date +%F`
    44logfile=/home/`whoami`/DataCheck/log/CheckRaw$today.log
     5
     6skipmd5sum="no" # fill md5 sums in any case
     7skipmd5sum="iffilled" # fill md5 sum only if they are not yet in db
     8#skipmd5sum="yes" # do not fill md5 sums in any case
     9
     10doupdate="yes" # update all entries
     11#doupdate="no" # fill only entries which are not yet existing
     12
     13echo ""  >> $logfile 2>&1
     14echo ""  >> $logfile 2>&1
     15echo ""  >> $logfile 2>&1
     16echo `date`"executing "$0"..." >> $logfile 2>&1
     17echo "=====> doupdate: "$doupdate >> $logfile 2>&1
     18echo "=====> skipmd5sum: "$skipmd5sum >> $logfile 2>&1
    519
    620password=`cat /home/fact/DataCheck/.pw`
     
    1630# get last 3 nights
    1731dates=( `date +%Y/%m/%d` `date +%Y/%m/%d --date="-1day"` `date +%Y/%m/%d --date="-2day"` `date +%Y/%m/%d --date="-3day"` )
    18 dates=( `date +%Y/%m/%d --date="-1day"` )
     32dates=( `date +%Y/%m/%d --date="-2day"` )
    1933
    2034# do rsync for rawfiles of these dates
     
    2640   rawdir=/loc_data/raw/$date
    2741   runnumber=`echo $date | sed -e 's/\///g'`
    28    echo `date`": processing files in "$rawdir >> $logfile
     42   echo `date`": processing files in "$rawdir >> $logfile 2>&1
    2943   # check if data are available from that night
    3044   if ! [ -d $rawdir ]
    3145   then
    32       echo `date`": no data available in "$rawdir >> $logfile
     46      echo `date`": no data available in "$rawdir >> $logfile 2>&1
    3347      continue
    3448   fi
    3549
    3650   # find all fits-files starting with the oldest file
    37    echo `date`": finding files to be zipped in $rawdir..." >> $logfile
     51   echo `date`": finding files to be zipped in $rawdir..." >> $logfile 2>&1
    3852   fitsfiles=`find $rawdir -type f -name '*.fits'| sort `
    3953
    4054   # loop to zip files
    41    echo `date`": zipping files in $rawdir..." >> $logfile
     55   echo `date`": checking files in $rawdir..." >> $logfile 2>&1
    4256   for file in $fitsfiles
    4357   do
    44       # check if raw file was accessed in the last 30 minutes
    45       isnew=`find $file -amin -30`
     58      echo "" >> $logfile 2>&1
     59      echo "checking file "$file >> $logfile 2>&1
     60      # check if raw file was changed in the last 30 minutes
     61      isnew=`find $file -cmin -30`
    4662      if [ "$isnew" != "" ]
    4763      then
    48          echo $file" is not older than 30 min => continue" >> $logfile
     64         echo $file" is not older than 30 min => continue" >> $logfile 2>&1
    4965         continue
    5066      fi
     
    5470      if [ "$isnew" != "" ]
    5571      then
    56          echo $file" is a drs file => continue" >> $logfile
     72         echo $file" is a drs file => continue" >> $logfile 2>&1
    5773         continue
    5874      fi
     
    7086         then
    7187            # if times are not the same, the file is still open => no zip
    72             echo `date`": file "$file" not yet closed => continue" >> $logfile
     88            echo `date`": file "$file" not yet closed => continue" >> $logfile 2>&1
    7389            continue
    7490         fi
    7591      else
    7692         # if the origfile doesn't exist, the data was probably written not on daq but on data
    77          echo `date`": file "$file" was probably taken on data and not daq " >> $logfile
    78       fi
    79      
    80       # get md5sum
    81       md5sum=`md5sum $file | cut -d' ' -f1`
    82       zipfile=`echo $file | sed -e 's/raw/zipraw/' -e 's/fits/fits.gz/'`
    83       md5sumzip=`md5sum $zipfile | cut -d' ' -f1`
     93         echo `date`": file "$file" was probably taken on data and not daq " >> $logfile 2>&1
     94      fi
    8495     
    8596      # get and check run and file number
     
    101112      fi
    102113     
     114      # check if entry already exists
     115      query3="SELECT fRunNumber FROM RunInfo WHERE fRunNumber="$runnumber" AND fFileNumber="$filenumberfromfileorig
     116      echo "Q3:"$query3 >> $logfile 2>&1 2>&1
     117      result3=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query3"`
     118
     119      # only proceed with file
     120      #  if information is not yet in database
     121      #  and no update is wished ($doupdate)
     122      if [ "$result3" != "" ] && [ "$doupdate" == "no" ]
     123      then
     124         echo $file" has been inserted already => continue " >> $logfile 2>&1
     125         continue
     126      fi
     127     
     128      # check if entry has already checksums
     129      query5="SELECT fRunNumber FROM RunInfo WHERE fRunNumber="$runnumber" AND fFileNumber="$filenumberfromfileorig
     130      query5=$query5" AND NOT ISNULL(fMd5sumRaw) AND NOT ISNULL(fMd5sumRawZip)"
     131      echo "Q5:"$query5 >> $logfile 2>&1 2>&1
     132      result5=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query5"`
     133
     134      # get md5sums of raw and zip file
     135      #   to safe time for tests and update this can be skipped ($skipmd5sum)
     136      md5sum=
     137      md5sumzip=
     138      if [ "$skipmd5sum" == "no" ] || [ "$skipmd5sum" == "iffilled" ]
     139      then
     140         #echo "skip: "$skipmd5sum >> $logfile 2>&1
     141         #echo "res5: -"$result5"-" >> $logfile 2>&1
     142         #if [ "$skipmd5sum" == "no" ] || [ "$result5$skipmd5sum" == "iffilled" ]
     143         if [ "$skipmd5sum" == "no" ] || [ "$result5" == "" ]
     144         then
     145            echo "calculation md5sum for raw..." >> $logfile 2>&1
     146            md5sum=`md5sum $file | cut -d' ' -f1`
     147            zipfile=`echo $file | sed -e 's/raw/zipraw/' -e 's/fits/fits.gz/'`
     148            echo "calculation md5sum for zipraw..." >> $logfile 2>&1
     149            md5sumzip=`md5sum $zipfile | cut -d' ' -f1`
     150         fi
     151      fi
     152     
     153      # check if this run has drs file
     154      drsfile=`echo $file | sed -e 's/fits/drs.fits/'`
     155      numdrsfiles=`ls $drsfile 2>/dev/null | wc -l`
     156     
    103157      # get other variables from header
    104158      runtype=`/home/fact/FACT++/fitsdump -h -t Events $file  2>/dev/null | grep RUNTYPE | grep -E -o "['][a-z-]+[']" | sed -e "s/'//g"`
    105159      roi=`/home/fact/FACT++/fitsdump -h -t Events $file  2>/dev/null | grep NROI | grep -v NROITM | grep -E -o '[0-9]{1,4}'`
    106160      numevents=`/home/fact/FACT++/fitsdump -h -t Events $file  2>/dev/null | grep Events | grep -E -o '[0-9]+'`
     161      runstart=`/home/fact/FACT++/fitsdump -h -t Events $file  2>/dev/null | grep TSTART | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
     162      runstop=`/home/fact/FACT++/fitsdump -h -t Events $file  2>/dev/null | grep TSTOP | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
     163      if [ "$runstart" == ""  ]
     164      then
     165         runstart=`/home/fact/FACT++/fitsdump -h -t Events $file  2>/dev/null | grep DATE | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
     166      fi
     167      if [ "$runstop" == ""  ]
     168      then
     169         runstop=`stat $file  2>/dev/null | grep Modify | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9][ ][0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{9}'`
     170      fi
    107171
    108172      # check if fits file is corrupted
    109       fitsfileerror="no"
     173      numfitserrors=0
    110174      checkfitsfile=`fverify $file | grep '0 error(s)'`
    111175      if [ "$checkfitsfile" == "" ]
    112176      then
    113          fitsfileerror="yes"
    114       fi
    115       echo "summary for file"$file
    116       echo "  errors: "$fitsfileerror
    117       echo "  runnumber error: "$runnumbererror
    118       echo "  number error: "$numbererror
    119       echo "  roi: "$roi
    120       echo "  runtype: "$runtype
    121       echo "  numevents: "$numevents
    122       echo "  md5sum: "$md5sum
    123       echo "  md5sum(zip): "$md5sumzip
    124       # check wehter entry has been made (status table)
     177         numfitserrors=1
     178      fi
    125179      # get runtype
    126180      query2="SELECT fRunTypeKEY FROM RunType WHERE fRunTypeName='"$runtype"'"
    127       echo "Q2:"$query2
     181      echo "Q2:"$query2 >> $logfile 2>&1
    128182      result2=( `/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query2"` )
    129183      if [ ${#result2} -eq 0 ]
    130184      then
    131185         echo "Could not query fRunTypeKey for runtype "$runtype
     186         echo "Could not query fRunTypeKey for runtype "$runtype >> $logfile 2>&1
    132187         exit
    133188      fi
    134       # check if entry already exists
    135       query3="SELECT fRunNumber FROM RunInfo WHERE fRunNumber="$runnumber" AND fFileNumber="$filenumberfromfileorig
    136       echo "Q3:"$query3
    137       resul3=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query3"`
     189      # insert or update depending on whether run exists
    138190      if [ "$result3" == "" ]
    139191      then
    140192         query4="INSERT"
    141193         querymid=" fRunNumber="$runnumber", fFileNumber="$filenumberfromfileorig", "
     194         querystop=""
    142195      else
    143196         query4="UPDATE"
     
    145198      fi
    146199      query4=$query4" RunInfo SET "$querymid" fRunTypeKey="${result2[1]}", fNumEvents="$numevents", fROI="$roi
    147       query4=$query4", fMd5sumRaw='"$md5sum"', fMd5sumRawZip='"$md5sumzip"'"
    148       echo "Q4:"$query4
     200      if [ "$md5sum" != "" ]
     201      then
     202         query4=$query4", fMd5sumRaw='"$md5sum"', fMd5sumRawZip='"$md5sumzip"'"
     203      fi
     204      query4=$query4", fHasDrsFile="$numdrsfiles
     205      query4=$query4", fFitsFileErrors="$numfitserrors
     206      query4=$query4", fRunStart='"$runstart"', fRunStop='"$runstop"'"
     207      query4=$query4" "$querystop
     208      echo "Q4:"$query4 >> $logfile 2>&1
    149209      if ! /usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query4"
    150210      then
    151          echo "insert/update of "$numfromfile" to mysql"
    152       fi
    153       exit
     211         echo "insert/update of "$numfromfile" to mysql" >> $logfile 2>&1
     212      fi
     213
     214      # print summary for debug
     215      echo "*******"  >> $logfile 2>&1
     216      echo "summary for file "$file >> $logfile 2>&1
     217      echo "*******"  >> $logfile 2>&1
     218      echo "  errors: "$numfitserrors" (1 means errors exist)" >> $logfile 2>&1
     219      echo "  number of drs files: "$numdrsfiles >> $logfile 2>&1
     220      echo "  runnumber error: "$runnumbererror >> $logfile 2>&1
     221      echo "  number error: "$numbererror >> $logfile 2>&1
     222      echo "  roi: "$roi >> $logfile 2>&1
     223      echo "  runtype: "$runtype >> $logfile 2>&1
     224      echo "  numevents: "$numevents >> $logfile 2>&1
     225      echo "  md5sum: "$md5sum >> $logfile 2>&1
     226      echo "  md5sum(zip): "$md5sumzip >> $logfile 2>&1
     227      echo "  start: "$runstart >> $logfile 2>&1
     228      echo "  stop: "$runstop >> $logfile 2>&1
     229
     230      # missing
     231      # check wether entry has been made (status table)
    154232   done
    155233done
Note: See TracChangeset for help on using the changeset viewer.