Changeset 12788 for trunk/DataCheck


Ignore:
Timestamp:
01/30/12 19:56:29 (13 years ago)
Author:
Daniela Dorner
Message:
included further checks and fill more information; common logfile; start from zipped files now
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/DataCheck/CheckRawData.sh

    r12761 r12788  
    11#!/bin/bash
    22
    3 today=`date +%F`
    4 logfile=/home/`whoami`/DataCheck/log/CheckRaw$today.log
    5 
     3# missing
     4# entry in status table
     5
     6# options:
    67skipmd5sum="no" # fill md5 sums in any case
    78skipmd5sum="iffilled" # fill md5 sum only if they are not yet in db
     
    1112doupdate="no" # fill only entries which are not yet existing
    1213
    13 fillonly="ifzipped" # fill only entries which are not yet existing + where zipped file is availabl
    14 
    15 echo ""  >> $logfile 2>&1
    16 echo ""  >> $logfile 2>&1
    17 echo ""  >> $logfile 2>&1
    18 echo `date`"executing "$0"..." >> $logfile 2>&1
    19 echo "=====> doupdate: "$doupdate >> $logfile 2>&1
    20 echo "=====> skipmd5sum: "$skipmd5sum >> $logfile 2>&1
    21 echo "=====> fillonly: "$fillonly >> $logfile 2>&1
    22 
    23 password=`cat /home/fact/DataCheck/.pw`
     14source `dirname $0`/Sourcefile.sh
     15printprocesslog "INFO starting $0 with options doupdate="$doupdate" and skipmd5sum="$skipmd5sum
     16
     17# setup to use ftools
     18source $HEADAS/headas-init.sh
     19
     20pwfile=`dirname $0`/.pw
     21password=`cat $pwfile 2>/dev/null`
    2422if [ "$password" == "" ]
    2523then
    26    echo "please insert password in .pw file"
     24   echo "please insert password in $pwfile"
     25   printprocesslog "ERROR password for DB access in $pwfile missing"
     26   finish
    2727fi
    2828
    29 # path to FACT++ version
    30 factpath=/home/fact/FACT++.2012.01.19
    31 
    32 # setup to use ftools
    33 export HEADAS=/opt/heasoft-6.11/x86_64-unknown-linux-gnu-libc2.13-0/
    34 source $HEADAS/headas-init.sh
    35 
    36 # get last 2 nights
     29# check if software is available
     30if ! ls $factpath/fitsdump >/dev/null 2>&1
     31then
     32   printprocesslog "ERROR "$factpath"/fitsdump is not available."
     33   finish
     34fi
     35
     36# get last 3 nights
    3737dates=( `date +%Y/%m/%d --date="-12hour"` `date +%Y/%m/%d --date="-36hour"` )
    38 
    39 # do rsync for rawfiles of these dates
     38#dates=( `find /loc_data/zipraw/ -mindepth 3 -type d | sort | sed -e 's/\/loc_data\/zipraw\///g'` ) #all available dates in /loc_data/zipraw
     39
     40# do check for rawfiles of these dates
    4041for date in ${dates[@]}
    4142do
    42    echo "" >> $logfile 2>&1
    43    echo "" >> $logfile 2>&1
    44    echo "" >> $logfile 2>&1
    45    rawdir=/loc_data/raw/$date
     43   ziprawdir=/loc_data/zipraw/$date
     44   # check if data are available from that night
     45   if ! [ -d $ziprawdir ]
     46   then
     47      printprocesslog "INFO "$ziprawdir" does not exist."
     48      continue
     49   else
     50      printprocesslog "INFO processing "$ziprawdir"..."
     51   fi
     52
     53   # find all fits.gz files starting with the oldest file
     54   printprocesslog "INFO finding files to be checked in $ziprawdir..."
     55   fitsgzfiles=`find $ziprawdir -type f -name '*.fits.gz'| sort `
     56
     57   # get runnumber from date
    4658   runnumber=`echo $date | sed -e 's/\///g'`
    47    echo `date`": processing files in "$rawdir >> $logfile 2>&1
    48    # check if data are available from that night
    49    if ! [ -d $rawdir ]
    50    then
    51       echo `date`": no data available in "$rawdir >> $logfile 2>&1
    52       continue
    53    fi
    54 
    55    # find all fits-files starting with the oldest file
    56    echo `date`": finding files to be zipped in $rawdir..." >> $logfile 2>&1
    57    fitsfiles=`find $rawdir -type f -name '*.fits'| sort `
    58 
    59    # loop to zip files
    60    echo `date`": checking files in $rawdir..." >> $logfile 2>&1
    61    for file in $fitsfiles
     59   
     60   # loop to check files
     61   for file in $fitsgzfiles
    6262   do
    63       echo "" >> $logfile 2>&1
    64       echo "checking file "$file >> $logfile 2>&1
     63      printprocesslog "INFO checking file "$file
     64
     65      # raw and original file
     66      rawfile=`echo $file | sed -e 's/zipraw/raw/' -e 's/fits[.]gz/fits/'`
     67      origfile=`echo $rawfile | sed -e 's/loc_data/daq/'`
     68
    6569      # check if raw file was changed in the last 30 minutes
    6670      isnew=`find $file -cmin -30`
    6771      if [ "$isnew" != "" ]
    6872      then
    69          echo $file" is not older than 30 min => continue" >> $logfile 2>&1
     73         printprocesslog "INFO "$file" is not older than 30 min. -> continue"
    7074         continue
    7175      fi
     
    7579      if [ "$isnew" != "" ]
    7680      then
    77          echo $file" is a drs file => continue" >> $logfile 2>&1
     81         printprocesslog "INFO "$file" is a drs file. -> continue"
    7882         continue
    7983      fi
     
    8185      # check if file is already finished
    8286      # original file on daq (if data was taken on daq
    83       origfile=`echo $file | sed -e 's/loc_data/daq/'`
    8487      if [ -e $origfile ]
    8588      then
    8689         # get time of last modification as seconds since Epoch for both files
    8790         timeorig=`stat -c %Y $origfile`
    88          timecopy=`stat -c %Y $file`
     91         timecopy=`stat -c %Y $rawfile`
    8992         # compare times
    9093         if ! [ $timeorig -eq $timecopy ]
    9194         then
    92             # if times are not the same, the file is still open => no zip
    93             echo `date`": file "$file" not yet closed => continue" >> $logfile 2>&1
     95            # if times are not the same, the file is still open => no check
     96            printprocesslog "INFO file "$rawfile" not yet closed. -> continue"
    9497            continue
    9598         fi
    9699      else
    97100         # if the origfile doesn't exist, the data was probably written not on daq but on data
    98          echo `date`": file "$file" was probably taken on data and not daq " >> $logfile 2>&1
     101         printprocesslog "INFO file "$rawfile" was probably taken on data and not daq."
    99102      fi
    100103     
     
    108111      then
    109112         runnumbererror="yes"
    110          echo "ERROR: for file "$file" runnumber from date and filename don't agree ("$runnumber" - "$runnumberfromname")"
    111          echo "ERROR: for file "$file" runnumber from date and filename don't agree ("$runnumber" - "$runnumberfromname")" >> $logfile 2>&1
     113         printprocesslog "ERROR for file "$file": runnumber from date ("$runnumber") and filename ("$runnumberfromname") don't agree."
    112114      fi
    113115     
    114116      # check if entry already exists
    115117      query3="SELECT fRunNumber FROM RunInfo WHERE Concat(fRunNumber, '_', LPAD(fFileNumber, 3, 0))='"$numberfromname"'"
    116       echo "Q3:"$query3 >> $logfile 2>&1 2>&1
    117       result3=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query3" 2>> $logfile`
     118      printprocesslog "DEBUG check if entry already exists in DB. QUERY: "$query3
     119      result3=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query3"`
    118120
    119121      # only proceed with file
     
    122124      if [ "$result3" != "" ] && [ "$doupdate" == "no" ]
    123125      then
    124          echo $file" has been inserted already => continue " >> $logfile 2>&1
     126         printprocesslog "INFO "$file" has been inserted already. -> continue "
    125127         continue
    126128      fi
    127129     
    128       # check if entry has already checksums
    129       query5="SELECT fRunNumber FROM RunInfo WHERE Concat(fRunNumber, '_', LPAD(fFileNumber, 3, 0))='"$numberfromname"'"
    130       query5=$query5" AND NOT ISNULL(fMd5sumRaw) AND NOT ISNULL(fMd5sumRawZip)"
    131       echo "Q5:"$query5 >> $logfile 2>&1
    132       result5=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query5" 2>> $logfile`
    133 
    134       # get md5sums of raw and zip file
    135       #   to safe time for tests and update this can be skipped ($skipmd5sum)
    136       md5sum=
    137       md5sumzip=
    138       if [ "$skipmd5sum" == "no" ] || [ "$skipmd5sum" == "iffilled" ]
    139       then
    140          #echo "skip: "$skipmd5sum >> $logfile 2>&1
    141          #echo "res5: -"$result5"-" >> $logfile 2>&1
    142          #if [ "$skipmd5sum" == "no" ] || [ "$result5$skipmd5sum" == "iffilled" ]
    143          if [ "$skipmd5sum" == "no" ] || [ "$result5" == "" ]
    144          then
    145             zipfile=`echo $file | sed -e 's/raw/zipraw/' -e 's/fits/fits.gz/'`
    146             # only do the md5sum if the zipfile is already available
    147             if ls $zipfile >/dev/null >> $logfile 2>&1
    148             then
    149                echo "calculation md5sum for raw..." >> $logfile 2>&1
    150                md5sum=`md5sum $file | cut -d' ' -f1`
    151                echo "calculation md5sum for zipraw..." >> $logfile 2>&1
    152                md5sumzip=`md5sum $zipfile | cut -d' ' -f1`
    153             fi
    154          fi
    155       fi
    156       if [ "$result3" == "" ] && [ "$md5sum" == "" ] && [ "$fillonly" == "ifzipped" ]
    157       then
    158          echo $file" is still missing zip => do not insert yet => continue " >> $logfile 2>&1
    159          continue
    160       fi
    161 
    162130      runtype=
    163131      # check if fits file is corrupted
    164132      numfitserrors=0
    165       checkfitsfile=`fverify $file  2>> $logfile | grep '0 error(s)'`
     133      checkfitsfile=`fverify $rawfile | grep '0 error(s)'`
    166134      if [ "$checkfitsfile" == "" ]
    167135      then
     
    172140      then
    173141         # get run and file number from file
    174          runnumberfromfile=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep NIGHT | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]'`
    175          filenumberfromfileorig=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep RUNID | grep -E -o '[0-9]{1,3}'`
     142         runnumberfromfile=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NIGHT | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]'`
     143         filenumberfromfileorig=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep RUNID | grep -E -o '[0-9]{1,3}'`
     144         if [ "$runnumberfromfile" = "" ] || [ "$filenumberfromfileorig" = "" ]
     145         then
     146            printprocesslog "ERROR couldn't get run or file number from file name ("$file")."
     147         fi
    176148         numberfromfile=$runnumberfromfile"_"`printf %03d $filenumberfromfileorig`
    177149         # compare numbers
     
    179151         then
    180152            numbererror="yes"
    181             echo "ERROR: for file "$file" number from filename and file don't agree ("$numberfromname" -"$numberfromfile")"
    182             echo "ERROR: for file "$file" number from filename and file don't agree ("$numberfromname" -"$numberfromfile")" >> $logfile 2>&1
     153            printprocesslog "ERROR for file "$file": number from filename ("$numberfromname") and file ("$numberfromfile") don't agree."
    183154         fi
    184155     
    185156         # check if this run has drs file
     157         #   in case file is available, get STEP from header
     158         # in the very beginning only drs-files were existing
     159         # in the beginning the keywords DRSCALIB and STEP were not existing
     160         step=
    186161         drsfile=`echo $file | sed -e 's/fits/drs.fits/'`
    187162         numdrsfiles=`ls $drsfile 2>/dev/null | wc -l`
     163         drscalib=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep DRSCALIB | grep -E -o "['][TF][']" | sed -e "s/'//g"`
     164         if [ "$drscalib" == "T" ]
     165         then
     166            step=`$factpath/fitsdump -h -t Events $drsfile  2>/dev/null | grep STEP | grep -E -o "['][012][']" | sed -e "s/'//g"`
     167            if ! [ $numdrsfiles -eq 1 ]
     168            then
     169               printprocesslog "ERROR for file "$file" number of drsfiles ("$numdrsfiles") and information from header ("$drscalib") don't agree."
     170            fi
     171         fi
    188172         
    189173         # get other variables from header
    190          runtype=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep RUNTYPE | grep -E -o "['][a-z-]+[']" | sed -e "s/'//g"`
     174         runtype=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep RUNTYPE | grep -E -o "['][a-z0-9._-]+[']" | sed -e "s/'//g" -e "s/_/-/g" -e "s/[.]//g"`
     175         #echo "runtype for file "$file": "$runtype
    191176         roi=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep NROI | grep -v NROITM | grep -E -o '[0-9]{1,4}'`
    192177         roitm=`$factpath/fitsdump -h -t Events $file  2>/dev/null | grep NROITM | grep -E -o '[0-9]{1,4}'`
     
    230215      fi
    231216     
    232       # set runtype to 'unknown' if no runtype could be queried
     217      # set runtype to 'unknown', if no runtype could be retrieved from file
    233218      if [ "$runtype" == "" ]
    234219      then
     
    261246      # get runtype
    262247      query2="SELECT fRunTypeKEY FROM RunType WHERE fRunTypeName='"$runtype"'"
    263       echo "Q2:"$query2 >> $logfile 2>&1
    264       result2=( `/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query2" 2>> $logfile` )
     248      printprocesslog "DEBUG get run type from DB. QUERY:"$query2
     249      result2=( `/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query2"` )
    265250      if [ ${#result2} -eq 0 ]
    266251      then
    267          echo $numberfromname": Could not query fRunTypeKey for runtype "$runtype
    268          echo $numberfromname": Could not query fRunTypeKey for runtype "$runtype >> $logfile 2>&1
     252         printprocesslog "ERROR "$numberfromname": Could not query fRunTypeKey for runtype "$runtype" ."
    269253         continue
     254      fi
     255
     256      # check if entry has already checksums
     257      query5="SELECT fRunNumber FROM RunInfo WHERE Concat(fRunNumber, '_', LPAD(fFileNumber, 3, 0))='"$numberfromname"'"
     258      query5=$query5" AND NOT ISNULL(fMd5sumRaw) AND NOT ISNULL(fMd5sumRawZip)"
     259      printprocesslog "DEBUG check if md5sums are alreay in DB. QUERY:"$query5
     260      result5=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query5"`
     261
     262      # get md5sums of raw and zip file
     263      #   to safe time for tests and update this can be skipped ($skipmd5sum)
     264      md5sum=
     265      md5sumzip=
     266      if [ "$skipmd5sum" == "no" ] || [ "$skipmd5sum" == "iffilled" ]
     267      then
     268         #echo "skip: "$skipmd5sum >> $logfile 2>&1
     269         #echo "res5: -"$result5"-" >> $logfile 2>&1
     270         #if [ "$skipmd5sum" == "no" ] || [ "$result5$skipmd5sum" == "iffilled" ]
     271         if [ "$skipmd5sum" == "no" ] || [ "$result5" == "" ]
     272         then
     273            #zipfile=`echo $file | sed -e 's/raw/zipraw/' -e 's/fits/fits.gz/'`
     274            # only do the md5sum if the zipfile is already available
     275            if ls $file >/dev/null 2>&1
     276            then
     277               printprocesslog "INFO calculation md5sum for rawfile "$rawfile
     278               md5sum=`md5sum $rawfile | cut -d' ' -f1`
     279               printprocesslog "INFO calculation md5sum for zipfile "$file
     280               md5sumzip=`md5sum $file | cut -d' ' -f1`
     281            fi
     282         fi
    270283      fi
    271284
     
    284297      if [ $numfitserrors -eq 0 ]
    285298      then
    286          query4=$query4", fNumEvents="$numevents", fROI="$roi", fROITimeMarker="$roitm
    287299         query4=$query4", fRunStart='"$runstart"', fRunStop='"$runstop"'"
     300         if [ "$numevents" != "" ]
     301         then
     302            query4=$query4", fNumEvents="$numevent
     303         fi
     304         if [ "$roi" != "" ]
     305         then
     306            query4=$query4", fROI="$roi
     307         fi
     308         if [ "$roitm" != "" ]
     309         then
     310            query4=$query4", fROITimeMarker="$roitm
     311         fi
    288312         if [ "$numphys" != "" ]
    289313         then
     
    324348         query4=$query4", fHasDrsFile="$numdrsfiles
    325349      fi
     350      if [ "$step" != "" ]
     351      then
     352         query4=$query4", fDrsStep="$step
     353      fi
    326354      query4=$query4", fFitsFileErrors="$numfitserrors
    327355      query4=$query4" "$querystop
    328       echo "Q4:"$query4 >> $logfile 2>&1
    329       if ! /usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query4" >> $logfile 2>&1
    330       then
    331          echo "insert/update of "$numfromfile" to mysql failed" >> $logfile 2>&1
    332       fi
    333 
    334       # print summary for debug
    335       echo "*******"  >> $logfile 2>&1
    336       echo "summary for file "$file >> $logfile 2>&1
    337       echo "*******"  >> $logfile 2>&1
    338       echo "  errors: "$numfitserrors" (1 means errors exist)" >> $logfile 2>&1
    339       echo "  number of drs files: "$numdrsfiles >> $logfile 2>&1
    340       echo "  runnumber error: "$runnumbererror >> $logfile 2>&1
    341       echo "  number error: "$numbererror >> $logfile 2>&1
    342       echo "  roi: "$roi >> $logfile 2>&1
    343       echo "  roitm: "$roitm >> $logfile 2>&1
    344       echo "  runtype: "$runtype >> $logfile 2>&1
    345       echo "  numevents: "$numevents >> $logfile 2>&1
    346       echo "  md5sum: "$md5sum >> $logfile 2>&1
    347       echo "  md5sum(zip): "$md5sumzip >> $logfile 2>&1
    348       echo "  start: "$runstart >> $logfile 2>&1
    349       echo "  stop: "$runstop >> $logfile 2>&1
    350 
    351       # missing
    352       # check wether entry has been made (status table)
     356      printprocesslog "INFO insert/update entry in DB. QUERY: "$query4
     357      if ! /usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query4"
     358      then
     359         printprocesslog "ERROR insert/update of "$numfromfile" to mysql failed."
     360         finish
     361      fi
    353362   done
    354363done
    355364
    356 
     365finish
     366
Note: See TracChangeset for help on using the changeset viewer.