Changeset 12788 for trunk/DataCheck
- Timestamp:
- 01/30/12 19:56:29 (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/DataCheck/CheckRawData.sh
r12761 r12788 1 1 #!/bin/bash 2 2 3 today=`date +%F` 4 logfile=/home/`whoami`/DataCheck/log/CheckRaw$today.log 5 3 # missing 4 # entry in status table 5 6 # options: 6 7 skipmd5sum="no" # fill md5 sums in any case 7 8 skipmd5sum="iffilled" # fill md5 sum only if they are not yet in db … … 11 12 doupdate="no" # fill only entries which are not yet existing 12 13 13 fillonly="ifzipped" # fill only entries which are not yet existing + where zipped file is availabl 14 15 echo "" >> $logfile 2>&1 16 echo "" >> $logfile 2>&1 17 echo "" >> $logfile 2>&1 18 echo `date`"executing "$0"..." >> $logfile 2>&1 19 echo "=====> doupdate: "$doupdate >> $logfile 2>&1 20 echo "=====> skipmd5sum: "$skipmd5sum >> $logfile 2>&1 21 echo "=====> fillonly: "$fillonly >> $logfile 2>&1 22 23 password=`cat /home/fact/DataCheck/.pw` 14 source `dirname $0`/Sourcefile.sh 15 printprocesslog "INFO starting $0 with options doupdate="$doupdate" and skipmd5sum="$skipmd5sum 16 17 # setup to use ftools 18 source $HEADAS/headas-init.sh 19 20 pwfile=`dirname $0`/.pw 21 password=`cat $pwfile 2>/dev/null` 24 22 if [ "$password" == "" ] 25 23 then 26 echo "please insert password in .pw file" 24 echo "please insert password in $pwfile" 25 printprocesslog "ERROR password for DB access in $pwfile missing" 26 finish 27 27 fi 28 28 29 # path to FACT++ version30 factpath=/home/fact/FACT++.2012.01.19 31 32 # setup to use ftools 33 export HEADAS=/opt/heasoft-6.11/x86_64-unknown-linux-gnu-libc2.13-0/ 34 source $HEADAS/headas-init.sh 35 36 # get last 2nights29 # check if software is available 30 if ! ls $factpath/fitsdump >/dev/null 2>&1 31 then 32 printprocesslog "ERROR "$factpath"/fitsdump is not available." 33 finish 34 fi 35 36 # get last 3 nights 37 37 dates=( `date +%Y/%m/%d --date="-12hour"` `date +%Y/%m/%d --date="-36hour"` ) 38 39 # do rsync for rawfiles of these dates 38 #dates=( `find /loc_data/zipraw/ -mindepth 3 -type d | sort | sed -e 's/\/loc_data\/zipraw\///g'` ) #all available dates in /loc_data/zipraw 39 40 # do check for rawfiles of these dates 40 41 for date in ${dates[@]} 41 42 do 42 echo "" >> $logfile 2>&1 43 echo "" >> $logfile 2>&1 44 echo "" >> $logfile 2>&1 45 rawdir=/loc_data/raw/$date 43 ziprawdir=/loc_data/zipraw/$date 44 # check if data are available from that night 45 if ! [ -d $ziprawdir ] 46 then 47 printprocesslog "INFO "$ziprawdir" does not exist." 48 continue 49 else 50 printprocesslog "INFO processing "$ziprawdir"..." 51 fi 52 53 # find all fits.gz files starting with the oldest file 54 printprocesslog "INFO finding files to be checked in $ziprawdir..." 55 fitsgzfiles=`find $ziprawdir -type f -name '*.fits.gz'| sort ` 56 57 # get runnumber from date 46 58 runnumber=`echo $date | sed -e 's/\///g'` 47 echo `date`": processing files in "$rawdir >> $logfile 2>&1 48 # check if data are available from that night 49 if ! [ -d $rawdir ] 50 then 51 echo `date`": no data available in "$rawdir >> $logfile 2>&1 52 continue 53 fi 54 55 # find all fits-files starting with the oldest file 56 echo `date`": finding files to be zipped in $rawdir..." >> $logfile 2>&1 57 fitsfiles=`find $rawdir -type f -name '*.fits'| sort ` 58 59 # loop to zip files 60 echo `date`": checking files in $rawdir..." >> $logfile 2>&1 61 for file in $fitsfiles 59 60 # loop to check files 61 for file in $fitsgzfiles 62 62 do 63 echo "" >> $logfile 2>&1 64 echo "checking file "$file >> $logfile 2>&1 63 printprocesslog "INFO checking file "$file 64 65 # raw and original file 66 rawfile=`echo $file | sed -e 's/zipraw/raw/' -e 's/fits[.]gz/fits/'` 67 origfile=`echo $rawfile | sed -e 's/loc_data/daq/'` 68 65 69 # check if raw file was changed in the last 30 minutes 66 70 isnew=`find $file -cmin -30` 67 71 if [ "$isnew" != "" ] 68 72 then 69 echo $file" is not older than 30 min => continue" >> $logfile 2>&173 printprocesslog "INFO "$file" is not older than 30 min. -> continue" 70 74 continue 71 75 fi … … 75 79 if [ "$isnew" != "" ] 76 80 then 77 echo $file" is a drs file => continue" >> $logfile 2>&181 printprocesslog "INFO "$file" is a drs file. -> continue" 78 82 continue 79 83 fi … … 81 85 # check if file is already finished 82 86 # original file on daq (if data was taken on daq 83 origfile=`echo $file | sed -e 's/loc_data/daq/'`84 87 if [ -e $origfile ] 85 88 then 86 89 # get time of last modification as seconds since Epoch for both files 87 90 timeorig=`stat -c %Y $origfile` 88 timecopy=`stat -c %Y $ file`91 timecopy=`stat -c %Y $rawfile` 89 92 # compare times 90 93 if ! [ $timeorig -eq $timecopy ] 91 94 then 92 # if times are not the same, the file is still open => no zip93 echo `date`": file "$file" not yet closed => continue" >> $logfile 2>&195 # if times are not the same, the file is still open => no check 96 printprocesslog "INFO file "$rawfile" not yet closed. -> continue" 94 97 continue 95 98 fi 96 99 else 97 100 # if the origfile doesn't exist, the data was probably written not on daq but on data 98 echo `date`": file "$file" was probably taken on data and not daq " >> $logfile 2>&1101 printprocesslog "INFO file "$rawfile" was probably taken on data and not daq." 99 102 fi 100 103 … … 108 111 then 109 112 runnumbererror="yes" 110 echo "ERROR: for file "$file" runnumber from date and filename don't agree ("$runnumber" - "$runnumberfromname")" 111 echo "ERROR: for file "$file" runnumber from date and filename don't agree ("$runnumber" - "$runnumberfromname")" >> $logfile 2>&1 113 printprocesslog "ERROR for file "$file": runnumber from date ("$runnumber") and filename ("$runnumberfromname") don't agree." 112 114 fi 113 115 114 116 # check if entry already exists 115 117 query3="SELECT fRunNumber FROM RunInfo WHERE Concat(fRunNumber, '_', LPAD(fFileNumber, 3, 0))='"$numberfromname"'" 116 echo "Q3:"$query3 >> $logfile 2>&1 2>&1117 result3=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query3" 2>> $logfile`118 printprocesslog "DEBUG check if entry already exists in DB. QUERY: "$query3 119 result3=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query3"` 118 120 119 121 # only proceed with file … … 122 124 if [ "$result3" != "" ] && [ "$doupdate" == "no" ] 123 125 then 124 echo $file" has been inserted already => continue " >> $logfile 2>&1126 printprocesslog "INFO "$file" has been inserted already. -> continue " 125 127 continue 126 128 fi 127 129 128 # check if entry has already checksums129 query5="SELECT fRunNumber FROM RunInfo WHERE Concat(fRunNumber, '_', LPAD(fFileNumber, 3, 0))='"$numberfromname"'"130 query5=$query5" AND NOT ISNULL(fMd5sumRaw) AND NOT ISNULL(fMd5sumRawZip)"131 echo "Q5:"$query5 >> $logfile 2>&1132 result5=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query5" 2>> $logfile`133 134 # get md5sums of raw and zip file135 # to safe time for tests and update this can be skipped ($skipmd5sum)136 md5sum=137 md5sumzip=138 if [ "$skipmd5sum" == "no" ] || [ "$skipmd5sum" == "iffilled" ]139 then140 #echo "skip: "$skipmd5sum >> $logfile 2>&1141 #echo "res5: -"$result5"-" >> $logfile 2>&1142 #if [ "$skipmd5sum" == "no" ] || [ "$result5$skipmd5sum" == "iffilled" ]143 if [ "$skipmd5sum" == "no" ] || [ "$result5" == "" ]144 then145 zipfile=`echo $file | sed -e 's/raw/zipraw/' -e 's/fits/fits.gz/'`146 # only do the md5sum if the zipfile is already available147 if ls $zipfile >/dev/null >> $logfile 2>&1148 then149 echo "calculation md5sum for raw..." >> $logfile 2>&1150 md5sum=`md5sum $file | cut -d' ' -f1`151 echo "calculation md5sum for zipraw..." >> $logfile 2>&1152 md5sumzip=`md5sum $zipfile | cut -d' ' -f1`153 fi154 fi155 fi156 if [ "$result3" == "" ] && [ "$md5sum" == "" ] && [ "$fillonly" == "ifzipped" ]157 then158 echo $file" is still missing zip => do not insert yet => continue " >> $logfile 2>&1159 continue160 fi161 162 130 runtype= 163 131 # check if fits file is corrupted 164 132 numfitserrors=0 165 checkfitsfile=`fverify $ file 2>> $logfile | grep '0 error(s)'`133 checkfitsfile=`fverify $rawfile | grep '0 error(s)'` 166 134 if [ "$checkfitsfile" == "" ] 167 135 then … … 172 140 then 173 141 # get run and file number from file 174 runnumberfromfile=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NIGHT | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]'` 175 filenumberfromfileorig=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep RUNID | grep -E -o '[0-9]{1,3}'` 142 runnumberfromfile=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NIGHT | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]'` 143 filenumberfromfileorig=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep RUNID | grep -E -o '[0-9]{1,3}'` 144 if [ "$runnumberfromfile" = "" ] || [ "$filenumberfromfileorig" = "" ] 145 then 146 printprocesslog "ERROR couldn't get run or file number from file name ("$file")." 147 fi 176 148 numberfromfile=$runnumberfromfile"_"`printf %03d $filenumberfromfileorig` 177 149 # compare numbers … … 179 151 then 180 152 numbererror="yes" 181 echo "ERROR: for file "$file" number from filename and file don't agree ("$numberfromname" -"$numberfromfile")" 182 echo "ERROR: for file "$file" number from filename and file don't agree ("$numberfromname" -"$numberfromfile")" >> $logfile 2>&1 153 printprocesslog "ERROR for file "$file": number from filename ("$numberfromname") and file ("$numberfromfile") don't agree." 183 154 fi 184 155 185 156 # check if this run has drs file 157 # in case file is available, get STEP from header 158 # in the very beginning only drs-files were existing 159 # in the beginning the keywords DRSCALIB and STEP were not existing 160 step= 186 161 drsfile=`echo $file | sed -e 's/fits/drs.fits/'` 187 162 numdrsfiles=`ls $drsfile 2>/dev/null | wc -l` 163 drscalib=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DRSCALIB | grep -E -o "['][TF][']" | sed -e "s/'//g"` 164 if [ "$drscalib" == "T" ] 165 then 166 step=`$factpath/fitsdump -h -t Events $drsfile 2>/dev/null | grep STEP | grep -E -o "['][012][']" | sed -e "s/'//g"` 167 if ! [ $numdrsfiles -eq 1 ] 168 then 169 printprocesslog "ERROR for file "$file" number of drsfiles ("$numdrsfiles") and information from header ("$drscalib") don't agree." 170 fi 171 fi 188 172 189 173 # get other variables from header 190 runtype=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep RUNTYPE | grep -E -o "['][a-z-]+[']" | sed -e "s/'//g"` 174 runtype=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep RUNTYPE | grep -E -o "['][a-z0-9._-]+[']" | sed -e "s/'//g" -e "s/_/-/g" -e "s/[.]//g"` 175 #echo "runtype for file "$file": "$runtype 191 176 roi=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NROI | grep -v NROITM | grep -E -o '[0-9]{1,4}'` 192 177 roitm=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NROITM | grep -E -o '[0-9]{1,4}'` … … 230 215 fi 231 216 232 # set runtype to 'unknown' if no runtype could be queried217 # set runtype to 'unknown', if no runtype could be retrieved from file 233 218 if [ "$runtype" == "" ] 234 219 then … … 261 246 # get runtype 262 247 query2="SELECT fRunTypeKEY FROM RunType WHERE fRunTypeName='"$runtype"'" 263 echo "Q2:"$query2 >> $logfile 2>&1264 result2=( `/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query2" 2>> $logfile` )248 printprocesslog "DEBUG get run type from DB. QUERY:"$query2 249 result2=( `/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query2"` ) 265 250 if [ ${#result2} -eq 0 ] 266 251 then 267 echo $numberfromname": Could not query fRunTypeKey for runtype "$runtype 268 echo $numberfromname": Could not query fRunTypeKey for runtype "$runtype >> $logfile 2>&1 252 printprocesslog "ERROR "$numberfromname": Could not query fRunTypeKey for runtype "$runtype" ." 269 253 continue 254 fi 255 256 # check if entry has already checksums 257 query5="SELECT fRunNumber FROM RunInfo WHERE Concat(fRunNumber, '_', LPAD(fFileNumber, 3, 0))='"$numberfromname"'" 258 query5=$query5" AND NOT ISNULL(fMd5sumRaw) AND NOT ISNULL(fMd5sumRawZip)" 259 printprocesslog "DEBUG check if md5sums are alreay in DB. QUERY:"$query5 260 result5=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query5"` 261 262 # get md5sums of raw and zip file 263 # to safe time for tests and update this can be skipped ($skipmd5sum) 264 md5sum= 265 md5sumzip= 266 if [ "$skipmd5sum" == "no" ] || [ "$skipmd5sum" == "iffilled" ] 267 then 268 #echo "skip: "$skipmd5sum >> $logfile 2>&1 269 #echo "res5: -"$result5"-" >> $logfile 2>&1 270 #if [ "$skipmd5sum" == "no" ] || [ "$result5$skipmd5sum" == "iffilled" ] 271 if [ "$skipmd5sum" == "no" ] || [ "$result5" == "" ] 272 then 273 #zipfile=`echo $file | sed -e 's/raw/zipraw/' -e 's/fits/fits.gz/'` 274 # only do the md5sum if the zipfile is already available 275 if ls $file >/dev/null 2>&1 276 then 277 printprocesslog "INFO calculation md5sum for rawfile "$rawfile 278 md5sum=`md5sum $rawfile | cut -d' ' -f1` 279 printprocesslog "INFO calculation md5sum for zipfile "$file 280 md5sumzip=`md5sum $file | cut -d' ' -f1` 281 fi 282 fi 270 283 fi 271 284 … … 284 297 if [ $numfitserrors -eq 0 ] 285 298 then 286 query4=$query4", fNumEvents="$numevents", fROI="$roi", fROITimeMarker="$roitm287 299 query4=$query4", fRunStart='"$runstart"', fRunStop='"$runstop"'" 300 if [ "$numevents" != "" ] 301 then 302 query4=$query4", fNumEvents="$numevent 303 fi 304 if [ "$roi" != "" ] 305 then 306 query4=$query4", fROI="$roi 307 fi 308 if [ "$roitm" != "" ] 309 then 310 query4=$query4", fROITimeMarker="$roitm 311 fi 288 312 if [ "$numphys" != "" ] 289 313 then … … 324 348 query4=$query4", fHasDrsFile="$numdrsfiles 325 349 fi 350 if [ "$step" != "" ] 351 then 352 query4=$query4", fDrsStep="$step 353 fi 326 354 query4=$query4", fFitsFileErrors="$numfitserrors 327 355 query4=$query4" "$querystop 328 echo "Q4:"$query4 >> $logfile 2>&1 329 if ! /usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query4" >> $logfile 2>&1 330 then 331 echo "insert/update of "$numfromfile" to mysql failed" >> $logfile 2>&1 332 fi 333 334 # print summary for debug 335 echo "*******" >> $logfile 2>&1 336 echo "summary for file "$file >> $logfile 2>&1 337 echo "*******" >> $logfile 2>&1 338 echo " errors: "$numfitserrors" (1 means errors exist)" >> $logfile 2>&1 339 echo " number of drs files: "$numdrsfiles >> $logfile 2>&1 340 echo " runnumber error: "$runnumbererror >> $logfile 2>&1 341 echo " number error: "$numbererror >> $logfile 2>&1 342 echo " roi: "$roi >> $logfile 2>&1 343 echo " roitm: "$roitm >> $logfile 2>&1 344 echo " runtype: "$runtype >> $logfile 2>&1 345 echo " numevents: "$numevents >> $logfile 2>&1 346 echo " md5sum: "$md5sum >> $logfile 2>&1 347 echo " md5sum(zip): "$md5sumzip >> $logfile 2>&1 348 echo " start: "$runstart >> $logfile 2>&1 349 echo " stop: "$runstop >> $logfile 2>&1 350 351 # missing 352 # check wether entry has been made (status table) 356 printprocesslog "INFO insert/update entry in DB. QUERY: "$query4 357 if ! /usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query4" 358 then 359 printprocesslog "ERROR insert/update of "$numfromfile" to mysql failed." 360 finish 361 fi 353 362 done 354 363 done 355 364 356 365 finish 366
Note:
See TracChangeset
for help on using the changeset viewer.