source: trunk/DataCheck/Processing/CheckRawData.sh@ 14632

Last change on this file since 14632 was 14155, checked in by Daniela Dorner, 12 years ago
bugfix for drsstep; better handling of corrupted files; included new version of checksums
  • Property svn:executable set to *
File size: 19.7 KB
Line 
1#!/bin/bash
2
3# this script has been written to run on La Palma on the machine data
4# i.e. paths are only working on this machine
5# the script starts from the zipped files
6# this causes a delay until files are in the database
7# because they have to be rsynced and zipped first (see RsyncRawData.sh, ZipRawData.sh)
8#
9# starting from 2012/06/05 the keywords CHECKSUM and DATASUM are
10# available in the fits header of the rawfile
11# starting from this time the md5sums are not calculated anymore
12#
13
14# todo
15# tempfile for fitsdump -h output
16
17# options:
18skipmd5sum="no" # fill md5 sums in any case
19skipmd5sum="iffilled" # fill md5 sum only if they are not yet in db # default
20skipmd5sum="yes" # do not fill md5 sums in any case #new default since 2012/06/05 when the checksum is available in heaser
21
22doupdate="yes" # update all entries
23doupdate="no" # fill only entries which are not yet existing #default
24
25source `dirname $0`/../Sourcefile.sh
26printprocesslog "INFO starting $0 with options doupdate="$doupdate" and skipmd5sum="$skipmd5sum
27
28# setup to use ftools
29source $HEADAS/headas-init.sh
30
31# check if software is available
32if ! ls $factpath/fitsdump >/dev/null 2>&1
33then
34 printprocesslog "ERROR "$factpath"/fitsdump is not available."
35 finish
36fi
37
38# check if paths are available
39if ! ls /daq/raw >/dev/null 2>&1
40then
41 printprocesslog "ERROR /daq/raw is not available."
42 finish
43fi
44if ! ls /loc_data/raw >/dev/null 2>&1
45then
46 printprocesslog "ERROR /loc_data/raw is not available."
47 finish
48fi
49if ! ls /loc_data/zipraw >/dev/null 2>&1
50then
51 printprocesslog "ERROR /loc_data/zipraw is not available."
52 finish
53fi
54
55# get last 3, 6 or 9 nights
56dates=( `date +%Y/%m/%d --date="-12hour"` `date +%Y/%m/%d --date="-36hour"` `date +%Y/%m/%d --date="-60hour"` \
57# `date +%Y/%m/%d --date="-84hour"` `date +%Y/%m/%d --date="-108hour"` `date +%Y/%m/%d --date="-132hour"` \
58# `date +%Y/%m/%d --date="-156hour"` `date +%Y/%m/%d --date="-180hour"` `date +%Y/%m/%d --date="-204hour"` \
59 )
60#dates=( `find /loc_data/zipraw/ -mindepth 3 -type d | sort | sed -e 's/\/loc_data\/zipraw\///g'` ) #all available dates in /loc_data/zipraw
61#dates=( `find /loc_data/aux/ -mindepth 3 -type d | sort | sed -e 's/\/loc_data\/aux\///g'` ) #all available dates in /loc_data/zipraw
62
63# do check for rawfiles of these dates
64for date in ${dates[@]}
65do
66 date2=`echo $date | sed -e 's/\///g'`
67 night=
68 query="SELECT fNight FROM AuxFilesAvailISDCStatus WHERE fNight="$date2
69 night=`sendquery`
70 if [ "$night" == "" ]
71 then
72 printprocesslog "INFO insert "$date2" to AuxFilesAvailISDCStatus"
73 query="INSERT AuxFilesAvailISDCStatus SET fNight="$date2", fPriority="$date2
74 sendquery >/dev/null 2>&1
75 fi
76 night=
77 query="SELECT fNight FROM DriveFileAvailISDCStatus WHERE fNight="$date2
78 night=`sendquery`
79 if [ "$night" == "" ]
80 then
81 printprocesslog "INFO insert "$date2" to DriveFileAvailISDCStatus"
82 query="INSERT DriveFileAvailISDCStatus SET fNight="$date2", fPriority="$date2
83 sendquery >/dev/null 2>&1
84 fi
85 night=
86 query="SELECT fNight FROM AuxDataInsertStatus WHERE fNight="$date2
87 night=`sendquery`
88 if [ "$night" == "" ]
89 then
90 printprocesslog "INFO insert "$date2" to AuxDataInsertStatus"
91 query="INSERT AuxDataInsertStatus SET fNight="$date2", fPriority="$date2
92 sendquery >/dev/null 2>&1
93 fi
94 ziprawdir=/loc_data/zipraw/$date
95 # check if data are available from that night
96 if ! [ -d $ziprawdir ]
97 then
98 printprocesslog "INFO "$ziprawdir" does not exist."
99 continue
100 else
101 printprocesslog "INFO processing "$ziprawdir"..."
102 fi
103
104 # find all fits.gz files starting with the oldest file
105 printprocesslog "INFO finding files to be checked in $ziprawdir..."
106 fitsgzfiles=`find $ziprawdir -type f -name '*.fits.gz'| sort `
107
108 # get runnumber from date
109 runnumber=`echo $date | sed -e 's/\///g'`
110
111 # loop to check files
112 for file in $fitsgzfiles
113 do
114 filecorrupt="no"
115 printprocesslog "INFO checking file "$file
116 #echo "INFO checking file "$file" at "`date`
117
118 # raw and original file
119 # file: /loc_data/zipraw
120 # rawfile: /loc_data/raw
121 # origfile: /daq/raw
122 rawfile=`echo $file | sed -e 's/zipraw/raw/' -e 's/fits[.]gz/fits/'`
123 origfile=`echo $rawfile | sed -e 's/loc_data/daq/'`
124
125 # check if it is drs file
126 isdrs=`ls $file | grep drs`
127 if [ "$isdrs" != "" ]
128 then
129 printprocesslog "INFO "$file" is a drs file. -> continue"
130 continue
131 fi
132
133 # check if file is already finished
134 # original file on daq (if data was taken on daq
135 if [ -e $origfile ]
136 then
137 # check if raw file was changed in the last 30 minutes
138 isnew=`find $origfile -cmin -30`
139 if [ "$isnew" != "" ]
140 then
141 printprocesslog "WARN "$origfile" is not older than 30 min. -> continue"
142 continue
143 fi
144
145 # get time of last modification as seconds since Epoch for both files
146 timeorig=`stat -c %Y $origfile`
147 timecopy=`stat -c %Y $rawfile`
148 # compare times
149 if ! [ $timeorig -eq $timecopy ]
150 then
151 # if times are not the same, the file is still open => no check
152 printprocesslog "INFO file "$rawfile" not yet closed. -> continue"
153 continue
154 fi
155 else
156 # if the origfile doesn't exist, the data was probably written not on daq but on data
157 printprocesslog "INFO file "$rawfile" was probably taken on data and not daq."
158 fi
159
160 # get run and file number form filename
161 runnumbererror="no"
162 numbererror="no"
163 numberfromname=`echo $file | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]_[0-9]{3}'`
164 runnumberfromname=`echo $numberfromname | cut -d_ -f1`
165 filenumberfromname=`echo $numberfromname | cut -d_ -f2 | sed -e 's/^0//g' -e 's/^0//g'`
166 if [ "$runnumber" != "$runnumberfromname" ]
167 then
168 runnumbererror="yes"
169 printprocesslog "ERROR for file "$file": runnumber from date ("$runnumber") and filename ("$runnumberfromname") don't agree."
170 fi
171
172 # check if entry already exists
173 query="SELECT fNight FROM RunInfo WHERE Concat(fNight, '_', LPAD(fRunID, 3, 0))='"$numberfromname"'"
174 printprocesslog "DEBUG check if entry already exists in DB. QUERY: "$query
175 #result3=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query3"`
176 result3=`sendquery`
177
178 # only proceed with file
179 # if information is not yet in database
180 # and no update is wished ($doupdate)
181 if [ "$result3" != "" ] && [ "$doupdate" == "no" ]
182 then
183 printprocesslog "INFO "$file" has been inserted already. -> continue "
184 continue
185 fi
186
187 runtype=
188 # check if fits file is corrupted
189 numfitserrors=0
190 #checkfitsfile=`fverify $rawfile 2>/dev/null | grep '0 error(s)'`
191 numfitserrors=`fverify $rawfile 2>/dev/null | grep -o '[0-9][ ]error(s)' | grep -E -o '[0-9]'`
192 #if [ "$checkfitsfile" == "" ]
193 if [ $numfitserrors -gt 0 ]
194 then
195 printprocesslog "WARN "$rawfile" has "$numfitserrors" fitserror(s). "
196 fitsdumperrors=`$factpath/fitsdump -h -t Events $file 2>&1 | grep corrupted`
197 if [ "$fitsdumperrors" != "" ]
198 then
199 filecorrupt="yes"
200 fi
201 #numfitserrors=1
202 fi
203
204 numdrsfiles=
205 step=
206 #if [ $numfitserrors -eq 0 ]
207 if [ "$filecorrupt" == "no" ]
208 then
209 # get run and file number from file
210 runnumberfromfile=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NIGHT | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]'`
211 filenumberfromfileorig=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep RUNID | grep -E -o '[0-9]{1,3}'`
212 if [ "$runnumberfromfile" = "" ] || [ "$filenumberfromfileorig" = "" ]
213 then
214 printprocesslog "ERROR couldn't get run or file number from file name ("$file")."
215 fi
216 numberfromfile=$runnumberfromfile"_"`printf %03d $filenumberfromfileorig`
217 # compare numbers
218 if [ "$numberfromfile" != "$numberfromname" ]
219 then
220 numbererror="yes"
221 printprocesslog "ERROR for file "$file": number from filename ("$numberfromname") and file ("$numberfromfile") don't agree."
222 fi
223
224 # get checksums from header
225 checksum=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep CHECKSUM | grep -E -o '[a-zA-Z0-9]{16}'`
226 if [ "$checksum" == "" ]
227 then
228 printprocesslog "WARN checksum for file "$file" is empty."
229 fi
230 datasum=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DATASUM | grep -E -o '[0-9]{1,10}'`
231 if [ "$datasum" == "" ]
232 then
233 printprocesslog "WARN datasum for file "$file" is empty."
234 fi
235
236 # check if this run has drs file
237 # in case file is available, get STEP from header
238 # in the very beginning only drs-files were existing
239 # in the beginning the keywords DRSCALIB and STEP were not existing
240 drsfile=`echo $file | sed -e 's/fits/drs.fits/'`
241 numdrsfiles=`ls $drsfile 2>/dev/null | wc -l`
242 drscalib=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DRSCALIB | grep -E -o "['][TF][']" | sed -e "s/'//g"`
243 if [ "$drscalib" == "T" ]
244 then
245 step=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DRSSTEP | grep -E -o "['][012][']" | sed -e "s/'//g"`
246 stepfromdrs=`$factpath/fitsdump -h -t Events $drsfile 2>/dev/null | grep STEP | grep -E -o "['][012][']" | sed -e "s/'//g"`
247 if [ "$stepfromdrs" != "$step" ]
248 then
249 printprocesslog "ERROR for file "$file" step from drsfile ("$stepfromdrs") and from file ("$step") do not agree."
250 if [ "$stepfromdrsfile" != "" ] && [ "$step" == "" ]
251 then
252 step=$stepfromdrsfile
253 printprocesslog "WARN setting drsstep from drsfile ("$stepfromdrs") although value differs from the one in file "$file"."
254 fi
255 fi
256 if ! [ $numdrsfiles -eq 1 ]
257 then
258 printprocesslog "ERROR for file "$file" number of drsfiles ("$numdrsfiles") and information from header ("$drscalib") don't agree."
259 fi
260 if [ "$step" = "" ]
261 then
262 printprocesslog "ERROR file "$file" has drsfiles ("$numdrsfiles"), but step ("$step") is empty."
263 fi
264 else
265 if ! [ "$drscalib" == "F" ]
266 then
267 printprocesslog "WARN for file "$file" DRSCALIB is neither T nor F."
268 fi
269 fi
270
271 # get other variables from header
272 runtype=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep RUNTYPE | grep -E -o "['][a-z0-9._-]+[']" | sed -e "s/'//g" -e "s/_/-/g" -e "s/[.]//g"`
273 #echo "runtype for file "$file": "$runtype
274 roi=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NROI | grep -v NROITM | grep -E -o '[0-9]{1,4}'`
275 roitm=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NROITM | grep -E -o '[0-9]{1,4}'`
276 numevents=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep Events | grep -E -o '[0-9]+'`
277 numphys=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRG ' | grep -E -o '[0-9]+'`
278 numext1=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGEXT1' | grep -E -o '[ ][0-9]+[ ]' | sed -e 's/\ //g'`
279 numext2=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGEXT2' | grep -E -o '[ ][0-9]+[ ]' | sed -e 's/\ //g'`
280 numelp=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGLPE' | grep -E -o '[0-9]+'`
281 numilp=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGLPI' | grep -E -o '[0-9]+'`
282 numoth=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGMISC' | grep -E -o '[0-9]+'`
283 numped=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGPED' | grep -E -o '[0-9]+'`
284 numtime=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGTIM' | grep -E -o '[0-9]+'`
285 compiled=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'COMPILED' | grep -E -o "['][a-zA-Z]+[ ][ 12][0-9][ ]20[0-9][0-9][ ][0-2][0-9]:[0-5][0-9]:[0-5][0-9][']" | sed -e "s/'//g"`
286 if ! [ "$compiled" == "" ]
287 then
288 compiletime=`date +'%F %H:%M:%S' --date="${compiled}" `
289 else
290 compiletime=
291 fi
292 revnum=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'REVISION' | grep -E -o "['][0-9]+[:]?[0-9]*[MSP]*[']" | sed -e "s/'//g"`
293 # in newest data start time is in DATE-OBS
294 # in older data start time is in TSTART
295 # in the beginning TSTART was empty
296 runstart=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DATE-OBS | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
297 runstart2=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep TSTART | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
298 if [ "$runstart" == "" ]
299 then
300 if [ "$runstart2" == "" ]
301 then
302 runstart=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DATE | grep -v 'DATE-' | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
303 else
304 runstart=$runstart2
305 fi
306 fi
307 # in newest data start time is in DATE-END
308 # in older data start time is in TSTOP
309 # in the beginning TSTOP was empty
310 runstop=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DATE-END | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
311 runstop2=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep TSTOP | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
312 if [ "$runstop" == "" ]
313 then
314 if [ "$runstop2" == "" ]
315 then
316 runstop=`stat $file 2>/dev/null | grep Modify | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9][ ][0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{9}'`
317 else
318 runstop=$runstop2
319 fi
320 fi
321 fi
322
323 # set runtype to 'unknown', if no runtype could be retrieved from file
324 if [ "$runtype" == "" ]
325 then
326 runtype="n/a"
327 fi
328 # on 15.11.2011 the runtypes had different names
329 if [ "$date" == "2011/11/15" ]
330 then
331 if [ "$runtype" == "drs-calib" ]
332 then
333 runtype="drs-gain"
334 fi
335 if [ "$runtype" == "drs-time-calib" ]
336 then
337 runtype="drs-time"
338 fi
339 if [ "$runtype" == "pedestal" ]
340 then
341 runtype="drs-pedestal"
342 fi
343 if [ "$runtype" == "light-pulser" ]
344 then
345 runtype="light-pulser-ext"
346 fi
347 if [ "$runtype" == "pedestal-on" ]
348 then
349 runtype="pedestal"
350 fi
351 fi
352 # get runtype
353 query="SELECT fRunTypeKEY FROM RunType WHERE fRunTypeName='"$runtype"'"
354 printprocesslog "DEBUG get run type from DB. QUERY:"$query
355 #result2=( `/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query2"` )
356 result2=( `sendquery` )
357 if [ ${#result2} -eq 0 ]
358 then
359 printprocesslog "ERROR "$numberfromname": Could not query fRunTypeKey for runtype "$runtype" ."
360 continue
361 fi
362
363 # check if entry has already checksums
364 query="SELECT fNight FROM RunInfo WHERE Concat(fNight, '_', LPAD(fRunID, 3, 0))='"$numberfromname"'"
365 query=$query" AND NOT ISNULL(fMd5sumRaw) AND NOT ISNULL(fMd5sumRawZip)"
366 printprocesslog "DEBUG check if md5sums are alreay in DB. QUERY:"$query
367 #result5=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query5"`
368 result5=`sendquery`
369
370 # get md5sums of raw and zip file
371 # to safe time for tests and update this can be skipped ($skipmd5sum)
372 md5sum=
373 md5sumzip=
374 if [ "$skipmd5sum" == "no" ] || [ "$skipmd5sum" == "iffilled" ]
375 then
376 #echo "skip: "$skipmd5sum >> $logfile 2>&1
377 #echo "res5: -"$result5"-" >> $logfile 2>&1
378 #if [ "$skipmd5sum" == "no" ] || [ "$result5$skipmd5sum" == "iffilled" ]
379 if [ "$skipmd5sum" == "no" ] || [ "$result5" == "" ]
380 then
381 #zipfile=`echo $file | sed -e 's/raw/zipraw/' -e 's/fits/fits.gz/'`
382 # only do the md5sum if the zipfile is already available
383 if ls $file >/dev/null 2>&1
384 then
385 printprocesslog "INFO calculation md5sum for rawfile "$rawfile
386 md5sum=`md5sum $rawfile | cut -d' ' -f1`
387 printprocesslog "INFO calculation md5sum for zipfile "$file
388 md5sumzip=`md5sum $file | cut -d' ' -f1`
389 fi
390 fi
391 fi
392
393 # insert or update depending on whether run exists
394 if [ "$result3" == "" ]
395 then
396 query="INSERT"
397 querymid=" fNight="$runnumber", fRunID="$filenumberfromname", "
398 querystop=
399 else
400 query="UPDATE"
401 querymid=
402 querystop=" WHERE fNight="$runnumber" AND fRunID="$filenumberfromname
403 fi
404 query=$query" RunInfo SET "$querymid" fRunTypeKey="${result2[0]}
405 #if [ $numfitserrors -eq 0 ]
406 if [ "$filecorrupt" == "no" ]
407 then
408 query=$query", fRunStart='"$runstart"', fRunStop='"$runstop"'"
409 if [ "$numevents" != "" ]
410 then
411 query=$query", fNumEvents="$numevents
412 fi
413 if [ "$roi" != "" ]
414 then
415 query=$query", fROI="$roi
416 fi
417 if [ "$roitm" != "" ]
418 then
419 query=$query", fROITimeMarker="$roitm
420 fi
421 if [ "$numphys" != "" ]
422 then
423 query=$query", fNumPhysicsTrigger="$numphys
424 fi
425 if [ "$numext1" != "" ]
426 then
427 query=$query", fNumExt1Trigger="$numext1
428 fi
429 if [ "$numext2" != "" ]
430 then
431 query=$query", fNumExt2Trigger="$numext2
432 fi
433 if [ "$numelp" != "" ]
434 then
435 query=$query", fNumELPTrigger="$numelp
436 fi
437 if [ "$numilp" != "" ]
438 then
439 query=$query", fNumILPTrigger="$numilp
440 fi
441 if [ "$numped" != "" ]
442 then
443 query=$query", fNumPedestalTrigger="$numped
444 fi
445 if [ "$numtime" != "" ]
446 then
447 query=$query", fNumTimeTrigger="$numtime
448 fi
449 if [ "$numoth" != "" ]
450 then
451 query=$query", fNumOtherTrigger="$numoth
452 fi
453 fi
454 if [ "$checksum" != "" ]
455 then
456 query=$query", fCheckSum='"$checksum"'"
457 fi
458 if [ "$datasum" != "" ]
459 then
460 query=$query", fDataSum='"$datasum"'"
461 fi
462 if [ "$md5sum" != "" ]
463 then
464 query=$query", fMd5sumRaw='"$md5sum"', fMd5sumRawZip='"$md5sumzip"'"
465 fi
466 if [ "$numdrsfiles" != "" ]
467 then
468 query=$query", fHasDrsFile="$numdrsfiles
469 fi
470 if [ "$step" != "" ]
471 then
472 query=$query", fDrsStep="$step
473 fi
474 if [ "$compiletime" != "" ]
475 then
476 query=$query", fCompileTime='"$compiletime"'"
477 fi
478 if [ "$revnum" != "" ]
479 then
480 query=$query", fRevisionNumber='"$revnum"'"
481 fi
482 query=$query", fFitsFileErrors="$numfitserrors
483 query=$query" "$querystop
484 # send query to DB
485 #echo $query
486 sendquery >/dev/null
487 done
488done
489
490finish
491
Note: See TracBrowser for help on using the repository browser.