source: trunk/DataCheck/Processing/CheckRawData.sh@ 14098

Last change on this file since 14098 was 13601, checked in by Daniela Dorner, 13 years ago
changed time range for raw files on LP from 3 to 9 days
  • Property svn:executable set to *
File size: 17.5 KB
Line 
1#!/bin/bash
2
3# this script has been written to run on La Palma on the machine data
4# i.e. paths are only working on this machine
5# the script starts from the zipped files
6# this causes a delay until files are in the database
7# because they have to be rsynced and zipped first (see RsyncRawData.sh, ZipRawData.sh)
8
9# missing
10# entry in status table
11
12# options:
13skipmd5sum="no" # fill md5 sums in any case
14skipmd5sum="iffilled" # fill md5 sum only if they are not yet in db # default
15#skipmd5sum="yes" # do not fill md5 sums in any case
16
17doupdate="yes" # update all entries
18doupdate="no" # fill only entries which are not yet existing #default
19
20source `dirname $0`/../Sourcefile.sh
21printprocesslog "INFO starting $0 with options doupdate="$doupdate" and skipmd5sum="$skipmd5sum
22
23# setup to use ftools
24source $HEADAS/headas-init.sh
25
26# check if software is available
27if ! ls $factpath/fitsdump >/dev/null 2>&1
28then
29 printprocesslog "ERROR "$factpath"/fitsdump is not available."
30 finish
31fi
32
33# check if paths are available
34if ! ls /daq/raw >/dev/null 2>&1
35then
36 printprocesslog "ERROR /daq/raw is not available."
37 finish
38fi
39if ! ls /loc_data/raw >/dev/null 2>&1
40then
41 printprocesslog "ERROR /loc_data/raw is not available."
42 finish
43fi
44if ! ls /loc_data/zipraw >/dev/null 2>&1
45then
46 printprocesslog "ERROR /loc_data/zipraw is not available."
47 finish
48fi
49
50# get last 3, 6 or 9 nights
51dates=( `date +%Y/%m/%d --date="-12hour"` `date +%Y/%m/%d --date="-36hour"` `date +%Y/%m/%d --date="-60hour"` \
52# `date +%Y/%m/%d --date="-84hour"` `date +%Y/%m/%d --date="-108hour"` `date +%Y/%m/%d --date="-132hour"` \
53# `date +%Y/%m/%d --date="-156hour"` `date +%Y/%m/%d --date="-180hour"` `date +%Y/%m/%d --date="-204hour"` \
54 )
55#dates=( `find /loc_data/zipraw/ -mindepth 3 -type d | sort | sed -e 's/\/loc_data\/zipraw\///g'` ) #all available dates in /loc_data/zipraw
56#dates=( `find /loc_data/aux/ -mindepth 3 -type d | sort | sed -e 's/\/loc_data\/aux\///g'` ) #all available dates in /loc_data/zipraw
57
58# do check for rawfiles of these dates
59for date in ${dates[@]}
60do
61 date2=`echo $date | sed -e 's/\///g'`
62 night=
63 query="SELECT fNight FROM AuxFilesAvailISDCStatus WHERE fNight="$date2
64 night=`sendquery`
65 if [ "$night" == "" ]
66 then
67 printprocesslog "INFO insert "$date2" to AuxFilesAvailISDCStatus"
68 query="INSERT AuxFilesAvailISDCStatus SET fNight="$date2", fPriority="$date2
69 sendquery >/dev/null 2>&1
70 fi
71 night=
72 query="SELECT fNight FROM DriveFileAvailISDCStatus WHERE fNight="$date2
73 night=`sendquery`
74 if [ "$night" == "" ]
75 then
76 printprocesslog "INFO insert "$date2" to DriveFileAvailISDCStatus"
77 query="INSERT DriveFileAvailISDCStatus SET fNight="$date2", fPriority="$date2
78 sendquery >/dev/null 2>&1
79 fi
80 night=
81 query="SELECT fNight FROM AuxDataInsertStatus WHERE fNight="$date2
82 night=`sendquery`
83 if [ "$night" == "" ]
84 then
85 printprocesslog "INFO insert "$date2" to AuxDataInsertStatus"
86 query="INSERT AuxDataInsertStatus SET fNight="$date2", fPriority="$date2
87 sendquery >/dev/null 2>&1
88 fi
89 ziprawdir=/loc_data/zipraw/$date
90 # check if data are available from that night
91 if ! [ -d $ziprawdir ]
92 then
93 printprocesslog "INFO "$ziprawdir" does not exist."
94 continue
95 else
96 printprocesslog "INFO processing "$ziprawdir"..."
97 fi
98
99 # find all fits.gz files starting with the oldest file
100 printprocesslog "INFO finding files to be checked in $ziprawdir..."
101 fitsgzfiles=`find $ziprawdir -type f -name '*.fits.gz'| sort `
102
103 # get runnumber from date
104 runnumber=`echo $date | sed -e 's/\///g'`
105
106 # loop to check files
107 for file in $fitsgzfiles
108 do
109 printprocesslog "INFO checking file "$file
110 #echo "INFO checking file "$file
111
112 # raw and original file
113 rawfile=`echo $file | sed -e 's/zipraw/raw/' -e 's/fits[.]gz/fits/'`
114 origfile=`echo $rawfile | sed -e 's/loc_data/daq/'`
115
116 # check if it is drs file
117 isdrs=`ls $file | grep drs`
118 if [ "$isdrs" != "" ]
119 then
120 printprocesslog "INFO "$file" is a drs file. -> continue"
121 continue
122 fi
123
124 # check if file is already finished
125 # original file on daq (if data was taken on daq
126 if [ -e $origfile ]
127 then
128 # check if raw file was changed in the last 30 minutes
129 isnew=`find $origfile -cmin -30`
130 if [ "$isnew" != "" ]
131 then
132 printprocesslog "WARN "$origfile" is not older than 30 min. -> continue"
133 continue
134 fi
135
136 # get time of last modification as seconds since Epoch for both files
137 timeorig=`stat -c %Y $origfile`
138 timecopy=`stat -c %Y $rawfile`
139 # compare times
140 if ! [ $timeorig -eq $timecopy ]
141 then
142 # if times are not the same, the file is still open => no check
143 printprocesslog "INFO file "$rawfile" not yet closed. -> continue"
144 continue
145 fi
146 else
147 # if the origfile doesn't exist, the data was probably written not on daq but on data
148 printprocesslog "INFO file "$rawfile" was probably taken on data and not daq."
149 fi
150
151 # get run and file number form filename
152 runnumbererror="no"
153 numbererror="no"
154 numberfromname=`echo $file | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]_[0-9]{3}'`
155 runnumberfromname=`echo $numberfromname | cut -d_ -f1`
156 filenumberfromname=`echo $numberfromname | cut -d_ -f2 | sed -e 's/^0//g' -e 's/^0//g'`
157 if [ "$runnumber" != "$runnumberfromname" ]
158 then
159 runnumbererror="yes"
160 printprocesslog "ERROR for file "$file": runnumber from date ("$runnumber") and filename ("$runnumberfromname") don't agree."
161 fi
162
163 # check if entry already exists
164 query="SELECT fNight FROM RunInfo WHERE Concat(fNight, '_', LPAD(fRunID, 3, 0))='"$numberfromname"'"
165 printprocesslog "DEBUG check if entry already exists in DB. QUERY: "$query
166 #result3=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query3"`
167 result3=`sendquery`
168
169 # only proceed with file
170 # if information is not yet in database
171 # and no update is wished ($doupdate)
172 if [ "$result3" != "" ] && [ "$doupdate" == "no" ]
173 then
174 printprocesslog "INFO "$file" has been inserted already. -> continue "
175 continue
176 fi
177
178 runtype=
179 # check if fits file is corrupted
180 numfitserrors=0
181 #checkfitsfile=`fverify $rawfile 2>/dev/null | grep '0 error(s)'`
182 numfitserrors=`fverify $rawfile 2>/dev/null | grep -o '[0-9][ ]error(s)' | grep -E -o '[0-9]'`
183 #if [ "$checkfitsfile" == "" ]
184 if [ $numfitserrors -gt 0 ]
185 then
186 printprocesslog "WARN "$rawfile" has "$numfitserrors" fitserror(s). "
187 #numfitserrors=1
188 fi
189
190 numdrsfiles=
191 step=
192 if [ $numfitserrors -eq 0 ]
193 then
194 # get run and file number from file
195 runnumberfromfile=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NIGHT | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]'`
196 filenumberfromfileorig=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep RUNID | grep -E -o '[0-9]{1,3}'`
197 if [ "$runnumberfromfile" = "" ] || [ "$filenumberfromfileorig" = "" ]
198 then
199 printprocesslog "ERROR couldn't get run or file number from file name ("$file")."
200 fi
201 numberfromfile=$runnumberfromfile"_"`printf %03d $filenumberfromfileorig`
202 # compare numbers
203 if [ "$numberfromfile" != "$numberfromname" ]
204 then
205 numbererror="yes"
206 printprocesslog "ERROR for file "$file": number from filename ("$numberfromname") and file ("$numberfromfile") don't agree."
207 fi
208
209 # check if this run has drs file
210 # in case file is available, get STEP from header
211 # in the very beginning only drs-files were existing
212 # in the beginning the keywords DRSCALIB and STEP were not existing
213 drsfile=`echo $file | sed -e 's/fits/drs.fits/'`
214 numdrsfiles=`ls $drsfile 2>/dev/null | wc -l`
215 drscalib=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DRSCALIB | grep -E -o "['][TF][']" | sed -e "s/'//g"`
216 if [ "$drscalib" == "T" ]
217 then
218 step=`$factpath/fitsdump -h -t Events $drsfile 2>/dev/null | grep STEP | grep -E -o "['][012][']" | sed -e "s/'//g"`
219 if ! [ $numdrsfiles -eq 1 ]
220 then
221 printprocesslog "ERROR for file "$file" number of drsfiles ("$numdrsfiles") and information from header ("$drscalib") don't agree."
222 fi
223 else
224 if ! [ "$drscalib" == "F" ]
225 then
226 printprocesslog "WARN for file "$file" DRSCALIB is neither T nor F."
227 fi
228 fi
229
230 # get other variables from header
231 runtype=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep RUNTYPE | grep -E -o "['][a-z0-9._-]+[']" | sed -e "s/'//g" -e "s/_/-/g" -e "s/[.]//g"`
232 #echo "runtype for file "$file": "$runtype
233 roi=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NROI | grep -v NROITM | grep -E -o '[0-9]{1,4}'`
234 roitm=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NROITM | grep -E -o '[0-9]{1,4}'`
235 numevents=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep Events | grep -E -o '[0-9]+'`
236 numphys=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRG ' | grep -E -o '[0-9]+'`
237 numext1=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGEXT1' | grep -E -o '[ ][0-9]+[ ]' | sed -e 's/\ //g'`
238 numext2=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGEXT2' | grep -E -o '[ ][0-9]+[ ]' | sed -e 's/\ //g'`
239 numelp=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGLPE' | grep -E -o '[0-9]+'`
240 numilp=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGLPI' | grep -E -o '[0-9]+'`
241 numoth=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGMISC' | grep -E -o '[0-9]+'`
242 numped=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGPED' | grep -E -o '[0-9]+'`
243 numtime=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGTIM' | grep -E -o '[0-9]+'`
244 compiled=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'COMPILED' | grep -E -o "['][a-zA-Z]+[ ][ 12][0-9][ ]20[0-9][0-9][ ][0-2][0-9]:[0-5][0-9]:[0-5][0-9][']" | sed -e "s/'//g"`
245 if ! [ "$compiled" == "" ]
246 then
247 compiletime=`date +'%F %H:%M:%S' --date="${compiled}" `
248 else
249 compiletime=
250 fi
251 revnum=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'REVISION' | grep -E -o "['][0-9]+[:]?[0-9]*[MSP]*[']" | sed -e "s/'//g"`
252 # in newest data start time is in DATE-OBS
253 # in older data start time is in TSTART
254 # in the beginning TSTART was empty
255 runstart=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DATE-OBS | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
256 runstart2=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep TSTART | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
257 if [ "$runstart" == "" ]
258 then
259 if [ "$runstart2" == "" ]
260 then
261 runstart=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DATE | grep -v 'DATE-' | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
262 else
263 runstart=$runstart2
264 fi
265 fi
266 # in newest data start time is in DATE-END
267 # in older data start time is in TSTOP
268 # in the beginning TSTOP was empty
269 runstop=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DATE-END | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
270 runstop2=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep TSTOP | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
271 if [ "$runstop" == "" ]
272 then
273 if [ "$runstop2" == "" ]
274 then
275 runstop=`stat $file 2>/dev/null | grep Modify | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9][ ][0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{9}'`
276 else
277 runstop=$runstop2
278 fi
279 fi
280 fi
281
282 # set runtype to 'unknown', if no runtype could be retrieved from file
283 if [ "$runtype" == "" ]
284 then
285 runtype="n/a"
286 fi
287 # on 15.11.2011 the runtypes had different names
288 if [ "$date" == "2011/11/15" ]
289 then
290 if [ "$runtype" == "drs-calib" ]
291 then
292 runtype="drs-gain"
293 fi
294 if [ "$runtype" == "drs-time-calib" ]
295 then
296 runtype="drs-time"
297 fi
298 if [ "$runtype" == "pedestal" ]
299 then
300 runtype="drs-pedestal"
301 fi
302 if [ "$runtype" == "light-pulser" ]
303 then
304 runtype="light-pulser-ext"
305 fi
306 if [ "$runtype" == "pedestal-on" ]
307 then
308 runtype="pedestal"
309 fi
310 fi
311 # get runtype
312 query="SELECT fRunTypeKEY FROM RunType WHERE fRunTypeName='"$runtype"'"
313 printprocesslog "DEBUG get run type from DB. QUERY:"$query
314 #result2=( `/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query2"` )
315 result2=( `sendquery` )
316 if [ ${#result2} -eq 0 ]
317 then
318 printprocesslog "ERROR "$numberfromname": Could not query fRunTypeKey for runtype "$runtype" ."
319 continue
320 fi
321
322 # check if entry has already checksums
323 query="SELECT fNight FROM RunInfo WHERE Concat(fNight, '_', LPAD(fRunID, 3, 0))='"$numberfromname"'"
324 query=$query" AND NOT ISNULL(fMd5sumRaw) AND NOT ISNULL(fMd5sumRawZip)"
325 printprocesslog "DEBUG check if md5sums are alreay in DB. QUERY:"$query
326 #result5=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query5"`
327 result5=`sendquery`
328
329 # get md5sums of raw and zip file
330 # to safe time for tests and update this can be skipped ($skipmd5sum)
331 md5sum=
332 md5sumzip=
333 if [ "$skipmd5sum" == "no" ] || [ "$skipmd5sum" == "iffilled" ]
334 then
335 #echo "skip: "$skipmd5sum >> $logfile 2>&1
336 #echo "res5: -"$result5"-" >> $logfile 2>&1
337 #if [ "$skipmd5sum" == "no" ] || [ "$result5$skipmd5sum" == "iffilled" ]
338 if [ "$skipmd5sum" == "no" ] || [ "$result5" == "" ]
339 then
340 #zipfile=`echo $file | sed -e 's/raw/zipraw/' -e 's/fits/fits.gz/'`
341 # only do the md5sum if the zipfile is already available
342 if ls $file >/dev/null 2>&1
343 then
344 printprocesslog "INFO calculation md5sum for rawfile "$rawfile
345 md5sum=`md5sum $rawfile | cut -d' ' -f1`
346 printprocesslog "INFO calculation md5sum for zipfile "$file
347 md5sumzip=`md5sum $file | cut -d' ' -f1`
348 fi
349 fi
350 fi
351
352 # insert or update depending on whether run exists
353 if [ "$result3" == "" ]
354 then
355 query="INSERT"
356 querymid=" fNight="$runnumber", fRunID="$filenumberfromname", "
357 querystop=
358 else
359 query="UPDATE"
360 querymid=
361 querystop=" WHERE fNight="$runnumber" AND fRunID="$filenumberfromname
362 fi
363 query=$query" RunInfo SET "$querymid" fRunTypeKey="${result2[0]}
364 if [ $numfitserrors -eq 0 ]
365 then
366 query=$query", fRunStart='"$runstart"', fRunStop='"$runstop"'"
367 if [ "$numevents" != "" ]
368 then
369 query=$query", fNumEvents="$numevents
370 fi
371 if [ "$roi" != "" ]
372 then
373 query=$query", fROI="$roi
374 fi
375 if [ "$roitm" != "" ]
376 then
377 query=$query", fROITimeMarker="$roitm
378 fi
379 if [ "$numphys" != "" ]
380 then
381 query=$query", fNumPhysicsTrigger="$numphys
382 fi
383 if [ "$numext1" != "" ]
384 then
385 query=$query", fNumExt1Trigger="$numext1
386 fi
387 if [ "$numext2" != "" ]
388 then
389 query=$query", fNumExt2Trigger="$numext2
390 fi
391 if [ "$numelp" != "" ]
392 then
393 query=$query", fNumELPTrigger="$numelp
394 fi
395 if [ "$numilp" != "" ]
396 then
397 query=$query", fNumILPTrigger="$numilp
398 fi
399 if [ "$numped" != "" ]
400 then
401 query=$query", fNumPedestalTrigger="$numped
402 fi
403 if [ "$numtime" != "" ]
404 then
405 query=$query", fNumTimeTrigger="$numtime
406 fi
407 if [ "$numoth" != "" ]
408 then
409 query=$query", fNumOtherTrigger="$numoth
410 fi
411 fi
412 if [ "$md5sum" != "" ]
413 then
414 query=$query", fMd5sumRaw='"$md5sum"', fMd5sumRawZip='"$md5sumzip"'"
415 fi
416 if [ "$numdrsfiles" != "" ]
417 then
418 query=$query", fHasDrsFile="$numdrsfiles
419 fi
420 if [ "$step" != "" ]
421 then
422 query=$query", fDrsStep="$step
423 fi
424 if [ "$compiletime" != "" ]
425 then
426 query=$query", fCompileTime='"$compiletime"'"
427 fi
428 if [ "$revnum" != "" ]
429 then
430 query=$query", fRevisionNumber='"$revnum"'"
431 fi
432 query=$query", fFitsFileErrors="$numfitserrors
433 query=$query" "$querystop
434 # send query to DB
435 sendquery >/dev/null
436 done
437done
438
439finish
440
Note: See TracBrowser for help on using the repository browser.