source: trunk/DataCheck/Processing/CheckRawData.sh@ 13158

Last change on this file since 13158 was 13158, checked in by Daniela Dorner, 13 years ago
added insert to nightly status tables, removed insert for status tables for runs (done with mysql trigger now)
  • Property svn:executable set to *
File size: 16.9 KB
Line 
1#!/bin/bash
2
3# this script has been written to run on La Palma on the machine data
4# i.e. paths are only working on this machine
5# the script starts from the zipped files
6# this causes a delay until files are in the database
7# because they have to be rsynced and zipped first (see RsyncRawData.sh, ZipRawData.sh)
8
9# missing
10# entry in status table
11
12# options:
13skipmd5sum="no" # fill md5 sums in any case
14skipmd5sum="iffilled" # fill md5 sum only if they are not yet in db # default
15#skipmd5sum="yes" # do not fill md5 sums in any case
16
17doupdate="yes" # update all entries
18doupdate="no" # fill only entries which are not yet existing #default
19
20source `dirname $0`/../Sourcefile.sh
21printprocesslog "INFO starting $0 with options doupdate="$doupdate" and skipmd5sum="$skipmd5sum
22
23# setup to use ftools
24source $HEADAS/headas-init.sh
25
26# check if software is available
27if ! ls $factpath/fitsdump >/dev/null 2>&1
28then
29 printprocesslog "ERROR "$factpath"/fitsdump is not available."
30 finish
31fi
32
33# check if paths are available
34if ! ls /daq/raw >/dev/null 2>&1
35then
36 printprocesslog "ERROR /daq/raw is not available."
37 finish
38fi
39if ! ls /loc_data/raw >/dev/null 2>&1
40then
41 printprocesslog "ERROR /loc_data/raw is not available."
42 finish
43fi
44if ! ls /loc_data/zipraw >/dev/null 2>&1
45then
46 printprocesslog "ERROR /loc_data/zipraw is not available."
47 finish
48fi
49
50# get last 3 nights
51dates=( `date +%Y/%m/%d --date="-12hour"` `date +%Y/%m/%d --date="-36hour"` `date +%Y/%m/%d --date="-60hour"` )
52#dates=( `find /loc_data/zipraw/ -mindepth 3 -type d | sort | sed -e 's/\/loc_data\/zipraw\///g'` ) #all available dates in /loc_data/zipraw
53#dates=( `find /loc_data/aux/ -mindepth 3 -type d | sort | sed -e 's/\/loc_data\/aux\///g'` ) #all available dates in /loc_data/zipraw
54
55# do check for rawfiles of these dates
56for date in ${dates[@]}
57do
58 date2=`echo $date | sed -e 's/\///g'`
59 query="SELECT fNight FROM AuxFilesAvailISDCStatus WHERE fNight="$date2
60 night=`sendquery`
61 if [ "$night" == "" ]
62 then
63 printprocesslog "INFO insert "$date2" to AuxFilesAvailISDCStatus"
64 query="INSERT AuxFilesAvailISDCStatus SET fNight="$date2", fPriority="$date2
65 sendquery >/dev/null 2>&1
66 fi
67 night=
68 query="SELECT fNight FROM AuxDataInsertStatus WHERE fNight="$date2
69 night=`sendquery`
70 if [ "$night" == "" ]
71 then
72 printprocesslog "INFO insert "$date2" to AuxDataInsertStatus"
73 query="INSERT AuxDataInsertStatus SET fNight="$date2", fPriority="$date2
74 sendquery >/dev/null 2>&1
75 fi
76 ziprawdir=/loc_data/zipraw/$date
77 # check if data are available from that night
78 if ! [ -d $ziprawdir ]
79 then
80 printprocesslog "INFO "$ziprawdir" does not exist."
81 continue
82 else
83 printprocesslog "INFO processing "$ziprawdir"..."
84 fi
85
86 # find all fits.gz files starting with the oldest file
87 printprocesslog "INFO finding files to be checked in $ziprawdir..."
88 fitsgzfiles=`find $ziprawdir -type f -name '*.fits.gz'| sort `
89
90 # get runnumber from date
91 runnumber=`echo $date | sed -e 's/\///g'`
92
93 # loop to check files
94 for file in $fitsgzfiles
95 do
96 printprocesslog "INFO checking file "$file
97 #echo "INFO checking file "$file
98
99 # raw and original file
100 rawfile=`echo $file | sed -e 's/zipraw/raw/' -e 's/fits[.]gz/fits/'`
101 origfile=`echo $rawfile | sed -e 's/loc_data/daq/'`
102
103 # check if it is drs file
104 isdrs=`ls $file | grep drs`
105 if [ "$isdrs" != "" ]
106 then
107 printprocesslog "INFO "$file" is a drs file. -> continue"
108 continue
109 fi
110
111 # check if file is already finished
112 # original file on daq (if data was taken on daq
113 if [ -e $origfile ]
114 then
115 # check if raw file was changed in the last 30 minutes
116 isnew=`find $origfile -cmin -30`
117 if [ "$isnew" != "" ]
118 then
119 printprocesslog "WARN "$origfile" is not older than 30 min. -> continue"
120 continue
121 fi
122
123 # get time of last modification as seconds since Epoch for both files
124 timeorig=`stat -c %Y $origfile`
125 timecopy=`stat -c %Y $rawfile`
126 # compare times
127 if ! [ $timeorig -eq $timecopy ]
128 then
129 # if times are not the same, the file is still open => no check
130 printprocesslog "INFO file "$rawfile" not yet closed. -> continue"
131 continue
132 fi
133 else
134 # if the origfile doesn't exist, the data was probably written not on daq but on data
135 printprocesslog "INFO file "$rawfile" was probably taken on data and not daq."
136 fi
137
138 # get run and file number form filename
139 runnumbererror="no"
140 numbererror="no"
141 numberfromname=`echo $file | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]_[0-9]{3}'`
142 runnumberfromname=`echo $numberfromname | cut -d_ -f1`
143 filenumberfromname=`echo $numberfromname | cut -d_ -f2 | sed -e 's/^0//g' -e 's/^0//g'`
144 if [ "$runnumber" != "$runnumberfromname" ]
145 then
146 runnumbererror="yes"
147 printprocesslog "ERROR for file "$file": runnumber from date ("$runnumber") and filename ("$runnumberfromname") don't agree."
148 fi
149
150 # check if entry already exists
151 query="SELECT fNight FROM RunInfo WHERE Concat(fNight, '_', LPAD(fRunID, 3, 0))='"$numberfromname"'"
152 printprocesslog "DEBUG check if entry already exists in DB. QUERY: "$query
153 #result3=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query3"`
154 result3=`sendquery`
155
156 # only proceed with file
157 # if information is not yet in database
158 # and no update is wished ($doupdate)
159 if [ "$result3" != "" ] && [ "$doupdate" == "no" ]
160 then
161 printprocesslog "INFO "$file" has been inserted already. -> continue "
162 continue
163 fi
164
165 runtype=
166 # check if fits file is corrupted
167 numfitserrors=0
168 #checkfitsfile=`fverify $rawfile 2>/dev/null | grep '0 error(s)'`
169 numfitserrors=`fverify $rawfile 2>/dev/null | grep -o '[0-9][ ]error(s)' | grep -E -o '[0-9]'`
170 #if [ "$checkfitsfile" == "" ]
171 if [ $numfitserrors -gt 0 ]
172 then
173 printprocesslog "WARN "$rawfile" has "$numfitserrors" fitserror(s). "
174 #numfitserrors=1
175 fi
176
177 if [ $numfitserrors -eq 0 ]
178 then
179 # get run and file number from file
180 runnumberfromfile=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NIGHT | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]'`
181 filenumberfromfileorig=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep RUNID | grep -E -o '[0-9]{1,3}'`
182 if [ "$runnumberfromfile" = "" ] || [ "$filenumberfromfileorig" = "" ]
183 then
184 printprocesslog "ERROR couldn't get run or file number from file name ("$file")."
185 fi
186 numberfromfile=$runnumberfromfile"_"`printf %03d $filenumberfromfileorig`
187 # compare numbers
188 if [ "$numberfromfile" != "$numberfromname" ]
189 then
190 numbererror="yes"
191 printprocesslog "ERROR for file "$file": number from filename ("$numberfromname") and file ("$numberfromfile") don't agree."
192 fi
193
194 # check if this run has drs file
195 # in case file is available, get STEP from header
196 # in the very beginning only drs-files were existing
197 # in the beginning the keywords DRSCALIB and STEP were not existing
198 numdrsfiles=
199 step=
200 drsfile=`echo $file | sed -e 's/fits/drs.fits/'`
201 numdrsfiles=`ls $drsfile 2>/dev/null | wc -l`
202 drscalib=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DRSCALIB | grep -E -o "['][TF][']" | sed -e "s/'//g"`
203 if [ "$drscalib" == "T" ]
204 then
205 step=`$factpath/fitsdump -h -t Events $drsfile 2>/dev/null | grep STEP | grep -E -o "['][012][']" | sed -e "s/'//g"`
206 if ! [ $numdrsfiles -eq 1 ]
207 then
208 printprocesslog "ERROR for file "$file" number of drsfiles ("$numdrsfiles") and information from header ("$drscalib") don't agree."
209 fi
210 else
211 if ! [ "$drscalib" == "F" ]
212 then
213 printprocesslog "WARN for file "$file" DRSCALIB is neither T nor F."
214 fi
215 fi
216
217 # get other variables from header
218 runtype=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep RUNTYPE | grep -E -o "['][a-z0-9._-]+[']" | sed -e "s/'//g" -e "s/_/-/g" -e "s/[.]//g"`
219 #echo "runtype for file "$file": "$runtype
220 roi=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NROI | grep -v NROITM | grep -E -o '[0-9]{1,4}'`
221 roitm=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NROITM | grep -E -o '[0-9]{1,4}'`
222 numevents=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep Events | grep -E -o '[0-9]+'`
223 numphys=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRG ' | grep -E -o '[0-9]+'`
224 numext1=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGEXT1' | grep -E -o '[ ][0-9]+[ ]' | sed -e 's/\ //g'`
225 numext2=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGEXT2' | grep -E -o '[ ][0-9]+[ ]' | sed -e 's/\ //g'`
226 numelp=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGLPE' | grep -E -o '[0-9]+'`
227 numilp=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGLPI' | grep -E -o '[0-9]+'`
228 numoth=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGMISC' | grep -E -o '[0-9]+'`
229 numped=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGPED' | grep -E -o '[0-9]+'`
230 numtime=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGTIM' | grep -E -o '[0-9]+'`
231 compiled=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'COMPILED' | grep -E -o "['][a-zA-Z]+[ ][ 12][0-9][ ]20[0-9][0-9][ ][0-2][0-9]:[0-5][0-9]:[0-5][0-9][']" | sed -e "s/'//g"`
232 if ! [ "$compiled" == "" ]
233 then
234 compiletime=`date +'%F %H:%M:%S' --date="${compiled}" `
235 else
236 compiletime=
237 fi
238 revnum=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'REVISION' | grep -E -o "['][0-9]+[:]?[0-9]*[MSP]*[']" | sed -e "s/'//g"`
239 # in newest data start time is in DATE-OBS
240 # in older data start time is in TSTART
241 # in the beginning TSTART was empty
242 runstart=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DATE-OBS | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
243 runstart2=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep TSTART | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
244 if [ "$runstart" == "" ]
245 then
246 if [ "$runstart2" == "" ]
247 then
248 runstart=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DATE | grep -v 'DATE-' | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
249 else
250 runstart=$runstart2
251 fi
252 fi
253 # in newest data start time is in DATE-END
254 # in older data start time is in TSTOP
255 # in the beginning TSTOP was empty
256 runstop=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DATE-END | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
257 runstop2=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep TSTOP | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
258 if [ "$runstop" == "" ]
259 then
260 if [ "$runstop2" == "" ]
261 then
262 runstop=`stat $file 2>/dev/null | grep Modify | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9][ ][0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{9}'`
263 else
264 runstop=$runstop2
265 fi
266 fi
267 fi
268
269 # set runtype to 'unknown', if no runtype could be retrieved from file
270 if [ "$runtype" == "" ]
271 then
272 runtype="n/a"
273 fi
274 # on 15.11.2011 the runtypes had different names
275 if [ "$date" == "2011/11/15" ]
276 then
277 if [ "$runtype" == "drs-calib" ]
278 then
279 runtype="drs-gain"
280 fi
281 if [ "$runtype" == "drs-time-calib" ]
282 then
283 runtype="drs-time"
284 fi
285 if [ "$runtype" == "pedestal" ]
286 then
287 runtype="drs-pedestal"
288 fi
289 if [ "$runtype" == "light-pulser" ]
290 then
291 runtype="light-pulser-ext"
292 fi
293 if [ "$runtype" == "pedestal-on" ]
294 then
295 runtype="pedestal"
296 fi
297 fi
298 # get runtype
299 query="SELECT fRunTypeKEY FROM RunType WHERE fRunTypeName='"$runtype"'"
300 printprocesslog "DEBUG get run type from DB. QUERY:"$query
301 #result2=( `/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query2"` )
302 result2=( `sendquery` )
303 if [ ${#result2} -eq 0 ]
304 then
305 printprocesslog "ERROR "$numberfromname": Could not query fRunTypeKey for runtype "$runtype" ."
306 continue
307 fi
308
309 # check if entry has already checksums
310 query="SELECT fNight FROM RunInfo WHERE Concat(fNight, '_', LPAD(fRunID, 3, 0))='"$numberfromname"'"
311 query=$query" AND NOT ISNULL(fMd5sumRaw) AND NOT ISNULL(fMd5sumRawZip)"
312 printprocesslog "DEBUG check if md5sums are alreay in DB. QUERY:"$query
313 #result5=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query5"`
314 result5=`sendquery`
315
316 # get md5sums of raw and zip file
317 # to safe time for tests and update this can be skipped ($skipmd5sum)
318 md5sum=
319 md5sumzip=
320 if [ "$skipmd5sum" == "no" ] || [ "$skipmd5sum" == "iffilled" ]
321 then
322 #echo "skip: "$skipmd5sum >> $logfile 2>&1
323 #echo "res5: -"$result5"-" >> $logfile 2>&1
324 #if [ "$skipmd5sum" == "no" ] || [ "$result5$skipmd5sum" == "iffilled" ]
325 if [ "$skipmd5sum" == "no" ] || [ "$result5" == "" ]
326 then
327 #zipfile=`echo $file | sed -e 's/raw/zipraw/' -e 's/fits/fits.gz/'`
328 # only do the md5sum if the zipfile is already available
329 if ls $file >/dev/null 2>&1
330 then
331 printprocesslog "INFO calculation md5sum for rawfile "$rawfile
332 md5sum=`md5sum $rawfile | cut -d' ' -f1`
333 printprocesslog "INFO calculation md5sum for zipfile "$file
334 md5sumzip=`md5sum $file | cut -d' ' -f1`
335 fi
336 fi
337 fi
338
339 # insert or update depending on whether run exists
340 if [ "$result3" == "" ]
341 then
342 query="INSERT"
343 querymid=" fNight="$runnumber", fRunID="$filenumberfromname", "
344 querystop=
345 else
346 query="UPDATE"
347 querymid=
348 querystop=" WHERE fNight="$runnumber" AND fRunID="$filenumberfromname
349 fi
350 query=$query" RunInfo SET "$querymid" fRunTypeKey="${result2[0]}
351 if [ $numfitserrors -eq 0 ]
352 then
353 query=$query", fRunStart='"$runstart"', fRunStop='"$runstop"'"
354 if [ "$numevents" != "" ]
355 then
356 query=$query", fNumEvents="$numevents
357 fi
358 if [ "$roi" != "" ]
359 then
360 query=$query", fROI="$roi
361 fi
362 if [ "$roitm" != "" ]
363 then
364 query=$query", fROITimeMarker="$roitm
365 fi
366 if [ "$numphys" != "" ]
367 then
368 query=$query", fNumPhysicsTrigger="$numphys
369 fi
370 if [ "$numext1" != "" ]
371 then
372 query=$query", fNumExt1Trigger="$numext1
373 fi
374 if [ "$numext2" != "" ]
375 then
376 query=$query", fNumExt2Trigger="$numext2
377 fi
378 if [ "$numelp" != "" ]
379 then
380 query=$query", fNumELPTrigger="$numelp
381 fi
382 if [ "$numilp" != "" ]
383 then
384 query=$query", fNumILPTrigger="$numilp
385 fi
386 if [ "$numped" != "" ]
387 then
388 query=$query", fNumPedestalTrigger="$numped
389 fi
390 if [ "$numtime" != "" ]
391 then
392 query=$query", fNumTimeTrigger="$numtime
393 fi
394 if [ "$numoth" != "" ]
395 then
396 query=$query", fNumOtherTrigger="$numoth
397 fi
398 fi
399 if [ "$md5sum" != "" ]
400 then
401 query=$query", fMd5sumRaw='"$md5sum"', fMd5sumRawZip='"$md5sumzip"'"
402 fi
403 if [ "$numdrsfiles" != "" ]
404 then
405 query=$query", fHasDrsFile="$numdrsfiles
406 fi
407 if [ "$step" != "" ]
408 then
409 query=$query", fDrsStep="$step
410 fi
411 if [ "$compiletime" != "" ]
412 then
413 query=$query", fCompileTime='"$compiletime"'"
414 fi
415 if [ "$revnum" != "" ]
416 then
417 query=$query", fRevisionNumber='"$revnum"'"
418 fi
419 query=$query", fFitsFileErrors="$numfitserrors
420 query=$query" "$querystop
421 # send query to DB
422 sendquery >/dev/null
423 done
424done
425
426finish
427
Note: See TracBrowser for help on using the repository browser.