source: trunk/DataCheck/CheckRawData.sh@ 12980

Last change on this file since 12980 was 12980, checked in by Daniela Dorner, 13 years ago
unified method to get dates to be processed
  • Property svn:executable set to *
File size: 16.6 KB
Line 
1#!/bin/bash
2
3# this script has been written to run on La Palma on the machine data
4# i.e. paths are only working on this machine
5# the script starts from the zipped files
6# this causes a delay until files are in the database
7# because they have to be rsynced and zipped first (see RsyncRawData.sh, ZipRawData.sh)
8
9# missing
10# entry in status table
11
12# options:
13skipmd5sum="no" # fill md5 sums in any case
14skipmd5sum="iffilled" # fill md5 sum only if they are not yet in db # default
15#skipmd5sum="yes" # do not fill md5 sums in any case
16
17doupdate="yes" # update all entries
18doupdate="no" # fill only entries which are not yet existing #default
19
20source `dirname $0`/Sourcefile.sh
21printprocesslog "INFO starting $0 with options doupdate="$doupdate" and skipmd5sum="$skipmd5sum
22
23# setup to use ftools
24source $HEADAS/headas-init.sh
25
26pwfile=`dirname $0`/.pw
27password=`cat $pwfile 2>/dev/null`
28if [ "$password" == "" ]
29then
30 echo "please insert password in $pwfile"
31 printprocesslog "ERROR password for DB access in $pwfile missing"
32 finish
33fi
34
35# check if software is available
36if ! ls $factpath/fitsdump >/dev/null 2>&1
37then
38 printprocesslog "ERROR "$factpath"/fitsdump is not available."
39 finish
40fi
41
42# check if paths are available
43if ! ls /daq/raw >/dev/null 2>&1
44then
45 printprocesslog "ERROR /daq/raw is not available."
46 finish
47fi
48if ! ls /loc_data/raw >/dev/null 2>&1
49then
50 printprocesslog "ERROR /loc_data/raw is not available."
51 finish
52fi
53if ! ls /loc_data/zipraw >/dev/null 2>&1
54then
55 printprocesslog "ERROR /loc_data/zipraw is not available."
56 finish
57fi
58
59# get last 3 nights
60dates=( `date +%Y/%m/%d --date="-12hour"` `date +%Y/%m/%d --date="-36hour"` `date +%Y/%m/%d --date="-60hour"` )
61#dates=( `find /loc_data/zipraw/ -mindepth 3 -type d | sort | sed -e 's/\/loc_data\/zipraw\///g'` ) #all available dates in /loc_data/zipraw
62
63# do check for rawfiles of these dates
64for date in ${dates[@]}
65do
66 ziprawdir=/loc_data/zipraw/$date
67 # check if data are available from that night
68 if ! [ -d $ziprawdir ]
69 then
70 printprocesslog "INFO "$ziprawdir" does not exist."
71 continue
72 else
73 printprocesslog "INFO processing "$ziprawdir"..."
74 fi
75
76 # find all fits.gz files starting with the oldest file
77 printprocesslog "INFO finding files to be checked in $ziprawdir..."
78 fitsgzfiles=`find $ziprawdir -type f -name '*.fits.gz'| sort `
79
80 # get runnumber from date
81 runnumber=`echo $date | sed -e 's/\///g'`
82
83 # loop to check files
84 for file in $fitsgzfiles
85 do
86 printprocesslog "INFO checking file "$file
87 #echo "INFO checking file "$file
88
89 # raw and original file
90 rawfile=`echo $file | sed -e 's/zipraw/raw/' -e 's/fits[.]gz/fits/'`
91 origfile=`echo $rawfile | sed -e 's/loc_data/daq/'`
92
93 # check if raw file was changed in the last 30 minutes
94 isnew=`find $file -cmin -30`
95 if [ "$isnew" != "" ]
96 then
97 printprocesslog "INFO "$file" is not older than 30 min. -> continue"
98 continue
99 fi
100
101 # check if it is drs file
102 isnew=`ls $file | grep drs`
103 if [ "$isnew" != "" ]
104 then
105 printprocesslog "INFO "$file" is a drs file. -> continue"
106 continue
107 fi
108
109 # check if file is already finished
110 # original file on daq (if data was taken on daq
111 if [ -e $origfile ]
112 then
113 # get time of last modification as seconds since Epoch for both files
114 timeorig=`stat -c %Y $origfile`
115 timecopy=`stat -c %Y $rawfile`
116 # compare times
117 if ! [ $timeorig -eq $timecopy ]
118 then
119 # if times are not the same, the file is still open => no check
120 printprocesslog "INFO file "$rawfile" not yet closed. -> continue"
121 continue
122 fi
123 else
124 # if the origfile doesn't exist, the data was probably written not on daq but on data
125 printprocesslog "INFO file "$rawfile" was probably taken on data and not daq."
126 fi
127
128 # get run and file number form filename
129 runnumbererror="no"
130 numbererror="no"
131 numberfromname=`echo $file | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]_[0-9]{3}'`
132 runnumberfromname=`echo $numberfromname | cut -d_ -f1`
133 filenumberfromname=`echo $numberfromname | cut -d_ -f2 | sed -e 's/^0//g' -e 's/^0//g'`
134 if [ "$runnumber" != "$runnumberfromname" ]
135 then
136 runnumbererror="yes"
137 printprocesslog "ERROR for file "$file": runnumber from date ("$runnumber") and filename ("$runnumberfromname") don't agree."
138 fi
139
140 # check if entry already exists
141 query="SELECT fNight FROM RunInfo WHERE Concat(fNight, '_', LPAD(fRunID, 3, 0))='"$numberfromname"'"
142 printprocesslog "DEBUG check if entry already exists in DB. QUERY: "$query
143 #result3=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query3"`
144 result3=`sendquery`
145
146 # only proceed with file
147 # if information is not yet in database
148 # and no update is wished ($doupdate)
149 if [ "$result3" != "" ] && [ "$doupdate" == "no" ]
150 then
151 printprocesslog "INFO "$file" has been inserted already. -> continue "
152 continue
153 fi
154
155 runtype=
156 # check if fits file is corrupted
157 numfitserrors=0
158 #checkfitsfile=`fverify $rawfile 2>/dev/null | grep '0 error(s)'`
159 numfitserrors=`fverify $rawfile 2>/dev/null | grep -o '[0-9][ ]error(s)' | grep -E -o '[0-9]'`
160 #if [ "$checkfitsfile" == "" ]
161 if [ $numfitserrors -gt 0 ]
162 then
163 printprocesslog "WARN "$rawfile" has "$numfitserrors" fitserror(s). "
164 #numfitserrors=1
165 fi
166
167 if [ $numfitserrors -eq 0 ]
168 then
169 # get run and file number from file
170 runnumberfromfile=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NIGHT | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]'`
171 filenumberfromfileorig=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep RUNID | grep -E -o '[0-9]{1,3}'`
172 if [ "$runnumberfromfile" = "" ] || [ "$filenumberfromfileorig" = "" ]
173 then
174 printprocesslog "ERROR couldn't get run or file number from file name ("$file")."
175 fi
176 numberfromfile=$runnumberfromfile"_"`printf %03d $filenumberfromfileorig`
177 # compare numbers
178 if [ "$numberfromfile" != "$numberfromname" ]
179 then
180 numbererror="yes"
181 printprocesslog "ERROR for file "$file": number from filename ("$numberfromname") and file ("$numberfromfile") don't agree."
182 fi
183
184 # check if this run has drs file
185 # in case file is available, get STEP from header
186 # in the very beginning only drs-files were existing
187 # in the beginning the keywords DRSCALIB and STEP were not existing
188 numdrsfiles=
189 step=
190 drsfile=`echo $file | sed -e 's/fits/drs.fits/'`
191 numdrsfiles=`ls $drsfile 2>/dev/null | wc -l`
192 drscalib=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DRSCALIB | grep -E -o "['][TF][']" | sed -e "s/'//g"`
193 if [ "$drscalib" == "T" ]
194 then
195 step=`$factpath/fitsdump -h -t Events $drsfile 2>/dev/null | grep STEP | grep -E -o "['][012][']" | sed -e "s/'//g"`
196 if ! [ $numdrsfiles -eq 1 ]
197 then
198 printprocesslog "ERROR for file "$file" number of drsfiles ("$numdrsfiles") and information from header ("$drscalib") don't agree."
199 fi
200 else
201 if ! [ "$drscalib" == "F" ]
202 then
203 printprocesslog "WARN for file "$file" DRSCALIB is neither T nor F."
204 fi
205 fi
206
207 # get other variables from header
208 runtype=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep RUNTYPE | grep -E -o "['][a-z0-9._-]+[']" | sed -e "s/'//g" -e "s/_/-/g" -e "s/[.]//g"`
209 #echo "runtype for file "$file": "$runtype
210 roi=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NROI | grep -v NROITM | grep -E -o '[0-9]{1,4}'`
211 roitm=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NROITM | grep -E -o '[0-9]{1,4}'`
212 numevents=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep Events | grep -E -o '[0-9]+'`
213 numphys=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRG ' | grep -E -o '[0-9]+'`
214 numext1=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGEXT1' | grep -E -o '[ ][0-9]+[ ]' | sed -e 's/\ //g'`
215 numext2=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGEXT2' | grep -E -o '[ ][0-9]+[ ]' | sed -e 's/\ //g'`
216 numelp=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGLPE' | grep -E -o '[0-9]+'`
217 numilp=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGLPI' | grep -E -o '[0-9]+'`
218 numoth=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGMISC' | grep -E -o '[0-9]+'`
219 numped=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGPED' | grep -E -o '[0-9]+'`
220 numtime=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGTIM' | grep -E -o '[0-9]+'`
221 compiled=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'COMPILED' | grep -E -o "['][a-zA-Z]+[ ][ 12][0-9][ ]20[0-9][0-9][ ][0-2][0-9]:[0-5][0-9]:[0-5][0-9][']" | sed -e "s/'//g"`
222 if ! [ "$compiled" == "" ]
223 then
224 compiletime=`date +'%F %H:%M:%S' --date="${compiled}" `
225 else
226 compiletime=
227 fi
228 revnum=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'REVISION' | grep -E -o "['][0-9]+[:]?[0-9]*[MSP]*[']" | sed -e "s/'//g"`
229 # in newest data start time is in DATE-OBS
230 # in older data start time is in TSTART
231 # in the beginning TSTART was empty
232 runstart=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DATE-OBS | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
233 runstart2=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep TSTART | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
234 if [ "$runstart" == "" ]
235 then
236 if [ "$runstart2" == "" ]
237 then
238 runstart=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DATE | grep -v 'DATE-' | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
239 else
240 runstart=$runstart2
241 fi
242 fi
243 # in newest data start time is in DATE-END
244 # in older data start time is in TSTOP
245 # in the beginning TSTOP was empty
246 runstop=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DATE-END | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
247 runstop2=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep TSTOP | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
248 if [ "$runstop" == "" ]
249 then
250 if [ "$runstop2" == "" ]
251 then
252 runstop=`stat $file 2>/dev/null | grep Modify | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9][ ][0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{9}'`
253 else
254 runstop=$runstop2
255 fi
256 fi
257 fi
258
259 # set runtype to 'unknown', if no runtype could be retrieved from file
260 if [ "$runtype" == "" ]
261 then
262 runtype="n/a"
263 fi
264 # on 15.11.2011 the runtypes had different names
265 if [ "$date" == "2011/11/15" ]
266 then
267 if [ "$runtype" == "drs-calib" ]
268 then
269 runtype="drs-gain"
270 fi
271 if [ "$runtype" == "drs-time-calib" ]
272 then
273 runtype="drs-time"
274 fi
275 if [ "$runtype" == "pedestal" ]
276 then
277 runtype="drs-pedestal"
278 fi
279 if [ "$runtype" == "light-pulser" ]
280 then
281 runtype="light-pulser-ext"
282 fi
283 if [ "$runtype" == "pedestal-on" ]
284 then
285 runtype="pedestal"
286 fi
287 fi
288 # get runtype
289 query="SELECT fRunTypeKEY FROM RunType WHERE fRunTypeName='"$runtype"'"
290 printprocesslog "DEBUG get run type from DB. QUERY:"$query
291 #result2=( `/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query2"` )
292 result2=( `sendquery` )
293 if [ ${#result2} -eq 0 ]
294 then
295 printprocesslog "ERROR "$numberfromname": Could not query fRunTypeKey for runtype "$runtype" ."
296 continue
297 fi
298
299 # check if entry has already checksums
300 query="SELECT fNight FROM RunInfo WHERE Concat(fNight, '_', LPAD(fRunID, 3, 0))='"$numberfromname"'"
301 query=$query" AND NOT ISNULL(fMd5sumRaw) AND NOT ISNULL(fMd5sumRawZip)"
302 printprocesslog "DEBUG check if md5sums are alreay in DB. QUERY:"$query
303 #result5=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query5"`
304 result5=`sendquery`
305
306 # get md5sums of raw and zip file
307 # to safe time for tests and update this can be skipped ($skipmd5sum)
308 md5sum=
309 md5sumzip=
310 if [ "$skipmd5sum" == "no" ] || [ "$skipmd5sum" == "iffilled" ]
311 then
312 #echo "skip: "$skipmd5sum >> $logfile 2>&1
313 #echo "res5: -"$result5"-" >> $logfile 2>&1
314 #if [ "$skipmd5sum" == "no" ] || [ "$result5$skipmd5sum" == "iffilled" ]
315 if [ "$skipmd5sum" == "no" ] || [ "$result5" == "" ]
316 then
317 #zipfile=`echo $file | sed -e 's/raw/zipraw/' -e 's/fits/fits.gz/'`
318 # only do the md5sum if the zipfile is already available
319 if ls $file >/dev/null 2>&1
320 then
321 printprocesslog "INFO calculation md5sum for rawfile "$rawfile
322 md5sum=`md5sum $rawfile | cut -d' ' -f1`
323 printprocesslog "INFO calculation md5sum for zipfile "$file
324 md5sumzip=`md5sum $file | cut -d' ' -f1`
325 fi
326 fi
327 fi
328
329 # insert or update depending on whether run exists
330 if [ "$result3" == "" ]
331 then
332 query="INSERT"
333 querymid=" fNight="$runnumber", fRunID="$filenumberfromname", "
334 querystop=
335 else
336 query="UPDATE"
337 querymid=
338 querystop=" WHERE fNight="$runnumber" AND fRunID="$filenumberfromname
339 fi
340 query=$query" RunInfo SET "$querymid" fRunTypeKey="${result2[0]}
341 if [ $numfitserrors -eq 0 ]
342 then
343 query=$query", fRunStart='"$runstart"', fRunStop='"$runstop"'"
344 if [ "$numevents" != "" ]
345 then
346 query=$query", fNumEvents="$numevents
347 fi
348 if [ "$roi" != "" ]
349 then
350 query=$query", fROI="$roi
351 fi
352 if [ "$roitm" != "" ]
353 then
354 query=$query", fROITimeMarker="$roitm
355 fi
356 if [ "$numphys" != "" ]
357 then
358 query=$query", fNumPhysicsTrigger="$numphys
359 fi
360 if [ "$numext1" != "" ]
361 then
362 query=$query", fNumExt1Trigger="$numext1
363 fi
364 if [ "$numext2" != "" ]
365 then
366 query=$query", fNumExt2Trigger="$numext2
367 fi
368 if [ "$numelp" != "" ]
369 then
370 query=$query", fNumELPTrigger="$numelp
371 fi
372 if [ "$numilp" != "" ]
373 then
374 query=$query", fNumILPTrigger="$numilp
375 fi
376 if [ "$numped" != "" ]
377 then
378 query=$query", fNumPedestalTrigger="$numped
379 fi
380 if [ "$numtime" != "" ]
381 then
382 query=$query", fNumTimeTrigger="$numtime
383 fi
384 if [ "$numoth" != "" ]
385 then
386 query=$query", fNumOtherTrigger="$numoth
387 fi
388 fi
389 if [ "$md5sum" != "" ]
390 then
391 query=$query", fMd5sumRaw='"$md5sum"', fMd5sumRawZip='"$md5sumzip"'"
392 fi
393 if [ "$numdrsfiles" != "" ]
394 then
395 query=$query", fHasDrsFile="$numdrsfiles
396 fi
397 if [ "$step" != "" ]
398 then
399 query=$query", fDrsStep="$step
400 fi
401 if [ "$compiletime" != "" ]
402 then
403 query=$query", fCompileTime='"$compiletime"'"
404 fi
405 if [ "$revnum" != "" ]
406 then
407 query=$query", fRevisionNumber='"$revnum"'"
408 fi
409 query=$query", fFitsFileErrors="$numfitserrors
410 query=$query" "$querystop
411 printprocesslog "INFO insert/update entry in DB. QUERY: "$query
412 # send query to DB
413 sendquery >/dev/null
414 #if ! /usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query4"
415 #then
416 # printprocesslog "ERROR insert/update of "$numfromfile" to mysql failed."
417 # finish
418 #fi
419 done
420done
421
422finish
423
Note: See TracBrowser for help on using the repository browser.