source: trunk/DataCheck/CheckRawData.sh@ 12969

Last change on this file since 12969 was 12933, checked in by Daniela Dorner, 13 years ago
added new variables and further checks
  • Property svn:executable set to *
File size: 15.8 KB
Line 
1#!/bin/bash
2
3# missing
4# entry in status table
5
6# options:
7skipmd5sum="no" # fill md5 sums in any case
8skipmd5sum="iffilled" # fill md5 sum only if they are not yet in db # default
9#skipmd5sum="yes" # do not fill md5 sums in any case
10
11doupdate="yes" # update all entries
12#doupdate="no" # fill only entries which are not yet existing #default
13
14source `dirname $0`/Sourcefile.sh
15printprocesslog "INFO starting $0 with options doupdate="$doupdate" and skipmd5sum="$skipmd5sum
16
17# setup to use ftools
18source $HEADAS/headas-init.sh
19
20pwfile=`dirname $0`/.pw
21password=`cat $pwfile 2>/dev/null`
22if [ "$password" == "" ]
23then
24 echo "please insert password in $pwfile"
25 printprocesslog "ERROR password for DB access in $pwfile missing"
26 finish
27fi
28
29# check if software is available
30if ! ls $factpath/fitsdump >/dev/null 2>&1
31then
32 printprocesslog "ERROR "$factpath"/fitsdump is not available."
33 finish
34fi
35
36# get last 3 nights
37dates=( `date +%Y/%m/%d --date="-12hour"` `date +%Y/%m/%d --date="-36hour"` )
38#dates=( `find /loc_data/zipraw/ -mindepth 3 -type d | sort | sed -e 's/\/loc_data\/zipraw\///g'` ) #all available dates in /loc_data/zipraw
39#dates=( "2012/02/19" )
40
41# do check for rawfiles of these dates
42for date in ${dates[@]}
43do
44 ziprawdir=/loc_data/zipraw/$date
45 # check if data are available from that night
46 if ! [ -d $ziprawdir ]
47 then
48 printprocesslog "INFO "$ziprawdir" does not exist."
49 continue
50 else
51 printprocesslog "INFO processing "$ziprawdir"..."
52 fi
53
54 # find all fits.gz files starting with the oldest file
55 printprocesslog "INFO finding files to be checked in $ziprawdir..."
56 fitsgzfiles=`find $ziprawdir -type f -name '*.fits.gz'| sort `
57
58 # get runnumber from date
59 runnumber=`echo $date | sed -e 's/\///g'`
60
61 # loop to check files
62 for file in $fitsgzfiles
63 do
64 printprocesslog "INFO checking file "$file
65 #echo "INFO checking file "$file
66
67 # raw and original file
68 rawfile=`echo $file | sed -e 's/zipraw/raw/' -e 's/fits[.]gz/fits/'`
69 origfile=`echo $rawfile | sed -e 's/loc_data/daq/'`
70
71 # check if raw file was changed in the last 30 minutes
72 isnew=`find $file -cmin -30`
73 if [ "$isnew" != "" ]
74 then
75 printprocesslog "INFO "$file" is not older than 30 min. -> continue"
76 continue
77 fi
78
79 # check if it is drs file
80 isnew=`ls $file | grep drs`
81 if [ "$isnew" != "" ]
82 then
83 printprocesslog "INFO "$file" is a drs file. -> continue"
84 continue
85 fi
86
87 # check if file is already finished
88 # original file on daq (if data was taken on daq
89 if [ -e $origfile ]
90 then
91 # get time of last modification as seconds since Epoch for both files
92 timeorig=`stat -c %Y $origfile`
93 timecopy=`stat -c %Y $rawfile`
94 # compare times
95 if ! [ $timeorig -eq $timecopy ]
96 then
97 # if times are not the same, the file is still open => no check
98 printprocesslog "INFO file "$rawfile" not yet closed. -> continue"
99 continue
100 fi
101 else
102 # if the origfile doesn't exist, the data was probably written not on daq but on data
103 printprocesslog "INFO file "$rawfile" was probably taken on data and not daq."
104 fi
105
106 # get run and file number form filename
107 runnumbererror="no"
108 numbererror="no"
109 numberfromname=`echo $file | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]_[0-9]{3}'`
110 runnumberfromname=`echo $numberfromname | cut -d_ -f1`
111 filenumberfromname=`echo $numberfromname | cut -d_ -f2 | sed -e 's/^0//g' -e 's/^0//g'`
112 if [ "$runnumber" != "$runnumberfromname" ]
113 then
114 runnumbererror="yes"
115 printprocesslog "ERROR for file "$file": runnumber from date ("$runnumber") and filename ("$runnumberfromname") don't agree."
116 fi
117
118 # check if entry already exists
119 query3="SELECT fNight FROM RunInfo WHERE Concat(fNight, '_', LPAD(fRunID, 3, 0))='"$numberfromname"'"
120 printprocesslog "DEBUG check if entry already exists in DB. QUERY: "$query3
121 result3=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query3"`
122
123 # only proceed with file
124 # if information is not yet in database
125 # and no update is wished ($doupdate)
126 if [ "$result3" != "" ] && [ "$doupdate" == "no" ]
127 then
128 printprocesslog "INFO "$file" has been inserted already. -> continue "
129 continue
130 fi
131
132 runtype=
133 # check if fits file is corrupted
134 numfitserrors=0
135 #checkfitsfile=`fverify $rawfile 2>/dev/null | grep '0 error(s)'`
136 numfitserrors=`fverify $rawfile 2>/dev/null | grep -o '[0-9][ ]error(s)' | grep -E -o '[0-9]'`
137 #if [ "$checkfitsfile" == "" ]
138 if [ $numfitserrors -gt 0 ]
139 then
140 printprocesslog "WARN "$rawfile" has "$numfitserrors" fitserror(s). "
141 #numfitserrors=1
142 fi
143
144 if [ $numfitserrors -eq 0 ]
145 then
146 # get run and file number from file
147 runnumberfromfile=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NIGHT | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]'`
148 filenumberfromfileorig=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep RUNID | grep -E -o '[0-9]{1,3}'`
149 if [ "$runnumberfromfile" = "" ] || [ "$filenumberfromfileorig" = "" ]
150 then
151 printprocesslog "ERROR couldn't get run or file number from file name ("$file")."
152 fi
153 numberfromfile=$runnumberfromfile"_"`printf %03d $filenumberfromfileorig`
154 # compare numbers
155 if [ "$numberfromfile" != "$numberfromname" ]
156 then
157 numbererror="yes"
158 printprocesslog "ERROR for file "$file": number from filename ("$numberfromname") and file ("$numberfromfile") don't agree."
159 fi
160
161 # check if this run has drs file
162 # in case file is available, get STEP from header
163 # in the very beginning only drs-files were existing
164 # in the beginning the keywords DRSCALIB and STEP were not existing
165 numdrsfiles=
166 step=
167 drsfile=`echo $file | sed -e 's/fits/drs.fits/'`
168 numdrsfiles=`ls $drsfile 2>/dev/null | wc -l`
169 drscalib=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DRSCALIB | grep -E -o "['][TF][']" | sed -e "s/'//g"`
170 if [ "$drscalib" == "T" ]
171 then
172 step=`$factpath/fitsdump -h -t Events $drsfile 2>/dev/null | grep STEP | grep -E -o "['][012][']" | sed -e "s/'//g"`
173 if ! [ $numdrsfiles -eq 1 ]
174 then
175 printprocesslog "ERROR for file "$file" number of drsfiles ("$numdrsfiles") and information from header ("$drscalib") don't agree."
176 fi
177 else
178 if ! [ "$drscalib" == "F" ]
179 then
180 printprocesslog "WARN for file "$file" DRSCALIB is neither T nor F."
181 fi
182 fi
183
184 # get other variables from header
185 runtype=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep RUNTYPE | grep -E -o "['][a-z0-9._-]+[']" | sed -e "s/'//g" -e "s/_/-/g" -e "s/[.]//g"`
186 #echo "runtype for file "$file": "$runtype
187 roi=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NROI | grep -v NROITM | grep -E -o '[0-9]{1,4}'`
188 roitm=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NROITM | grep -E -o '[0-9]{1,4}'`
189 numevents=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep Events | grep -E -o '[0-9]+'`
190 numphys=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRG ' | grep -E -o '[0-9]+'`
191 numext1=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGEXT1' | grep -E -o '[ ][0-9]+[ ]' | sed -e 's/\ //g'`
192 numext2=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGEXT2' | grep -E -o '[ ][0-9]+[ ]' | sed -e 's/\ //g'`
193 numelp=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGLPE' | grep -E -o '[0-9]+'`
194 numilp=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGLPI' | grep -E -o '[0-9]+'`
195 numoth=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGMISC' | grep -E -o '[0-9]+'`
196 numped=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGPED' | grep -E -o '[0-9]+'`
197 numtime=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGTIM' | grep -E -o '[0-9]+'`
198 compiled=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'COMPILED' | grep -E -o "['][a-zA-Z]+[ ][ 12][0-9][ ]20[0-9][0-9][ ][0-2][0-9]:[0-5][0-9]:[0-5][0-9][']" | sed -e "s/'//g"`
199 if ! [ "$compiled" == "" ]
200 then
201 compiletime=`date +'%F %H:%M:%S' --date="${compiled}" `
202 else
203 compiletime=
204 fi
205 revnum=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'REVISION' | grep -E -o "['][0-9]+[:]?[0-9]*[MSP]*[']" | sed -e "s/'//g"`
206 # in newest data start time is in DATE-OBS
207 # in older data start time is in TSTART
208 # in the beginning TSTART was empty
209 runstart=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DATE-OBS | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
210 runstart2=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep TSTART | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
211 if [ "$runstart" == "" ]
212 then
213 if [ "$runstart2" == "" ]
214 then
215 runstart=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DATE | grep -v 'DATE-' | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
216 else
217 runstart=$runstart2
218 fi
219 fi
220 # in newest data start time is in DATE-END
221 # in older data start time is in TSTOP
222 # in the beginning TSTOP was empty
223 runstop=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DATE-END | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
224 runstop2=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep TSTOP | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
225 if [ "$runstop" == "" ]
226 then
227 if [ "$runstop2" == "" ]
228 then
229 runstop=`stat $file 2>/dev/null | grep Modify | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9][ ][0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{9}'`
230 else
231 runstop=$runstop2
232 fi
233 fi
234 fi
235
236 # set runtype to 'unknown', if no runtype could be retrieved from file
237 if [ "$runtype" == "" ]
238 then
239 runtype="n/a"
240 fi
241 # on 15.11.2011 the runtypes had different names
242 if [ "$date" == "2011/11/15" ]
243 then
244 if [ "$runtype" == "drs-calib" ]
245 then
246 runtype="drs-gain"
247 fi
248 if [ "$runtype" == "drs-time-calib" ]
249 then
250 runtype="drs-time"
251 fi
252 if [ "$runtype" == "pedestal" ]
253 then
254 runtype="drs-pedestal"
255 fi
256 if [ "$runtype" == "light-pulser" ]
257 then
258 runtype="light-pulser-ext"
259 fi
260 if [ "$runtype" == "pedestal-on" ]
261 then
262 runtype="pedestal"
263 fi
264 fi
265 # get runtype
266 query2="SELECT fRunTypeKEY FROM RunType WHERE fRunTypeName='"$runtype"'"
267 printprocesslog "DEBUG get run type from DB. QUERY:"$query2
268 result2=( `/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query2"` )
269 if [ ${#result2} -eq 0 ]
270 then
271 printprocesslog "ERROR "$numberfromname": Could not query fRunTypeKey for runtype "$runtype" ."
272 continue
273 fi
274
275 # check if entry has already checksums
276 query5="SELECT fNight FROM RunInfo WHERE Concat(fNight, '_', LPAD(fRunID, 3, 0))='"$numberfromname"'"
277 query5=$query5" AND NOT ISNULL(fMd5sumRaw) AND NOT ISNULL(fMd5sumRawZip)"
278 printprocesslog "DEBUG check if md5sums are alreay in DB. QUERY:"$query5
279 result5=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query5"`
280
281 # get md5sums of raw and zip file
282 # to safe time for tests and update this can be skipped ($skipmd5sum)
283 md5sum=
284 md5sumzip=
285 if [ "$skipmd5sum" == "no" ] || [ "$skipmd5sum" == "iffilled" ]
286 then
287 #echo "skip: "$skipmd5sum >> $logfile 2>&1
288 #echo "res5: -"$result5"-" >> $logfile 2>&1
289 #if [ "$skipmd5sum" == "no" ] || [ "$result5$skipmd5sum" == "iffilled" ]
290 if [ "$skipmd5sum" == "no" ] || [ "$result5" == "" ]
291 then
292 #zipfile=`echo $file | sed -e 's/raw/zipraw/' -e 's/fits/fits.gz/'`
293 # only do the md5sum if the zipfile is already available
294 if ls $file >/dev/null 2>&1
295 then
296 printprocesslog "INFO calculation md5sum for rawfile "$rawfile
297 md5sum=`md5sum $rawfile | cut -d' ' -f1`
298 printprocesslog "INFO calculation md5sum for zipfile "$file
299 md5sumzip=`md5sum $file | cut -d' ' -f1`
300 fi
301 fi
302 fi
303
304 # insert or update depending on whether run exists
305 if [ "$result3" == "" ]
306 then
307 query4="INSERT"
308 querymid=" fNight="$runnumber", fRunID="$filenumberfromname", "
309 querystop=
310 else
311 query4="UPDATE"
312 querymid=
313 querystop=" WHERE fNight="$runnumber" AND fRunID="$filenumberfromname
314 fi
315 query4=$query4" RunInfo SET "$querymid" fRunTypeKey="${result2[1]}
316 if [ $numfitserrors -eq 0 ]
317 then
318 query4=$query4", fRunStart='"$runstart"', fRunStop='"$runstop"'"
319 if [ "$numevents" != "" ]
320 then
321 query4=$query4", fNumEvents="$numevents
322 fi
323 if [ "$roi" != "" ]
324 then
325 query4=$query4", fROI="$roi
326 fi
327 if [ "$roitm" != "" ]
328 then
329 query4=$query4", fROITimeMarker="$roitm
330 fi
331 if [ "$numphys" != "" ]
332 then
333 query4=$query4", fNumPhysicsTrigger="$numphys
334 fi
335 if [ "$numext1" != "" ]
336 then
337 query4=$query4", fNumExt1Trigger="$numext1
338 fi
339 if [ "$numext2" != "" ]
340 then
341 query4=$query4", fNumExt2Trigger="$numext2
342 fi
343 if [ "$numelp" != "" ]
344 then
345 query4=$query4", fNumELPTrigger="$numelp
346 fi
347 if [ "$numilp" != "" ]
348 then
349 query4=$query4", fNumILPTrigger="$numilp
350 fi
351 if [ "$numped" != "" ]
352 then
353 query4=$query4", fNumPedestalTrigger="$numped
354 fi
355 if [ "$numtime" != "" ]
356 then
357 query4=$query4", fNumTimeTrigger="$numtime
358 fi
359 if [ "$numoth" != "" ]
360 then
361 query4=$query4", fNumOtherTrigger="$numoth
362 fi
363 fi
364 if [ "$md5sum" != "" ]
365 then
366 query4=$query4", fMd5sumRaw='"$md5sum"', fMd5sumRawZip='"$md5sumzip"'"
367 fi
368 if [ "$numdrsfiles" != "" ]
369 then
370 query4=$query4", fHasDrsFile="$numdrsfiles
371 fi
372 if [ "$step" != "" ]
373 then
374 query4=$query4", fDrsStep="$step
375 fi
376 if [ "$compiletime" != "" ]
377 then
378 query4=$query4", fCompileTime='"$compiletime"'"
379 fi
380 if [ "$revnum" != "" ]
381 then
382 query4=$query4", fRevisionNumber='"$revnum"'"
383 fi
384 query4=$query4", fFitsFileErrors="$numfitserrors
385 query4=$query4" "$querystop
386 printprocesslog "INFO insert/update entry in DB. QUERY: "$query4
387 if ! /usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query4"
388 then
389 printprocesslog "ERROR insert/update of "$numfromfile" to mysql failed."
390 finish
391 fi
392 done
393done
394
395finish
396
Note: See TracBrowser for help on using the repository browser.