source: trunk/DataCheck/CheckRawData.sh@ 12923

Last change on this file since 12923 was 12788, checked in by Daniela Dorner, 13 years ago
included further checks and fill more information; common logfile; start from zipped files now
  • Property svn:executable set to *
File size: 14.6 KB
Line 
1#!/bin/bash
2
3# missing
4# entry in status table
5
6# options:
7skipmd5sum="no" # fill md5 sums in any case
8skipmd5sum="iffilled" # fill md5 sum only if they are not yet in db
9#skipmd5sum="yes" # do not fill md5 sums in any case
10
11doupdate="yes" # update all entries
12doupdate="no" # fill only entries which are not yet existing
13
14source `dirname $0`/Sourcefile.sh
15printprocesslog "INFO starting $0 with options doupdate="$doupdate" and skipmd5sum="$skipmd5sum
16
17# setup to use ftools
18source $HEADAS/headas-init.sh
19
20pwfile=`dirname $0`/.pw
21password=`cat $pwfile 2>/dev/null`
22if [ "$password" == "" ]
23then
24 echo "please insert password in $pwfile"
25 printprocesslog "ERROR password for DB access in $pwfile missing"
26 finish
27fi
28
29# check if software is available
30if ! ls $factpath/fitsdump >/dev/null 2>&1
31then
32 printprocesslog "ERROR "$factpath"/fitsdump is not available."
33 finish
34fi
35
36# get last 3 nights
37dates=( `date +%Y/%m/%d --date="-12hour"` `date +%Y/%m/%d --date="-36hour"` )
38#dates=( `find /loc_data/zipraw/ -mindepth 3 -type d | sort | sed -e 's/\/loc_data\/zipraw\///g'` ) #all available dates in /loc_data/zipraw
39
40# do check for rawfiles of these dates
41for date in ${dates[@]}
42do
43 ziprawdir=/loc_data/zipraw/$date
44 # check if data are available from that night
45 if ! [ -d $ziprawdir ]
46 then
47 printprocesslog "INFO "$ziprawdir" does not exist."
48 continue
49 else
50 printprocesslog "INFO processing "$ziprawdir"..."
51 fi
52
53 # find all fits.gz files starting with the oldest file
54 printprocesslog "INFO finding files to be checked in $ziprawdir..."
55 fitsgzfiles=`find $ziprawdir -type f -name '*.fits.gz'| sort `
56
57 # get runnumber from date
58 runnumber=`echo $date | sed -e 's/\///g'`
59
60 # loop to check files
61 for file in $fitsgzfiles
62 do
63 printprocesslog "INFO checking file "$file
64
65 # raw and original file
66 rawfile=`echo $file | sed -e 's/zipraw/raw/' -e 's/fits[.]gz/fits/'`
67 origfile=`echo $rawfile | sed -e 's/loc_data/daq/'`
68
69 # check if raw file was changed in the last 30 minutes
70 isnew=`find $file -cmin -30`
71 if [ "$isnew" != "" ]
72 then
73 printprocesslog "INFO "$file" is not older than 30 min. -> continue"
74 continue
75 fi
76
77 # check if it is drs file
78 isnew=`ls $file | grep drs`
79 if [ "$isnew" != "" ]
80 then
81 printprocesslog "INFO "$file" is a drs file. -> continue"
82 continue
83 fi
84
85 # check if file is already finished
86 # original file on daq (if data was taken on daq
87 if [ -e $origfile ]
88 then
89 # get time of last modification as seconds since Epoch for both files
90 timeorig=`stat -c %Y $origfile`
91 timecopy=`stat -c %Y $rawfile`
92 # compare times
93 if ! [ $timeorig -eq $timecopy ]
94 then
95 # if times are not the same, the file is still open => no check
96 printprocesslog "INFO file "$rawfile" not yet closed. -> continue"
97 continue
98 fi
99 else
100 # if the origfile doesn't exist, the data was probably written not on daq but on data
101 printprocesslog "INFO file "$rawfile" was probably taken on data and not daq."
102 fi
103
104 # get run and file number form filename
105 runnumbererror="no"
106 numbererror="no"
107 numberfromname=`echo $file | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]_[0-9]{3}'`
108 runnumberfromname=`echo $numberfromname | cut -d_ -f1`
109 filenumberfromname=`echo $numberfromname | cut -d_ -f2 | sed -e 's/^0//g' -e 's/^0//g'`
110 if [ "$runnumber" != "$runnumberfromname" ]
111 then
112 runnumbererror="yes"
113 printprocesslog "ERROR for file "$file": runnumber from date ("$runnumber") and filename ("$runnumberfromname") don't agree."
114 fi
115
116 # check if entry already exists
117 query3="SELECT fRunNumber FROM RunInfo WHERE Concat(fRunNumber, '_', LPAD(fFileNumber, 3, 0))='"$numberfromname"'"
118 printprocesslog "DEBUG check if entry already exists in DB. QUERY: "$query3
119 result3=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query3"`
120
121 # only proceed with file
122 # if information is not yet in database
123 # and no update is wished ($doupdate)
124 if [ "$result3" != "" ] && [ "$doupdate" == "no" ]
125 then
126 printprocesslog "INFO "$file" has been inserted already. -> continue "
127 continue
128 fi
129
130 runtype=
131 # check if fits file is corrupted
132 numfitserrors=0
133 checkfitsfile=`fverify $rawfile | grep '0 error(s)'`
134 if [ "$checkfitsfile" == "" ]
135 then
136 numfitserrors=1
137 fi
138
139 if [ $numfitserrors -eq 0 ]
140 then
141 # get run and file number from file
142 runnumberfromfile=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NIGHT | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]'`
143 filenumberfromfileorig=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep RUNID | grep -E -o '[0-9]{1,3}'`
144 if [ "$runnumberfromfile" = "" ] || [ "$filenumberfromfileorig" = "" ]
145 then
146 printprocesslog "ERROR couldn't get run or file number from file name ("$file")."
147 fi
148 numberfromfile=$runnumberfromfile"_"`printf %03d $filenumberfromfileorig`
149 # compare numbers
150 if [ "$numberfromfile" != "$numberfromname" ]
151 then
152 numbererror="yes"
153 printprocesslog "ERROR for file "$file": number from filename ("$numberfromname") and file ("$numberfromfile") don't agree."
154 fi
155
156 # check if this run has drs file
157 # in case file is available, get STEP from header
158 # in the very beginning only drs-files were existing
159 # in the beginning the keywords DRSCALIB and STEP were not existing
160 step=
161 drsfile=`echo $file | sed -e 's/fits/drs.fits/'`
162 numdrsfiles=`ls $drsfile 2>/dev/null | wc -l`
163 drscalib=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DRSCALIB | grep -E -o "['][TF][']" | sed -e "s/'//g"`
164 if [ "$drscalib" == "T" ]
165 then
166 step=`$factpath/fitsdump -h -t Events $drsfile 2>/dev/null | grep STEP | grep -E -o "['][012][']" | sed -e "s/'//g"`
167 if ! [ $numdrsfiles -eq 1 ]
168 then
169 printprocesslog "ERROR for file "$file" number of drsfiles ("$numdrsfiles") and information from header ("$drscalib") don't agree."
170 fi
171 fi
172
173 # get other variables from header
174 runtype=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep RUNTYPE | grep -E -o "['][a-z0-9._-]+[']" | sed -e "s/'//g" -e "s/_/-/g" -e "s/[.]//g"`
175 #echo "runtype for file "$file": "$runtype
176 roi=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NROI | grep -v NROITM | grep -E -o '[0-9]{1,4}'`
177 roitm=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep NROITM | grep -E -o '[0-9]{1,4}'`
178 numevents=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep Events | grep -E -o '[0-9]+'`
179 numphys=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRG ' | grep -E -o '[0-9]+'`
180 numext1=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGEXT1' | grep -E -o '[ ][0-9]+[ ]' | sed -e 's/\ //g'`
181 numext2=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGEXT2' | grep -E -o '[ ][0-9]+[ ]' | sed -e 's/\ //g'`
182 numelp=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGLPE' | grep -E -o '[0-9]+'`
183 numilp=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGLPI' | grep -E -o '[0-9]+'`
184 numoth=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGMISC' | grep -E -o '[0-9]+'`
185 numped=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGPED' | grep -E -o '[0-9]+'`
186 numtime=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGTIM' | grep -E -o '[0-9]+'`
187 # in newest data start time is in DATE-OBS
188 # in older data start time is in TSTART
189 # in the beginning TSTART was empty
190 runstart=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DATE-OBS | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
191 runstart2=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep TSTART | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
192 if [ "$runstart" == "" ]
193 then
194 if [ "$runstart2" == "" ]
195 then
196 runstart=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DATE | grep -v 'DATE-' | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
197 else
198 runstart=$runstart2
199 fi
200 fi
201 # in newest data start time is in DATE-END
202 # in older data start time is in TSTOP
203 # in the beginning TSTOP was empty
204 runstop=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep DATE-END | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
205 runstop2=`$factpath/fitsdump -h -t Events $file 2>/dev/null | grep TSTOP | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
206 if [ "$runstop" == "" ]
207 then
208 if [ "$runstop2" == "" ]
209 then
210 runstop=`stat $file 2>/dev/null | grep Modify | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9][ ][0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{9}'`
211 else
212 runstop=$runstop2
213 fi
214 fi
215 fi
216
217 # set runtype to 'unknown', if no runtype could be retrieved from file
218 if [ "$runtype" == "" ]
219 then
220 runtype="n/a"
221 fi
222 # on 15.11.2011 the runtypes had different names
223 if [ "$date" == "2011/11/15" ]
224 then
225 if [ "$runtype" == "drs-calib" ]
226 then
227 runtype="drs-gain"
228 fi
229 if [ "$runtype" == "drs-time-calib" ]
230 then
231 runtype="drs-time"
232 fi
233 if [ "$runtype" == "pedestal" ]
234 then
235 runtype="drs-pedestal"
236 fi
237 if [ "$runtype" == "light-pulser" ]
238 then
239 runtype="light-pulser-ext"
240 fi
241 if [ "$runtype" == "pedestal-on" ]
242 then
243 runtype="pedestal"
244 fi
245 fi
246 # get runtype
247 query2="SELECT fRunTypeKEY FROM RunType WHERE fRunTypeName='"$runtype"'"
248 printprocesslog "DEBUG get run type from DB. QUERY:"$query2
249 result2=( `/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query2"` )
250 if [ ${#result2} -eq 0 ]
251 then
252 printprocesslog "ERROR "$numberfromname": Could not query fRunTypeKey for runtype "$runtype" ."
253 continue
254 fi
255
256 # check if entry has already checksums
257 query5="SELECT fRunNumber FROM RunInfo WHERE Concat(fRunNumber, '_', LPAD(fFileNumber, 3, 0))='"$numberfromname"'"
258 query5=$query5" AND NOT ISNULL(fMd5sumRaw) AND NOT ISNULL(fMd5sumRawZip)"
259 printprocesslog "DEBUG check if md5sums are alreay in DB. QUERY:"$query5
260 result5=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query5"`
261
262 # get md5sums of raw and zip file
263 # to safe time for tests and update this can be skipped ($skipmd5sum)
264 md5sum=
265 md5sumzip=
266 if [ "$skipmd5sum" == "no" ] || [ "$skipmd5sum" == "iffilled" ]
267 then
268 #echo "skip: "$skipmd5sum >> $logfile 2>&1
269 #echo "res5: -"$result5"-" >> $logfile 2>&1
270 #if [ "$skipmd5sum" == "no" ] || [ "$result5$skipmd5sum" == "iffilled" ]
271 if [ "$skipmd5sum" == "no" ] || [ "$result5" == "" ]
272 then
273 #zipfile=`echo $file | sed -e 's/raw/zipraw/' -e 's/fits/fits.gz/'`
274 # only do the md5sum if the zipfile is already available
275 if ls $file >/dev/null 2>&1
276 then
277 printprocesslog "INFO calculation md5sum for rawfile "$rawfile
278 md5sum=`md5sum $rawfile | cut -d' ' -f1`
279 printprocesslog "INFO calculation md5sum for zipfile "$file
280 md5sumzip=`md5sum $file | cut -d' ' -f1`
281 fi
282 fi
283 fi
284
285 # insert or update depending on whether run exists
286 if [ "$result3" == "" ]
287 then
288 query4="INSERT"
289 querymid=" fRunNumber="$runnumber", fFileNumber="$filenumberfromname", "
290 querystop=
291 else
292 query4="UPDATE"
293 querymid=
294 querystop=" WHERE fRunNumber="$runnumber" AND fFileNumber="$filenumberfromname
295 fi
296 query4=$query4" RunInfo SET "$querymid" fRunTypeKey="${result2[1]}
297 if [ $numfitserrors -eq 0 ]
298 then
299 query4=$query4", fRunStart='"$runstart"', fRunStop='"$runstop"'"
300 if [ "$numevents" != "" ]
301 then
302 query4=$query4", fNumEvents="$numevent
303 fi
304 if [ "$roi" != "" ]
305 then
306 query4=$query4", fROI="$roi
307 fi
308 if [ "$roitm" != "" ]
309 then
310 query4=$query4", fROITimeMarker="$roitm
311 fi
312 if [ "$numphys" != "" ]
313 then
314 query4=$query4", fNumPhysicsTrigger="$numphys
315 fi
316 if [ "$numext1" != "" ]
317 then
318 query4=$query4", fNumExt1Trigger="$numext1
319 fi
320 if [ "$numext2" != "" ]
321 then
322 query4=$query4", fNumExt2Trigger="$numext2
323 fi
324 if [ "$numelp" != "" ]
325 then
326 query4=$query4", fNumELPTrigger="$numelp
327 fi
328 if [ "$numilp" != "" ]
329 then
330 query4=$query4", fNumILPTrigger="$numilp
331 fi
332 if [ "$numped" != "" ]
333 then
334 query4=$query4", fNumPedestalTrigger="$numped
335 fi
336 if [ "$numtime" != "" ]
337 then
338 query4=$query4", fNumTimeTrigger="$numtime
339 fi
340 if [ "$numoth" != "" ]
341 then
342 query4=$query4", fNumOtherTrigger="$numoth
343 fi
344 fi
345 if [ "$md5sum" != "" ]
346 then
347 query4=$query4", fMd5sumRaw='"$md5sum"', fMd5sumRawZip='"$md5sumzip"'"
348 query4=$query4", fHasDrsFile="$numdrsfiles
349 fi
350 if [ "$step" != "" ]
351 then
352 query4=$query4", fDrsStep="$step
353 fi
354 query4=$query4", fFitsFileErrors="$numfitserrors
355 query4=$query4" "$querystop
356 printprocesslog "INFO insert/update entry in DB. QUERY: "$query4
357 if ! /usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query4"
358 then
359 printprocesslog "ERROR insert/update of "$numfromfile" to mysql failed."
360 finish
361 fi
362 done
363done
364
365finish
366
Note: See TracBrowser for help on using the repository browser.