source: trunk/DataCheck/CheckRawData.sh@ 12632

Last change on this file since 12632 was 12632, checked in by Daniela Dorner, 13 years ago
added new values, rearranged checks
  • Property svn:executable set to *
File size: 14.3 KB
Line 
1#!/bin/bash
2
3today=`date +%F`
4logfile=/home/`whoami`/DataCheck/log/CheckRaw$today.log
5
6skipmd5sum="no" # fill md5 sums in any case
7skipmd5sum="iffilled" # fill md5 sum only if they are not yet in db
8#skipmd5sum="yes" # do not fill md5 sums in any case
9
10doupdate="yes" # update all entries
11#doupdate="no" # fill only entries which are not yet existing
12
13echo "" >> $logfile 2>&1
14echo "" >> $logfile 2>&1
15echo "" >> $logfile 2>&1
16echo `date`"executing "$0"..." >> $logfile 2>&1
17echo "=====> doupdate: "$doupdate >> $logfile 2>&1
18echo "=====> skipmd5sum: "$skipmd5sum >> $logfile 2>&1
19
20password=`cat /home/fact/DataCheck/.pw`
21if [ "$password" == "" ]
22then
23 echo "please insert password in .pw file"
24fi
25
26# setup to use ftools
27export HEADAS=/opt/heasoft-6.11/x86_64-unknown-linux-gnu-libc2.13-0/
28source $HEADAS/headas-init.sh
29
30# get last 3 nights
31dates=( `date +%Y/%m/%d` `date +%Y/%m/%d --date="-1day"` `date +%Y/%m/%d --date="-2day"` `date +%Y/%m/%d --date="-3day"` )
32dates=( `date +%Y/%m/%d --date="-6day"` )
33dates=( "2011/11/23" )
34
35# do rsync for rawfiles of these dates
36for date in ${dates[@]}
37do
38 echo "" >> $logfile 2>&1
39 echo "" >> $logfile 2>&1
40 echo "" >> $logfile 2>&1
41 rawdir=/loc_data/raw/$date
42 runnumber=`echo $date | sed -e 's/\///g'`
43 echo `date`": processing files in "$rawdir >> $logfile 2>&1
44 # check if data are available from that night
45 if ! [ -d $rawdir ]
46 then
47 echo `date`": no data available in "$rawdir >> $logfile 2>&1
48 continue
49 fi
50
51 # find all fits-files starting with the oldest file
52 echo `date`": finding files to be zipped in $rawdir..." >> $logfile 2>&1
53 fitsfiles=`find $rawdir -type f -name '*.fits'| sort `
54
55 # loop to zip files
56 echo `date`": checking files in $rawdir..." >> $logfile 2>&1
57 for file in $fitsfiles
58 do
59 echo "" >> $logfile 2>&1
60 echo "checking file "$file >> $logfile 2>&1
61 # check if raw file was changed in the last 30 minutes
62 isnew=`find $file -cmin -30`
63 if [ "$isnew" != "" ]
64 then
65 echo $file" is not older than 30 min => continue" >> $logfile 2>&1
66 continue
67 fi
68
69 # check if it is drs file
70 isnew=`ls $file | grep drs`
71 if [ "$isnew" != "" ]
72 then
73 echo $file" is a drs file => continue" >> $logfile 2>&1
74 continue
75 fi
76
77 # check if file is already finished
78 # original file on daq (if data was taken on daq
79 origfile=`echo $file | sed -e 's/loc_data/daq/'`
80 if [ -e $origfile ]
81 then
82 # get time of last modification as seconds since Epoch for both files
83 timeorig=`stat -c %Y $origfile`
84 timecopy=`stat -c %Y $file`
85 # compare times
86 if ! [ $timeorig -eq $timecopy ]
87 then
88 # if times are not the same, the file is still open => no zip
89 echo `date`": file "$file" not yet closed => continue" >> $logfile 2>&1
90 continue
91 fi
92 else
93 # if the origfile doesn't exist, the data was probably written not on daq but on data
94 echo `date`": file "$file" was probably taken on data and not daq " >> $logfile 2>&1
95 fi
96
97 # get run and file number form filename
98 runnumbererror="no"
99 numbererror="no"
100 numberfromname=`echo $file | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]_[0-9]{3}'`
101 runnumberfromname=`echo $numberfromname | cut -d_ -f1`
102 filenumberfromname=`echo $numberfromname | cut -d_ -f2 | sed -e 's/^0//g' -e 's/^0//g'`
103 if [ "$runnumber" != "$runnumberfromname" ]
104 then
105 runnumbererror="yes"
106 echo "ERROR: for file "$file" runnumber from date and filename don't agree ("$runnumber" - "$runnumberfromname")"
107 echo "ERROR: for file "$file" runnumber from date and filename don't agree ("$runnumber" - "$runnumberfromname")" >> $logfile 2>&1
108 fi
109
110 # check if entry already exists
111 query3="SELECT fRunNumber FROM RunInfo WHERE Concat(fRunNumber, '_', LPAD(fFileNumber, 3, 0))='"$numberfromname"'"
112 echo "Q3:"$query3 >> $logfile 2>&1 2>&1
113 result3=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query3" 2>> $logfile`
114
115 # only proceed with file
116 # if information is not yet in database
117 # and no update is wished ($doupdate)
118 if [ "$result3" != "" ] && [ "$doupdate" == "no" ]
119 then
120 echo $file" has been inserted already => continue " >> $logfile 2>&1
121 continue
122 fi
123
124 # check if entry has already checksums
125 query5="SELECT fRunNumber FROM RunInfo WHERE Concat(fRunNumber, '_', LPAD(fFileNumber, 3, 0))='"$numberfromname"'"
126 query5=$query5" AND NOT ISNULL(fMd5sumRaw) AND NOT ISNULL(fMd5sumRawZip)"
127 echo "Q5:"$query5 >> $logfile 2>&1
128 result5=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query5" 2>> $logfile`
129
130 # get md5sums of raw and zip file
131 # to safe time for tests and update this can be skipped ($skipmd5sum)
132 md5sum=
133 md5sumzip=
134 if [ "$skipmd5sum" == "no" ] || [ "$skipmd5sum" == "iffilled" ]
135 then
136 #echo "skip: "$skipmd5sum >> $logfile 2>&1
137 #echo "res5: -"$result5"-" >> $logfile 2>&1
138 #if [ "$skipmd5sum" == "no" ] || [ "$result5$skipmd5sum" == "iffilled" ]
139 if [ "$skipmd5sum" == "no" ] || [ "$result5" == "" ]
140 then
141 zipfile=`echo $file | sed -e 's/raw/zipraw/' -e 's/fits/fits.gz/'`
142 # only do the md5sum if the zipfile is already available
143 if ls $zipfile >/dev/null >> $logfile 2>&1
144 then
145 echo "calculation md5sum for raw..." >> $logfile 2>&1
146 md5sum=`md5sum $file | cut -d' ' -f1`
147 echo "calculation md5sum for zipraw..." >> $logfile 2>&1
148 md5sumzip=`md5sum $zipfile | cut -d' ' -f1`
149 fi
150 fi
151 fi
152
153 runtype=
154 # check if fits file is corrupted
155 numfitserrors=0
156 checkfitsfile=`fverify $file 2>> $logfile | grep '0 error(s)'`
157 if [ "$checkfitsfile" == "" ]
158 then
159 numfitserrors=1
160 fi
161
162 if [ $numfitserrors -eq 0 ]
163 then
164 # get run and file number from file
165 runnumberfromfile=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep NIGHT | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]'`
166 filenumberfromfileorig=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep RUNID | grep -E -o '[0-9]{1,3}'`
167 numberfromfile=$runnumberfromfile"_"`printf %03d $filenumberfromfileorig`
168 # compare numbers
169 if [ "$numberfromfile" != "$numberfromname" ]
170 then
171 numbererror="yes"
172 echo "ERROR: for file "$file" number from filename and file don't agree ("$numberfromname" -"$numberfromfile")"
173 echo "ERROR: for file "$file" number from filename and file don't agree ("$numberfromname" -"$numberfromfile")" >> $logfile 2>&1
174 fi
175
176 # check if this run has drs file
177 drsfile=`echo $file | sed -e 's/fits/drs.fits/'`
178 numdrsfiles=`ls $drsfile 2>/dev/null | wc -l`
179
180 # get other variables from header
181 runtype=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep RUNTYPE | grep -E -o "['][a-z-]+[']" | sed -e "s/'//g"`
182 roi=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep NROI | grep -v NROITM | grep -E -o '[0-9]{1,4}'`
183 numevents=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep Events | grep -E -o '[0-9]+'`
184 numphys=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRG ' | grep -E -o '[0-9]+'`
185 numext1=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGEXT1' | grep -E -o '[ ][0-9]+[ ]' | sed -e 's/\ //g'`
186 numext2=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGEXT2' | grep -E -o '[ ][0-9]+[ ]' | sed -e 's/\ //g'`
187 numelp=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGLPE' | grep -E -o '[0-9]+'`
188 numilp=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGLPI' | grep -E -o '[0-9]+'`
189 numoth=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGMISC' | grep -E -o '[0-9]+'`
190 numped=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGPED' | grep -E -o '[0-9]+'`
191 numtime=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep 'NTRGTIM' | grep -E -o '[0-9]+'`
192 # in newest data start time is in DATE-OBS
193 # in older data start time is in TSTART
194 # in the beginning TSTART was empty
195 runstart=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep DATE-OBS | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
196 runstart2=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep TSTART | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
197 if [ "$runstart" == "" ]
198 then
199 if [ "$runstart2" == "" ]
200 then
201 runstart=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep DATE | grep -v 'DATE-' | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
202 else
203 runstart=$runstart2
204 fi
205 fi
206 # in newest data start time is in DATE-END
207 # in older data start time is in TSTOP
208 # in the beginning TSTOP was empty
209 runstop=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep DATE-END | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
210 runstop2=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep TSTOP | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
211 if [ "$runstop" == "" ]
212 then
213 if [ "$runstop2" == "" ]
214 then
215 runstop=`stat $file 2>/dev/null | grep Modify | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9][ ][0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{9}'`
216 else
217 runstop=$runstop2
218 fi
219 fi
220 fi
221
222 # set runtype to 'unknown' if no runtype could be queried
223 if [ "$runtype" == "" ]
224 then
225 runtype="n/a"
226 fi
227 # on 15.11.2011 the runtypes had different names
228 if [ "$date" == "2011/11/15" ]
229 then
230 if [ "$runtype" == "drs-calib" ]
231 then
232 runtype="drs-gain"
233 fi
234 if [ "$runtype" == "drs-time-calib" ]
235 then
236 runtype="drs-time"
237 fi
238 if [ "$runtype" == "pedestal" ]
239 then
240 runtype="drs-pedestal"
241 fi
242 if [ "$runtype" == "light-pulser" ]
243 then
244 runtype="light-pulser-ext"
245 fi
246 if [ "$runtype" == "pedestal-on" ]
247 then
248 runtype="pedestal"
249 fi
250 fi
251 # get runtype
252 query2="SELECT fRunTypeKEY FROM RunType WHERE fRunTypeName='"$runtype"'"
253 echo "Q2:"$query2 >> $logfile 2>&1
254 result2=( `/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query2" 2>> $logfile` )
255 if [ ${#result2} -eq 0 ]
256 then
257 echo $numberfromname": Could not query fRunTypeKey for runtype "$runtype
258 echo $numberfromname": Could not query fRunTypeKey for runtype "$runtype >> $logfile 2>&1
259 exit
260 fi
261
262 # insert or update depending on whether run exists
263 if [ "$result3" == "" ]
264 then
265 query4="INSERT"
266 querymid=" fRunNumber="$runnumber", fFileNumber="$filenumberfromname", "
267 querystop=""
268 else
269 query4="UPDATE"
270 querystop=" WHERE fRunNumber="$runnumber" AND fFileNumber="$filenumberfromname
271 fi
272 query4=$query4" RunInfo SET "$querymid" fRunTypeKey="${result2[1]}
273 if [ $numfitserrors -eq 0 ]
274 then
275 query4=$query4", fNumEvents="$numevents", fROI="$roi
276 query4=$query4", fRunStart='"$runstart"', fRunStop='"$runstop"'"
277 if [ "$numphys" != "" ]
278 then
279 query4=$query4", fNumPhysicsTrigger="$numphys
280 fi
281 if [ "$numext1" != "" ]
282 then
283 query4=$query4", fNumExt1Trigger="$numext1
284 fi
285 if [ "$numext2" != "" ]
286 then
287 query4=$query4", fNumExt2Trigger="$numext2
288 fi
289 if [ "$numelp" != "" ]
290 then
291 query4=$query4", fNumELPTrigger="$numelp
292 fi
293 if [ "$numilp" != "" ]
294 then
295 query4=$query4", fNumILPTrigger="$numilp
296 fi
297 if [ "$numped" != "" ]
298 then
299 query4=$query4", fNumPedestalTrigger="$numped
300 fi
301 if [ "$numtime" != "" ]
302 then
303 query4=$query4", fNumTimeTrigger="$numtime
304 fi
305 if [ "$numoth" != "" ]
306 then
307 query4=$query4", fNumOtherTrigger="$numoth
308 fi
309 fi
310 if [ "$md5sum" != "" ]
311 then
312 query4=$query4", fMd5sumRaw='"$md5sum"', fMd5sumRawZip='"$md5sumzip"'"
313 query4=$query4", fHasDrsFile="$numdrsfiles
314 fi
315 query4=$query4", fFitsFileErrors="$numfitserrors
316 query4=$query4" "$querystop
317 echo "Q4:"$query4 >> $logfile 2>&1
318 if ! /usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query4" >> $logfile 2>&1
319 then
320 echo "insert/update of "$numfromfile" to mysql failed" >> $logfile 2>&1
321 fi
322
323 # print summary for debug
324 echo "*******" >> $logfile 2>&1
325 echo "summary for file "$file >> $logfile 2>&1
326 echo "*******" >> $logfile 2>&1
327 echo " errors: "$numfitserrors" (1 means errors exist)" >> $logfile 2>&1
328 echo " number of drs files: "$numdrsfiles >> $logfile 2>&1
329 echo " runnumber error: "$runnumbererror >> $logfile 2>&1
330 echo " number error: "$numbererror >> $logfile 2>&1
331 echo " roi: "$roi >> $logfile 2>&1
332 echo " runtype: "$runtype >> $logfile 2>&1
333 echo " numevents: "$numevents >> $logfile 2>&1
334 echo " md5sum: "$md5sum >> $logfile 2>&1
335 echo " md5sum(zip): "$md5sumzip >> $logfile 2>&1
336 echo " start: "$runstart >> $logfile 2>&1
337 echo " stop: "$runstop >> $logfile 2>&1
338
339 # missing
340 # check wether entry has been made (status table)
341 done
342done
343
344
Note: See TracBrowser for help on using the repository browser.