source: trunk/DataCheck/CheckRawData.sh@ 12622

Last change on this file since 12622 was 12609, checked in by Daniela Dorner, 13 years ago
added new information and checks
  • Property svn:executable set to *
File size: 9.6 KB
Line 
1#!/bin/bash
2
3today=`date +%F`
4logfile=/home/`whoami`/DataCheck/log/CheckRaw$today.log
5
6skipmd5sum="no" # fill md5 sums in any case
7skipmd5sum="iffilled" # fill md5 sum only if they are not yet in db
8#skipmd5sum="yes" # do not fill md5 sums in any case
9
10doupdate="yes" # update all entries
11#doupdate="no" # fill only entries which are not yet existing
12
13echo "" >> $logfile 2>&1
14echo "" >> $logfile 2>&1
15echo "" >> $logfile 2>&1
16echo `date`"executing "$0"..." >> $logfile 2>&1
17echo "=====> doupdate: "$doupdate >> $logfile 2>&1
18echo "=====> skipmd5sum: "$skipmd5sum >> $logfile 2>&1
19
20password=`cat /home/fact/DataCheck/.pw`
21if [ "$password" == "" ]
22then
23 echo "please insert password in .pw file"
24fi
25
26# setup to use ftools
27export HEADAS=/opt/heasoft-6.11/x86_64-unknown-linux-gnu-libc2.13-0/
28source $HEADAS/headas-init.sh
29
30# get last 3 nights
31dates=( `date +%Y/%m/%d` `date +%Y/%m/%d --date="-1day"` `date +%Y/%m/%d --date="-2day"` `date +%Y/%m/%d --date="-3day"` )
32dates=( `date +%Y/%m/%d --date="-2day"` )
33
34# do rsync for rawfiles of these dates
35for date in ${dates[@]}
36do
37 echo "" >> $logfile 2>&1
38 echo "" >> $logfile 2>&1
39 echo "" >> $logfile 2>&1
40 rawdir=/loc_data/raw/$date
41 runnumber=`echo $date | sed -e 's/\///g'`
42 echo `date`": processing files in "$rawdir >> $logfile 2>&1
43 # check if data are available from that night
44 if ! [ -d $rawdir ]
45 then
46 echo `date`": no data available in "$rawdir >> $logfile 2>&1
47 continue
48 fi
49
50 # find all fits-files starting with the oldest file
51 echo `date`": finding files to be zipped in $rawdir..." >> $logfile 2>&1
52 fitsfiles=`find $rawdir -type f -name '*.fits'| sort `
53
54 # loop to zip files
55 echo `date`": checking files in $rawdir..." >> $logfile 2>&1
56 for file in $fitsfiles
57 do
58 echo "" >> $logfile 2>&1
59 echo "checking file "$file >> $logfile 2>&1
60 # check if raw file was changed in the last 30 minutes
61 isnew=`find $file -cmin -30`
62 if [ "$isnew" != "" ]
63 then
64 echo $file" is not older than 30 min => continue" >> $logfile 2>&1
65 continue
66 fi
67
68 # check if it is drs file
69 isnew=`ls $file | grep drs`
70 if [ "$isnew" != "" ]
71 then
72 echo $file" is a drs file => continue" >> $logfile 2>&1
73 continue
74 fi
75
76 # check if file is already finished
77 # original file on daq (if data was taken on daq
78 origfile=`echo $file | sed -e 's/loc_data/daq/'`
79 if [ -e $origfile ]
80 then
81 # get time of last modification as seconds since Epoch for both files
82 timeorig=`stat -c %Y $origfile`
83 timecopy=`stat -c %Y $file`
84 # compare times
85 if ! [ $timeorig -eq $timecopy ]
86 then
87 # if times are not the same, the file is still open => no zip
88 echo `date`": file "$file" not yet closed => continue" >> $logfile 2>&1
89 continue
90 fi
91 else
92 # if the origfile doesn't exist, the data was probably written not on daq but on data
93 echo `date`": file "$file" was probably taken on data and not daq " >> $logfile 2>&1
94 fi
95
96 # get and check run and file number
97 runnumbererror="no"
98 numbererror="no"
99 numberfromname=`echo $file | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]_[0-9]{3}'`
100 runnumberfromname=`echo $file | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]'`
101 runnumberfromfile=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep NIGHT | grep -E -o '20[1-9][0-9][01][0-9][0-3][0-9]'`
102 filenumberfromfileorig=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep RUNID | grep -E -o '[0-9]{1,3}'`
103 filenumberfromfile=`printf %03d $filenumberfromfileorig`
104 numberfromfile=$runnumberfromfile"_"$filenumberfromfile
105 if [ "$runnumber" != "$runnumberfromname" ]
106 then
107 runnumbererror="yes"
108 fi
109 if [ "$numberfromfile" != "$numberfromname" ]
110 then
111 numbererror="yes"
112 fi
113
114 # check if entry already exists
115 query3="SELECT fRunNumber FROM RunInfo WHERE fRunNumber="$runnumber" AND fFileNumber="$filenumberfromfileorig
116 echo "Q3:"$query3 >> $logfile 2>&1 2>&1
117 result3=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query3"`
118
119 # only proceed with file
120 # if information is not yet in database
121 # and no update is wished ($doupdate)
122 if [ "$result3" != "" ] && [ "$doupdate" == "no" ]
123 then
124 echo $file" has been inserted already => continue " >> $logfile 2>&1
125 continue
126 fi
127
128 # check if entry has already checksums
129 query5="SELECT fRunNumber FROM RunInfo WHERE fRunNumber="$runnumber" AND fFileNumber="$filenumberfromfileorig
130 query5=$query5" AND NOT ISNULL(fMd5sumRaw) AND NOT ISNULL(fMd5sumRawZip)"
131 echo "Q5:"$query5 >> $logfile 2>&1 2>&1
132 result5=`/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query5"`
133
134 # get md5sums of raw and zip file
135 # to safe time for tests and update this can be skipped ($skipmd5sum)
136 md5sum=
137 md5sumzip=
138 if [ "$skipmd5sum" == "no" ] || [ "$skipmd5sum" == "iffilled" ]
139 then
140 #echo "skip: "$skipmd5sum >> $logfile 2>&1
141 #echo "res5: -"$result5"-" >> $logfile 2>&1
142 #if [ "$skipmd5sum" == "no" ] || [ "$result5$skipmd5sum" == "iffilled" ]
143 if [ "$skipmd5sum" == "no" ] || [ "$result5" == "" ]
144 then
145 echo "calculation md5sum for raw..." >> $logfile 2>&1
146 md5sum=`md5sum $file | cut -d' ' -f1`
147 zipfile=`echo $file | sed -e 's/raw/zipraw/' -e 's/fits/fits.gz/'`
148 echo "calculation md5sum for zipraw..." >> $logfile 2>&1
149 md5sumzip=`md5sum $zipfile | cut -d' ' -f1`
150 fi
151 fi
152
153 # check if this run has drs file
154 drsfile=`echo $file | sed -e 's/fits/drs.fits/'`
155 numdrsfiles=`ls $drsfile 2>/dev/null | wc -l`
156
157 # get other variables from header
158 runtype=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep RUNTYPE | grep -E -o "['][a-z-]+[']" | sed -e "s/'//g"`
159 roi=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep NROI | grep -v NROITM | grep -E -o '[0-9]{1,4}'`
160 numevents=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep Events | grep -E -o '[0-9]+'`
161 runstart=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep TSTART | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
162 runstop=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep TSTOP | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
163 if [ "$runstart" == "" ]
164 then
165 runstart=`/home/fact/FACT++/fitsdump -h -t Events $file 2>/dev/null | grep DATE | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{6}'`
166 fi
167 if [ "$runstop" == "" ]
168 then
169 runstop=`stat $file 2>/dev/null | grep Modify | grep -E -o '20[1-9][0-9][\-][01][0-9][\-][0-3][0-9][ ][0-2][0-9]:[0-6][0-9]:[0-6][0-9][.][0-9]{9}'`
170 fi
171
172 # check if fits file is corrupted
173 numfitserrors=0
174 checkfitsfile=`fverify $file | grep '0 error(s)'`
175 if [ "$checkfitsfile" == "" ]
176 then
177 numfitserrors=1
178 fi
179 # get runtype
180 query2="SELECT fRunTypeKEY FROM RunType WHERE fRunTypeName='"$runtype"'"
181 echo "Q2:"$query2 >> $logfile 2>&1
182 result2=( `/usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query2"` )
183 if [ ${#result2} -eq 0 ]
184 then
185 echo "Could not query fRunTypeKey for runtype "$runtype
186 echo "Could not query fRunTypeKey for runtype "$runtype >> $logfile 2>&1
187 exit
188 fi
189 # insert or update depending on whether run exists
190 if [ "$result3" == "" ]
191 then
192 query4="INSERT"
193 querymid=" fRunNumber="$runnumber", fFileNumber="$filenumberfromfileorig", "
194 querystop=""
195 else
196 query4="UPDATE"
197 querystop=" WHERE fRunNumber="$runnumber" AND fFileNumber="$filenumberfromfileorig
198 fi
199 query4=$query4" RunInfo SET "$querymid" fRunTypeKey="${result2[1]}", fNumEvents="$numevents", fROI="$roi
200 if [ "$md5sum" != "" ]
201 then
202 query4=$query4", fMd5sumRaw='"$md5sum"', fMd5sumRawZip='"$md5sumzip"'"
203 fi
204 query4=$query4", fHasDrsFile="$numdrsfiles
205 query4=$query4", fFitsFileErrors="$numfitserrors
206 query4=$query4", fRunStart='"$runstart"', fRunStop='"$runstop"'"
207 query4=$query4" "$querystop
208 echo "Q4:"$query4 >> $logfile 2>&1
209 if ! /usr/bin/mysql -u operator --host=fact01.fact.local --password=$password FactData -e "$query4"
210 then
211 echo "insert/update of "$numfromfile" to mysql" >> $logfile 2>&1
212 fi
213
214 # print summary for debug
215 echo "*******" >> $logfile 2>&1
216 echo "summary for file "$file >> $logfile 2>&1
217 echo "*******" >> $logfile 2>&1
218 echo " errors: "$numfitserrors" (1 means errors exist)" >> $logfile 2>&1
219 echo " number of drs files: "$numdrsfiles >> $logfile 2>&1
220 echo " runnumber error: "$runnumbererror >> $logfile 2>&1
221 echo " number error: "$numbererror >> $logfile 2>&1
222 echo " roi: "$roi >> $logfile 2>&1
223 echo " runtype: "$runtype >> $logfile 2>&1
224 echo " numevents: "$numevents >> $logfile 2>&1
225 echo " md5sum: "$md5sum >> $logfile 2>&1
226 echo " md5sum(zip): "$md5sumzip >> $logfile 2>&1
227 echo " start: "$runstart >> $logfile 2>&1
228 echo " stop: "$runstop >> $logfile 2>&1
229
230 # missing
231 # check wether entry has been made (status table)
232 done
233done
234
235
Note: See TracBrowser for help on using the repository browser.