source: trunk/DataCheck/Transfer/ZipRawData.sh@ 19400

Last change on this file since 19400 was 18872, checked in by Daniela Dorner, 7 years ago
implemented option to use /data1 instead of /scratch
  • Property svn:executable set to *
File size: 8.2 KB
Line 
1#!/bin/bash
2
3# this script has been written to run on La Palma on the machine newdata
4# i.e. paths are only working on this machine
5
6# to use script with /data1 instead of /scratch
7# (e.g. when /scratch is full)
8# export data1=yes
9# before executing the script
10source `dirname $0`/../Sourcefile.sh
11printprocesslog "INFO starting "$0
12
13logfile=$runlogpath"/ZipRawLP-"$datetime".log"
14date >> $logfile
15
16if ! ls /scratch/raw >/dev/null 2>&1
17then
18 printprocesslog "ERROR /scratch/raw is not available."
19 finish
20fi
21
22# get dates
23if [ "$certaindate" != "" ]
24then
25 getdates $certaindate
26else
27 # get all night
28 #getdates "all"
29 # get last 3 nights if hour between 7 and 19h, else only current night
30 getdates 3 7 19
31fi
32
33rsynctempdir=/data1/rsync_tmp
34if ! [ -d $rsynctempdir ]
35then
36 mkdir $rsynctempdir
37fi
38
39# not needed anymore as ftools are not used anymore
40## setup to use ftools
41#source $HEADAS/headas-init.sh
42
43# files younger than $delaytime minutes are not processed
44delaytime=5
45
46# do rsync for rawfiles of these dates
47for date in ${dates[@]}
48do
49 echo "" >> $logfile 2>&1
50 if [ "$data1" = "yes" ]
51 then
52 rawdir=/data1/raw/$date
53 printprocesslog "INFO using "$rawdir" as input"
54 else
55 rawdir=/scratch/raw/$date
56 fi
57 echo `date`": processing files in "$rawdir >> $logfile 2>&1
58 printprocesslog "INFO processing files in "$rawdir
59 #echo "INFO processing files in "$rawdir
60 # check if data are available from that night
61 if ! [ -d $rawdir ]
62 then
63 echo `date`": no data available in "$rawdir >> $logfile 2>&1
64 printprocesslog "INFO no data available in "$rawdir
65 continue
66 fi
67
68 # find all fits-files starting with the oldest file
69 echo `date`": finding files to be zipped in $rawdir..." >> $logfile 2>&1
70 printprocesslog "INFO finding files to be zipped in "$rawdir"..."
71 fitsfiles=`find $rawdir -type f -regex '.*[.]fits[.]?[g]?[f]?[z]?' | sort `
72
73 if [ ${#fitsfiles[@]} -eq 0 ]
74 then
75 echo `date`": no files to be zipped in $rawdir..." >> $logfile 2>&1
76 printprocesslog "INFO no files to be zipped in "$rawdir"..."
77 continue
78 fi
79
80 rawdir2=/data1/raw/$date
81 zipdir=/data2/zipraw/$date
82 # create output directories
83 makedir $rawdir2 >> $logfile 2>&1
84 makedir $zipdir >> $logfile 2>&1
85
86 # loop to zip files
87 echo `date`": zipping files in $rawdir..." >> $logfile 2>&1
88 printprocesslog "INFO zipping files in "$rawdir"..."
89 for file in $fitsfiles
90 do
91 # check if more than ~ 10 GB are left on output directories
92 diskusage=( `df -P /data1 | grep /data1 ` )
93 if [ ${diskusage[3]} -lt 10000000 ]
94 then
95 echo "WARN less than 10 GB left on /data1 on newdata ("${diskusage[3]}")" >> $logfile 2>&1
96 printprocesslog "WARN less than 10 GB left on /data1 on newdata ("${diskusage[3]}")"
97 finish
98 fi
99 diskusage=( `df -P /data2 | grep /data2 ` )
100 if [ ${diskusage[3]} -lt 10000000 ]
101 then
102 echo "WARN less than 10 GB left on /data2 on newdata ("${diskusage[3]}")" >> $logfile 2>&1
103 printprocesslog "WARN less than 10 GB left on /data2 on newdata ("${diskusage[3]}")"
104 finish
105 fi
106
107 # check if raw file was accessed in the last $delaytime minutes
108 isnew=`find $file -amin -$delaytime`
109 if [ "$isnew" != "" ]
110 then
111 echo $file" is not older than $delaytime min => continue" >> $logfile 2>&1
112 printprocesslog "INFO "$file" is not older than $delaytime min => continue"
113 continue
114 fi
115
116 printprocesslog "INFO process file "$file
117 if [ "$certaindate" != "" ]
118 then
119 echo "INFO process file "$file
120 fi
121 # first rsync from /scratch to /data1
122 if [ "$data1" != "yes" ]
123 then
124 file2=`echo $file | sed -e 's/scratch/data1/'`
125 if ! rsync -au -T $rsynctempdir $file $file2
126 then
127 printprocesslog "ERROR something went wrong with rsync of "$file
128 rm $file2
129 continue
130 fi
131 fi
132 # copying to /data2
133 iszipped=`echo $file | grep -o fz`
134 if [ "$iszipped" == "fz" ]
135 then
136 # treat already compressed files
137 printprocesslog "DEBUG treat compressed file "$file
138
139 # filename for file on data
140 if [ "$data1" = "yes" ]
141 then
142 zipfile=`echo $file | sed -e 's/data1/data2/g' -e 's/raw/zipraw/' `
143 zipfiletmp=`echo $file | sed -e 's/data1/data2/g' -e 's/raw/zipraw/' -e 's/fits.fz/fits.fz.tmp/'`
144 else
145 zipfile=`echo $file | sed -e 's/scratch/data2/g' -e 's/raw/zipraw/' `
146 zipfiletmp=`echo $file | sed -e 's/scratch/data2/g' -e 's/raw/zipraw/' -e 's/fits.fz/fits.fz.tmp/'`
147 fi
148 # check if file on data already exists
149 if [ -e $zipfile ]
150 then
151 continue
152 fi
153
154 echo `date`": copying "$file" to "$zipfile" ..." >> $logfile 2>&1
155 printprocesslog "INFO copying "$file" to "$zipfile" ..."
156 #echo "INFO zipping "$file" to "$zipfile" ..."
157 # read setup again to allow for updates of variables defining transfer
158 source `dirname $0`/../Sourcefile.sh
159 # zip file to stdout and pipe it to outputfile
160 echo "pv --rate-limit $limitpigz $file > $zipfiletmp" >> $logfile 2>&1
161 pv --rate-limit $limitpigz $file > $zipfiletmp
162 statuspv=$?
163 printprocesslog "DEBUG pvstatus:"$statuspv
164 echo " pvstatus:"$statuspv >> $logfile 2>&1
165 if [ $statuspv -eq 0 ]
166 then
167 # if successful, move temporary to final zipfile
168 printprocesslog "INFO move "$zipfiletmp" to "$zipfile" ..."
169 mv -v $zipfiletmp $zipfile >> $logfile 2>&1
170 else
171 # if not successful, remove temporary zipfile
172 printprocesslog "WARN pv failed (statuspv:"$statuspv")."
173 echo "WARN pv failed (PIPESTATUS:"$statuspv")." >> $logfile 2>&1
174 printprocesslog "INFO remove "$zipfiletmp"..."
175 rm -v $zipfiletmp >> $logfile 2>&1
176 finish
177 fi
178 else
179 # treat uncompressed files
180 printprocesslog "DEBUG treat uncompressed file "$file
181
182 # filename for temporary and final zipfile
183 if [ "$data1" = "yes" ]
184 then
185 zipfile=`echo $file | sed -e 's/data1/data2/g' -e 's/raw/zipraw/' -e 's/fits/fits.gz/'`
186 zipfiletmp=`echo $file | sed -e 's/data1/data2/g' -e 's/raw/zipraw/' -e 's/fits/fits.tmp.gz/'`
187 else
188 zipfile=`echo $file | sed -e 's/scratch/data2/g' -e 's/raw/zipraw/' -e 's/fits/fits.gz/'`
189 zipfiletmp=`echo $file | sed -e 's/scratch/data2/g' -e 's/raw/zipraw/' -e 's/fits/fits.tmp.gz/'`
190 fi
191
192 # check if zipped file already exists
193 if [ -e $zipfile ]
194 then
195 continue
196 fi
197
198 echo `date`": zipping "$file" to "$zipfile" ..." >> $logfile 2>&1
199 printprocesslog "INFO zipping "$file" to "$zipfile" ..."
200 #echo "INFO zipping "$file" to "$zipfile" ..."
201 # read setup again to allow for updates of variables defining transfer
202 source `dirname $0`/../Sourcefile.sh
203 # zip file to stdout and pipe it to outputfile
204 echo "pv --rate-limit $limitpigz $file | pigz -1 -c -f -p $numprocpigz > $zipfiletmp" >> $logfile 2>&1
205 pv --rate-limit $limitpigz $file | pigz -1 -c -f -p $numprocpigz > $zipfiletmp
206 statuspigz=( `echo ${PIPESTATUS[@]}` )
207 printprocesslog "DEBUG PIPESTATUS:"${statuspigz[@]}
208 echo " PIPESTATUS:"${statuspigz[@]} >> $logfile 2>&1
209 if [ ${statuspigz[0]} -eq 0 ] && [ ${statuspigz[1]} -eq 0 ]
210 then
211 # if successful, move temporary to final zipfile
212 printprocesslog "INFO move "$zipfiletmp" to "$zipfile" ..."
213 mv -v $zipfiletmp $zipfile >> $logfile 2>&1
214 else
215 # if not successful, remove temporary zipfile
216 printprocesslog "WARN pigz or pv failed (PIPESTATUS:"${statuspigz[@]}")."
217 echo "WARN pigz or pv failed (PIPESTATUS:"${statuspigz[@]}")." >> $logfile 2>&1
218 printprocesslog "INFO remove "$zipfiletmp"..."
219 rm -v $zipfiletmp >> $logfile 2>&1
220 finish
221 fi
222 fi
223 done
224done
225echo "finished zipping..." >> $logfile 2>&1
226finish
Note: See TracBrowser for help on using the repository browser.