source: trunk/MagicSoft/Mars/datacenter/scripts/jobmanager@ 8690

Last change on this file since 8690 was 8527, checked in by Daniela Dorner, 17 years ago
*** empty log message ***
  • Property svn:executable set to *
File size: 6.0 KB
Line 
1#!/bin/sh
2#
3# ========================================================================
4#
5# *
6# * This file is part of MARS, the MAGIC Analysis and Reconstruction
7# * Software. It is distributed to you in the hope that it can be a useful
8# * and timesaving tool in analysing Data of imaging Cerenkov telescopes.
9# * It is distributed WITHOUT ANY WARRANTY.
10# *
11# * Permission to use, copy, modify and distribute this software and its
12# * documentation for any purpose is hereby granted without fee,
13# * provided that the above copyright notice appear in all copies and
14# * that both that copyright notice and this permission notice appear
15# * in supporting documentation. It is provided "as is" without express
16# * or implied warranty.
17# *
18#
19#
20# Author(s): Daniela Dorner 05/2006 <mailto:dorner@astro.uni-wuerzburg.de>
21#
22# Copyright: MAGIC Software Development, 2000-2007
23#
24#
25# ========================================================================
26#
27# This a script, which launches other scripts (all scripts, that are run
28# on primary basis
29#
30
31source `dirname $0`/sourcefile
32printprocesslog "INFO starting $0"
33
34set -C
35
36echo "" >> $jmscriptlog 2>&1
37echo "starting jobmanager ("`date`")" >> $jmscriptlog 2>&1
38
39if [ "$1" == "" ]
40then
41 echo "running jobmanager for data" >> $jmscriptlog 2>&1
42 scripts=( ${datascripts[@]} )
43 scriptscolname=( ${datascriptscolname[@]} )
44elif [ "$1" = "mc" ]
45then
46 echo "running jobmanager for mc" >> $jmscriptlog 2>&1
47 scripts=( ${mcscripts[@]} )
48 scriptscolname=( ${mcscriptscolname[@]} )
49else
50 echo "$1 is awrong commandline option for jobmanager -> exit" >> $jmscriptlog 2>&1
51 printprocesslog "WARN $1 is wrong commandline option for jobmanager"
52 finish >> $jmscriptlog 2>&1
53fi
54
55prev=$max
56user=`whoami`
57# endless loop
58notcount=0
59nothingtodocount=0
60nothingtodosleeptime=0
61while (( $notcount < 100 ))
62do
63 for (( i=0 ; i < ${#scripts[@]} ; i++ ))
64 do
65 date >> $jmscriptlog 2>&1
66 source `dirname $0`/sourcefile
67 echo "script: ${scripts[$i]}" >> $jmscriptlog 2>&1
68
69 # check if there's something to do
70 getstatus >> $jmscriptlog 2>&1
71 echo " $numproc ${scripts[$i]} still do to" >> $jmscriptlog 2>&1
72 if [ "$numproc" = "" ]
73 then
74 prev=0
75 nothingtodocount=`expr $nothingtodocount + 1`
76 if [ $nothingtodocount -lt ${#scripts[@]} ]
77 then
78 cont >> $jmscriptlog 2>&1
79 else
80 if [ $nothingtodosleeptime -lt $sleeptimelimit ]
81 then
82 nothingtodosleeptime=`echo " $nothingtodocount * $sleeptime " | bc`
83 fi
84 echo "sleeping $nothingtodosleeptime" >> $jmscriptlog 2>&1
85 sleep $nothingtodosleeptime
86 cont >> $jmscriptlog 2>&1
87 fi
88 else
89 nothingtodocount=0
90 nothingtodosleeptime=0
91 fi
92
93 echo "sleeping $sleeptime..." >> $jmscriptlog 2>&1
94 sleep $sleeptime
95
96 # get processes in queue
97 q=(`/usr/local/bin/condor_q -global -format "Owner%s " Owner -format "%s" CMD -format "Jobstatus%s\n" Jobstatus 2>&1 `)
98 if echo $q | egrep \(Error\|failed\)
99 then
100 echo `date`" WARN condor_q failed" >> $jmscriptlog 2>&1
101 printprocesslog "WARN condor_q failed"
102 echo `date`" WARN condor_q failed" >> $jmerrorlog
103 cont >> $jmscriptlog 2>&1
104 fi
105 # get processes of user in queue
106 q1=(`echo ${q[@]} | egrep -o Owner$user`)
107 queued=${#q1[@]}
108 # get scripts in queue
109 q2=(`echo ${q[@]} | egrep -o ${scripts[$i]}`)
110 queuedscript=${#q2[@]}
111 # get running scripts
112 q3=(`echo ${q[@]} | egrep -o ${scripts[$i]}Jobstatus2`)
113 runningscript=${#q3[@]}
114 stillinqueue=`echo $queuedscript - $runningscript | bc `
115
116 #get total number of allowed process for current time
117 hour=`date +%k`
118 totalpno=${pnototal[$hour]}
119 #choose array according to the day of the week
120 dayofweek=`date +%u`
121 case $dayofweek in
122 0 | 6) pnos=( ${pnoswe[@]} ) ;;
123 *) pnos=( ${pnosweek[@]} ) ;;
124 esac
125 # get number of allowed scripts for current time
126 num=`echo "((( $i + 1 ) * 24 ) + ( $hour + 1 ) ) - 24 - 1 " | bc `
127 pnoscript=${pnos[$num]}
128 # if there was nothing to do for previous script, more scripts can be allowed
129 if [ $prev -eq 0 ]
130 then
131 echo " prev=0 => resetting pnoscript [$pnoscript] to max [$max]" >> $jmscriptlog 2>&1
132 pnoscript=$max
133 fi
134 echo " found $queued jobs in the queue (incl. running jobs) [allowed $totalpno]" >> $jmscriptlog 2>&1
135 echo " found $queuedscript ${scripts[$i]} in the queue (incl. running jobs [$runningscript]) [allowed $pnoscript] - not running: $stillinqueue" >> $jmscriptlog 2>&1
136
137 # continue if there are already enough processes or scripts in the queue
138 if [ "$queued" -ge "$totalpno" ] || [ "$queuedscript" -ge "$pnoscript" ]
139 then
140 cont >> $jmscriptlog 2>&1
141 fi
142 # continue if the number of script is the queue is larger (or equal) than the number which still has to be done
143 if [ $numproc -le $stillinqueue ]
144 then
145 echo " numproc($numproc) -le stillinqueue($stillinqueue)" >> $jmscriptlog 2>&1
146 cont >> $jmscriptlog 2>&1
147 fi
148
149 # reset prev
150 prev=$max
151
152 # submit 1 script to condor
153 date=`date +%Y-%m-%d`
154 echo " committing 1 ${scripts[$i]} to condor" >> $jmscriptlog 2>&1
155 if ! /usr/local/bin/condor_submit -a path=$scriptspath -a prog=${scripts[$i]} -a date=$date -a dir=$runlogpath $scriptspath/run.condor 2>> $jmerrorlog
156 then
157 echo `date`" WARN condor_submit failed" >> $jmerrorlog
158 echo "condor is not working -> sleeping $errorsleeptime" >> $jmscriptlog 2>&1
159 printprocesslog "WARN submitting ${scripts[$i]} to condor failed"
160 if [ $errorsleeptime -lt $sleeptimelimit ]
161 then
162 errorsleeptime=`echo " $errorsleeptime + $errorsleeptimedefault " | bc`
163 fi
164 sleep $errorsleeptime
165 else
166 errorsleeptime=$errorsleeptimedefault
167 fi
168 date >> $jmscriptlog 2>&1
169 echo "" >> $jmscriptlog 2>&1
170 done
171done
172
Note: See TracBrowser for help on using the repository browser.