source: tags/Mars-V0.10.3/datacenter/scripts/jobmanager

Last change on this file was 8118, checked in by Daniela Dorner, 18 years ago
*** empty log message ***
  • Property svn:executable set to *
File size: 4.7 KB
Line 
1#!/bin/sh
2#
3# ========================================================================
4#
5# *
6# * This file is part of MARS, the MAGIC Analysis and Reconstruction
7# * Software. It is distributed to you in the hope that it can be a useful
8# * and timesaving tool in analysing Data of imaging Cerenkov telescopes.
9# * It is distributed WITHOUT ANY WARRANTY.
10# *
11# * Permission to use, copy, modify and distribute this software and its
12# * documentation for any purpose is hereby granted without fee,
13# * provided that the above copyright notice appear in all copies and
14# * that both that copyright notice and this permission notice appear
15# * in supporting documentation. It is provided "as is" without express
16# * or implied warranty.
17# *
18#
19#
20# Author(s): Daniela Dorner 05/2006 <mailto:dorner@astro.uni-wuerzburg.de>
21#
22# Copyright: MAGIC Software Development, 2000-2006
23#
24#
25# ========================================================================
26#
27# This a script, which launches other scripts (all scripts, that are run
28# on primary basis
29#
30
31source `dirname $0`/sourcefile
32printprocesslog "INFO starting $0"
33
34set -C
35
36echo "" >> $jmscriptlog 2>&1
37echo "starting jobmanager ("`date`")" >> $jmscriptlog 2>&1
38
39prev=$max
40user=`whoami`
41# endless loop
42notcount=0
43while (( $notcount < 100 ))
44do
45 for (( i=0 ; i < ${#scripts[@]} ; i++ ))
46 do
47 date >> $jmscriptlog 2>&1
48 source `dirname $0`/sourcefile
49 echo "script: ${scripts[$i]}" >> $jmscriptlog 2>&1
50
51 # check if there's something to do
52 getstatus >> $jmscriptlog 2>&1
53 echo " $numproc ${scripts[$i]} still do to" >> $jmscriptlog 2>&1
54 if [ "$numproc" = "" ]
55 then
56 prev=0
57 cont >> $jmscriptlog 2>&1
58 fi
59
60 echo "sleeping $sleeptime..." >> $jmscriptlog 2>&1
61 sleep $sleeptime
62
63 # get processes in queue
64 q=(`/usr/local/bin/condor_q -global -format "Owner%s " Owner -format "%s" CMD -format "Jobstatus%s\n" Jobstatus 2>&1 `)
65 if echo $q | egrep \(Error\|failed\)
66 then
67 echo "WARN condor_q failed" >> $jmscriptlog 2>&1
68 printprocesslog "WARN condor_q failed"
69 echo `date`"ERROR condor_q failed" >> $jmerrorlog
70 cont >> $jmscriptlog 2>&1
71 fi
72 # get processes of user in queue
73 q1=(`echo ${q[@]} | egrep -o Owner$user`)
74 queued=${#q1[@]}
75 # get scripts in queue
76 q2=(`echo ${q[@]} | egrep -o ${scripts[$i]}`)
77 queuedscript=${#q2[@]}
78 # get running scripts
79 q3=(`echo ${q[@]} | egrep -o ${scripts[$i]}Jobstatus2`)
80 runningscript=${#q3[@]}
81 stillinqueue=`echo $queuedscript - $runningscript | bc `
82
83 #get total number of allowed process for current time
84 hour=`date +%k`
85 totalpno=${pnototal[$hour]}
86 #choose array according to the day of the week
87 dayofweek=`date +%u`
88 case $dayofweek in
89 0 | 6) pnos=( ${pnoswe[@]} ) ;;
90 *) pnos=( ${pnosweek[@]} ) ;;
91 esac
92 # get number of allowed scripts for current time
93 num=`echo "((( $i + 1 ) * 24 ) + ( $hour + 1 ) ) - 24 - 1 " | bc `
94 pnoscript=${pnos[$num]}
95 # if there was nothing to do for previous script, more scripts can be allowed
96 if [ $prev -eq 0 ]
97 then
98 echo " prev=0 => resetting pnoscript [$pnoscript] to max [$max]" >> $jmscriptlog 2>&1
99 pnoscript=$max
100 fi
101 echo " found $queued jobs in the queue (incl. running jobs) [allowed $totalpno]" >> $jmscriptlog 2>&1
102 echo " found $queuedscript ${scripts[$i]} in the queue (incl. running jobs [$runningscript]) [allowed $pnoscript] - not running: $stillinqueue" >> $jmscriptlog 2>&1
103
104 # continue if there are already enough processes or scripts in the queue
105 if [ "$queued" -ge "$totalpno" ] || [ "$queuedscript" -ge "$pnoscript" ]
106 then
107 cont >> $jmscriptlog 2>&1
108 fi
109 # continue if the number of script is the queue is larger (or equal) than the number which still has to be done
110 if [ $numproc -le $stillinqueue ]
111 then
112 echo " numproc($numproc) -le stillinqueue($stillinqueue)" >> $jmscriptlog 2>&1
113 cont >> $jmscriptlog 2>&1
114 fi
115
116 # reset prev
117 prev=$max
118
119 # submit 1 script to condor
120 date=`date +%Y-%m-%d`
121 echo " committing 1 ${scripts[$i]} to condor" >> $jmscriptlog 2>&1
122 if ! /usr/local/bin/condor_submit -a path=$scriptspath -a prog=${scripts[$i]} -a date=$date -a dir=$runlogpath $scriptspath/run.condor 2>> $jmerrorlog
123 then
124 echo `date`"ERROR condor_submit failed" >> $jmerrorlog
125 echo "condor is not working -> sleeping $errorsleeptime" >> $jmscriptlog 2>&1
126 printprocesslog "WARN submitting ${scripts[$i]} to condor failed"
127 sleep $errorsleeptime
128 fi
129 date >> $jmscriptlog 2>&1
130 echo "" >> $jmscriptlog 2>&1
131 done
132done
133
Note: See TracBrowser for help on using the repository browser.