source: trunk/MagicSoft/Mars/datacenter/scripts/jobmanager@ 8006

Last change on this file since 8006 was 7959, checked in by Daniela Dorner, 18 years ago
*** empty log message ***
  • Property svn:executable set to *
File size: 4.7 KB
Line 
1#!/bin/sh
2#
3# ========================================================================
4#
5# *
6# * This file is part of MARS, the MAGIC Analysis and Reconstruction
7# * Software. It is distributed to you in the hope that it can be a useful
8# * and timesaving tool in analysing Data of imaging Cerenkov telescopes.
9# * It is distributed WITHOUT ANY WARRANTY.
10# *
11# * Permission to use, copy, modify and distribute this software and its
12# * documentation for any purpose is hereby granted without fee,
13# * provided that the above copyright notice appear in all copies and
14# * that both that copyright notice and this permission notice appear
15# * in supporting documentation. It is provided "as is" without express
16# * or implied warranty.
17# *
18#
19#
20# Author(s): Daniela Dorner 05/2006 <mailto:dorner@astro.uni-wuerzburg.de>
21#
22# Copyright: MAGIC Software Development, 2000-2006
23#
24#
25# ========================================================================
26#
27# This a script, which launches other scripts (all scripts, that are run
28# on primary basis
29#
30
31source `dirname $0`/sourcefile
32printprocesslog "INFO starting $0"
33
34set -C
35
36echo "" >> $jmscriptlog 2>&1
37echo "starting jobmanager ("`date`")" >> $jmscriptlog 2>&1
38makedir $jmlogpath
39makedir $listpath >> $jmscriptlog 2>&1
40makedir $lockpath >> $jmscriptlog 2>&1
41
42prev=$max
43user=`whoami`
44notcount=0
45while (( $notcount < 100 ))
46do
47 for (( i=0 ; i < ${#scripts[@]} ; i++ ))
48 do
49 makedir $jmlogpath
50 date >> $jmscriptlog 2>&1
51 echo "sleeping $sleeptime..." >> $jmscriptlog 2>&1
52 sleep $sleeptime
53 dayofweek=`date +%u`
54 hour=`date +%k`
55
56 source `dirname $0`/sourcefile
57 echo "script: ${scripts[$i]}" >> $jmscriptlog 2>&1
58# echo " day: $dayofweek hour: $hour" >> $jmscriptlog 2>&1
59
60 queued=`/usr/local/bin/condor_q -global -format "%s\n" Owner | grep -c $user` && ! [ $queued -eq 0 ]
61 if [ "$queued" = "" ]
62 then
63 printprocesslog "WARN condor_q failed"
64 echo `date`"ERROR condor_q failed" >> $jmerrorlog
65 cont >> $jmscriptlog 2>&1
66 fi
67 queuedscript=`/usr/local/bin/condor_q -global | grep $user | grep -c ${scripts[$i]}`
68 if [ "$queuedscript" = "" ]
69 then
70 printprocesslog "WARN condor_q failed"
71 echo `date`"ERROR condor_q failed" >> $jmerrorlog
72 cont >> $jmscriptlog 2>&1
73 fi
74 runningscript=`/usr/local/bin/condor_q -global | grep $user | grep ' R ' | grep -c ${scripts[$i]}`
75 if [ "$runningscript" = "" ]
76 then
77 printprocesslog "WARN condor_q failed"
78 echo `date`"ERROR condor_q failed" >> $jmerrorlog
79 cont >> $jmscriptlog 2>&1
80 fi
81 stillinqueue=`echo $queuedscript - $runningscript | bc `
82
83 totalpno=${pnototal[$hour]}
84
85 #choose array according to the day of the week
86 case $dayofweek in
87 0 | 6) pnos=( ${pnoswe[@]} ) ;;
88 *) pnos=( ${pnosweek[@]} ) ;;
89 esac
90 num=`echo "((( $i + 1 ) * 24 ) + ( $hour + 1 ) ) - 24 - 1 " | bc `
91 pnoscript=${pnos[$num]}
92 if [ $prev -eq 0 ]
93 then
94 echo " prev=0 => resetting pnoscript [$pnoscript] to max [$max]" >> $jmscriptlog 2>&1
95 pnoscript=$max
96 fi
97 echo " found $queued jobs in the queue (incl. running jobs) [allowed $totalpno]" >> $jmscriptlog 2>&1
98 echo " found $queuedscript ${scripts[$i]} in the queue (incl. running jobs [$runningscript]) [allowed $pnoscript] - not running: $stillinqueue" >> $jmscriptlog 2>&1
99
100 if [ "$queued" -gt "$totalpno" ]
101 then
102 cont >> $jmscriptlog 2>&1
103 fi
104 if [ "$queuedscript" -gt "$pnoscript" ]
105 then
106 cont >> $jmscriptlog 2>&1
107 fi
108
109 # check if there's something to do
110 getstatus >> $jmscriptlog 2>&1
111 echo " $numproc ${scripts[$i]} still do to" >> $jmscriptlog 2>&1
112 if [ "$numproc" = "" ]
113 then
114 prev=0
115 cont >> $jmscriptlog 2>&1
116 fi
117 prev=$max
118 if [ $numproc -lt $stillinqueue ]
119 then
120 echo " numproc($numproc) -lt stillinqueue($stillinqueue)" >> $jmscriptlog 2>&1
121 cont >> $jmscriptlog 2>&1
122 fi
123
124 date=`date +%Y-%m-%d`
125 echo " committing 1 ${scripts[$i]} to condor" >> $jmscriptlog 2>&1
126 if ! /usr/local/bin/condor_submit -a path=$scriptspath -a prog=${scripts[$i]} -a date=$date -a dir=$runlogpath $scriptspath/run.condor 2>> $jmerrorlog
127 then
128 echo `date`"ERROR condor_submit failed" >> $jmerrorlog
129 echo "condor is not working -> sleeping $errorsleeptime" >> $jmscriptlog 2>&1
130 printprocesslog "WARN submitting ${scripts[$i]} to condor failed"
131 sleep $errorsleeptime
132 fi
133 date >> $jmscriptlog 2>&1
134 echo "" >> $jmscriptlog 2>&1
135 done
136done
137
Note: See TracBrowser for help on using the repository browser.