Changeset 8118
- Timestamp:
- 10/18/06 13:10:32 (18 years ago)
- Location:
- trunk/MagicSoft/Mars
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/MagicSoft/Mars/Changelog
r8117 r8118 39 39 - moved getstatus to beginning of loop and moved sleep behind 40 40 getstatus to avoid unneed waiting and calling of condor_q 41 - replaced -gt by -ge and -lt by -le, to make sure that logging 42 output makes sense 43 44 * datacenter/scripts/setup: 45 - updated default values for jobmanager 41 46 42 47 -
trunk/MagicSoft/Mars/datacenter/scripts/jobmanager
r8115 r8118 39 39 prev=$max 40 40 user=`whoami` 41 # endless loop 41 42 notcount=0 42 43 while (( $notcount < 100 )) … … 56 57 cont >> $jmscriptlog 2>&1 57 58 fi 58 prev=$max59 59 60 60 echo "sleeping $sleeptime..." >> $jmscriptlog 2>&1 61 61 sleep $sleeptime 62 62 63 q=(`/usr/local/bin/condor_q -global -format "Owner%s " Owner -format "%s" CMD -format "Jobstatus%s\n" Jobstatus`) 63 # get processes in queue 64 q=(`/usr/local/bin/condor_q -global -format "Owner%s " Owner -format "%s" CMD -format "Jobstatus%s\n" Jobstatus 2>&1 `) 64 65 if echo $q | egrep \(Error\|failed\) 65 66 then 67 echo "WARN condor_q failed" >> $jmscriptlog 2>&1 66 68 printprocesslog "WARN condor_q failed" 67 69 echo `date`"ERROR condor_q failed" >> $jmerrorlog 68 70 cont >> $jmscriptlog 2>&1 69 71 fi 72 # get processes of user in queue 70 73 q1=(`echo ${q[@]} | egrep -o Owner$user`) 71 74 queued=${#q1[@]} 72 75 # get scripts in queue 73 76 q2=(`echo ${q[@]} | egrep -o ${scripts[$i]}`) 74 77 queuedscript=${#q2[@]} 75 78 # get running scripts 76 79 q3=(`echo ${q[@]} | egrep -o ${scripts[$i]}Jobstatus2`) 77 80 runningscript=${#q3[@]} … … 81 84 hour=`date +%k` 82 85 totalpno=${pnototal[$hour]} 83 84 86 #choose array according to the day of the week 85 87 dayofweek=`date +%u` … … 88 90 *) pnos=( ${pnosweek[@]} ) ;; 89 91 esac 92 # get number of allowed scripts for current time 90 93 num=`echo "((( $i + 1 ) * 24 ) + ( $hour + 1 ) ) - 24 - 1 " | bc ` 91 94 pnoscript=${pnos[$num]} 95 # if there was nothing to do for previous script, more scripts can be allowed 92 96 if [ $prev -eq 0 ] 93 97 then … … 98 102 echo " found $queuedscript ${scripts[$i]} in the queue (incl. running jobs [$runningscript]) [allowed $pnoscript] - not running: $stillinqueue" >> $jmscriptlog 2>&1 99 103 100 if [ "$queued" -gt "$totalpno" ] || [ "$queuedscript" -gt "$pnoscript" ] 104 # continue if there are already enough processes or scripts in the queue 105 if [ "$queued" -ge "$totalpno" ] || [ "$queuedscript" -ge "$pnoscript" ] 101 106 then 102 107 cont >> $jmscriptlog 2>&1 103 108 fi 104 105 if [ $numproc -l t$stillinqueue ]109 # continue if the number of script is the queue is larger (or equal) than the number which still has to be done 110 if [ $numproc -le $stillinqueue ] 106 111 then 107 echo " numproc($numproc) -l tstillinqueue($stillinqueue)" >> $jmscriptlog 2>&1112 echo " numproc($numproc) -le stillinqueue($stillinqueue)" >> $jmscriptlog 2>&1 108 113 cont >> $jmscriptlog 2>&1 109 114 fi 115 116 # reset prev 117 prev=$max 110 118 119 # submit 1 script to condor 111 120 date=`date +%Y-%m-%d` 112 121 echo " committing 1 ${scripts[$i]} to condor" >> $jmscriptlog 2>&1 -
trunk/MagicSoft/Mars/datacenter/scripts/setup
r7982 r8118 62 62 #setup for jobmanager 63 63 sleeptime=30 #30 64 errorsleeptime= 180 #18065 max=1 6#maximum number of processes64 errorsleeptime=60 #60 65 max=18 #maximum number of processes 66 66 67 67 scripts=( "runcallisto" "runstar" "runganymed" "dodatacheck" ) … … 69 69 70 70 #hour: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 71 pnototal=( 1 6 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16)72 pnototalwe=( 1 6 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16)71 pnototal=( 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 ) 72 pnototalwe=( 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 18 ) 73 73 74 74 pnocallisto=( 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 ) 75 pnostar=( 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5)76 pnoganymed=( 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2)75 pnostar=( 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 ) 76 pnoganymed=( 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 ) 77 77 pnodatacheck=( 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 ) 78 78 79 79 pnocallistowe=( 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 12 ) 80 pnostarwe=( 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5)81 pnoganymedwe=( 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2)80 pnostarwe=( 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 ) 81 pnoganymedwe=( 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 ) 82 82 pnodatacheckwe=( 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 16 ) 83 83
Note:
See TracChangeset
for help on using the changeset viewer.