#!/bin/sh
#
# ========================================================================
#
# *
# * This file is part of MARS, the MAGIC Analysis and Reconstruction
# * Software. It is distributed to you in the hope that it can be a useful
# * and timesaving tool in analysing Data of imaging Cerenkov telescopes.
# * It is distributed WITHOUT ANY WARRANTY.
# *
# * Permission to use, copy, modify and distribute this software and its
# * documentation for any purpose is hereby granted without fee,
# * provided that the above copyright notice appear in all copies and
# * that both that copyright notice and this permission notice appear
# * in supporting documentation. It is provided "as is" without express
# * or implied warranty.
# *
#
#
#   Author(s): Daniela Dorner  05/2006 <mailto:dorner@astro.uni-wuerzburg.de>
#
#   Copyright: MAGIC Software Development, 2000-2006
#
#
# ========================================================================
#
# This a script, which launches other scripts (all scripts, that are run 
# on primary basis
#

source `dirname $0`/sourcefile
printprocesslog "INFO starting $0"

set -C

echo "" >> $jmscriptlog 2>&1
echo "starting jobmanager ("`date`")" >> $jmscriptlog 2>&1

prev=$max
user=`whoami`
# endless loop
notcount=0
nothingtodocount=0
nothingtodosleeptime=0
while (( $notcount < 100 ))
do
   for (( i=0 ; i < ${#scripts[@]} ; i++ ))
   do 
      date  >> $jmscriptlog 2>&1
      source `dirname $0`/sourcefile
      echo "script: ${scripts[$i]}" >> $jmscriptlog 2>&1

      # check if there's something to do
      getstatus >> $jmscriptlog 2>&1
      echo " $numproc ${scripts[$i]} still do to" >> $jmscriptlog 2>&1
      if [ "$numproc" = "" ]
      then 
         prev=0
         nothingtodocount=`expr $nothingtodocount + 1`
         if [ $nothingtodocount -lt ${#scripts[@]} ]
         then 
            cont >> $jmscriptlog 2>&1
         else
            if [ $nothingtodosleeptime -lt $sleeptimelimit ]
            then 
               nothingtodosleeptime=`echo " $nothingtodocount * $sleeptime " | bc`
            fi
            echo "sleeping $nothingtodosleeptime" >> $jmscriptlog 2>&1
            sleep $nothingtodosleeptime
            cont >> $jmscriptlog 2>&1
         fi
      else
         nothingtodocount=0
         nothingtodosleeptime=0
      fi

      echo "sleeping $sleeptime..." >> $jmscriptlog 2>&1
      sleep $sleeptime

      # get processes in queue
      q=(`/usr/local/bin/condor_q -global -format "Owner%s " Owner -format "%s" CMD -format "Jobstatus%s\n" Jobstatus 2>&1 `)
      if echo $q | egrep \(Error\|failed\)
      then 
         echo "WARN condor_q failed" >> $jmscriptlog 2>&1
         printprocesslog "WARN condor_q failed"
         echo `date`"ERROR condor_q failed" >> $jmerrorlog
         cont >> $jmscriptlog 2>&1
      fi
      # get processes of user in queue
      q1=(`echo ${q[@]} | egrep -o Owner$user`)
      queued=${#q1[@]}
      # get scripts in queue
      q2=(`echo ${q[@]} | egrep -o ${scripts[$i]}`)
      queuedscript=${#q2[@]}
      # get running scripts
      q3=(`echo ${q[@]} | egrep -o ${scripts[$i]}Jobstatus2`)
      runningscript=${#q3[@]}
      stillinqueue=`echo $queuedscript - $runningscript | bc `

      #get total number of allowed process for current time
      hour=`date +%k`
      totalpno=${pnototal[$hour]}
      #choose array according to the day of the week
      dayofweek=`date +%u`
      case $dayofweek in
         0 | 6)  pnos=( ${pnoswe[@]} ) ;;
             *)  pnos=( ${pnosweek[@]} ) ;;
      esac
      # get number of allowed scripts for current time
      num=`echo "((( $i + 1 ) * 24 ) + ( $hour + 1 ) ) - 24 - 1 " | bc `
      pnoscript=${pnos[$num]}
      # if there was nothing to do for previous script, more scripts can be allowed
      if [ $prev -eq 0 ]
      then
         echo " prev=0 => resetting pnoscript [$pnoscript] to max [$max]" >> $jmscriptlog 2>&1
         pnoscript=$max
      fi
      echo " found $queued jobs in the queue (incl. running jobs) [allowed $totalpno]" >> $jmscriptlog 2>&1
      echo " found $queuedscript ${scripts[$i]} in the queue (incl. running jobs [$runningscript]) [allowed $pnoscript] - not running: $stillinqueue" >> $jmscriptlog 2>&1
      
      # continue if there are already enough processes or scripts in the queue
      if [ "$queued" -ge "$totalpno" ] || [ "$queuedscript" -ge "$pnoscript" ]
      then
         cont >> $jmscriptlog 2>&1
      fi
      # continue if the number of script is the queue is larger (or equal) than the number which still has to be done
      if [ $numproc -le $stillinqueue ]
      then 
         echo " numproc($numproc) -le stillinqueue($stillinqueue)" >> $jmscriptlog 2>&1
         cont >> $jmscriptlog 2>&1
      fi
      
      # reset prev
      prev=$max

      # submit 1 script to condor
      date=`date +%Y-%m-%d`
      echo " committing 1 ${scripts[$i]} to condor" >> $jmscriptlog 2>&1
      if ! /usr/local/bin/condor_submit -a path=$scriptspath -a prog=${scripts[$i]} -a date=$date -a dir=$runlogpath $scriptspath/run.condor 2>> $jmerrorlog
      then 
         echo `date`"ERROR condor_submit failed" >> $jmerrorlog
         echo "condor is not working -> sleeping $errorsleeptime" >> $jmscriptlog 2>&1
         printprocesslog "WARN submitting ${scripts[$i]} to condor failed"
         sleep $errorsleeptime
      fi
      date >> $jmscriptlog 2>&1
      echo "" >> $jmscriptlog 2>&1
   done
done

