Index: /trunk/MagicSoft/Mars/Changelog
===================================================================
--- /trunk/MagicSoft/Mars/Changelog	(revision 9487)
+++ /trunk/MagicSoft/Mars/Changelog	(revision 9488)
@@ -18,4 +18,12 @@
 
                                                  -*-*- END OF LINE -*-*-
+
+ 2009/08/12 Daniela Dorner
+
+   * datacenter/scripts/jobmanager:
+     - improved logging (new layout and more information)
+     - moved sleep to end of loop (function nextscript())
+
+
 
  2009/08/12 Daniel Hoehne-Moench
Index: /trunk/MagicSoft/Mars/datacenter/scripts/jobmanager
===================================================================
--- /trunk/MagicSoft/Mars/datacenter/scripts/jobmanager	(revision 9487)
+++ /trunk/MagicSoft/Mars/datacenter/scripts/jobmanager	(revision 9488)
@@ -34,6 +34,16 @@
 set -C
 
+# function to continue in loop and go to next script
+function nextscript()
+{
+   echo `date +%F\ %T`" sleeping \$$1 = $sleeptime seconds... " >> $jmscriptlog 2>&1
+   sleep $2
+   echo "" >> $jmscriptlog 2>&1
+   continue
+}
+
 echo "" >> $jmscriptlog 2>&1
-echo "starting jobmanager ("`date`")" >> $jmscriptlog 2>&1
+echo "" >> $jmscriptlog 2>&1
+echo -n `date +%F\ %T`" starting jobmanager for setup " >> $jmscriptlog 2>&1
 
 # decide which jobmanager you want to run
@@ -41,5 +51,5 @@
 #  the number of jobs are defined in the file setup
 case $1 in
-   data) echo "running jobmanager for data" >> $jmscriptlog 2>&1
+   data) echo -n "'data'" >> $jmscriptlog 2>&1
          scripts=( "runganymed" "runstar" "runcallisto" ) # not used: "dodatacheck" "cutslices"
          scriptscolname=( "fGanymed" "fStar" "fCallisto" ) # not used: "fDataCheckDone" "fCompmux" 
@@ -48,5 +58,5 @@
          break
          ;;
-     mc) echo "running jobmanager for mc" >> $jmscriptlog 2>&1
+     mc) echo -n "'mc'" >> $jmscriptlog 2>&1
          scripts=( "runcorsika" "runreflector" "runcamera" )
          scriptscolname=( "fCorsikaFileAvail" "fReflectorFileAvail" "fCameraFileAvail" )
@@ -55,5 +65,5 @@
          break
          ;;
-  ctamc) echo "running jobmanager for cta mc" >> $jmscriptlog 2>&1
+  ctamc) echo -n "'cta mc'" >> $jmscriptlog 2>&1
          scripts=( "runsimtel" )
          scriptscolname=( "fCorsikaSimTelarray" )
@@ -62,5 +72,6 @@
          break
          ;;
-      *) echo "'$1' is a wrong commandline option for jobmanager -> exit" >> $jmscriptlog 2>&1
+      *) echo ""
+         echo "'$1' is a wrong commandline option for jobmanager -> exit" >> $jmscriptlog 2>&1
          echo "'$1' is a wrong commandline option for jobmanager -> exit" 
          printprocesslog "WARN '$1' is a wrong commandline option for jobmanager"
@@ -72,5 +83,5 @@
 # choose commands according to queueing system (defined in setup)
 case $queuesys in
-      sge)  echo "setting commands for sun grid engine" >> $jmscriptlog 2>&1
+      sge)  echo " on queuing system 'sun grid engine'" >> $jmscriptlog 2>&1
             alias 'queuesubmit'='/opt/gridengine/bin/lx26-amd64/qsub -b y -e `echo $runlogpath`/error-`echo $date`.log -o `echo $runlogpath`/log-`echo $date`.log `echo $scriptspath`/`echo ${scripts[$i]}` '
 #            alias 'queuesubmit'='/opt/gridengine/bin/lx26-amd64/qsub -sc runlogpath=`echo $runlogpath` -sc date=`echo $date` -sc scriptspath=`echo $scriptspath` -sc script=`echo ${scripts[$i]}` `echo $scriptspath`/job.sge '
@@ -78,12 +89,15 @@
             break
             ;;
-   condor)  echo "setting commands for condor" >> $jmscriptlog 2>&1
+   condor)  echo " on queuing system 'condor'" >> $jmscriptlog 2>&1
             alias 'queuesubmit'='/usr/local/bin/condor_submit -a path=`echo $scriptspath` -a prog=`echo ${scripts[$i]}` -a date=`echo $date` -a dir=`echo $runlogpath` `echo $scriptspath`/run.condor'
             alias 'checkqueue'='/usr/local/bin/condor_q -global -format "Owner%s " Owner -format "%s" CMD -format "Jobstatus%s\n" Jobstatus '
             break
             ;;
-        *)  finish >> $jmscriptlog 2>&1
+        *)  echo "" >> $jmscriptlog 2>&1
+            finish >> $jmscriptlog 2>&1
             ;;
 esac
+
+echo "" >> $jmscriptlog 2>&1
 
 prev=$max
@@ -98,12 +112,11 @@
    for (( i=0 ; i < ${#scripts[@]} ; i++ ))
    do 
-      date  >> $jmscriptlog 2>&1
       source `dirname $0`/sourcefile
-      echo "script: ${scripts[$i]}" >> $jmscriptlog 2>&1
+      echo `date +%F\ %T`" Evaluating processing status for script '${scripts[$i]}'" >> $jmscriptlog 2>&1
 
       # check if there's something to do
       column=${scriptscolname[$i]}
       getstatus >> $jmscriptlog 2>&1
-      echo " $numproc ${scripts[$i]} still to do" >> $jmscriptlog 2>&1
+      echo `date +%F\ %T`" Database: $numproc ${scripts[$i]} still to be done (incl. idle jobs) [DB/table/column $db/$table/$column]" >> $jmscriptlog 2>&1
       if [ "$numproc" = "" ]
       then 
@@ -112,5 +125,5 @@
          if [ $nothingtodocount -lt ${#scripts[@]} ]
          then 
-            cont >> $jmscriptlog 2>&1
+            nextscript 0 0
          else
             if [ $nothingtodosleeptime -lt $sleeptimelimit ]
@@ -118,7 +131,5 @@
                nothingtodosleeptime=`echo " $nothingtodocount * $sleeptime " | bc`
             fi
-            echo "sleeping $nothingtodosleeptime" >> $jmscriptlog 2>&1
-            sleep $nothingtodosleeptime
-            cont >> $jmscriptlog 2>&1
+            nextscript nothingtodosleeptime $nothingtodosleeptime
          fi
       else
@@ -126,7 +137,4 @@
          nothingtodosleeptime=0
       fi
-
-      echo "sleeping $sleeptime..." >> $jmscriptlog 2>&1
-      sleep $sleeptime
 
       # get processes in queue
@@ -136,8 +144,8 @@
       if echo $q | egrep \(Error\|failed\)
       then 
-         echo `date`" WARN checking query ($queuesys) failed" >> $jmscriptlog 2>&1
+         echo `date +%F\ %T`" WARN checking query ($queuesys) failed" >> $jmscriptlog 2>&1
          printprocesslog "WARN checking query ($queuesys) failed"
-         echo `date`" WARN checking query ($queuesys) failed" >> $jmerrorlog
-         cont >> $jmscriptlog 2>&1
+         echo `date +%F\ %T`" WARN checking query ($queuesys) failed" >> $jmerrorlog
+         nextscript sleeptime $sleeptime
       fi
       # get processes of user in queue
@@ -154,5 +162,4 @@
       #get total number of allowed process for current time
       hour=`date +%k`
-      #totalpno=${pnototal[$hour]}
       if [ ${pnototal[$hour]} -lt $totalmax ]
       then
@@ -174,20 +181,20 @@
       if [ $prev -eq 0 ]
       then
-         echo " prev=0 => resetting pnoscript [$pnoscript] to max [$max]" >> $jmscriptlog 2>&1
+         echo `date +%F\ %T`" \$prev=0 => resetting \$pnoscript from $pnoscript to $max [\$max]" >> $jmscriptlog 2>&1
          pnoscript=$max
       fi
-      echo " found $queued jobs in the queue (incl. running jobs) [allowed $totalpno]" >> $jmscriptlog 2>&1
-      echo " found $queuedscript ${scripts[$i]} in the queue (incl. running jobs [$runningscript]) [allowed $pnoscript] - not running: $stillinqueue" >> $jmscriptlog 2>&1
+      echo `date +%F\ %T`" queue for user '$user': total: $queued queued jobs [allowed \$totalpno = $totalpno]" >> $jmscriptlog 2>&1
+      echo `date +%F\ %T`" queue for user '$user': ${scripts[$i]}: $queuedscript queued, $runningscript running, $stillinqueue idle [allowed \$pnoscript = $pnoscript]" >> $jmscriptlog 2>&1
       
       # continue if there are already enough processes or scripts in the queue
       if [ "$queued" -ge "$totalpno" ] || [ "$queuedscript" -ge "$pnoscript" ]
       then
-         cont >> $jmscriptlog 2>&1
+         nextscript sleeptime $sleeptime
       fi
       # continue if the number of script is the queue is larger (or equal) than the number which still has to be done
       if [ $numproc -le $stillinqueue ]
       then 
-         echo " numproc($numproc) -le stillinqueue($stillinqueue)" >> $jmscriptlog 2>&1
-         cont >> $jmscriptlog 2>&1
+         echo `date +%F\ %T`" \$numproc ($numproc) <  \$stillinqueue ($stillinqueue) " >> $jmscriptlog 2>&1
+         nextscript sleeptime $sleeptime
       fi
       
@@ -197,5 +204,5 @@
       # submit 1 script to queuing system
       date=`date +%Y-%m-%d`
-      echo " committing 1 ${scripts[$i]} to $queuesys" >> $jmscriptlog 2>&1
+      echo `date +%F\ %T`" committing 1 ${scripts[$i]} to $queuesys" >> $jmscriptlog 2>&1
 #      if ! /usr/local/bin/condor_submit -a path=$scriptspath -a prog=${scripts[$i]} -a date=$date -a dir=$runlogpath $scriptspath/run.condor 2>> $jmerrorlog
 #      if ! /opt/gridengine/bin/lx26-amd64/qsub -e $runlogpath/error-$date.log -o $runlogpath/log-$(date).log 2>> $jmerrorlog
@@ -203,5 +210,5 @@
       then 
          echo `date`" WARN submitting job ($queuesys) failed" >> $jmerrorlog
-         echo "$queuesys is not working -> sleeping $errorsleeptime" >> $jmscriptlog 2>&1
+         echo `date +%F\ %T`" WARN $queuesys is not working -> sleeping $errorsleeptime [\$errorsleeptime]" >> $jmscriptlog 2>&1
          printprocesslog "WARN submitting ${scripts[$i]} ($queuesys) failed"
          if [ $errorsleeptime -lt $sleeptimelimit ]
@@ -209,10 +216,9 @@
             errorsleeptime=`echo " $errorsleeptime + $errorsleeptimedefault " | bc`
          fi
-         sleep $errorsleeptime
+         nextscript errorsleeptime $errorsleeptime
       else
          errorsleeptime=$errorsleeptimedefault
       fi
-      date >> $jmscriptlog 2>&1
-      echo "" >> $jmscriptlog 2>&1
+      nextscript sleeptime $sleeptime
    done
 done
