Index: /trunk/MagicSoft/Mars/Changelog
===================================================================
--- /trunk/MagicSoft/Mars/Changelog	(revision 9491)
+++ /trunk/MagicSoft/Mars/Changelog	(revision 9492)
@@ -18,4 +18,31 @@
 
                                                  -*-*- END OF LINE -*-*-
+
+ 2009/08/14 Daniela Dorner
+
+   * datacenter/scripts/sourcefile:
+     - function getstatus: return 0, if variable $numprocs is empty
+     - moved call of getdbinfo() to function getstepinfo where possible
+     - enhanced function getstepinfo: implemented retrieving variable
+       NodeRestricted from steps.rc
+       In steps.rc a line 'Table.Column.NodeRestricted: yes' has to be
+       added when a step has to be executed on a certain node (stored 
+       in the database in MCRunProcessStatus.fProductionHostKEY)
+
+   * datacenter/scripts/jobmanager:
+     - implemented possibility to send certain jobs to certain nodes
+       of the cluster (currently implemented only for sun grid engine)
+     - fix for greping the scriptname from the qstat output (needed
+       for scripts with commandline option)
+     - bugfix: pass environment variable AUTOMATIONSETUP to sun grid
+       engine
+
+   * datacenter/scripts/setup.isdc.cta:
+     - added analysis scripts for jobmanager
+     - added variables concerning the cluster: number of nodes and 
+       excluded nodes (needed in the jobmanager for the changes 
+       above)
+
+
 
  2009/08/14 Daniel Hoehne-Moench
Index: /trunk/MagicSoft/Mars/datacenter/scripts/jobmanager
===================================================================
--- /trunk/MagicSoft/Mars/datacenter/scripts/jobmanager	(revision 9491)
+++ /trunk/MagicSoft/Mars/datacenter/scripts/jobmanager	(revision 9492)
@@ -50,7 +50,11 @@
 case $queuesys in
       sge)  echo " on queuing system 'sun grid engine'" >> $jmscriptlog 2>&1
-            alias 'queuesubmit'='/opt/gridengine/bin/lx26-amd64/qsub -b y -e `echo $runlogpath`/error-`echo $date`.log -o `echo $runlogpath`/log-`echo $date`.log `echo $scriptspath`/`echo ${scripts[$i]}` '
+            # (-hard) -l hostname=compute-*
+            #   for qstat this returns the jobs running on that node + all jobs in the queue
+            alias 'queuesubmit'='/opt/gridengine/bin/lx26-amd64/qsub -b y -v AUTOMATIONSETUP=$AUTOMATIONSETUP -e `echo $runlogpath`/error-`echo $date`.log -o `echo $runlogpath`/log-`echo $date`.log `echo $noderequirementsub` `echo $scriptspath`/`echo ${scripts[$i]}` '
+#            alias 'queuesubmit'='/opt/gridengine/bin/lx26-amd64/qsub -b y -v AUTOMATIONSETUP=$AUTOMATIONSETUP -e `echo $runlogpath`/error-`echo $date`.log -o `echo $runlogpath`/log-`echo $date`.log `echo $scriptspath`/`echo ${scripts[$i]}` '
 #            alias 'queuesubmit'='/opt/gridengine/bin/lx26-amd64/qsub -sc runlogpath=`echo $runlogpath` -sc date=`echo $date` -sc scriptspath=`echo $scriptspath` -sc script=`echo ${scripts[$i]}` `echo $scriptspath`/job.sge '
-            alias 'checkqueue'="/opt/gridengine/bin/lx26-amd64/qstat | awk ' { print \"Owner\"\$4\" \" \$3\"Jobstatus\"\$5 } '"
+            # FIXME: get complete scriptname (including command line option), needed for runstereo
+            alias 'checkqueue'="/opt/gridengine/bin/lx26-amd64/qstat \`echo \$noderequirementstat\`  | awk ' { print \"Owner\"\$4\" \" \$3\"Jobstatus\"\$5 } '"
             break
             ;;
@@ -69,4 +73,6 @@
 prev=$max
 user=`whoami`
+currentnode=$minnode
+numevaluated=0
 # endless loop
 notcount=0
@@ -83,5 +89,41 @@
       # check if there's something to do
       column=${scriptscolname[$i]}
-      getstatus >> $jmscriptlog 2>&1
+      getstepinfo
+      if [ "$noderestricted" = "yes" ]
+      then
+         # get number of next node 
+         if [ $numevaluated -ge $numrestrictedscripts ]
+         then 
+            currentnode=`echo $currentnode + 1 | bc -l`
+            numevaluated=1
+         else
+            numevaluated=`echo $numevaluated + 1 | bc -l`
+         fi
+         if [ $currentnode -gt $maxnode ]
+         then 
+            currentnode=$minnode
+         fi
+         # check if node is excluded
+         for excludednode in ${excludednodes[@]}
+         do
+            if [ $currentnode -eq $excludednode ]
+            then
+               echo `date +%F\ %T`" Node compute-0-$currentnode is currently excluded." >> $jmscriptlog 2>&1
+               continue 2
+            fi
+         done
+         # define requirement for submission 
+         # FIXME: currently only for sge at isdc
+         echo `date +%F\ %T`" Checking for node $currentnode. " >> $jmscriptlog 2>&1
+         noderequirementsub=" -hard -l hostname=compute-0-${currentnode}"
+         noderequirementstat=" -l hostname=compute-0-${currentnode}"
+         getstatus $currentnode >> $jmscriptlog 2>&1
+      else
+         noderequirementsub=""
+         noderequirementstat=""
+         getstatus >> $jmscriptlog 2>&1
+      fi
+      
+      # check number of processes to be done
       echo `date +%F\ %T`" Database: $numproc ${scripts[$i]} still to be done (incl. idle jobs) [DB/table/column $db/$table/$column]" >> $jmscriptlog 2>&1
       if [ "$numproc" = "" ]
@@ -105,6 +147,4 @@
 
       # get processes in queue
-#      q=(`/usr/local/bin/condor_q -global -format "Owner%s " Owner -format "%s" CMD -format "Jobstatus%s\n" Jobstatus 2>&1 `)
-#      q=(`/opt/gridengine/bin/lx26-amd64/qstat | awk ' { print "Owner"$4" " $3"Jobstatus"$5 } ' 2>&1 `)
       q=(`checkqueue 2>&1 `)
       if echo $q | egrep \(Error\|failed\)
@@ -115,12 +155,13 @@
          nextscript sleeptime $sleeptime
       fi
+      # FIXME: sge cuts scriptname to 8 digits in qstat
       # get processes of user in queue
-      q1=(`echo ${q[@]} | egrep -o Owner$user`)
+      q1=( `echo ${q[@]} | egrep -o "Owner$user"`)
       queued=${#q1[@]}
       # get scripts in queue
-      q2=(`echo ${q[@]} | egrep -o ${scripts[$i]}`)
+      q2=( `echo ${q[@]} | egrep -o "${scripts[$i]}"`)
       queuedscript=${#q2[@]}
       # get running scripts
-      q3=( `echo ${q[@]} | egrep -o \(${scripts[$i]}Jobstatus2\|${scripts[$i]}Jobstatusr\)` )
+      q3=( `echo ${q[@]} | egrep -o \("${scripts[$i]}"Jobstatus2\|"${scripts[$i]}"Jobstatusr\)` )
       runningscript=${#q3[@]}
       stillinqueue=`echo $queuedscript - $runningscript | bc `
@@ -171,6 +212,4 @@
       date=`date +%Y-%m-%d`
       echo `date +%F\ %T`" committing 1 ${scripts[$i]} to $queuesys" >> $jmscriptlog 2>&1
-#      if ! /usr/local/bin/condor_submit -a path=$scriptspath -a prog=${scripts[$i]} -a date=$date -a dir=$runlogpath $scriptspath/run.condor 2>> $jmerrorlog
-#      if ! /opt/gridengine/bin/lx26-amd64/qsub -e $runlogpath/error-$date.log -o $runlogpath/log-$(date).log 2>> $jmerrorlog
       if ! queuesubmit 2>> $jmerrorlog
       then 
Index: /trunk/MagicSoft/Mars/datacenter/scripts/setup.isdc.cta
===================================================================
--- /trunk/MagicSoft/Mars/datacenter/scripts/setup.isdc.cta	(revision 9491)
+++ /trunk/MagicSoft/Mars/datacenter/scripts/setup.isdc.cta	(revision 9492)
@@ -89,6 +89,5 @@
 sleeptimelimit=360 #360
 errorsleeptimedefault=60 #60
-max=500 #maximum number of processes
-max=25 #maximum number of processes for one script in case there are more than one and the others do not have anything to do
+max=50 #maximum number of processes for one script in case there are more than one and the others do not have anything to do
 totalmax=1600 #maximum number of processes (total) overwrites pnototal(we) in case it is smaller
 
@@ -106,11 +105,37 @@
 pnototalwe=(     1500 1500 1500 1500 1500 1500 1500 1500 1500 1500 1500 1500 1500 1500 1500 1500 1500 1500 1500 1500 1500 1500 1500 1500 )
 
-pnosimtel=(       65 65 65 65 65 65 65 65 65 65 65 65 65 65 65 65 65 65 65 65 65 65 65 65 )
-pnosimtelwe=(     65 65 65 65 65 65 65 65 65 65 65 65 65 65 65 65 65 65 65 65 65 65 65 65 )
+pnosimtel=(       50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 )
+pnosimtelwe=(     50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 )
+
+# restricted to nodes => numbers per node 
+pnochimp=(         2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ) 
+pnochimpwe=(       2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ) 
+pnoctastar=(       2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ) 
+pnoctastarwe=(     2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ) 
+pnostereob=(       2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ) 
+pnostereobwe=(     2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ) 
+pnostereoc=(       2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ) 
+pnostereocwe=(     2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ) 
+pnostereog=(       2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ) 
+pnostereogwe=(     2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ) 
 
 # set variables for jobmanager 
-scripts=( "runsimtel" )
-scriptscolname=( "fCorsikaSimTelarray" )
-pnosweek=( ${pnosimtel[@]} )
-pnoswe=( ${pnosimtelwe[@]} )
+#scripts=( "runsimtel" "runchimp" "runctastar" "runstereo 2" "runstereo 3" "runstereo 7" )
+#scriptscolname=( "fCorsikaSimTelarray" "fChimp" "fCTAStar" "fStereoB" "fStereoC" "fStereoG" )
+#pnosweek=( ${pnosimtel[@]}   ${pnochimp[@]}   ${pnoctastar[@]}   ${pnostereob[@]}   ${pnostereoc[@]}   ${pnostereog[@]} )
+#pnoswe=( ${pnosimtelwe[@]} ${pnochimpwe[@]} ${pnoctastarwe[@]} ${pnostereobwe[@]} ${pnostereocwe[@]} ${pnostereogwe[@]} )
+scripts=( "runsimtel" "runchimp" "runctastar" )
+scriptscolname=( "fCorsikaSimTelarray" "fChimp" "fCTAStar" )
+pnosweek=( ${pnosimtel[@]}   ${pnochimp[@]}   ${pnoctastar[@]} )
+pnoswe=( ${pnosimtelwe[@]} ${pnochimpwe[@]} ${pnoctastarwe[@]} )
 
+# number of script that are limited to a certain node
+#numrestrictedscripts=5
+numrestrictedscripts=2
+
+# set up for nodes 
+#  for processes that can run only on certain node
+#  db: fProductionHostKEY
+minnode=0
+maxnode=26
+excludednodes=( 7 14 23 )
Index: /trunk/MagicSoft/Mars/datacenter/scripts/sourcefile
===================================================================
--- /trunk/MagicSoft/Mars/datacenter/scripts/sourcefile	(revision 9491)
+++ /trunk/MagicSoft/Mars/datacenter/scripts/sourcefile	(revision 9492)
@@ -274,7 +274,9 @@
 function getstepinfo()
 {
+   getdbsetup
    table=`grep "$column:" $steps | sed -e "s/[.]$column://" -e 's/#//' -e 's/ //g'`
    coltab=`grep "$column:" $steps | sed -e 's/://' -e 's/#//' -e 's/ //g'`
    needs=`grep "$coltab[.]Needs:" $steps | sed -e "s/$coltab[.]Needs://"`
+   noderestricted=`grep "$coltab[.]NodeRestricted:" $steps | sed -e "s/$coltab[.]NodeRestricted://" -e 's/ //g'`
    influences=`grep "$coltab[.]Influences:" $steps | sed -e "s/$coltab[.]Influences://"`
    prims=( `grep "$table[.]Primary:" $steps | sed -e "s/$table[.]Primary://"` )
@@ -282,4 +284,5 @@
 #   echo " needs: $needs"
 #   echo " influences: $influences"
+#   echo " noderestricted: $noderestricted"
 #   echo " prims: ${prims[@]}"
 }
@@ -290,5 +293,4 @@
    process=
    printprocesslog "INFO getting todo..."
-   getdbsetup
    getstepinfo
    # get query
@@ -340,5 +342,4 @@
 {
    numproc=
-   getdbsetup
    getstepinfo
    # get query
@@ -366,4 +367,8 @@
       continue
    fi
+   if [ "$numproc" = "" ]
+   then
+      numproc=0
+   fi
 }
 
@@ -377,5 +382,4 @@
    resetstatusvalues
    evalstatus $@
-   getdbsetup
    getstepinfo
    # get query
