Ignore:
Timestamp:
08/21/08 18:49:35 (16 years ago)
Author:
Daniela Dorner
Message:
*** empty log message ***
File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/MagicSoft/Mars/datacenter/scripts/jobmanager

    r9122 r9129  
    3737echo "starting jobmanager ("`date`")" >> $jmscriptlog 2>&1
    3838
    39 if [ "$1" == "" ]
    40 then
    41    echo "running jobmanager for data" >> $jmscriptlog 2>&1
    42    scripts=( ${datascripts[@]} )
    43    scriptscolname=( ${datascriptscolname[@]} )
    44 elif [ "$1" = "mc" ]
    45 then
    46    echo "running jobmanager for mc" >> $jmscriptlog 2>&1
    47    scripts=( ${mcscripts[@]} )
    48    scriptscolname=( ${mcscriptscolname[@]} )
    49 else
    50    echo "$1 is awrong commandline option for jobmanager -> exit" >> $jmscriptlog 2>&1
    51    printprocesslog "WARN $1 is wrong commandline option for jobmanager"
    52    finish >> $jmscriptlog 2>&1
    53 fi
     39# decide which jobmanager you want to run
     40# setup of the different jobmanagers (which scripts they start)
     41#  the number of jobs are defined in the file setup
     42case $1 in
     43   data) echo "running jobmanager for data" >> $jmscriptlog 2>&1
     44         scripts=( "runganymed" "runstar" "runcallisto" ) # not used: "dodatacheck" "cutslices"
     45         scriptscolname=( "fGanymed" "fStar" "fCallisto" ) # not used: "fDataCheckDone" "fCompmux"
     46         pnosweek=( ${pnoganymed[@]} ${pnostar[@]} ${pnocallisto[@]} ) # not used: ${pnodatacheck[@]} ${pnocutslices[@]}
     47         pnoswe=( ${pnoganymedwe[@]} ${pnostarwe[@]} ${pnocallistowe[@]} ) # not used: ${pnodatacheckwe[@]} ${pnocutsliceswe[@]}
     48         break
     49         ;;
     50     mc) echo "running jobmanager for mc" >> $jmscriptlog 2>&1
     51         scripts=( "runcorsika" "runreflector" "runcamera" )
     52         scriptscolname=( "fCorsikaFileAvail" "fReflectorFileAvail" "fCameraFileAvail" )
     53         pnosweek=( ${pnocorsika[@]} ${pnoreflector[@]} ${pnocamera[@]} )
     54         pnoswe=( ${pnocorsikawe[@]} ${pnoreflectorwe[@]} ${pnocamerawe[@]} )
     55         break
     56         ;;
     57      *) echo "$1 is a wrong commandline option for jobmanager -> exit" >> $jmscriptlog 2>&1
     58         printprocesslog "WARN $1 is wrong commandline option for jobmanager"
     59         finish >> $jmscriptlog 2>&1
     60         break
     61         ;;
     62esac
     63
     64# choose commands according to queueing system (defined in setup)
     65case $queuesys in
     66      sge)  echo "setting commands for sun grid engine" >> $jmscriptlog 2>&1
     67            alias 'queuesubmit'='/opt/gridengine/bin/lx26-amd64/qsub -b y -e `echo $runlogpath`/error-`echo $date`.log -o `echo $runlogpath`/log-`echo $date`.log `echo $scriptspath`/`echo ${scripts[$i]}` '
     68#            alias 'queuesubmit'='/opt/gridengine/bin/lx26-amd64/qsub -sc runlogpath=`echo $runlogpath` -sc date=`echo $date` -sc scriptspath=`echo $scriptspath` -sc script=`echo ${scripts[$i]}` `echo $scriptspath`/job.sge '
     69            alias 'checkqueue'="/opt/gridengine/bin/lx26-amd64/qstat | awk ' { print \"Owner\"\$4\" \" \$3\"Jobstatus\"\$5 } '"
     70            break
     71            ;;
     72   condor)  echo "setting commands for condor" >> $jmscriptlog 2>&1
     73            alias 'queuesubmit'='/usr/local/bin/condor_submit -a path=`echo $scriptspath` -a prog=`echo ${scripts[$i]}` -a date=`echo $date` -a dir=`echo $runlogpath` `echo $scriptspath`/run.condor'
     74            alias 'checkqueue'='/usr/local/bin/condor_q -global -format "Owner%s " Owner -format "%s" CMD -format "Jobstatus%s\n" Jobstatus '
     75            break
     76            ;;
     77        *)  finish >> $jmscriptlog 2>&1
     78            ;;
     79esac
    5480
    5581prev=$max
     
    5985nothingtodocount=0
    6086nothingtodosleeptime=0
     87errorsleeptime=$errorsleeptimedefault
    6188while (( $notcount < 100 ))
    6289do
     
    96123
    97124      # get processes in queue
    98       q=(`/usr/local/bin/condor_q -global -format "Owner%s " Owner -format "%s" CMD -format "Jobstatus%s\n" Jobstatus 2>&1 `)
     125#      q=(`/usr/local/bin/condor_q -global -format "Owner%s " Owner -format "%s" CMD -format "Jobstatus%s\n" Jobstatus 2>&1 `)
     126#      q=(`/opt/gridengine/bin/lx26-amd64/qstat | awk ' { print "Owner"$4" " $3"Jobstatus"$5 } ' 2>&1 `)
     127      q=(`checkqueue 2>&1 `)
    99128      if echo $q | egrep \(Error\|failed\)
    100129      then
    101          echo `date`" WARN condor_q failed" >> $jmscriptlog 2>&1
    102          printprocesslog "WARN condor_q failed"
    103          echo `date`" WARN condor_q failed" >> $jmerrorlog
     130         echo `date`" WARN checking query ($queuesys) failed" >> $jmscriptlog 2>&1
     131         printprocesslog "WARN checking query ($queuesys) failed"
     132         echo `date`" WARN checking query ($queuesys) failed" >> $jmerrorlog
    104133         cont >> $jmscriptlog 2>&1
    105134      fi
     
    111140      queuedscript=${#q2[@]}
    112141      # get running scripts
    113       q3=(`echo ${q[@]} | egrep -o ${scripts[$i]}Jobstatus2`)
     142      q3=( `echo ${q[@]} | egrep -o \(${scripts[$i]}Jobstatus2\|${scripts[$i]}Jobstatusr\)` )
    114143      runningscript=${#q3[@]}
    115144      stillinqueue=`echo $queuedscript - $runningscript | bc `
     
    151180      prev=$max
    152181
    153       # submit 1 script to condor
     182      # submit 1 script to queuing system
    154183      date=`date +%Y-%m-%d`
    155       echo " committing 1 ${scripts[$i]} to condor" >> $jmscriptlog 2>&1
    156       if ! /usr/local/bin/condor_submit -a path=$scriptspath -a prog=${scripts[$i]} -a date=$date -a dir=$runlogpath $scriptspath/run.condor 2>> $jmerrorlog
    157       then
    158          echo `date`" WARN condor_submit failed" >> $jmerrorlog
    159          echo "condor is not working -> sleeping $errorsleeptime" >> $jmscriptlog 2>&1
    160          printprocesslog "WARN submitting ${scripts[$i]} to condor failed"
     184      echo " committing 1 ${scripts[$i]} to $queuesys" >> $jmscriptlog 2>&1
     185#      if ! /usr/local/bin/condor_submit -a path=$scriptspath -a prog=${scripts[$i]} -a date=$date -a dir=$runlogpath $scriptspath/run.condor 2>> $jmerrorlog
     186#      if ! /opt/gridengine/bin/lx26-amd64/qsub -e $runlogpath/error-$date.log -o $runlogpath/log-$(date).log 2>> $jmerrorlog
     187      if ! queuesubmit 2>> $jmerrorlog
     188      then
     189         echo `date`" WARN submitting job ($queuesys) failed" >> $jmerrorlog
     190         echo "$queuesys is not working -> sleeping $errorsleeptime" >> $jmscriptlog 2>&1
     191         printprocesslog "WARN submitting ${scripts[$i]} ($queuesys) failed"
    161192         if [ $errorsleeptime -lt $sleeptimelimit ]
    162193         then
Note: See TracChangeset for help on using the changeset viewer.