Index: trunk/DataCheck/Monitoring/CheckStatus.sh
===================================================================
--- trunk/DataCheck/Monitoring/CheckStatus.sh	(revision 13064)
+++ trunk/DataCheck/Monitoring/CheckStatus.sh	(revision 13064)
@@ -0,0 +1,65 @@
+#!/bin/bash
+#
+# This script checks the status tables in the DB for failed and crashed jobs
+#
+
+source `dirname $0`/../Sourcefile.sh
+printprocesslog "INFO starting $0"
+
+tables=( RawFileAvailISDC RawFileAvailWue RawFileRsyncedISDC SequenceFileAvailISDC AuxFilesAvailISDC Callisto Star )
+
+for step in ${tables[@]}
+do
+   getstepinfo
+   counter=0
+   for prim in ${prims[@]}
+   do
+      if [ $counter -eq 0 ]
+      then
+         selstart=" CONCAT("$prim
+      fi
+      if [ $counter -gt 0 ]
+      then
+         selstart=$selstart" '_', LPAD("$prim", 3, 0)"
+      fi
+      counter=`echo $counter + 1 | bc -l`
+      #if [ $counter -eq ${#prims[@]} ]
+      #then
+      #   sel=$sel")"
+      #fi
+   done
+   
+   # check table for failed jobs
+   where=" WHERE NOT ISNULL(fReturnCode) "
+   where=$where" AND fStopTime > ADDDATE(NOW(), INTERVAL -25 HOUR)"              
+   query="SELECT Count(*) FROM "$step"Status "$where
+   num=`sendquery`
+   printprocesslog "WARN found in "$step" "$num" rows with errors."
+   if [ $num -gt 0 ]
+   then
+      sel=$selstart", '(', fReturnCode, ')')"
+      query="SELECT "$sel" FROM "$step"Status "$where
+      sendquery | mail -s 'found failed jobs in '$step $erradrs
+      printprocesslog "INFO sent mail about failed jobs in "$step" to "$erradrs
+   fi
+
+   # check table for crashed jobs
+   #   i.e. jobs which are running > 2 hours
+   # check the last 27 hours
+   where=" WHERE NOT ISNULL(fStartTime) AND ISNULL(fStopTime) "
+   where=$where" AND fStartTime < ADDDATE(NOW(), INTERVAL -2 HOUR)"
+   where=$where" AND fStartTime > ADDDATE(NOW(), INTERVAL -27 HOUR)"
+   query="SELECT Count(*) FROM "$step"Status "$where
+   num=`sendquery`
+   printprocesslog "WARN found in "$step" "$num" crashed jobs."
+   if [ $num -gt 0 ]
+   then
+      sel=$selstart", '(', fStartTime, ')')"
+      query="SELECT "$sel" FROM "$step"Status "$where
+      sendquery | mail -s 'found crashed jobs in '$step $erradrs
+      printprocesslog "INFO sent mail about crashed jobs in "$step" to "$erradrs
+   fi
+done
+
+printprocesslog "INFO finished $0"
+
