Index: trunk/DataCheck/Monitoring/CheckStatus.sh
===================================================================
--- trunk/DataCheck/Monitoring/CheckStatus.sh	(revision 19425)
+++ trunk/DataCheck/Monitoring/CheckStatus.sh	(revision 19433)
@@ -7,5 +7,16 @@
 printprocesslog "INFO starting $0"
 
-tables=( RawFileAvailISDC RawFileAvailWue RawFileRsyncedISDC SequenceFileAvailISDC AuxFilesAvailISDC DriveFileAvailISDC Callisto Star )
+tables=( RawFileAvailISDC RawFileAvailWue RawFileRsyncedISDC SequenceFileAvailISDC AuxFilesAvailISDC DriveFileAvailISDC Callisto Star StarEventsFilledStatus )
+
+printjobs()
+{
+   echo "The following "$2" jobs are "$1": "
+   sendquery
+   echo ""
+   echo "to view: "$query
+   echo ""
+   updquery="UPDATE "$step"Status SET fStartTime=NULL, fStopTime=NULL, fReturnCode=NULL, fAvailable=NULL, fProcessingSiteKEY=NULL "$where
+   echo "to reset: "$updquery
+}
 
 for step in ${tables[@]}
@@ -33,5 +44,8 @@
    printprocesslog "INFO checking "$step" for failed jobs."
    where=" WHERE NOT ISNULL(fReturnCode) "
-   where=$where" AND fStopTime > ADDDATE(NOW(), INTERVAL -25 HOUR)"              
+   # check only last 24h
+   #where=$where" AND fStopTime > ADDDATE(NOW(), INTERVAL -25 HOUR)"              
+   # last 3 days
+   where=$where" AND fStopTime > ADDDATE(NOW(), INTERVAL -72 HOUR)"              
    query="SELECT Count(*) FROM "$step"Status "$where
    num=`sendquery`
@@ -45,5 +59,5 @@
          sel=$selstart", '(', fReturnCode, ')')"
          query="SELECT "$sel" FROM "$step"Status "$where
-         sendquery | mail -s 'found failed jobs in '$step $erradrs
+         printjobs "failed" $num | mail -s 'found '$num' failed jobs in '$step $erradrs
          printprocesslog "INFO sent mail about failed jobs in "$step" to "$erradrs
       fi
@@ -55,6 +69,10 @@
    printprocesslog "INFO checking "$step" for crashed jobs."
    where=" WHERE NOT ISNULL(fStartTime) AND ISNULL(fStopTime) "
-   where=$where" AND fStartTime < ADDDATE(NOW(), INTERVAL -2 HOUR)"
-   where=$where" AND fStartTime > ADDDATE(NOW(), INTERVAL -27 HOUR)"
+   # all crashed jobs 
+   where=$where" AND fStartTime < ADDDATE(NOW(), INTERVAL -5 HOUR)"
+   where=$where" AND fStartTime > '1971-01-01 01:01:01' "
+   # only the crashed jobs of the last 24h
+   #where=$where" AND fStartTime < ADDDATE(NOW(), INTERVAL -2 HOUR)"
+   #where=$where" AND fStartTime > ADDDATE(NOW(), INTERVAL -27 HOUR)"
    query="SELECT Count(*) FROM "$step"Status "$where
    num=`sendquery`
@@ -68,6 +86,27 @@
          sel=$selstart", '(', fStartTime, ')')"
          query="SELECT "$sel" FROM "$step"Status "$where
-         sendquery | mail -s 'found crashed jobs in '$step $erradrs
+         printjobs "crashed" $num | mail -s 'found '$num' crashed jobs in '$step $erradrs
          printprocesslog "INFO sent mail about crashed jobs in "$step" to "$erradrs
+      fi
+   fi
+   
+   # check for jobs with strange status
+   #  i.e. jobs with startime NULL and valid stoptime
+   #  (probably they have been reset while a job was running)
+   printprocesslog "INFO checking "$step" for alien jobs."
+   where=" WHERE ISNULL(fStartTime) AND NOT ISNULL(fStopTime) "
+   query="SELECT Count(*) FROM "$step"Status "$where
+   num=`sendquery`
+   if [ "$num" == "" ]
+   then 
+      printprocesslog "WARN could not get number of alien jobs from the DB."
+   else
+      if [ $num -gt 0 ]
+      then
+         printprocesslog "WARN found in "$step" "$num" alien jobs."
+         sel=$selstart")"
+         query="SELECT "$sel" FROM "$step"Status "$where
+         printjobs "alien" $num | mail -s 'found '$num' alien jobs in '$step $erradrs
+         printprocesslog "INFO sent mail about alien jobs in "$step" to "$erradrs
       fi
    fi
