Index: Evidence/Alarm.cc
===================================================================
--- Evidence/Alarm.cc	(revision 224)
+++ Evidence/Alarm.cc	(revision 227)
@@ -4,11 +4,15 @@
 
   - Checks periodically if all required servers are up
-    (later it should try to start them if not)
-  - Listens to the 'Status' service of each server.
-  - A text describing the state of all servers is published as DIM service.
-    The states are described in StateString[].
+  - Listens to the 'Message' service of each server and generates new service for
+    each observed server indicating the maximum Severity in the past.
+  - Maximum severity may be reset by a command 'Alarm/ResetAlarm' for a server.
+  - A text describing the current state of all servers is published as DIM service.
+    The states are described in LevelStr[].
   - A master alarm (indicating most severe of individual alarms) is published. 
     
-  Oliver Grimm, January 2010
+  A mutex is used because UpdateAlarmSummary() may be called from DIM handler thread and
+  from main thread.
+
+  Oliver Grimm, June 2010
 
 \********************************************************************/
@@ -17,16 +21,23 @@
 #include "Evidence.h"
 
-#define SUMMARYSIZE 10000	// Bytes for alarm summary text
-
-const char* StateString[] = {"OK", "WARN", "ERROR", "FATAL", "UNAVAILABLE"};
+#include <sstream>
+
+using namespace std;
+
+const char* LevelStr[] = {"OK", "WARN", "ERROR", "FATAL", "UNAVAILABLE"};
 
 //
 // Data handling class
 //
-class AlarmHandler : public DimClient, public EvidenceServer {
-    
-    DimStampedInfo **StatusService;
+class AlarmHandler: public DimClient, public EvidenceServer {
+    
+	DimCommand *Command;
+	DimService *Summary, *Master;
+	char *AlarmText;
+	int MasterAlarm;
+	pthread_mutex_t Mutex;
 
     void infoHandler();
+	void commandHandler();
 
   public:
@@ -34,14 +45,14 @@
     ~AlarmHandler();
 
-	DimService *Summary, *Master;
-	
-	char *AlarmSummary;
-	int MasterAlarm;
-	int *State;
-	bool *Warned;    
-    char **Server;
-    unsigned int NumServers;
-    char *ServerList; 
-	
+	struct Item {
+	  string Server;
+	  string Email;
+	  DimStampedInfo *Subscription;
+	  DimService *AlarmLevel;
+	  int WarnedLevel;
+	  int Level;
+	};
+	vector<struct Item> List;
+
 	void UpdateAlarmSummary();
 }; 
@@ -50,53 +61,72 @@
 AlarmHandler::AlarmHandler(): EvidenceServer(SERVER_NAME) {
 
-  AlarmSummary = new char [SUMMARYSIZE];
+  struct Item N;
+  static int InitLevel = -1; // static for DIM service below
+
+  // Initialise
   MasterAlarm = 0;
+  AlarmText = NULL;
+  
+  if (pthread_mutex_init(&Mutex, NULL) != 0) {
+    Message(FATAL, "pthread_mutex_init failed");
+  }
+
+  // Handling of servies will only start after start()
+  autoStartOff();
 
   // Create DIM services
-  Summary = new DimService(SERVER_NAME"/Summary", AlarmSummary);
+  Summary = new DimService(SERVER_NAME"/Summary", (char *) "not yet available");
   Master = new DimService(SERVER_NAME"/MasterAlarm", MasterAlarm);
 
-  // Copy original list of servers to observe
-  char *ServerNames = GetConfig("servers");
-  ServerList = new char [strlen(ServerNames)+1];
-  strcpy(ServerList, ServerNames);
-  
-  // Extract DIM servers to observe
-  Server = new char* [strlen(ServerNames)];
-  NumServers = 0;
-  char *NextToken = strtok(ServerNames, " \t");
-  while (NextToken != NULL) {
-    Server[NumServers++] = NextToken; // Subscribe with handler
-    NextToken = strtok(NULL, " \t");     
-  }
-
-  // Subscribe with handler to 'Message' service of all servers
-  StatusService = new DimStampedInfo* [NumServers];
-  State = new int [NumServers];
-  Warned = new bool [NumServers];
-
-  for (int i=0; i<NumServers; i++) {
-    char *Buffer = new char [strlen(Server[i])+10];
-    strcpy(Buffer, Server[i]);
-    strcat(Buffer, "/Message");
-    StatusService[i] = new DimStampedInfo(Buffer, NO_LINK, this);
-    delete[] Buffer;
-	
-	State[i] = 0;
-  }
-}
+  // Get DIM servers to observe
+  char *Token = strtok(GetConfig("servers"), " \t");
+  int Pos;
+  while (Token != NULL) {
+	// Extract server name and email
+	N.Server = Token;
+	Pos = N.Server.find(':');
+	if (Pos > 0 && Pos < N.Server.size()-2) {
+	  N.Email = N.Server.substr(Pos+1, string::npos);
+	  N.Server = N.Server.substr(0, Pos);	
+	}
+	else N.Email = string();
+
+	// DIS_DNS has no Message service
+	if (N.Server == "DIS_DNS") N.Subscription = NULL;
+	else N.Subscription = new DimStampedInfo((N.Server+"/Message").c_str(), NO_LINK, this);
+
+	// Alarm service for server (reference to variable will be updated in UpdateAlarmSummary())
+	N.WarnedLevel = 0;
+	N.Level = -1;
+	N.AlarmLevel = new DimService((N.Server+"/AlarmLevel").c_str(), InitLevel);
+
+	List.push_back(N);
+    Token = strtok(NULL, " \t");     
+  }
+
+  // Provide command to reset Level   
+  Command = new DimCommand("Alarm/ResetAlarm", (char *) "C", this);
+  
+  // List set up, can start handling
+  start(SERVER_NAME);
+}
+
 
 // Destructor
 AlarmHandler::~AlarmHandler() {
 
-  for (int i=0; i<NumServers; i++) delete StatusService[i];
-  delete[] StatusService;
+  delete Command;
+
+  for (int i=0; i<List.size(); i++) {
+    delete List[i].Subscription;
+    delete List[i].AlarmLevel;
+  }	
   delete Master;
   delete Summary;
-  delete[] State;
-  delete[] Server;
-  delete[] ServerList;
-  delete[] AlarmSummary;
-}
+  delete[] AlarmText;
+  
+  pthread_mutex_destroy(&Mutex);
+}
+
 
 // Print messages of status changes to screen and update status string
@@ -104,22 +134,32 @@
 
   // Identify status service
-  for (int i=0; i<NumServers; i++) if (getInfo() == StatusService[i]) {
-
-	// Ignore DIS_DNS (has no status service)
-	if (strcmp(getInfo()->getName(),"DIS_DNS/Message") == 0) return;
-	
-	// Update State: unavailable or current severity of status  
-	if (!ServiceOK(getInfo())) State[i] = 4;
-	else {
-	  State[i] = *(getInfo()->getString()+getInfo()->getSize()-1);
-
-	  // Print message
-	  time_t RawTime = getInfo()->getTimestamp();
-	  struct tm *TM = localtime(&RawTime);
-	  printf("%s (%02d:%02d:%02d): %s\n", getInfo()->getName(), TM->tm_hour,
-		TM->tm_min, TM->tm_sec, getInfo()->getString());	  
-	}
-	UpdateAlarmSummary();
-  }  
+  for (int i=0; i<List.size(); i++) if (getInfo() == List[i].Subscription) {
+	// Update level: unavailable or current severity of status  
+	if (!ServiceOK(getInfo())) List[i].Level = 4;
+	else if (getInfo()->getInt() > List[i].Level) List[i].Level = getInfo()->getInt();
+  }
+
+  UpdateAlarmSummary();
+}
+
+
+// Reset alarm level of given server
+void AlarmHandler::commandHandler() {
+
+  DimCommand *C = getCommand();
+
+  // Check for valid command parameter
+  if (C != Command) return;
+  if (C->getSize() == 0) return;
+  if (*((char *) C->getData() + C->getSize() - 1) != '\0') return;
+ 
+  // Reset alarm level and publish/log action
+  for (int i=0; i<List.size(); i++) if (List[i].Server == C->getString()) {
+    Message(INFO, "Alarm level of server %s reset by %s (ID %d)", C->getString(), getClientName(), getClientId());
+	List[i].Level = 0;
+	List[i].WarnedLevel = 0;
+  }
+  
+  UpdateAlarmSummary();
 }
 
@@ -127,15 +167,56 @@
 // Update alarm status summary
 void AlarmHandler::UpdateAlarmSummary() {
-  
-  int Offset = 0;
-  MasterAlarm = 0;
-   
-  for (int i=0; i<NumServers; i++) {
-    snprintf(AlarmSummary+Offset, SUMMARYSIZE-Offset, "%s: %s (%d)\n", Server[i], State[i]<=4 ? StateString[State[i]] : "unknown", State[i]);
-	Offset += strlen(AlarmSummary+Offset);
-	if (State[i] > MasterAlarm) MasterAlarm = State[i];
-  }
-  Summary->updateService();
+
+  ostringstream Buf;
+  int Alarm, Ret;  
+
+  // Lock because access can be from main thread and DIM handler thread
+  if ((Ret = pthread_mutex_lock(&Mutex)) != 0) {
+	Message(FATAL, "pthread_mutex_lock() failed (%s)", strerror(Ret));
+  }
+ 
+  for (int i=0; i<List.size(); i++) {
+	// Alarm level description
+	Buf << List[i].Server << ": " << (List[i].Level>=0 && List[i].Level<=4 ? LevelStr[List[i].Level] : "unknown");
+	Buf << " (" << List[i].Level << ")" << endl;
+
+	// Adjust master alarm and update server alarm level
+	if (List[i].Level > Alarm) Alarm = List[i].Level;
+	List[i].AlarmLevel->updateService(List[i].Level);
+
+	// Check if alarm level raised, then send alarm message once
+	if (List[i].WarnedLevel < List[i].Level && !List[i].Email.empty()) {
+	  List[i].WarnedLevel = List[i].Level;
+	  
+	  // Prepare email message
+	  char *Text;
+	  time_t Time = time(NULL);
+	  if (asprintf(&Text, "echo \"Server alarm level '%s' at %s\"|"
+			"mail -s \"Evidence Alarm for '%s'\" %s",
+			List[i].Level>=0 && List[i].Level<=4 ? LevelStr[List[i].Level] : "unknown",
+	  		ctime(&Time), List[i].Server.c_str(), List[i].Email.c_str()) != -1) {
+		system(Text); // Return value depending on OS
+		free(Text);
+	  }
+	  else Message(ERROR, "Could not send alarm email, asprintf() failed");
+	}
+  }
+  
+  // Update master alarm services
+  MasterAlarm = Alarm;   
   Master->updateService();
+  
+  // Update alarm description (DIM requires variables to be valid until update)
+  char *Tmp = new char[Buf.str().size()+1];
+  strcpy(Tmp, Buf.str().c_str());  
+  Summary->updateService(Tmp);
+
+  delete[] AlarmText;
+  AlarmText = Tmp;
+  
+  // Unlock
+  if ((Ret = pthread_mutex_unlock(&Mutex)) != 0) {
+	Message(FATAL, "pthread_mutex_unlock() failed (%s)", strerror(Ret));
+  }
 }
 
@@ -146,6 +227,6 @@
     
   DimBrowser Browser;
-  char *ServerName, *Node;
-  bool Exists;
+  char *Server, *Node;
+  bool Exist;
   
   // Static declaration ensures calling of destructor by exit()
@@ -154,28 +235,13 @@
   // Check periodically if servers are up
   while(!Alarm.ExitRequest) {
-    for (int i=0; i<Alarm.NumServers; i++) {
-      Exists = false;
+
+    for (int i=0; i<Alarm.List.size(); i++) {
+      Exist = false;
       Browser.getServers();
-      while (Browser.getNextServer(ServerName, Node) == 1) {
-        if (strcmp(ServerName, Alarm.Server[i]) == 0) Exists = true;
+      while (Browser.getNextServer(Server, Node) == 1) {
+        if (Alarm.List[i].Server == Server) Exist = true;
       }
-
-      if (Exists) {
-		Alarm.Warned[i] = false;
-		continue;
-	  }
-
-	  Alarm.State[i] = 4;
-
-	  // If server unavailable, send alarm message once
-	  if (Alarm.Warned[i] == false) {
-		Alarm.Warned[i] = true;
-		char *Message;
-		time_t Time = time(NULL);
-		if (asprintf(&Message, "echo \"Server unavailable at %s\"|mail -s \"Evidence Alarm for '%s'\" %s", ctime(&Time), Alarm.Server[i], Alarm.GetConfig("email","")) != -1) {
-		  system(Message);
-		  free(Message);
-		}
-	  }
+	  if (!Exist) Alarm.List[i].Level = 4;
+	  else if (Alarm.List[i].Level = -1) Alarm.List[i].Level = 0;
     }
     
Index: Evidence/readme.txt
===================================================================
--- Evidence/readme.txt	(revision 224)
+++ Evidence/readme.txt	(revision 227)
@@ -41,4 +41,8 @@
 17/6/2010	Added SendToLog() method. Changed severity encoding of Message service to
 			use standard DIM structure of format "I:1;C"
+18/6/2010	Alarm server configuration accepts now one email address per server. A new
+			service for each observed server SERVERNAME/AlarmLevel contains the highest
+			level that occurred in the past. Reset of alarm level only via a DIM command.
+
 
 
