/********************************************************************\

  Alarm handler of the Evidence Control System

  - Checks periodically if all required servers are up
  - Listens to the 'Message' service of each server and generates new service for
    each observed server indicating the maximum Severity in the past.
  - Maximum severity may be reset by a command 'Alarm/ResetAlarm' for a server.
  - A text describing the current state of all servers is published as DIM service.
  - A master alarm (indicating most severe of individual alarms) is published. 
  - The server can be switched on/off with the command 'Alarm/Switch'.
      
  A mutex is used because UpdateAlarmSummary() may be called from DIM handler thread and
  from main thread.

  Oliver Grimm, February 2011

\********************************************************************/

#define SERVER_NAME "Alarm"
#include "Evidence.h"

#include <sstream>

using namespace std;

const int MIN_PERIOD = 5;	// Minimum period in seconds for checking servers are alive 
const int UNAVA = 255;		// Alarm level to use if server unavailable

//
// Class declaration
//
class AlarmHandler: public DimClient, public EvidenceServer {
    
	DimCommand *ResetCommand;
	DimCommand *SwitchCommand;
	DimService *Summary, *Master;
	char *AlarmText;
	int MasterAlarm;

    void infoHandler();
	void commandHandler();

  public:
    AlarmHandler();
    ~AlarmHandler();

	struct Item {
	  string Server;
	  string Email;
	  DimStampedInfo *Subscription;
	  DimService *AlarmLevel;
	  int WarnedLevel;
	  int Level;
	};
	vector<struct Item> List;
	bool Active;

	void UpdateAlarmSummary();
}; 

// Constructor
AlarmHandler::AlarmHandler(): EvidenceServer(SERVER_NAME) {

  struct Item N;
  static int InitLevel = -1; // static for DIM service below

  // Initialise
  MasterAlarm = 0;
  AlarmText = NULL;
  Active = true;

  // Handling of servies will only start after start()
  autoStartOff();

  // Create DIM services
  Summary = new DimService(SERVER_NAME"/Summary", (char *) "not yet available");
  Master = new DimService(SERVER_NAME"/MasterAlarm", MasterAlarm);

  // Get DIM servers to observe
  vector<string> Token = Tokenize(GetConfig("servers"));

  for (int i=0; i<Token.size(); i++) {
	// Extract server name and email
	vector<string> A = Tokenize(Token[i], ":");
	N.Server = A[0];
	if (A.size() == 2) N.Email = A[1];
	else N.Email = string();

	// DIS_DNS has no Message service
	if (N.Server == "DIS_DNS") N.Subscription = NULL;
	else N.Subscription = new DimStampedInfo((N.Server+"/Message").c_str(), NO_LINK, this);

	// Alarm service for server (reference to variable will be updated in UpdateAlarmSummary())
	N.WarnedLevel = 0;
	N.Level = -1;
	N.AlarmLevel = new DimService((N.Server+"/AlarmLevel").c_str(), InitLevel);

	List.push_back(N);
  }

  // Provide command to reset Level   
  ResetCommand = new DimCommand(SERVER_NAME"/ResetAlarm", (char *) "C", this);
  SwitchCommand = new DimCommand(SERVER_NAME"/Switch", (char *) "C", this);
  
  // List set up, can start handling
  start(SERVER_NAME);
}


// Destructor
AlarmHandler::~AlarmHandler() {

  delete SwitchCommand;
  delete ResetCommand;

  for (int i=0; i<List.size(); i++) {
    delete List[i].Subscription;
    delete List[i].AlarmLevel;
  }	
  delete Master;
  delete Summary;
  delete[] AlarmText;
}


// Print messages of status changes to screen and update status string
void AlarmHandler::infoHandler() {

  // Check if alarm server active
  if (!Active) return;

  // Identify status service
  for (int i=0; i<List.size(); i++) if (getInfo() == List[i].Subscription) {
	// Update level: unavailable or current severity of status (safely extracted)  
	if (!ServiceOK(getInfo())) List[i].Level = UNAVA;
	else {
	  int Severity = atoi(ToString(getInfo()->getFormat(), getInfo()->getData(), getInfo()->getSize()).c_str());
	  if ((Severity>List[i].Level) || (List[i].Level==UNAVA && Severity==0)) List[i].Level = Severity;
	}
  }

  UpdateAlarmSummary();
}


// Handle commands
void AlarmHandler::commandHandler() {

  string Text = ToString((char *) "C", getCommand()->getData(), getCommand()->getSize());

  // Reset alarm level, publish/log action and reset server message severity
  if (getCommand() == ResetCommand) {
	for (int i=0; i<List.size(); i++) if (List[i].Server == Text) {
      Message(INFO, "Alarm level of server %s reset by %s (ID %d)", Text.c_str(), getClientName(), getClientId());
	  List[i].Level = 0;
	  List[i].WarnedLevel = 0;
	  sendCommandNB((Text+"/ResetMessage").c_str(), (int) 0);
	}
  }

  // Switch Alarm server on/off and publish/log action
  if (getCommand() == SwitchCommand) {
    if (Text == "off") Active = false;
	else Active = true;

    Message(INFO, "Alarm server switched %s by %s (ID %d)", Active ? "ON":"OFF", getClientName(), getClientId());
  }
  
  UpdateAlarmSummary();
}


// Update alarm status summary (locking since access can be from main thread and DIM handler threads)
void AlarmHandler::UpdateAlarmSummary() {

  ostringstream Buf;
  string Desc;
  int Alarm = -1, Ret;  

  Lock();

  if (!Active) Buf << "Alarm server inactive";
  else for (int i=0; i<List.size(); i++) {
	// Alarm level description
	Buf << List[i].Server << ": ";
	switch (List[i].Level) {
	case INFO:	Desc = "OK"; break;
	case WARN:	Desc = "WARN"; break;
	case ERROR:	Desc = "ERROR"; break;
	case FATAL:	Desc = "FATAL"; break;
	case UNAVA:	Desc = "UNAVAILABLE"; break;
	default:	Desc = "?"; break;
	}
	Buf << Desc << " (" << List[i].Level << ")" << endl;

	// Adjust master alarm and update server alarm level
	if (List[i].Level > Alarm) Alarm = List[i].Level;
	List[i].AlarmLevel->updateService(List[i].Level);

	// Check if alarm level raised, then send alarm message once
	if (List[i].WarnedLevel < List[i].Level && !List[i].Email.empty()) {
	  List[i].WarnedLevel = List[i].Level;
	  
	  // Prepare email message
	  char *Text;
	  time_t Time = time(NULL);
	  if (asprintf(&Text, "echo \"Server alarm level '%s' (%d) at %s\"|"
			"mail -s \"Evidence Alarm for '%s'\" %s",
			Desc.c_str(), List[i].Level, ctime(&Time), List[i].Server.c_str(), List[i].Email.c_str()) != -1) {
		system(Text); // Return value depending on OS
		free(Text);
	  }
	  else Message(ERROR, "Could not send alarm email, asprintf() failed");
	}
  }
  
  // Update master alarm services
  MasterAlarm = Alarm;   
  Master->updateService();
  
  // Update alarm description (DIM requires variables to be valid until update)
  char *Tmp = new char[Buf.str().size()+1];
  strcpy(Tmp, Buf.str().c_str());  
  Summary->updateService(Tmp);

  delete[] AlarmText;
  AlarmText = Tmp;
  
  Unlock();
}

//	    
// Main program
//
int main() {
    
  DimBrowser B;
  char *Server, *Node;
  bool Exist;
  
  // Static declaration ensures calling of destructor by exit()
  static AlarmHandler A; 

  // Verify periodically that servers exist (if Alarm is active)  
  while(!A.ExitRequest) {
    for (int i=0; i<A.List.size() && A.Active; i++) {
	  // Check if server exists
      Exist = false;
      B.getServers();
      while (B.getNextServer(Server, Node) == 1) {
        if (A.List[i].Server == Server) Exist = true;
      }
	  if (!Exist) A.List[i].Level = UNAVA;

	  // Check if standard service available in case server not yet checked (Level is -1)
	  if (B.getServices((A.List[i].Server+"/VERSION_NUMBER").c_str())>0 && A.List[i].Level==-1) A.List[i].Level = 0;
	}
    
	A.UpdateAlarmSummary();
	sleep(max(atoi(A.GetConfig("period").c_str()), MIN_PERIOD));
  }
}
