Changeset 227
- Timestamp:
- 06/18/10 13:25:48 (14 years ago)
- Location:
- Evidence
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
Evidence/Alarm.cc
r216 r227 4 4 5 5 - Checks periodically if all required servers are up 6 (later it should try to start them if not) 7 - Listens to the 'Status' service of each server. 8 - A text describing the state of all servers is published as DIM service. 9 The states are described in StateString[]. 6 - Listens to the 'Message' service of each server and generates new service for 7 each observed server indicating the maximum Severity in the past. 8 - Maximum severity may be reset by a command 'Alarm/ResetAlarm' for a server. 9 - A text describing the current state of all servers is published as DIM service. 10 The states are described in LevelStr[]. 10 11 - A master alarm (indicating most severe of individual alarms) is published. 11 12 12 Oliver Grimm, January 2010 13 A mutex is used because UpdateAlarmSummary() may be called from DIM handler thread and 14 from main thread. 15 16 Oliver Grimm, June 2010 13 17 14 18 \********************************************************************/ … … 17 21 #include "Evidence.h" 18 22 19 #define SUMMARYSIZE 10000 // Bytes for alarm summary text 20 21 const char* StateString[] = {"OK", "WARN", "ERROR", "FATAL", "UNAVAILABLE"}; 23 #include <sstream> 24 25 using namespace std; 26 27 const char* LevelStr[] = {"OK", "WARN", "ERROR", "FATAL", "UNAVAILABLE"}; 22 28 23 29 // 24 30 // Data handling class 25 31 // 26 class AlarmHandler : public DimClient, public EvidenceServer { 27 28 DimStampedInfo **StatusService; 32 class AlarmHandler: public DimClient, public EvidenceServer { 33 34 DimCommand *Command; 35 DimService *Summary, *Master; 36 char *AlarmText; 37 int MasterAlarm; 38 pthread_mutex_t Mutex; 29 39 30 40 void infoHandler(); 41 void commandHandler(); 31 42 32 43 public: … … 34 45 ~AlarmHandler(); 35 46 36 DimService *Summary, *Master;37 38 char *AlarmSummary;39 int MasterAlarm;40 int *State;41 bool *Warned;42 char **Server;43 unsigned int NumServers;44 char *ServerList; 45 47 struct Item { 48 string Server; 49 string Email; 50 DimStampedInfo *Subscription; 51 DimService *AlarmLevel; 52 int WarnedLevel; 53 int Level; 54 }; 55 vector<struct Item> List; 56 46 57 void UpdateAlarmSummary(); 47 58 }; … … 50 61 AlarmHandler::AlarmHandler(): EvidenceServer(SERVER_NAME) { 51 62 52 AlarmSummary = new char [SUMMARYSIZE]; 63 struct Item N; 64 static int InitLevel = -1; // static for DIM service below 65 66 // Initialise 53 67 MasterAlarm = 0; 68 AlarmText = NULL; 69 70 if (pthread_mutex_init(&Mutex, NULL) != 0) { 71 Message(FATAL, "pthread_mutex_init failed"); 72 } 73 74 // Handling of servies will only start after start() 75 autoStartOff(); 54 76 55 77 // Create DIM services 56 Summary = new DimService(SERVER_NAME"/Summary", AlarmSummary);78 Summary = new DimService(SERVER_NAME"/Summary", (char *) "not yet available"); 57 79 Master = new DimService(SERVER_NAME"/MasterAlarm", MasterAlarm); 58 80 59 // Copy original list of servers to observe 60 char *ServerNames = GetConfig("servers"); 61 ServerList = new char [strlen(ServerNames)+1]; 62 strcpy(ServerList, ServerNames); 63 64 // Extract DIM servers to observe 65 Server = new char* [strlen(ServerNames)]; 66 NumServers = 0; 67 char *NextToken = strtok(ServerNames, " \t"); 68 while (NextToken != NULL) { 69 Server[NumServers++] = NextToken; // Subscribe with handler 70 NextToken = strtok(NULL, " \t"); 71 } 72 73 // Subscribe with handler to 'Message' service of all servers 74 StatusService = new DimStampedInfo* [NumServers]; 75 State = new int [NumServers]; 76 Warned = new bool [NumServers]; 77 78 for (int i=0; i<NumServers; i++) { 79 char *Buffer = new char [strlen(Server[i])+10]; 80 strcpy(Buffer, Server[i]); 81 strcat(Buffer, "/Message"); 82 StatusService[i] = new DimStampedInfo(Buffer, NO_LINK, this); 83 delete[] Buffer; 84 85 State[i] = 0; 86 } 87 } 81 // Get DIM servers to observe 82 char *Token = strtok(GetConfig("servers"), " \t"); 83 int Pos; 84 while (Token != NULL) { 85 // Extract server name and email 86 N.Server = Token; 87 Pos = N.Server.find(':'); 88 if (Pos > 0 && Pos < N.Server.size()-2) { 89 N.Email = N.Server.substr(Pos+1, string::npos); 90 N.Server = N.Server.substr(0, Pos); 91 } 92 else N.Email = string(); 93 94 // DIS_DNS has no Message service 95 if (N.Server == "DIS_DNS") N.Subscription = NULL; 96 else N.Subscription = new DimStampedInfo((N.Server+"/Message").c_str(), NO_LINK, this); 97 98 // Alarm service for server (reference to variable will be updated in UpdateAlarmSummary()) 99 N.WarnedLevel = 0; 100 N.Level = -1; 101 N.AlarmLevel = new DimService((N.Server+"/AlarmLevel").c_str(), InitLevel); 102 103 List.push_back(N); 104 Token = strtok(NULL, " \t"); 105 } 106 107 // Provide command to reset Level 108 Command = new DimCommand("Alarm/ResetAlarm", (char *) "C", this); 109 110 // List set up, can start handling 111 start(SERVER_NAME); 112 } 113 88 114 89 115 // Destructor 90 116 AlarmHandler::~AlarmHandler() { 91 117 92 for (int i=0; i<NumServers; i++) delete StatusService[i]; 93 delete[] StatusService; 118 delete Command; 119 120 for (int i=0; i<List.size(); i++) { 121 delete List[i].Subscription; 122 delete List[i].AlarmLevel; 123 } 94 124 delete Master; 95 125 delete Summary; 96 delete[] State;97 delete[] Server;98 delete[] ServerList;99 delete[] AlarmSummary; 100 } 126 delete[] AlarmText; 127 128 pthread_mutex_destroy(&Mutex); 129 } 130 101 131 102 132 // Print messages of status changes to screen and update status string … … 104 134 105 135 // Identify status service 106 for (int i=0; i<NumServers; i++) if (getInfo() == StatusService[i]) { 107 108 // Ignore DIS_DNS (has no status service) 109 if (strcmp(getInfo()->getName(),"DIS_DNS/Message") == 0) return; 110 111 // Update State: unavailable or current severity of status 112 if (!ServiceOK(getInfo())) State[i] = 4; 113 else { 114 State[i] = *(getInfo()->getString()+getInfo()->getSize()-1); 115 116 // Print message 117 time_t RawTime = getInfo()->getTimestamp(); 118 struct tm *TM = localtime(&RawTime); 119 printf("%s (%02d:%02d:%02d): %s\n", getInfo()->getName(), TM->tm_hour, 120 TM->tm_min, TM->tm_sec, getInfo()->getString()); 121 } 122 UpdateAlarmSummary(); 123 } 136 for (int i=0; i<List.size(); i++) if (getInfo() == List[i].Subscription) { 137 // Update level: unavailable or current severity of status 138 if (!ServiceOK(getInfo())) List[i].Level = 4; 139 else if (getInfo()->getInt() > List[i].Level) List[i].Level = getInfo()->getInt(); 140 } 141 142 UpdateAlarmSummary(); 143 } 144 145 146 // Reset alarm level of given server 147 void AlarmHandler::commandHandler() { 148 149 DimCommand *C = getCommand(); 150 151 // Check for valid command parameter 152 if (C != Command) return; 153 if (C->getSize() == 0) return; 154 if (*((char *) C->getData() + C->getSize() - 1) != '\0') return; 155 156 // Reset alarm level and publish/log action 157 for (int i=0; i<List.size(); i++) if (List[i].Server == C->getString()) { 158 Message(INFO, "Alarm level of server %s reset by %s (ID %d)", C->getString(), getClientName(), getClientId()); 159 List[i].Level = 0; 160 List[i].WarnedLevel = 0; 161 } 162 163 UpdateAlarmSummary(); 124 164 } 125 165 … … 127 167 // Update alarm status summary 128 168 void AlarmHandler::UpdateAlarmSummary() { 129 130 int Offset = 0; 131 MasterAlarm = 0; 132 133 for (int i=0; i<NumServers; i++) { 134 snprintf(AlarmSummary+Offset, SUMMARYSIZE-Offset, "%s: %s (%d)\n", Server[i], State[i]<=4 ? StateString[State[i]] : "unknown", State[i]); 135 Offset += strlen(AlarmSummary+Offset); 136 if (State[i] > MasterAlarm) MasterAlarm = State[i]; 137 } 138 Summary->updateService(); 169 170 ostringstream Buf; 171 int Alarm, Ret; 172 173 // Lock because access can be from main thread and DIM handler thread 174 if ((Ret = pthread_mutex_lock(&Mutex)) != 0) { 175 Message(FATAL, "pthread_mutex_lock() failed (%s)", strerror(Ret)); 176 } 177 178 for (int i=0; i<List.size(); i++) { 179 // Alarm level description 180 Buf << List[i].Server << ": " << (List[i].Level>=0 && List[i].Level<=4 ? LevelStr[List[i].Level] : "unknown"); 181 Buf << " (" << List[i].Level << ")" << endl; 182 183 // Adjust master alarm and update server alarm level 184 if (List[i].Level > Alarm) Alarm = List[i].Level; 185 List[i].AlarmLevel->updateService(List[i].Level); 186 187 // Check if alarm level raised, then send alarm message once 188 if (List[i].WarnedLevel < List[i].Level && !List[i].Email.empty()) { 189 List[i].WarnedLevel = List[i].Level; 190 191 // Prepare email message 192 char *Text; 193 time_t Time = time(NULL); 194 if (asprintf(&Text, "echo \"Server alarm level '%s' at %s\"|" 195 "mail -s \"Evidence Alarm for '%s'\" %s", 196 List[i].Level>=0 && List[i].Level<=4 ? LevelStr[List[i].Level] : "unknown", 197 ctime(&Time), List[i].Server.c_str(), List[i].Email.c_str()) != -1) { 198 system(Text); // Return value depending on OS 199 free(Text); 200 } 201 else Message(ERROR, "Could not send alarm email, asprintf() failed"); 202 } 203 } 204 205 // Update master alarm services 206 MasterAlarm = Alarm; 139 207 Master->updateService(); 208 209 // Update alarm description (DIM requires variables to be valid until update) 210 char *Tmp = new char[Buf.str().size()+1]; 211 strcpy(Tmp, Buf.str().c_str()); 212 Summary->updateService(Tmp); 213 214 delete[] AlarmText; 215 AlarmText = Tmp; 216 217 // Unlock 218 if ((Ret = pthread_mutex_unlock(&Mutex)) != 0) { 219 Message(FATAL, "pthread_mutex_unlock() failed (%s)", strerror(Ret)); 220 } 140 221 } 141 222 … … 146 227 147 228 DimBrowser Browser; 148 char *Server Name, *Node;149 bool Exist s;229 char *Server, *Node; 230 bool Exist; 150 231 151 232 // Static declaration ensures calling of destructor by exit() … … 154 235 // Check periodically if servers are up 155 236 while(!Alarm.ExitRequest) { 156 for (int i=0; i<Alarm.NumServers; i++) { 157 Exists = false; 237 238 for (int i=0; i<Alarm.List.size(); i++) { 239 Exist = false; 158 240 Browser.getServers(); 159 while (Browser.getNextServer(Server Name, Node) == 1) {160 if ( strcmp(ServerName, Alarm.Server[i]) == 0) Exists= true;241 while (Browser.getNextServer(Server, Node) == 1) { 242 if (Alarm.List[i].Server == Server) Exist = true; 161 243 } 162 163 if (Exists) { 164 Alarm.Warned[i] = false; 165 continue; 166 } 167 168 Alarm.State[i] = 4; 169 170 // If server unavailable, send alarm message once 171 if (Alarm.Warned[i] == false) { 172 Alarm.Warned[i] = true; 173 char *Message; 174 time_t Time = time(NULL); 175 if (asprintf(&Message, "echo \"Server unavailable at %s\"|mail -s \"Evidence Alarm for '%s'\" %s", ctime(&Time), Alarm.Server[i], Alarm.GetConfig("email","")) != -1) { 176 system(Message); 177 free(Message); 178 } 179 } 244 if (!Exist) Alarm.List[i].Level = 4; 245 else if (Alarm.List[i].Level = -1) Alarm.List[i].Level = 0; 180 246 } 181 247 -
Evidence/readme.txt
r224 r227 41 41 17/6/2010 Added SendToLog() method. Changed severity encoding of Message service to 42 42 use standard DIM structure of format "I:1;C" 43 18/6/2010 Alarm server configuration accepts now one email address per server. A new 44 service for each observed server SERVERNAME/AlarmLevel contains the highest 45 level that occurred in the past. Reset of alarm level only via a DIM command. 46 43 47 44 48
Note:
See TracChangeset
for help on using the changeset viewer.