source: Evidence/Alarm.cc@ 192

Last change on this file since 192 was 191, checked in by ogrimm, 14 years ago
Implemented automatic email alarm if server down
File size: 4.8 KB
Line 
1/********************************************************************\
2
3 Alarm handler of the Evidence Control System
4
5 - Checks periodically if all required servers are up
6 (later it should try to start them if not)
7 - Listens to the 'Status' service of each server.
8 - A text describing the state of all servers is published as DIM service.
9 The states are described in StateString[].
10 - A master alarm (indicating most severe of individual alarms) is published.
11
12 Oliver Grimm, January 2010
13
14\********************************************************************/
15
16#define SERVER_NAME "Alarm"
17#include "Evidence.h"
18
19#define SUMMARYSIZE 10000 // Bytes for alarm summary text
20
21const char* StateString[] = {"OK", "WARN", "ERROR", "FATAL", "UNAVAILABLE"};
22
23//
24// Data handling class
25//
26class AlarmHandler : public DimClient, public EvidenceServer {
27
28 DimStampedInfo **StatusService;
29
30 void infoHandler();
31
32 public:
33 AlarmHandler();
34 ~AlarmHandler();
35
36 DimService *Summary, *Master;
37
38 char *AlarmSummary;
39 int MasterAlarm;
40 int *State;
41 bool *Warned;
42 char **Server;
43 unsigned int NumServers;
44 char *ServerList;
45
46 void UpdateAlarmSummary();
47};
48
49// Constructor
50AlarmHandler::AlarmHandler(): EvidenceServer(SERVER_NAME) {
51
52 AlarmSummary = new char [SUMMARYSIZE];
53 MasterAlarm = 0;
54
55 // Create DIM services
56 Summary = new DimService(SERVER_NAME"/Summary", AlarmSummary);
57 Master = new DimService(SERVER_NAME"/MasterAlarm", MasterAlarm);
58
59 // Copy original list of servers to observe
60 char *ServerNames = GetConfig("servers");
61 ServerList = new char [strlen(ServerNames)+1];
62 strcpy(ServerList, ServerNames);
63
64 // Extract DIM servers to observe
65 Server = new char* [strlen(ServerNames)];
66 NumServers = 0;
67 char *NextToken = strtok(ServerNames, " \t");
68 while (NextToken != NULL) {
69 Server[NumServers++] = NextToken; // Subscribe with handler
70 NextToken = strtok(NULL, " \t");
71 }
72
73 // Subscribe with handler to 'Status' service of all servers
74 StatusService = new DimStampedInfo* [NumServers];
75 State = new int [NumServers];
76 Warned = new bool [NumServers];
77
78 for (int i=0; i<NumServers; i++) {
79 char *Buffer = new char [strlen(Server[i])+10];
80 strcpy(Buffer, Server[i]);
81 strcat(Buffer, "/Status");
82 StatusService[i] = new DimStampedInfo(Buffer, NO_LINK, this);
83 delete[] Buffer;
84
85 State[i] = 0;
86 }
87}
88
89// Destructor
90AlarmHandler::~AlarmHandler() {
91
92 for (int i=0; i<NumServers; i++) delete StatusService[i];
93 delete[] StatusService;
94 delete Master;
95 delete Summary;
96 delete[] State;
97 delete[] Server;
98 delete[] ServerList;
99 delete[] AlarmSummary;
100}
101
102// Print messages of status changes to screen and update status string
103void AlarmHandler::infoHandler() {
104
105 // Identify status service
106 for (int i=0; i<NumServers; i++) if (getInfo() == StatusService[i]) {
107
108 // Ignore DIS_DNS (has no status service)
109 if (strcmp(getInfo()->getName(),"DIS_DNS/Status") == 0) return;
110
111 // Update State: unavailable or current severity of status
112 if (!ServiceOK(getInfo())) State[i] = 4;
113 else {
114 State[i] = *(getInfo()->getString()+getInfo()->getSize()-1);
115
116 // Print message
117 time_t RawTime = getInfo()->getTimestamp();
118 struct tm *TM = localtime(&RawTime);
119 printf("%s (%02d:%02d:%02d): %s\n", getInfo()->getName(), TM->tm_hour,
120 TM->tm_min, TM->tm_sec, getInfo()->getString());
121 }
122 UpdateAlarmSummary();
123 }
124}
125
126
127// Update alarm status summary
128void AlarmHandler::UpdateAlarmSummary() {
129
130 int Offset = 0;
131 MasterAlarm = 0;
132
133 for (int i=0; i<NumServers; i++) {
134 snprintf(AlarmSummary+Offset, SUMMARYSIZE-Offset, "%s: %s (%d)\n", Server[i], State[i]<=4 ? StateString[State[i]] : "unknown", State[i]);
135 Offset += strlen(AlarmSummary+Offset);
136 if (State[i] > MasterAlarm) MasterAlarm = State[i];
137 }
138 Summary->updateService();
139 Master->updateService();
140}
141
142//
143// Main program
144//
145int main() {
146
147 DimBrowser Browser;
148 char *ServerName, *Node;
149 bool Exists;
150
151 // Static declaration ensures calling of destructor by exit()
152 static AlarmHandler Alarm;
153
154 // Check periodically if servers are up
155 while(!Alarm.ExitRequest) {
156 for (int i=0; i<Alarm.NumServers; i++) {
157 Exists = false;
158 Browser.getServers();
159 while (Browser.getNextServer(ServerName, Node) == 1) {
160 if (strcmp(ServerName, Alarm.Server[i]) == 0) Exists = true;
161 }
162
163 if (Exists) {
164 Alarm.Warned[i] = false;
165 continue;
166 }
167
168 Alarm.State[i] = 4;
169
170 // If server unavailable, send alarm message once
171 if (Alarm.Warned[i] == false) {
172 Alarm.Warned[i] = true;
173 char *Message;
174 time_t Time = time(NULL);
175 if (asprintf(&Message, "echo \"Server unavailable at %s\"|mail -s \"Evidence Alarm for '%s'\" %s", ctime(&Time), Alarm.Server[i], Alarm.GetConfig("email","")) != -1) {
176 system(Message);
177 free(Message);
178 }
179 }
180 }
181
182 Alarm.UpdateAlarmSummary();
183 sleep(atoi(Alarm.GetConfig("period")));
184 }
185}
Note: See TracBrowser for help on using the repository browser.