1 | /********************************************************************\
|
---|
2 |
|
---|
3 | Alarm handler of the Evidence Control System
|
---|
4 |
|
---|
5 | - Checks periodically if all required servers are up
|
---|
6 | (later it should try to start them if not)
|
---|
7 | - Listens to the 'Status' service of each server.
|
---|
8 | - A text describing the state of all servers is published as DIM service.
|
---|
9 | The states are described in StateString[].
|
---|
10 | - A master alarm (indicating most severe of individual alarms) is published.
|
---|
11 |
|
---|
12 | Oliver Grimm, January 2010
|
---|
13 |
|
---|
14 | \********************************************************************/
|
---|
15 |
|
---|
16 | #define SERVER_NAME "Alarm"
|
---|
17 | #include "Evidence.h"
|
---|
18 |
|
---|
19 | #define SUMMARYSIZE 10000 // Bytes for alarm summary text
|
---|
20 |
|
---|
21 | const char* StateString[] = {"OK", "WARN", "ERROR", "FATAL", "UNAVAILABLE"};
|
---|
22 |
|
---|
23 | //
|
---|
24 | // Data handling class
|
---|
25 | //
|
---|
26 | class AlarmHandler : public DimClient, public EvidenceServer {
|
---|
27 |
|
---|
28 | DimStampedInfo **StatusService;
|
---|
29 |
|
---|
30 | void infoHandler();
|
---|
31 |
|
---|
32 | public:
|
---|
33 | AlarmHandler();
|
---|
34 | ~AlarmHandler();
|
---|
35 |
|
---|
36 | DimService *Summary, *Master;
|
---|
37 |
|
---|
38 | char *AlarmSummary;
|
---|
39 | int MasterAlarm;
|
---|
40 | int *State;
|
---|
41 | bool *Warned;
|
---|
42 | char **Server;
|
---|
43 | unsigned int NumServers;
|
---|
44 | char *ServerList;
|
---|
45 |
|
---|
46 | void UpdateAlarmSummary();
|
---|
47 | };
|
---|
48 |
|
---|
49 | // Constructor
|
---|
50 | AlarmHandler::AlarmHandler(): EvidenceServer(SERVER_NAME) {
|
---|
51 |
|
---|
52 | AlarmSummary = new char [SUMMARYSIZE];
|
---|
53 | MasterAlarm = 0;
|
---|
54 |
|
---|
55 | // Create DIM services
|
---|
56 | Summary = new DimService(SERVER_NAME"/Summary", AlarmSummary);
|
---|
57 | Master = new DimService(SERVER_NAME"/MasterAlarm", MasterAlarm);
|
---|
58 |
|
---|
59 | // Copy original list of servers to observe
|
---|
60 | char *ServerNames = GetConfig("servers");
|
---|
61 | ServerList = new char [strlen(ServerNames)+1];
|
---|
62 | strcpy(ServerList, ServerNames);
|
---|
63 |
|
---|
64 | // Extract DIM servers to observe
|
---|
65 | Server = new char* [strlen(ServerNames)];
|
---|
66 | NumServers = 0;
|
---|
67 | char *NextToken = strtok(ServerNames, " \t");
|
---|
68 | while (NextToken != NULL) {
|
---|
69 | Server[NumServers++] = NextToken; // Subscribe with handler
|
---|
70 | NextToken = strtok(NULL, " \t");
|
---|
71 | }
|
---|
72 |
|
---|
73 | // Subscribe with handler to 'Status' service of all servers
|
---|
74 | StatusService = new DimStampedInfo* [NumServers];
|
---|
75 | State = new int [NumServers];
|
---|
76 | Warned = new bool [NumServers];
|
---|
77 |
|
---|
78 | for (int i=0; i<NumServers; i++) {
|
---|
79 | char *Buffer = new char [strlen(Server[i])+10];
|
---|
80 | strcpy(Buffer, Server[i]);
|
---|
81 | strcat(Buffer, "/Status");
|
---|
82 | StatusService[i] = new DimStampedInfo(Buffer, NO_LINK, this);
|
---|
83 | delete[] Buffer;
|
---|
84 |
|
---|
85 | State[i] = 0;
|
---|
86 | }
|
---|
87 | }
|
---|
88 |
|
---|
89 | // Destructor
|
---|
90 | AlarmHandler::~AlarmHandler() {
|
---|
91 |
|
---|
92 | for (int i=0; i<NumServers; i++) delete StatusService[i];
|
---|
93 | delete[] StatusService;
|
---|
94 | delete Master;
|
---|
95 | delete Summary;
|
---|
96 | delete[] State;
|
---|
97 | delete[] Server;
|
---|
98 | delete[] ServerList;
|
---|
99 | delete[] AlarmSummary;
|
---|
100 | }
|
---|
101 |
|
---|
102 | // Print messages of status changes to screen and update status string
|
---|
103 | void AlarmHandler::infoHandler() {
|
---|
104 |
|
---|
105 | // Identify status service
|
---|
106 | for (int i=0; i<NumServers; i++) if (getInfo() == StatusService[i]) {
|
---|
107 |
|
---|
108 | // Ignore DIS_DNS (has no status service)
|
---|
109 | if (strcmp(getInfo()->getName(),"DIS_DNS/Status") == 0) return;
|
---|
110 |
|
---|
111 | // Update State: unavailable or current severity of status
|
---|
112 | if (!ServiceOK(getInfo())) State[i] = 4;
|
---|
113 | else {
|
---|
114 | State[i] = *(getInfo()->getString()+getInfo()->getSize()-1);
|
---|
115 |
|
---|
116 | // Print message
|
---|
117 | time_t RawTime = getInfo()->getTimestamp();
|
---|
118 | struct tm *TM = localtime(&RawTime);
|
---|
119 | printf("%s (%02d:%02d:%02d): %s\n", getInfo()->getName(), TM->tm_hour,
|
---|
120 | TM->tm_min, TM->tm_sec, getInfo()->getString());
|
---|
121 | }
|
---|
122 | UpdateAlarmSummary();
|
---|
123 | }
|
---|
124 | }
|
---|
125 |
|
---|
126 |
|
---|
127 | // Update alarm status summary
|
---|
128 | void AlarmHandler::UpdateAlarmSummary() {
|
---|
129 |
|
---|
130 | int Offset = 0;
|
---|
131 | MasterAlarm = 0;
|
---|
132 |
|
---|
133 | for (int i=0; i<NumServers; i++) {
|
---|
134 | snprintf(AlarmSummary+Offset, SUMMARYSIZE-Offset, "%s: %s (%d)\n", Server[i], State[i]<=4 ? StateString[State[i]] : "unknown", State[i]);
|
---|
135 | Offset += strlen(AlarmSummary+Offset);
|
---|
136 | if (State[i] > MasterAlarm) MasterAlarm = State[i];
|
---|
137 | }
|
---|
138 | Summary->updateService();
|
---|
139 | Master->updateService();
|
---|
140 | }
|
---|
141 |
|
---|
142 | //
|
---|
143 | // Main program
|
---|
144 | //
|
---|
145 | int main() {
|
---|
146 |
|
---|
147 | DimBrowser Browser;
|
---|
148 | char *ServerName, *Node;
|
---|
149 | bool Exists;
|
---|
150 |
|
---|
151 | // Static declaration ensures calling of destructor by exit()
|
---|
152 | static AlarmHandler Alarm;
|
---|
153 |
|
---|
154 | // Check periodically if servers are up
|
---|
155 | while(!Alarm.ExitRequest) {
|
---|
156 | for (int i=0; i<Alarm.NumServers; i++) {
|
---|
157 | Exists = false;
|
---|
158 | Browser.getServers();
|
---|
159 | while (Browser.getNextServer(ServerName, Node) == 1) {
|
---|
160 | if (strcmp(ServerName, Alarm.Server[i]) == 0) Exists = true;
|
---|
161 | }
|
---|
162 |
|
---|
163 | if (Exists) {
|
---|
164 | Alarm.Warned[i] = false;
|
---|
165 | continue;
|
---|
166 | }
|
---|
167 |
|
---|
168 | Alarm.State[i] = 4;
|
---|
169 |
|
---|
170 | // If server unavailable, send alarm message once
|
---|
171 | if (Alarm.Warned[i] == false) {
|
---|
172 | Alarm.Warned[i] = true;
|
---|
173 | char *Message;
|
---|
174 | time_t Time = time(NULL);
|
---|
175 | if (asprintf(&Message, "echo \"Server unavailable at %s\"|mail -s \"Evidence Alarm for '%s'\" %s", ctime(&Time), Alarm.Server[i], Alarm.GetConfig("email","")) != -1) {
|
---|
176 | system(Message);
|
---|
177 | free(Message);
|
---|
178 | }
|
---|
179 | }
|
---|
180 | }
|
---|
181 |
|
---|
182 | Alarm.UpdateAlarmSummary();
|
---|
183 | sleep(atoi(Alarm.GetConfig("period")));
|
---|
184 | }
|
---|
185 | }
|
---|