source: Evidence/Alarm.cc@ 257

Last change on this file since 257 was 255, checked in by ogrimm, 15 years ago
Added special handling of code 0 in exitHandler(), fixed recursion bug in Lock()/Unlock() by not calling Message()
File size: 6.4 KB
Line 
1/********************************************************************\
2
3 Alarm handler of the Evidence Control System
4
5 - Checks periodically if all required servers are up
6 - Listens to the 'Message' service of each server and generates new service for
7 each observed server indicating the maximum Severity in the past.
8 - Maximum severity may be reset by a command 'Alarm/ResetAlarm' for a server.
9 - A text describing the current state of all servers is published as DIM service.
10 The states are described in LevelStr[].
11 - A master alarm (indicating most severe of individual alarms) is published.
12
13 A mutex is used because UpdateAlarmSummary() may be called from DIM handler thread and
14 from main thread.
15
16 Oliver Grimm, June 2010
17
18\********************************************************************/
19
20#define SERVER_NAME "Alarm"
21#include "Evidence.h"
22
23#include <sstream>
24
25using namespace std;
26
27const char* LevelStr[] = {"OK", "WARN", "ERROR", "FATAL", "UNAVAILABLE"};
28
29//
30// Data handling class
31//
32class AlarmHandler: public DimClient, public EvidenceServer {
33
34 DimCommand *Command;
35 DimService *Summary, *Master;
36 char *AlarmText;
37 int MasterAlarm;
38
39 void infoHandler();
40 void commandHandler();
41
42 public:
43 AlarmHandler();
44 ~AlarmHandler();
45
46 struct Item {
47 string Server;
48 string Email;
49 DimStampedInfo *Subscription;
50 DimService *AlarmLevel;
51 int WarnedLevel;
52 int Level;
53 };
54 vector<struct Item> List;
55
56 void UpdateAlarmSummary();
57};
58
59// Constructor
60AlarmHandler::AlarmHandler(): EvidenceServer(SERVER_NAME) {
61
62 struct Item N;
63 static int InitLevel = -1; // static for DIM service below
64
65 // Initialise
66 MasterAlarm = 0;
67 AlarmText = NULL;
68
69 // Handling of servies will only start after start()
70 autoStartOff();
71
72 // Create DIM services
73 Summary = new DimService(SERVER_NAME"/Summary", (char *) "not yet available");
74 Master = new DimService(SERVER_NAME"/MasterAlarm", MasterAlarm);
75
76 // Get DIM servers to observe
77 vector<string> Token = Tokenize(GetConfig("servers"));
78
79 for (int i=0; i<Token.size(); i++) {
80 // Extract server name and email
81 vector<string> A = Tokenize(Token[i], ":");
82 N.Server = A[0];
83 if (A.size() == 2) N.Email = A[1];
84 else N.Email = string();
85
86 // DIS_DNS has no Message service
87 if (N.Server == "DIS_DNS") N.Subscription = NULL;
88 else N.Subscription = new DimStampedInfo((N.Server+"/Message").c_str(), NO_LINK, this);
89
90 // Alarm service for server (reference to variable will be updated in UpdateAlarmSummary())
91 N.WarnedLevel = 0;
92 N.Level = -1;
93 N.AlarmLevel = new DimService((N.Server+"/AlarmLevel").c_str(), InitLevel);
94
95 List.push_back(N);
96 }
97
98 // Provide command to reset Level
99 Command = new DimCommand("ResetAlarm", (char *) "C", this);
100
101 // List set up, can start handling
102 start(SERVER_NAME);
103}
104
105
106// Destructor
107AlarmHandler::~AlarmHandler() {
108
109 delete Command;
110
111 for (int i=0; i<List.size(); i++) {
112 delete List[i].Subscription;
113 delete List[i].AlarmLevel;
114 }
115 delete Master;
116 delete Summary;
117 delete[] AlarmText;
118}
119
120
121// Print messages of status changes to screen and update status string
122void AlarmHandler::infoHandler() {
123
124 // Identify status service
125 for (int i=0; i<List.size(); i++) if (getInfo() == List[i].Subscription) {
126 // Update level: unavailable or current severity of status (safely extracted)
127 if (!ServiceOK(getInfo())) List[i].Level = 4;
128 else {
129 int Severity = atoi(ToString(getInfo()->getFormat(), getInfo()->getData(), getInfo()->getSize()).c_str());
130 if (Severity > List[i].Level) List[i].Level = Severity;
131 }
132 }
133
134 UpdateAlarmSummary();
135}
136
137
138// Reset alarm level of given server
139void AlarmHandler::commandHandler() {
140
141 // Safety check
142 string Server = ToString((char *) "C", getCommand()->getData(), getCommand()->getSize());
143 if (getCommand() != Command || Server.empty()) return;
144
145 // Reset alarm level, publish/log action and reset server message severity
146 for (int i=0; i<List.size(); i++) if (List[i].Server == Server) {
147 Message(INFO, "Alarm level of server %s reset by %s (ID %d)", Server.c_str(), getClientName(), getClientId());
148 List[i].Level = 0;
149 List[i].WarnedLevel = 0;
150 if (Server != "DIS_DNS") sendCommandNB((Server+"/EXIT").c_str(), (int) 0);
151 }
152
153 UpdateAlarmSummary();
154}
155
156
157// Update alarm status summary (locking since access can be from main thread and DIM handler threads)
158void AlarmHandler::UpdateAlarmSummary() {
159
160 ostringstream Buf;
161 int Alarm = 0, Ret;
162
163 Lock();
164
165 for (int i=0; i<List.size(); i++) {
166 // Alarm level description
167 Buf << List[i].Server << ": " << (List[i].Level>=0 && List[i].Level<=4 ? LevelStr[List[i].Level] : "unknown");
168 Buf << " (" << List[i].Level << ")" << endl;
169
170 // Adjust master alarm and update server alarm level
171 if (List[i].Level > Alarm) Alarm = List[i].Level;
172 List[i].AlarmLevel->updateService(List[i].Level);
173
174 // Check if alarm level raised, then send alarm message once
175 if (List[i].WarnedLevel < List[i].Level && !List[i].Email.empty()) {
176 List[i].WarnedLevel = List[i].Level;
177
178 // Prepare email message
179 char *Text;
180 time_t Time = time(NULL);
181 if (asprintf(&Text, "echo \"Server alarm level '%s' at %s\"|"
182 "mail -s \"Evidence Alarm for '%s'\" %s",
183 List[i].Level>=0 && List[i].Level<=4 ? LevelStr[List[i].Level] : "unknown",
184 ctime(&Time), List[i].Server.c_str(), List[i].Email.c_str()) != -1) {
185 system(Text); // Return value depending on OS
186 free(Text);
187 }
188 else Message(ERROR, "Could not send alarm email, asprintf() failed");
189 }
190 }
191
192 // Update master alarm services
193 MasterAlarm = Alarm;
194 Master->updateService();
195
196 // Update alarm description (DIM requires variables to be valid until update)
197 char *Tmp = new char[Buf.str().size()+1];
198 strcpy(Tmp, Buf.str().c_str());
199 Summary->updateService(Tmp);
200
201 delete[] AlarmText;
202 AlarmText = Tmp;
203
204 Unlock();
205}
206
207//
208// Main program
209//
210int main() {
211
212 DimBrowser Browser;
213 char *Server, *Node;
214 bool Exist;
215
216 // Static declaration ensures calling of destructor by exit()
217 static AlarmHandler Alarm;
218
219 // Check periodically if servers are up
220 while(!Alarm.ExitRequest) {
221
222 for (int i=0; i<Alarm.List.size(); i++) {
223 Exist = false;
224 Browser.getServers();
225 while (Browser.getNextServer(Server, Node) == 1) {
226 if (Alarm.List[i].Server == Server) Exist = true;
227 }
228 if (!Exist) Alarm.List[i].Level = 4;
229 else if (Alarm.List[i].Level == -1) Alarm.List[i].Level = 0;
230 }
231
232 Alarm.UpdateAlarmSummary();
233 sleep(atoi(Alarm.GetConfig("period").c_str()));
234 }
235}
Note: See TracBrowser for help on using the repository browser.