source: Evidence/Alarm.cc@ 254

Last change on this file since 254 was 253, checked in by ogrimm, 14 years ago
Added command ResetAlarm, Evidence servers now always safely translate a DIM string into a C string, added documentation, replaced several vectors my maps
File size: 6.2 KB
Line 
1/********************************************************************\
2
3 Alarm handler of the Evidence Control System
4
5 - Checks periodically if all required servers are up
6 - Listens to the 'Message' service of each server and generates new service for
7 each observed server indicating the maximum Severity in the past.
8 - Maximum severity may be reset by a command 'Alarm/ResetAlarm' for a server.
9 - A text describing the current state of all servers is published as DIM service.
10 The states are described in LevelStr[].
11 - A master alarm (indicating most severe of individual alarms) is published.
12
13 A mutex is used because UpdateAlarmSummary() may be called from DIM handler thread and
14 from main thread.
15
16 Oliver Grimm, June 2010
17
18\********************************************************************/
19
20#define SERVER_NAME "Alarm"
21#include "Evidence.h"
22
23#include <sstream>
24
25using namespace std;
26
27const char* LevelStr[] = {"OK", "WARN", "ERROR", "FATAL", "UNAVAILABLE"};
28
29//
30// Data handling class
31//
32class AlarmHandler: public DimClient, public EvidenceServer {
33
34 DimCommand *Command;
35 DimService *Summary, *Master;
36 char *AlarmText;
37 int MasterAlarm;
38
39 void infoHandler();
40 void commandHandler();
41
42 public:
43 AlarmHandler();
44 ~AlarmHandler();
45
46 struct Item {
47 string Server;
48 string Email;
49 DimStampedInfo *Subscription;
50 DimService *AlarmLevel;
51 int WarnedLevel;
52 int Level;
53 };
54 vector<struct Item> List;
55
56 void UpdateAlarmSummary();
57};
58
59// Constructor
60AlarmHandler::AlarmHandler(): EvidenceServer(SERVER_NAME) {
61
62 struct Item N;
63 static int InitLevel = -1; // static for DIM service below
64
65 // Initialise
66 MasterAlarm = 0;
67 AlarmText = NULL;
68
69 // Handling of servies will only start after start()
70 autoStartOff();
71
72 // Create DIM services
73 Summary = new DimService(SERVER_NAME"/Summary", (char *) "not yet available");
74 Master = new DimService(SERVER_NAME"/MasterAlarm", MasterAlarm);
75
76 // Get DIM servers to observe
77 vector<string> Token = Tokenize(GetConfig("servers"));
78
79 for (int i=0; i<Token.size(); i++) {
80 // Extract server name and email
81 vector<string> A = Tokenize(Token[i], ":");
82 N.Server = A[0];
83 if (A.size() == 2) N.Email = A[1];
84 else N.Email = string();
85
86 // DIS_DNS has no Message service
87 if (N.Server == "DIS_DNS") N.Subscription = NULL;
88 else N.Subscription = new DimStampedInfo((N.Server+"/Message").c_str(), NO_LINK, this);
89
90 // Alarm service for server (reference to variable will be updated in UpdateAlarmSummary())
91 N.WarnedLevel = 0;
92 N.Level = -1;
93 N.AlarmLevel = new DimService((N.Server+"/AlarmLevel").c_str(), InitLevel);
94
95 List.push_back(N);
96 }
97
98 // Provide command to reset Level
99 Command = new DimCommand("ResetAlarm", (char *) "C", this);
100
101 // List set up, can start handling
102 start(SERVER_NAME);
103}
104
105
106// Destructor
107AlarmHandler::~AlarmHandler() {
108
109 delete Command;
110
111 for (int i=0; i<List.size(); i++) {
112 delete List[i].Subscription;
113 delete List[i].AlarmLevel;
114 }
115 delete Master;
116 delete Summary;
117 delete[] AlarmText;
118}
119
120
121// Print messages of status changes to screen and update status string
122void AlarmHandler::infoHandler() {
123
124 // Identify status service
125 for (int i=0; i<List.size(); i++) if (getInfo() == List[i].Subscription) {
126 // Update level: unavailable or current severity of status
127 if (!ServiceOK(getInfo())) List[i].Level = 4;
128 else if (getInfo()->getInt() > List[i].Level) List[i].Level = getInfo()->getInt();
129 }
130
131 UpdateAlarmSummary();
132}
133
134
135// Reset alarm level of given server
136void AlarmHandler::commandHandler() {
137
138 // Safety check
139 string Server = ToString((char *) "C", getCommand()->getData(), getCommand()->getSize());
140 if (getCommand() != Command || Server.empty()) return;
141
142 // Reset alarm level and publish/log action
143 for (int i=0; i<List.size(); i++) if (List[i].Server == Server) {
144 Message(INFO, "Alarm level of server %s reset by %s (ID %d)", Server.c_str(), getClientName(), getClientId());
145 List[i].Level = 0;
146 List[i].WarnedLevel = 0;
147 }
148
149 UpdateAlarmSummary();
150}
151
152
153// Update alarm status summary (locking since access can be from main thread and DIM handler threads)
154void AlarmHandler::UpdateAlarmSummary() {
155
156 ostringstream Buf;
157 int Alarm, Ret;
158
159 Lock();
160
161 for (int i=0; i<List.size(); i++) {
162 // Alarm level description
163 Buf << List[i].Server << ": " << (List[i].Level>=0 && List[i].Level<=4 ? LevelStr[List[i].Level] : "unknown");
164 Buf << " (" << List[i].Level << ")" << endl;
165
166 // Adjust master alarm and update server alarm level
167 if (List[i].Level > Alarm) Alarm = List[i].Level;
168 List[i].AlarmLevel->updateService(List[i].Level);
169
170 // Check if alarm level raised, then send alarm message once
171 if (List[i].WarnedLevel < List[i].Level && !List[i].Email.empty()) {
172 List[i].WarnedLevel = List[i].Level;
173
174 // Prepare email message
175 char *Text;
176 time_t Time = time(NULL);
177 if (asprintf(&Text, "echo \"Server alarm level '%s' at %s\"|"
178 "mail -s \"Evidence Alarm for '%s'\" %s",
179 List[i].Level>=0 && List[i].Level<=4 ? LevelStr[List[i].Level] : "unknown",
180 ctime(&Time), List[i].Server.c_str(), List[i].Email.c_str()) != -1) {
181 system(Text); // Return value depending on OS
182 free(Text);
183 }
184 else Message(ERROR, "Could not send alarm email, asprintf() failed");
185 }
186 }
187
188 // Update master alarm services
189 MasterAlarm = Alarm;
190 Master->updateService();
191
192 // Update alarm description (DIM requires variables to be valid until update)
193 char *Tmp = new char[Buf.str().size()+1];
194 strcpy(Tmp, Buf.str().c_str());
195 Summary->updateService(Tmp);
196
197 delete[] AlarmText;
198 AlarmText = Tmp;
199
200 Unlock();
201}
202
203//
204// Main program
205//
206int main() {
207
208 DimBrowser Browser;
209 char *Server, *Node;
210 bool Exist;
211
212 // Static declaration ensures calling of destructor by exit()
213 static AlarmHandler Alarm;
214
215 // Check periodically if servers are up
216 while(!Alarm.ExitRequest) {
217
218 for (int i=0; i<Alarm.List.size(); i++) {
219 Exist = false;
220 Browser.getServers();
221 while (Browser.getNextServer(Server, Node) == 1) {
222 if (Alarm.List[i].Server == Server) Exist = true;
223 }
224 if (!Exist) Alarm.List[i].Level = 4;
225 else if (Alarm.List[i].Level = -1) Alarm.List[i].Level = 0;
226 }
227
228 Alarm.UpdateAlarmSummary();
229 sleep(atoi(Alarm.GetConfig("period").c_str()));
230 }
231}
Note: See TracBrowser for help on using the repository browser.