source: Evidence/Alarm.cc@ 9619

Last change on this file since 9619 was 262, checked in by ogrimm, 14 years ago
Removed signaling to invoke ConfigChanged(), now it is run as separate thread. New command '/ResetMessage'
File size: 6.4 KB
Line 
1/********************************************************************\
2
3 Alarm handler of the Evidence Control System
4
5 - Checks periodically if all required servers are up
6 - Listens to the 'Message' service of each server and generates new service for
7 each observed server indicating the maximum Severity in the past.
8 - Maximum severity may be reset by a command 'Alarm/ResetAlarm' for a server.
9 - A text describing the current state of all servers is published as DIM service.
10 The states are described in LevelStr[].
11 - A master alarm (indicating most severe of individual alarms) is published.
12
13 A mutex is used because UpdateAlarmSummary() may be called from DIM handler thread and
14 from main thread.
15
16 Oliver Grimm, June 2010
17
18\********************************************************************/
19
20#define SERVER_NAME "Alarm"
21#include "Evidence.h"
22
23#include <sstream>
24
25using namespace std;
26
27const char* LevelStr[] = {"OK", "WARN", "ERROR", "FATAL", "UNAVAILABLE"};
28
29//
30// Class declaration
31//
32class AlarmHandler: public DimClient, public EvidenceServer {
33
34 DimCommand *Command;
35 DimService *Summary, *Master;
36 char *AlarmText;
37 int MasterAlarm;
38
39 void infoHandler();
40 void commandHandler();
41
42 public:
43 AlarmHandler();
44 ~AlarmHandler();
45
46 struct Item {
47 string Server;
48 string Email;
49 DimStampedInfo *Subscription;
50 DimService *AlarmLevel;
51 int WarnedLevel;
52 int Level;
53 };
54 vector<struct Item> List;
55
56 void UpdateAlarmSummary();
57};
58
59// Constructor
60AlarmHandler::AlarmHandler(): EvidenceServer(SERVER_NAME) {
61
62 struct Item N;
63 static int InitLevel = -1; // static for DIM service below
64
65 // Initialise
66 MasterAlarm = 0;
67 AlarmText = NULL;
68
69 // Handling of servies will only start after start()
70 autoStartOff();
71
72 // Create DIM services
73 Summary = new DimService(SERVER_NAME"/Summary", (char *) "not yet available");
74 Master = new DimService(SERVER_NAME"/MasterAlarm", MasterAlarm);
75
76 // Get DIM servers to observe
77 vector<string> Token = Tokenize(GetConfig("servers"));
78
79 for (int i=0; i<Token.size(); i++) {
80 // Extract server name and email
81 vector<string> A = Tokenize(Token[i], ":");
82 N.Server = A[0];
83 if (A.size() == 2) N.Email = A[1];
84 else N.Email = string();
85
86 // DIS_DNS has no Message service
87 if (N.Server == "DIS_DNS") N.Subscription = NULL;
88 else N.Subscription = new DimStampedInfo((N.Server+"/Message").c_str(), NO_LINK, this);
89
90 // Alarm service for server (reference to variable will be updated in UpdateAlarmSummary())
91 N.WarnedLevel = 0;
92 N.Level = -1;
93 N.AlarmLevel = new DimService((N.Server+"/AlarmLevel").c_str(), InitLevel);
94
95 List.push_back(N);
96 }
97
98 // Provide command to reset Level
99 Command = new DimCommand("ResetAlarm", (char *) "C", this);
100
101 // List set up, can start handling
102 start(SERVER_NAME);
103}
104
105
106// Destructor
107AlarmHandler::~AlarmHandler() {
108
109 delete Command;
110
111 for (int i=0; i<List.size(); i++) {
112 delete List[i].Subscription;
113 delete List[i].AlarmLevel;
114 }
115 delete Master;
116 delete Summary;
117 delete[] AlarmText;
118}
119
120
121// Print messages of status changes to screen and update status string
122void AlarmHandler::infoHandler() {
123
124 // Identify status service
125 for (int i=0; i<List.size(); i++) if (getInfo() == List[i].Subscription) {
126 // Update level: unavailable or current severity of status (safely extracted)
127 if (!ServiceOK(getInfo())) List[i].Level = 4;
128 else {
129 int Severity = atoi(ToString(getInfo()->getFormat(), getInfo()->getData(), getInfo()->getSize()).c_str());
130 if (Severity > List[i].Level) List[i].Level = Severity;
131 }
132 }
133
134 UpdateAlarmSummary();
135}
136
137
138// Reset alarm level of given server
139void AlarmHandler::commandHandler() {
140
141 // Safety check
142 string Server = ToString((char *) "C", getCommand()->getData(), getCommand()->getSize());
143 if (getCommand() != Command || Server.empty()) return;
144
145 // Reset alarm level, publish/log action and reset server message severity
146 for (int i=0; i<List.size(); i++) if (List[i].Server == Server) {
147 Message(INFO, "Alarm level of server %s reset by %s (ID %d)", Server.c_str(), getClientName(), getClientId());
148 List[i].Level = 0;
149 List[i].WarnedLevel = 0;
150 sendCommandNB((Server+"/ResetMessage").c_str(), (int) 0);
151 }
152
153 UpdateAlarmSummary();
154}
155
156
157// Update alarm status summary (locking since access can be from main thread and DIM handler threads)
158void AlarmHandler::UpdateAlarmSummary() {
159
160 ostringstream Buf;
161 int Alarm = 0, Ret;
162
163 Lock();
164
165 for (int i=0; i<List.size(); i++) {
166 // Alarm level description
167 Buf << List[i].Server << ": " << (List[i].Level>=0 && List[i].Level<=4 ? LevelStr[List[i].Level] : "unknown");
168 Buf << " (" << List[i].Level << ")" << endl;
169
170 // Adjust master alarm and update server alarm level
171 if (List[i].Level > Alarm) Alarm = List[i].Level;
172 List[i].AlarmLevel->updateService(List[i].Level);
173
174 // Check if alarm level raised, then send alarm message once
175 if (List[i].WarnedLevel < List[i].Level && !List[i].Email.empty()) {
176 List[i].WarnedLevel = List[i].Level;
177
178 // Prepare email message
179 char *Text;
180 time_t Time = time(NULL);
181 if (asprintf(&Text, "echo \"Server alarm level '%s' at %s\"|"
182 "mail -s \"Evidence Alarm for '%s'\" %s",
183 List[i].Level>=0 && List[i].Level<=4 ? LevelStr[List[i].Level] : "unknown",
184 ctime(&Time), List[i].Server.c_str(), List[i].Email.c_str()) != -1) {
185 system(Text); // Return value depending on OS
186 free(Text);
187 }
188 else Message(ERROR, "Could not send alarm email, asprintf() failed");
189 }
190 }
191
192 // Update master alarm services
193 MasterAlarm = Alarm;
194 Master->updateService();
195
196 // Update alarm description (DIM requires variables to be valid until update)
197 char *Tmp = new char[Buf.str().size()+1];
198 strcpy(Tmp, Buf.str().c_str());
199 Summary->updateService(Tmp);
200
201 delete[] AlarmText;
202 AlarmText = Tmp;
203
204 Unlock();
205}
206
207//
208// Main program
209//
210int main() {
211
212 DimBrowser Browser;
213 char *Server, *Node;
214 bool Exist;
215
216 // Static declaration ensures calling of destructor by exit()
217 static AlarmHandler Alarm;
218
219 // Check periodically if servers are up
220 while(!Alarm.ExitRequest) {
221
222 for (int i=0; i<Alarm.List.size(); i++) {
223 Exist = false;
224 Browser.getServers();
225 while (Browser.getNextServer(Server, Node) == 1) {
226 if (Alarm.List[i].Server == Server) Exist = true;
227 }
228 if (!Exist) Alarm.List[i].Level = 4;
229 else if (Alarm.List[i].Level == -1) Alarm.List[i].Level = 0;
230 }
231
232 Alarm.UpdateAlarmSummary();
233 sleep(atoi(Alarm.GetConfig("period").c_str()));
234 }
235}
Note: See TracBrowser for help on using the repository browser.