source: Evidence/Alarm.cc@ 245

Last change on this file since 245 was 229, checked in by ogrimm, 14 years ago
Config requests non-blocking if not made from main thread, adapted all servers to GetConfig() returning std::string, workaround for erroneous SERVICE_LIST
File size: 6.6 KB
Line 
1/********************************************************************\
2
3 Alarm handler of the Evidence Control System
4
5 - Checks periodically if all required servers are up
6 - Listens to the 'Message' service of each server and generates new service for
7 each observed server indicating the maximum Severity in the past.
8 - Maximum severity may be reset by a command 'Alarm/ResetAlarm' for a server.
9 - A text describing the current state of all servers is published as DIM service.
10 The states are described in LevelStr[].
11 - A master alarm (indicating most severe of individual alarms) is published.
12
13 A mutex is used because UpdateAlarmSummary() may be called from DIM handler thread and
14 from main thread.
15
16 Oliver Grimm, June 2010
17
18\********************************************************************/
19
20#define SERVER_NAME "Alarm"
21#include "Evidence.h"
22
23#include <sstream>
24
25using namespace std;
26
27const char* LevelStr[] = {"OK", "WARN", "ERROR", "FATAL", "UNAVAILABLE"};
28
29//
30// Data handling class
31//
32class AlarmHandler: public DimClient, public EvidenceServer {
33
34 DimCommand *Command;
35 DimService *Summary, *Master;
36 char *AlarmText;
37 int MasterAlarm;
38 pthread_mutex_t Mutex;
39
40 void infoHandler();
41 void commandHandler();
42
43 public:
44 AlarmHandler();
45 ~AlarmHandler();
46
47 struct Item {
48 string Server;
49 string Email;
50 DimStampedInfo *Subscription;
51 DimService *AlarmLevel;
52 int WarnedLevel;
53 int Level;
54 };
55 vector<struct Item> List;
56
57 void UpdateAlarmSummary();
58};
59
60// Constructor
61AlarmHandler::AlarmHandler(): EvidenceServer(SERVER_NAME) {
62
63 struct Item N;
64 static int InitLevel = -1; // static for DIM service below
65
66 // Initialise
67 MasterAlarm = 0;
68 AlarmText = NULL;
69
70 if (pthread_mutex_init(&Mutex, NULL) != 0) {
71 Message(FATAL, "pthread_mutex_init failed");
72 }
73
74 // Handling of servies will only start after start()
75 autoStartOff();
76
77 // Create DIM services
78 Summary = new DimService(SERVER_NAME"/Summary", (char *) "not yet available");
79 Master = new DimService(SERVER_NAME"/MasterAlarm", MasterAlarm);
80
81 // Get DIM servers to observe
82 vector<string> Token = Tokenize(GetConfig("servers"));
83
84 for (int i=0; i<Token.size(); i++) {
85 // Extract server name and email
86 vector<string> A = Tokenize(Token[i], ":");
87 N.Server = A[0];
88 if (A.size() == 2) N.Email = A[1];
89 else N.Email = string();
90
91 // DIS_DNS has no Message service
92 if (N.Server == "DIS_DNS") N.Subscription = NULL;
93 else N.Subscription = new DimStampedInfo((N.Server+"/Message").c_str(), NO_LINK, this);
94
95 // Alarm service for server (reference to variable will be updated in UpdateAlarmSummary())
96 N.WarnedLevel = 0;
97 N.Level = -1;
98 N.AlarmLevel = new DimService((N.Server+"/AlarmLevel").c_str(), InitLevel);
99
100 List.push_back(N);
101 }
102
103 // Provide command to reset Level
104 Command = new DimCommand("Alarm/ResetAlarm", (char *) "C", this);
105
106 // List set up, can start handling
107 start(SERVER_NAME);
108}
109
110
111// Destructor
112AlarmHandler::~AlarmHandler() {
113
114 delete Command;
115
116 for (int i=0; i<List.size(); i++) {
117 delete List[i].Subscription;
118 delete List[i].AlarmLevel;
119 }
120 delete Master;
121 delete Summary;
122 delete[] AlarmText;
123
124 pthread_mutex_destroy(&Mutex);
125}
126
127
128// Print messages of status changes to screen and update status string
129void AlarmHandler::infoHandler() {
130
131 // Identify status service
132 for (int i=0; i<List.size(); i++) if (getInfo() == List[i].Subscription) {
133 // Update level: unavailable or current severity of status
134 if (!ServiceOK(getInfo())) List[i].Level = 4;
135 else if (getInfo()->getInt() > List[i].Level) List[i].Level = getInfo()->getInt();
136 }
137
138 UpdateAlarmSummary();
139}
140
141
142// Reset alarm level of given server
143void AlarmHandler::commandHandler() {
144
145 DimCommand *C = getCommand();
146
147 // Check for valid command parameter
148 if (C != Command) return;
149 if (C->getSize() == 0) return;
150 if (*((char *) C->getData() + C->getSize() - 1) != '\0') return;
151
152 // Reset alarm level and publish/log action
153 for (int i=0; i<List.size(); i++) if (List[i].Server == C->getString()) {
154 Message(INFO, "Alarm level of server %s reset by %s (ID %d)", C->getString(), getClientName(), getClientId());
155 List[i].Level = 0;
156 List[i].WarnedLevel = 0;
157 }
158
159 UpdateAlarmSummary();
160}
161
162
163// Update alarm status summary
164void AlarmHandler::UpdateAlarmSummary() {
165
166 ostringstream Buf;
167 int Alarm, Ret;
168
169 // Lock because access can be from main thread and DIM handler thread
170 if ((Ret = pthread_mutex_lock(&Mutex)) != 0) {
171 Message(FATAL, "pthread_mutex_lock() failed (%s)", strerror(Ret));
172 }
173
174 for (int i=0; i<List.size(); i++) {
175 // Alarm level description
176 Buf << List[i].Server << ": " << (List[i].Level>=0 && List[i].Level<=4 ? LevelStr[List[i].Level] : "unknown");
177 Buf << " (" << List[i].Level << ")" << endl;
178
179 // Adjust master alarm and update server alarm level
180 if (List[i].Level > Alarm) Alarm = List[i].Level;
181 List[i].AlarmLevel->updateService(List[i].Level);
182
183 // Check if alarm level raised, then send alarm message once
184 if (List[i].WarnedLevel < List[i].Level && !List[i].Email.empty()) {
185 List[i].WarnedLevel = List[i].Level;
186
187 // Prepare email message
188 char *Text;
189 time_t Time = time(NULL);
190 if (asprintf(&Text, "echo \"Server alarm level '%s' at %s\"|"
191 "mail -s \"Evidence Alarm for '%s'\" %s",
192 List[i].Level>=0 && List[i].Level<=4 ? LevelStr[List[i].Level] : "unknown",
193 ctime(&Time), List[i].Server.c_str(), List[i].Email.c_str()) != -1) {
194 system(Text); // Return value depending on OS
195 free(Text);
196 }
197 else Message(ERROR, "Could not send alarm email, asprintf() failed");
198 }
199 }
200
201 // Update master alarm services
202 MasterAlarm = Alarm;
203 Master->updateService();
204
205 // Update alarm description (DIM requires variables to be valid until update)
206 char *Tmp = new char[Buf.str().size()+1];
207 strcpy(Tmp, Buf.str().c_str());
208 Summary->updateService(Tmp);
209
210 delete[] AlarmText;
211 AlarmText = Tmp;
212
213 // Unlock
214 if ((Ret = pthread_mutex_unlock(&Mutex)) != 0) {
215 Message(FATAL, "pthread_mutex_unlock() failed (%s)", strerror(Ret));
216 }
217}
218
219//
220// Main program
221//
222int main() {
223
224 DimBrowser Browser;
225 char *Server, *Node;
226 bool Exist;
227
228 // Static declaration ensures calling of destructor by exit()
229 static AlarmHandler Alarm;
230
231 // Check periodically if servers are up
232 while(!Alarm.ExitRequest) {
233
234 for (int i=0; i<Alarm.List.size(); i++) {
235 Exist = false;
236 Browser.getServers();
237 while (Browser.getNextServer(Server, Node) == 1) {
238 if (Alarm.List[i].Server == Server) Exist = true;
239 }
240 if (!Exist) Alarm.List[i].Level = 4;
241 else if (Alarm.List[i].Level = -1) Alarm.List[i].Level = 0;
242 }
243
244 Alarm.UpdateAlarmSummary();
245 sleep(atoi(Alarm.GetConfig("period").c_str()));
246 }
247}
Note: See TracBrowser for help on using the repository browser.