source: fact/Evidence/Alarm.cc@ 10067

Last change on this file since 10067 was 9852, checked in by ogrimm, 14 years ago
Alarm server can be switched off
File size: 7.0 KB
Line 
1/********************************************************************\
2
3 Alarm handler of the Evidence Control System
4
5 - Checks periodically if all required servers are up
6 - Listens to the 'Message' service of each server and generates new service for
7 each observed server indicating the maximum Severity in the past.
8 - Maximum severity may be reset by a command 'Alarm/ResetAlarm' for a server.
9 - A text describing the current state of all servers is published as DIM service.
10 The states are described in LevelStr[].
11 - A master alarm (indicating most severe of individual alarms) is published.
12 - The server can be switched on/off with the command 'Alarm/Switch'.
13
14 A mutex is used because UpdateAlarmSummary() may be called from DIM handler thread and
15 from main thread.
16
17 Oliver Grimm, June 2010
18
19\********************************************************************/
20
21#define SERVER_NAME "Alarm"
22#include "Evidence.h"
23
24#include <sstream>
25
26using namespace std;
27
28const char* LevelStr[] = {"OK", "WARN", "ERROR", "FATAL", "UNAVAILABLE"};
29
30//
31// Class declaration
32//
33class AlarmHandler: public DimClient, public EvidenceServer {
34
35 DimCommand *ResetCommand;
36 DimCommand *SwitchCommand;
37 DimService *Summary, *Master;
38 char *AlarmText;
39 int MasterAlarm;
40 bool Active;
41
42 void infoHandler();
43 void commandHandler();
44
45 public:
46 AlarmHandler();
47 ~AlarmHandler();
48
49 struct Item {
50 string Server;
51 string Email;
52 DimStampedInfo *Subscription;
53 DimService *AlarmLevel;
54 int WarnedLevel;
55 int Level;
56 };
57 vector<struct Item> List;
58
59 void UpdateAlarmSummary();
60};
61
62// Constructor
63AlarmHandler::AlarmHandler(): EvidenceServer(SERVER_NAME) {
64
65 struct Item N;
66 static int InitLevel = -1; // static for DIM service below
67
68 // Initialise
69 MasterAlarm = 0;
70 AlarmText = NULL;
71 Active = true;
72
73 // Handling of servies will only start after start()
74 autoStartOff();
75
76 // Create DIM services
77 Summary = new DimService(SERVER_NAME"/Summary", (char *) "not yet available");
78 Master = new DimService(SERVER_NAME"/MasterAlarm", MasterAlarm);
79
80 // Get DIM servers to observe
81 vector<string> Token = Tokenize(GetConfig("servers"));
82
83 for (int i=0; i<Token.size(); i++) {
84 // Extract server name and email
85 vector<string> A = Tokenize(Token[i], ":");
86 N.Server = A[0];
87 if (A.size() == 2) N.Email = A[1];
88 else N.Email = string();
89
90 // DIS_DNS has no Message service
91 if (N.Server == "DIS_DNS") N.Subscription = NULL;
92 else N.Subscription = new DimStampedInfo((N.Server+"/Message").c_str(), NO_LINK, this);
93
94 // Alarm service for server (reference to variable will be updated in UpdateAlarmSummary())
95 N.WarnedLevel = 0;
96 N.Level = -1;
97 N.AlarmLevel = new DimService((N.Server+"/AlarmLevel").c_str(), InitLevel);
98
99 List.push_back(N);
100 }
101
102 // Provide command to reset Level
103 ResetCommand = new DimCommand(SERVER_NAME"/ResetAlarm", (char *) "C", this);
104 SwitchCommand = new DimCommand(SERVER_NAME"/Switch", (char *) "C", this);
105
106 // List set up, can start handling
107 start(SERVER_NAME);
108}
109
110
111// Destructor
112AlarmHandler::~AlarmHandler() {
113
114 delete SwitchCommand;
115 delete ResetCommand;
116
117 for (int i=0; i<List.size(); i++) {
118 delete List[i].Subscription;
119 delete List[i].AlarmLevel;
120 }
121 delete Master;
122 delete Summary;
123 delete[] AlarmText;
124}
125
126
127// Print messages of status changes to screen and update status string
128void AlarmHandler::infoHandler() {
129
130 // Identify status service
131 for (int i=0; i<List.size(); i++) if (getInfo() == List[i].Subscription) {
132 // Update level: unavailable or current severity of status (safely extracted)
133 if (!ServiceOK(getInfo())) List[i].Level = 4;
134 else {
135 int Severity = atoi(ToString(getInfo()->getFormat(), getInfo()->getData(), getInfo()->getSize()).c_str());
136 if (Severity > List[i].Level) List[i].Level = Severity;
137 }
138 }
139
140 UpdateAlarmSummary();
141}
142
143
144// Handle commands
145void AlarmHandler::commandHandler() {
146
147 string Text = ToString((char *) "C", getCommand()->getData(), getCommand()->getSize());
148
149 // Reset alarm level, publish/log action and reset server message severity
150 if (getCommand() == ResetCommand) {
151 for (int i=0; i<List.size(); i++) if (List[i].Server == Text) {
152 Message(INFO, "Alarm level of server %s reset by %s (ID %d)", Text.c_str(), getClientName(), getClientId());
153 List[i].Level = 0;
154 List[i].WarnedLevel = 0;
155 sendCommandNB((Text+"/ResetMessage").c_str(), (int) 0);
156 }
157 }
158
159 // Switch Alarm server on/off and publish/log action
160 if (getCommand() == SwitchCommand) {
161 if (Text == "off") Active = false;
162 else Active = true;
163
164 Message(INFO, "Alarm server switched %s by %s (ID %d)", Active ? "ON":"OFF", getClientName(), getClientId());
165 }
166
167 UpdateAlarmSummary();
168}
169
170
171// Update alarm status summary (locking since access can be from main thread and DIM handler threads)
172void AlarmHandler::UpdateAlarmSummary() {
173
174 ostringstream Buf;
175 int Alarm = -1, Ret;
176
177 Lock();
178
179 if (!Active) Buf << "Alarm server inactive";
180 else for (int i=0; i<List.size(); i++) {
181 // Alarm level description
182 Buf << List[i].Server << ": " << (List[i].Level>=0 && List[i].Level<=4 ? LevelStr[List[i].Level] : "unknown");
183 Buf << " (" << List[i].Level << ")" << endl;
184
185 // Adjust master alarm and update server alarm level
186 if (List[i].Level > Alarm) Alarm = List[i].Level;
187 List[i].AlarmLevel->updateService(List[i].Level);
188
189 // Check if alarm level raised, then send alarm message once
190 if (List[i].WarnedLevel < List[i].Level && !List[i].Email.empty()) {
191 List[i].WarnedLevel = List[i].Level;
192
193 // Prepare email message
194 char *Text;
195 time_t Time = time(NULL);
196 if (asprintf(&Text, "echo \"Server alarm level '%s' at %s\"|"
197 "mail -s \"Evidence Alarm for '%s'\" %s",
198 List[i].Level>=0 && List[i].Level<=4 ? LevelStr[List[i].Level] : "unknown",
199 ctime(&Time), List[i].Server.c_str(), List[i].Email.c_str()) != -1) {
200 system(Text); // Return value depending on OS
201 free(Text);
202 }
203 else Message(ERROR, "Could not send alarm email, asprintf() failed");
204 }
205 }
206
207 // Update master alarm services
208 MasterAlarm = Alarm;
209 Master->updateService();
210
211 // Update alarm description (DIM requires variables to be valid until update)
212 char *Tmp = new char[Buf.str().size()+1];
213 strcpy(Tmp, Buf.str().c_str());
214 Summary->updateService(Tmp);
215
216 delete[] AlarmText;
217 AlarmText = Tmp;
218
219 Unlock();
220}
221
222//
223// Main program
224//
225int main() {
226
227 DimBrowser B;
228 char *Server, *Node;
229 bool Exist;
230
231 // Static declaration ensures calling of destructor by exit()
232 static AlarmHandler A;
233
234 while(!A.ExitRequest) {
235 for (int i=0; i<A.List.size(); i++) {
236 // Check if server exists
237 Exist = false;
238 B.getServers();
239 while (B.getNextServer(Server, Node) == 1) {
240 if (A.List[i].Server == Server) Exist = true;
241 }
242 if (!Exist) A.List[i].Level = 4;
243
244 // Check if standard service available in case server not yet chcked (Level is -1)
245 if (B.getServices((A.List[i].Server+"/VERSION_NUMBER").c_str())>0 && A.List[i].Level==-1) A.List[i].Level = 0;
246 }
247
248 A.UpdateAlarmSummary();
249 sleep(atoi(A.GetConfig("period").c_str()));
250 }
251}
Note: See TracBrowser for help on using the repository browser.