source: trunk/FACT++/src/EventBuilder.cc@ 16107

Last change on this file since 16107 was 16103, checked in by tbretz, 13 years ago
some imporvements of the output; some improvements to the statistics; added a enum for signals to the file status; also wait for the processingQueue1 at the end of mainloop()
File size: 40.3 KB
Line 
1#include <sys/time.h>
2#include <sys/epoll.h>
3#include <netinet/tcp.h>
4
5#include <cstring>
6#include <cstdarg>
7#include <list>
8#include <forward_list>
9
10#include <boost/algorithm/string/join.hpp>
11
12#include "queue.h"
13
14#include "MessageImp.h"
15#include "EventBuilder.h"
16
17using namespace std;
18
19#define MIN_LEN 32 // min #bytes needed to interpret FADheader
20#define MAX_LEN 81920 // one max evt = 1024*2*36 + 8*36 + 72 + 4 = 74092 (data+boardheader+eventheader+endflag)
21
22//#define COMPLETE_EVENTS
23//#define USE_EPOLL
24//#define USE_SELECT
25//#define COMPLETE_EPOLL
26
27// ==========================================================================
28
29bool runOpen(const shared_ptr<EVT_CTRL2> &evt);
30bool runWrite(const shared_ptr<EVT_CTRL2> &evt);
31void runClose();
32void applyCalib(const shared_ptr<EVT_CTRL2> &evt);
33void factOut(int severity, const char *message);
34void factReportIncomplete (uint64_t rep);
35void gotNewRun(RUN_CTRL2 &run);
36void runFinished();
37void factStat(GUI_STAT gj);
38int eventCheck(const shared_ptr<EVT_CTRL2> &evt);
39void debugHead(void *buf);
40
41// ==========================================================================
42
43int g_reset;
44
45size_t g_maxMem; //maximum memory allowed for buffer
46
47FACT_SOCK g_port[NBOARDS]; // .addr=string of IP-addr in dotted-decimal "ddd.ddd.ddd.ddd"
48
49uint gi_NumConnect[NBOARDS]; //4 crates * 10 boards
50
51GUI_STAT gj;
52
53// ==========================================================================
54
55void factPrintf(int severity, const char *fmt, ...)
56{
57 char str[1000];
58
59 va_list ap;
60 va_start(ap, fmt);
61 vsnprintf(str, 1000, fmt, ap);
62 va_end(ap);
63
64 factOut(severity, str);
65}
66
67// ==========================================================================
68
69#define MAX_HEAD_MEM (NBOARDS * sizeof(PEVNT_HEADER))
70#define MAX_TOT_MEM (sizeof(EVENT) + (NPIX+NTMARK)*1024*2 + MAX_HEAD_MEM)
71
72namespace Memory
73{
74 uint64_t inuse = 0;
75 uint64_t allocated = 0;
76
77 uint64_t max_inuse = 0;
78
79 mutex mtx;
80
81 forward_list<void*> memory;
82
83 void *malloc()
84 {
85 // No free slot available, next alloc would exceed max memory
86 if (memory.empty() && allocated+MAX_TOT_MEM>g_maxMem)
87 return NULL;
88
89 // We will return this amount of memory
90 // This is not 100% thread safe, but it is not a super accurate measure anyway
91 inuse += MAX_TOT_MEM;
92 if (inuse>max_inuse)
93 max_inuse = inuse;
94
95 void *mem = NULL;
96
97 if (memory.empty())
98 {
99 // No free slot available, allocate a new one
100 allocated += MAX_TOT_MEM;
101 mem = new char[MAX_TOT_MEM];
102 }
103 else
104 {
105 // Get the next free slot from the stack and return it
106 const lock_guard<mutex> lock(mtx);
107 mem = memory.front();
108 memory.pop_front();
109 }
110
111 memset(mem, 0, MAX_HEAD_MEM);
112 return mem;
113 };
114
115 void free(void *mem)
116 {
117 if (!mem)
118 return;
119
120 // Decrease the amont of memory in use accordingly
121 inuse -= MAX_TOT_MEM;
122
123 // If the maximum memory has changed, we might be over the limit.
124 // In this case: free a slot
125 if (allocated>g_maxMem)
126 {
127 delete [] (char*)mem;
128 allocated -= MAX_TOT_MEM;
129 return;
130 }
131
132 const lock_guard<mutex> lock(mtx);
133 memory.push_front(mem);
134 }
135};
136
137// ==========================================================================
138
139struct READ_STRUCT
140{
141 enum buftyp_t
142 {
143 kStream,
144 kHeader,
145 kData,
146#ifdef COMPLETE_EVENTS
147 kWait
148#endif
149 };
150
151 // ---------- connection ----------
152
153 static uint activeSockets;
154
155 int sockId; // socket id (board number)
156 int socket; // socket handle
157 bool connected; // is this socket connected?
158
159 struct sockaddr_in SockAddr; // Socket address copied from wrapper during socket creation
160
161 // ------------ epoll -------------
162
163 static int fd_epoll;
164 static epoll_event events[NBOARDS];
165
166 static void init();
167 static void close();
168 static int wait();
169 static READ_STRUCT *get(int i) { return reinterpret_cast<READ_STRUCT*>(events[i].data.ptr); }
170
171 // ------------ buffer ------------
172
173 buftyp_t bufTyp; // what are we reading at the moment: 0=header 1=data -1=skip ...
174
175 uint32_t bufLen; // number of bytes left to read
176 uint8_t *bufPos; // next byte to read to the buffer next
177
178 union
179 {
180 uint8_t B[MAX_LEN];
181 uint16_t S[MAX_LEN / 2];
182 uint32_t I[MAX_LEN / 4];
183 uint64_t L[MAX_LEN / 8];
184 PEVNT_HEADER H;
185 };
186
187 uint64_t rateBytes;
188 uint32_t skip; // number of bytes skipped before start of event
189 bool repmem; // reportet no mmemory free
190
191 uint32_t len() const { return uint32_t(H.package_length)*2; }
192
193 void swapHeader();
194 void swapData();
195
196 // --------------------------------
197
198 READ_STRUCT() : socket(-1), connected(false), rateBytes(0)
199 {
200 if (fd_epoll<0)
201 init();
202 }
203 ~READ_STRUCT()
204 {
205 destroy();
206 }
207
208 void destroy();
209 bool create(sockaddr_in addr);
210 bool check(int, sockaddr_in addr);
211 bool read();
212};
213
214int READ_STRUCT::wait()
215{
216 // wait for something to do...
217 const int rc = epoll_wait(fd_epoll, events, NBOARDS, 10); // max, timeout[ms]
218 if (rc>=0)
219 return rc;
220
221 if (errno==EINTR) // timout or signal interruption
222 return 0;
223
224 factPrintf(MessageImp::kError, "epoll_wait failed: %m (rc=%d)", errno);
225 return -1;
226}
227
228uint READ_STRUCT::activeSockets = 0;
229int READ_STRUCT::fd_epoll = -1;
230epoll_event READ_STRUCT::events[NBOARDS];
231
232void READ_STRUCT::init()
233{
234 if (fd_epoll>=0)
235 return;
236
237#ifdef USE_EPOLL
238 fd_epoll = epoll_create(NBOARDS);
239 if (fd_epoll<0)
240 {
241 factPrintf(MessageImp::kError, "Waiting for data failed: %d (epoll_create,rc=%d)", errno);
242 return;
243 }
244#endif
245}
246
247void READ_STRUCT::close()
248{
249#ifdef USE_EPOLL
250 if (fd_epoll>=0 && ::close(fd_epoll)>0)
251 factPrintf(MessageImp::kFatal, "Closing epoll failed: %m (close,rc=%d)", errno);
252#endif
253
254 fd_epoll = -1;
255}
256
257bool READ_STRUCT::create(sockaddr_in sockAddr)
258{
259 if (socket>=0)
260 return false;
261
262 const int port = ntohs(sockAddr.sin_port) + 1;
263
264 SockAddr.sin_family = sockAddr.sin_family;
265 SockAddr.sin_addr = sockAddr.sin_addr;
266 SockAddr.sin_port = htons(port);
267
268 if ((socket = ::socket(PF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0)) <= 0)
269 {
270 factPrintf(MessageImp::kFatal, "Generating socket %d failed: %m (socket,rc=%d)", sockId, errno);
271 socket = -1;
272 return false;
273 }
274
275 int optval = 1;
276 if (setsockopt (socket, SOL_SOCKET, SO_KEEPALIVE, &optval, sizeof(int)) < 0)
277 factPrintf(MessageImp::kInfo, "Setting SO_KEEPALIVE for socket %d failed: %m (setsockopt,rc=%d)", sockId, errno);
278
279 optval = 10; //start after 10 seconds
280 if (setsockopt (socket, SOL_TCP, TCP_KEEPIDLE, &optval, sizeof(int)) < 0)
281 factPrintf(MessageImp::kInfo, "Setting TCP_KEEPIDLE for socket %d failed: %m (setsockopt,rc=%d)", sockId, errno);
282
283 optval = 10; //do every 10 seconds
284 if (setsockopt (socket, SOL_TCP, TCP_KEEPINTVL, &optval, sizeof(int)) < 0)
285 factPrintf(MessageImp::kInfo, "Setting TCP_KEEPINTVL for socket %d failed: %m (setsockopt,rc=%d)", sockId, errno);
286
287 optval = 2; //close after 2 unsuccessful tries
288 if (setsockopt (socket, SOL_TCP, TCP_KEEPCNT, &optval, sizeof(int)) < 0)
289 factPrintf(MessageImp::kInfo, "Setting TCP_KEEPCNT for socket %d failed: %m (setsockopt,rc=%d)", sockId, errno);
290
291 factPrintf(MessageImp::kInfo, "Generated socket %d (%d)", sockId, socket);
292
293 //connected = false;
294 activeSockets++;
295
296 return true;
297}
298
299void READ_STRUCT::destroy()
300{
301 if (socket<0)
302 return;
303
304#ifdef USE_EPOLL
305 // strictly speaking this should not be necessary
306 if (fd_epoll>=0 && connected && epoll_ctl(fd_epoll, EPOLL_CTL_DEL, socket, NULL)<0)
307 factPrintf(MessageImp::kError, "epoll_ctrl failed: %m (EPOLL_CTL_DEL,rc=%d)", errno);
308#endif
309
310 if (::close(socket) > 0)
311 factPrintf(MessageImp::kFatal, "Closing socket %d failed: %m (close,rc=%d)", sockId, errno);
312 else
313 factPrintf(MessageImp::kInfo, "Closed socket %d (%d)", sockId, socket);
314
315 socket = -1;
316 connected = false;
317 activeSockets--;
318}
319
320bool READ_STRUCT::check(int sockDef, sockaddr_in addr)
321{
322 // Continue in the most most likely case (performance)
323 //if (socket>=0 && sockDef!=0 && connected)
324 // return;
325 const int old = socket;
326
327 // socket open, but should not be open
328 if (socket>=0 && sockDef==0)
329 destroy();
330
331 // Socket closed, but should be open
332 if (socket<0 && sockDef!=0)
333 create(addr); //generate address and socket
334
335 const bool retval = old!=socket;
336
337 // Socket closed
338 if (socket<0)
339 return retval;
340
341 // Socket open and connected: Nothing to do
342 if (connected)
343 return retval;
344
345 //try to connect if not yet done
346 const int rc = connect(socket, (struct sockaddr *) &SockAddr, sizeof(SockAddr));
347 if (rc == -1)
348 return retval;
349
350 connected = true;
351
352 if (sockDef<0)
353 {
354 bufTyp = READ_STRUCT::kStream; // full data to be skipped
355 bufLen = MAX_LEN; // huge for skipping
356 }
357 else
358 {
359 bufTyp = READ_STRUCT::kHeader; // expect a header
360 bufLen = sizeof(PEVNT_HEADER); // max size to read at begining
361 }
362
363 bufPos = B; // no byte read so far
364 skip = 0; // start empty
365 repmem = false;
366
367 factPrintf(MessageImp::kInfo, "Connected socket %d (%d)", sockId, socket);
368
369#ifdef USE_EPOLL
370 epoll_event ev;
371 ev.events = EPOLLIN;
372 ev.data.ptr = this; // user data (union: ev.ptr)
373 if (epoll_ctl(fd_epoll, EPOLL_CTL_ADD, socket, &ev)<0)
374 factPrintf(MessageImp::kError, "epoll_ctl failed: %m (EPOLL_CTL_ADD,rc=%d)", errno);
375#endif
376
377 return retval;
378}
379
380bool READ_STRUCT::read()
381{
382 if (bufLen==0)
383 return true;
384
385 const int32_t jrd = recv(socket, bufPos, bufLen, MSG_DONTWAIT);
386 // recv failed
387 if (jrd<0)
388 {
389 // There was just nothing waiting
390 if (errno==EWOULDBLOCK || errno==EAGAIN)
391 return false;
392
393 factPrintf(MessageImp::kError, "Reading from socket %d failed: %m (recv,rc=%d)", sockId, errno);
394 return false;
395 }
396
397 // connection was closed ...
398 if (jrd==0)
399 {
400 factPrintf(MessageImp::kInfo, "Socket %d closed by FAD", sockId);
401
402 destroy();//DestroySocket(rd[i]); //generate address and socket
403 return false;
404 }
405
406 rateBytes += jrd;
407
408 // are we skipping this board ...
409 if (bufTyp==kStream)
410 return false;
411
412 bufPos += jrd; //==> prepare for continuation
413 bufLen -= jrd;
414
415 // not yet all read
416 return bufLen==0;
417}
418
419void READ_STRUCT::swapHeader()
420{
421 S[1] = ntohs(S[1]); // package_length (bytes not swapped!)
422 S[2] = ntohs(S[2]); // version_no
423 S[3] = ntohs(S[3]); // PLLLCK
424 S[4] = ntohs(S[4]); // trigger_crc
425 S[5] = ntohs(S[5]); // trigger_type
426
427 I[3] = ntohl(I[3]); // trigger_id
428 I[4] = ntohl(I[4]); // fad_evt_counter
429 I[5] = ntohl(I[5]); // REFCLK_frequency
430
431 S[12] = ntohs(S[12]); // board id
432 S[13] = ntohs(S[13]); // adc_clock_phase_shift
433 S[14] = ntohs(S[14]); // number_of_triggers_to_generate
434 S[15] = ntohs(S[15]); // trigger_generator_prescaler
435
436 I[10] = ntohl(I[10]); // runnumber;
437 I[11] = ntohl(I[11]); // time;
438
439 for (int s=24; s<24+NTemp+NDAC; s++)
440 S[s] = ntohs(S[s]); // drs_temperature / dac
441}
442
443void READ_STRUCT::swapData()
444{
445 // swapEventHeaderBytes: End of the header. to channels now
446
447 int i = 36;
448 for (int ePatchesCount = 0; ePatchesCount<4*9; ePatchesCount++)
449 {
450 S[i+0] = ntohs(S[i+0]);//id
451 S[i+1] = ntohs(S[i+1]);//start_cell
452 S[i+2] = ntohs(S[i+2]);//roi
453 S[i+3] = ntohs(S[i+3]);//filling
454
455 i += 4+S[i+2];//skip the pixel data
456 }
457}
458
459// ==========================================================================
460
461bool checkRoiConsistency(const READ_STRUCT &rd, uint16_t roi[])
462{
463 int xjr = -1;
464 int xkr = -1;
465
466 //points to the very first roi
467 int roiPtr = sizeof(PEVNT_HEADER)/2 + 2;
468
469 roi[0] = ntohs(rd.S[roiPtr]);
470
471 for (int jr = 0; jr < 9; jr++)
472 {
473 roi[jr] = ntohs(rd.S[roiPtr]);
474
475 if (roi[jr]>1024)
476 {
477 factPrintf(MessageImp::kError, "Illegal roi in channel %d (allowed: roi<=1024)", jr, roi[jr]);
478 return false;
479 }
480
481 // Check that the roi of pixels jr are compatible with the one of pixel 0
482 if (jr!=8 && roi[jr]!=roi[0])
483 {
484 xjr = jr;
485 break;
486 }
487
488 // Check that the roi of all other DRS chips on boards are compatible
489 for (int kr = 1; kr < 4; kr++)
490 {
491 const int kroi = ntohs(rd.S[roiPtr]);
492 if (kroi != roi[jr])
493 {
494 xjr = jr;
495 xkr = kr;
496 break;
497 }
498 roiPtr += kroi+4;
499 }
500 }
501
502 if (xjr>=0)
503 {
504 if (xkr<0)
505 factPrintf(MessageImp::kFatal, "Inconsistent Roi accross chips [DRS=%d], expected %d, got %d", xjr, roi[0], roi[xjr]);
506 else
507 factPrintf(MessageImp::kFatal, "Inconsistent Roi accross channels [DRS=%d Ch=%d], expected %d, got %d", xjr, xkr, roi[xjr], ntohs(rd.S[roiPtr]));
508
509 return false;
510 }
511
512 if (roi[8] < roi[0])
513 {
514 factPrintf(MessageImp::kError, "Mismatch of roi (%d) in channel 8. Should be larger or equal than the roi (%d) in channel 0.", roi[8], roi[0]);
515 return false;
516 }
517
518 return true;
519}
520
521list<shared_ptr<EVT_CTRL2>> evtCtrl;
522
523shared_ptr<EVT_CTRL2> mBufEvt(const READ_STRUCT &rd, shared_ptr<RUN_CTRL2> &actrun)
524{
525 uint16_t nRoi[9];
526 if (!checkRoiConsistency(rd, nRoi))
527 return shared_ptr<EVT_CTRL2>();
528
529 for (auto it=evtCtrl.rbegin(); it!=evtCtrl.rend(); it++)
530 {
531 // A reference is enough because the evtCtrl holds the shared_ptr anyway
532 const shared_ptr<EVT_CTRL2> &evt = *it;
533
534 // If the run is different, go on searching.
535 // We cannot stop searching if a lower run-id is found as in
536 // the case of the events, because theoretically, there
537 // can be the same run on two different days.
538 if (rd.H.runnumber != evt->runNum)
539 continue;
540
541 // If the ID of the new event if higher than the last one stored
542 // in that run, we have to assign a new slot (leave the loop)
543 if (rd.H.fad_evt_counter > evt->evNum/* && runID == evtCtrl[k].runNum*/)
544 break;
545
546 if (rd.H.fad_evt_counter != evt->evNum/* || runID != evtCtrl[k].runNum*/)
547 continue;
548
549 // We have found an entry with the same runID and evtID
550 // Check if ROI is consistent
551 if (evt->nRoi != nRoi[0] || evt->nRoiTM != nRoi[8])
552 {
553 factPrintf(MessageImp::kError, "Mismatch of roi within event. Expected roi=%d and roi_tm=%d, got %d and %d.",
554 evt->nRoi, evt->nRoiTM, nRoi[0], nRoi[8]);
555 return shared_ptr<EVT_CTRL2>();
556 }
557
558 // count for inconsistencies
559 if (evt->trgNum != rd.H.trigger_id)
560 evt->Errors[0]++;
561 if (evt->trgTyp != rd.H.trigger_type)
562 evt->Errors[2]++;
563
564 //everything seems fine so far ==> use this slot ....
565 return evt;
566 }
567
568 if (actrun->runId==rd.H.runnumber && (actrun->roi0 != nRoi[0] || actrun->roi8 != nRoi[8]))
569 {
570 factPrintf(MessageImp::kError, "Mismatch of roi within run. Expected roi=%d and roi_tm=%d, got %d and %d (runID=%d, evID=%d)",
571 actrun->roi0, actrun->roi8, nRoi[0], nRoi[8], rd.H.runnumber, rd.H.fad_evt_counter);
572 return shared_ptr<EVT_CTRL2>();
573 }
574
575 shared_ptr<EVT_CTRL2> evt(new EVT_CTRL2);
576
577 gettimeofday(&evt->time, NULL);
578
579 evt->runNum = rd.H.runnumber;
580 evt->evNum = rd.H.fad_evt_counter;
581
582 evt->trgNum = rd.H.trigger_id;
583 evt->trgTyp = rd.H.trigger_type;
584
585 evt->nRoi = nRoi[0];
586 evt->nRoiTM = nRoi[8];
587
588 const bool newrun = actrun->runId != rd.H.runnumber;
589 if (newrun)
590 {
591 // Since we have started a new run, we know already when to close the
592 // previous run in terms of number of events
593 actrun->maxEvt = actrun->lastEvt;
594
595 factPrintf(MessageImp::kInfo, "New run %d (evt=%d) registered with roi=%d(%d), prev=%d",
596 rd.H.runnumber, rd.H.fad_evt_counter, nRoi[0], nRoi[8], actrun->runId);
597
598 // The new run is the active run now
599 actrun = shared_ptr<RUN_CTRL2>(new RUN_CTRL2);
600
601 const time_t &tsec = evt->time.tv_sec;
602
603 actrun->openTime = tsec;
604 actrun->closeTime = tsec + 3600 * 24; // max time allowed
605 actrun->runId = rd.H.runnumber;
606 actrun->roi0 = nRoi[0]; // FIXME: Make obsolete!
607 actrun->roi8 = nRoi[8]; // FIXME: Make obsolete!
608
609 // Signal the fadctrl that a new run has been started
610 // Note this is the only place at which we can ensure that
611 // gotnewRun is called only once
612 gotNewRun(*actrun);
613 }
614
615 // Increase the number of events we have started to receive in this run
616 actrun->lastTime = evt->time.tv_sec; // Time when the last event was received
617 actrun->lastEvt++;
618
619 // Keep pointer to run of this event
620 evt->runCtrl = actrun;
621
622 // Secure access to evtCtrl against access in CloseRunFile
623 // This should be the last... otherwise we can run into threading issues
624 // if the event is accessed before it is fully initialized.
625 evtCtrl.push_back(evt);
626
627 // An event can be the first and the last, but not the last and the first.
628 // Therefore gotNewRun is called before runFinished.
629 // runFinished signals that the last event of a run was just received. Processing
630 // might still be ongoing, but we can start a new run.
631 const bool cond1 = actrun->lastEvt < actrun->maxEvt; // max number of events not reached
632 const bool cond2 = actrun->lastTime < actrun->closeTime; // max time not reached
633 if (!cond1 || !cond2)
634 runFinished();
635
636 return evt;
637}
638
639
640void copyData(const READ_STRUCT &rBuf, EVT_CTRL2 *evt)
641{
642 const int i = rBuf.sockId;
643
644 memcpy(evt->FADhead.get()+i, &rBuf.H, sizeof(PEVNT_HEADER));
645
646 int src = sizeof(PEVNT_HEADER) / 2; // Header is 72 byte = 36 shorts
647
648 // consistency of ROIs have been checked already (is it all correct?)
649 const uint16_t &roi = rBuf.S[src+2];
650
651 // different sort in FAD board.....
652 for (int px = 0; px < 9; px++)
653 {
654 for (int drs = 0; drs < 4; drs++)
655 {
656 const int16_t pixC = rBuf.S[src+1]; // start-cell
657 const int16_t pixR = rBuf.S[src+2]; // roi
658 //here we should check if pixH is correct ....
659
660 const int pixS = i*36 + drs*9 + px;
661
662 evt->fEvent->StartPix[pixS] = pixC;
663
664 memcpy(evt->fEvent->Adc_Data + pixS*roi, &rBuf.S[src+4], roi * 2);
665
666 src += 4+pixR;
667
668 // Treatment for ch 9 (TM channel)
669 if (px != 8)
670 continue;
671
672 const int tmS = i*4 + drs;
673
674 //and we have additional TM info
675 if (pixR > roi)
676 {
677 evt->fEvent->StartTM[tmS] = (pixC + pixR - roi) % 1024;
678
679 memcpy(evt->fEvent->Adc_Data + tmS*roi + NPIX*roi, &rBuf.S[src - roi], roi * 2);
680 }
681 else
682 {
683 evt->fEvent->StartTM[tmS] = -1;
684 }
685 }
686 }
687}
688
689// ==========================================================================
690
691uint64_t reportIncomplete(const shared_ptr<EVT_CTRL2> &evt, const char *txt)
692{
693 factPrintf(MessageImp::kWarn, "skip incomplete evt (run=%d, evt=%d, n=%d, %s)",
694 evt->runNum, evt->evNum, evtCtrl.size(), txt);
695
696 uint64_t report = 0;
697
698 char str[1000];
699
700 int ik=0;
701 for (int ib=0; ib<NBOARDS; ib++)
702 {
703 if (ib%10==0)
704 str[ik++] = '|';
705
706 const int jb = evt->board[ib];
707 if (jb>=0) // data received from that board
708 {
709 str[ik++] = '0'+(jb%10);
710 continue;
711 }
712
713 // FIXME: This is not synchronous... it reports
714 // accoridng to the current connection status, not w.r.t. to the
715 // one when the event was taken.
716 if (gi_NumConnect[ib]==0) // board not connected
717 {
718 str[ik++] = 'x';
719 continue;
720 }
721
722 // data from this board lost
723 str[ik++] = '.';
724 report |= ((uint64_t)1)<<ib;
725 }
726
727 str[ik++] = '|';
728 str[ik] = 0;
729
730 factOut(MessageImp::kWarn, str);
731
732 return report;
733}
734
735// ==========================================================================
736// ==========================================================================
737
738Queue<shared_ptr<EVT_CTRL2>> processingQueue1(bind(&applyCalib, placeholders::_1));
739
740// If this is not convenient anymore, it could be replaced by
741// a command queue, to which command+data is posted,
742// (e.g. runOpen+runInfo, runClose+runInfo, evtWrite+evtInfo)
743void writeEvt(const shared_ptr<EVT_CTRL2> &evt)
744{
745 const shared_ptr<RUN_CTRL2> &run = evt->runCtrl;
746
747 // Is this a valid event or just an empty event to trigger run close?
748 // If this is not an empty event open the new run-file
749 // Empty events are there to trigger run-closing conditions
750 if (evt->runNum>=0)
751 {
752 // File not yet open
753 if (run->fileStat==kFileNotYetOpen)
754 {
755 // runOpen will close a previous run, if still open
756 if (!runOpen(evt))
757 {
758 factPrintf(MessageImp::kError, "Could not open new file for run %d (evt=%d, runOpen failed)", evt->runNum, evt->evNum);
759 run->fileStat = kFileClosed;
760 return;
761 }
762
763 factPrintf(MessageImp::kInfo, "Opened new file for run %d (evt=%d)", evt->runNum, evt->evNum);
764 run->fileStat = kFileOpen;
765 }
766
767 // Here we have a valid calibration and can go on with that.
768 processingQueue1.post(evt);
769 }
770
771 // File already closed
772 if (run->fileStat==kFileClosed)
773 return;
774
775 bool rc1 = true;
776 if (evt->runNum>=0)
777 {
778 rc1 = runWrite(evt);
779 if (!rc1)
780 factPrintf(MessageImp::kError, "Writing event %d for run %d failed (runWrite)", evt->evNum, evt->runNum);
781 }
782
783 const bool cond1 = run->lastEvt < run->maxEvt; // max number of events not reached
784 const bool cond2 = run->lastTime < run->closeTime; // max time not reached
785 const bool cond3 = run->closeRequest==kRequestNone; // file signaled to be closed
786 const bool cond4 = rc1; // Write successfull
787
788 // File is not yet to be closed.
789 if (cond1 && cond2 && cond3 && cond4)
790 return;
791
792 runClose();
793 run->fileStat = kFileClosed;
794
795 vector<string> reason;
796 if (!cond1)
797 reason.push_back(to_string(run->maxEvt)+" evts reached");
798 if (!cond2)
799 reason.push_back(to_string(run->closeTime-run->openTime)+"s reached");
800 if (!cond3)
801 {
802 if (run->closeRequest&kRequestManual)
803 reason.push_back("close requested");
804 if (run->closeRequest&kRequestTimeout)
805 reason.push_back("receive timeout");
806 if (run->closeRequest&kRequestConnectionChange)
807 reason.push_back("connection changed");
808 }
809 if (!cond4)
810 reason.push_back("runWrite failed");
811
812 const string str = boost::algorithm::join(reason, ", ");
813 factPrintf(MessageImp::kInfo, "File closed because %s", str.c_str());
814}
815
816Queue<shared_ptr<EVT_CTRL2>> secondaryQueue(bind(&writeEvt, placeholders::_1));
817
818void procEvt(const shared_ptr<EVT_CTRL2> &evt)
819{
820 if (evt->runNum>=0)
821 {
822 evt->fEvent->Errors[0] = evt->Errors[0];
823 evt->fEvent->Errors[1] = evt->Errors[1];
824 evt->fEvent->Errors[2] = evt->Errors[2];
825 evt->fEvent->Errors[3] = evt->Errors[3];
826
827 for (int ib=0; ib<NBOARDS; ib++)
828 evt->fEvent->BoardTime[ib] = evt->FADhead.get()[ib].time;
829
830 const int rc = eventCheck(evt);
831 if (rc < 0)
832 return;
833 }
834
835 // If file is open post the event for being written
836 secondaryQueue.post(evt);
837}
838
839// ==========================================================================
840// ==========================================================================
841
842shared_ptr<RUN_CTRL2> actrun; // needed in CloseRunFile
843
844/*
845 task 1-4:
846
847 lock1()-lock4();
848 while (1)
849 {
850 wait for signal [lockN]; // unlocked
851
852 while (n!=10)
853 wait sockets;
854 read;
855
856 lockM();
857 finished[n] = true;
858 signal(mainloop);
859 unlockM();
860 }
861
862
863 mainloop:
864
865 while (1)
866 {
867 lockM();
868 while (!finished[0] || !finished[1] ...)
869 wait for signal [lockM]; // unlocked... signals can be sent
870 finished[0-1] = false;
871 unlockM()
872
873 copy data to queue // locked
874
875 lockN[0-3];
876 signalN[0-3];
877 unlockN[0-3];
878 }
879
880
881 */
882
883/*
884 while (g_reset)
885 {
886 shared_ptr<EVT_CTRL2> evt = new shared_ptr<>;
887
888 // Check that all sockets are connected
889
890 for (int i=0; i<40; i++)
891 if (rd[i].connected && epoll_ctl(fd_epoll, EPOLL_CTL_ADD, socket, NULL)<0)
892 factPrintf(kError, "epoll_ctrl failed: %m (EPOLL_CTL_ADD,rc=%d)", errno);
893
894 while (g_reset)
895 {
896 if (READ_STRUCT::wait()<0)
897 break;
898
899 if (rc_epoll==0)
900 break;
901
902 for (int jj=0; jj<rc_epoll; jj++)
903 {
904 READ_STRUCT *rs = READ_STRUCT::get(jj);
905 if (!rs->connected)
906 continue;
907
908 const bool rc_read = rs->read();
909 if (!rc_read)
910 continue;
911
912 if (rs->bufTyp==READ_STRUCT::kHeader)
913 {
914 [...]
915 }
916
917 [...]
918
919 if (epoll_ctl(fd_epoll, EPOLL_CTL_DEL, socket, NULL)<0)
920 factPrintf(kError, "epoll_ctrl failed: %m (EPOLL_CTL_DEL,rc=%d)", errno);
921 }
922
923 if (once_a_second)
924 {
925 if (evt==timeout)
926 break;
927 }
928 }
929
930 if (evt.nBoards==actBoards)
931 primaryQueue.post(evt);
932 }
933*/
934
935void CloseRunFile()
936{
937 // Create a copy of the shared_ptr to ensure
938 // is not replaced in the middle of the action
939 const shared_ptr<RUN_CTRL2> run = actrun;
940 if (run)
941 run->closeRequest |= kRequestManual;
942}
943
944bool mainloop(READ_STRUCT *rd)
945{
946 factPrintf(MessageImp::kInfo, "Starting EventBuilder main loop");
947
948 Queue<shared_ptr<EVT_CTRL2>> primaryQueue(bind(&procEvt, placeholders::_1));
949
950 primaryQueue.start();
951 secondaryQueue.start();
952
953 actrun = shared_ptr<RUN_CTRL2>(new RUN_CTRL2);
954
955 //time in seconds
956 time_t gi_SecTime = time(NULL)-1;
957
958 //loop until global variable g_runStat claims stop
959 g_reset = 0;
960 while (g_reset == 0)
961 {
962#ifdef USE_SELECT
963 fd_set readfs;
964 FD_ZERO(&readfs);
965 int nfsd = 0;
966 for (int i=0; i<NBOARDS; i++)
967 if (rd[i].socket>=0 && rd[i].connected && rd[i].bufLen>0)
968 {
969 FD_SET(rd[i].socket, &readfs);
970 if (rd[i].socket>nfsd)
971 nfsd = rd[i].socket;
972 }
973
974 timeval tv;
975 tv.tv_sec = 0;
976 tv.tv_usec = 100;
977 const int rc_select = select(nfsd+1, &readfs, NULL, NULL, &tv);
978 // 0: timeout
979 // -1: error
980 if (rc_select<0)
981 {
982 factPrintf(MessageImp::kError, "Waiting for data failed: %d (select,rc=%d)", errno);
983 continue;
984 }
985#endif
986
987#ifdef USE_EPOLL
988 const int rc_epoll = READ_STRUCT::wait();
989 if (rc_epoll<0)
990 break;
991#endif
992
993#ifdef USE_EPOLL
994 for (int jj=0; jj<rc_epoll; jj++)
995#else
996 for (int jj=0; jj<NBOARDS; jj++)
997#endif
998 {
999#ifdef USE_EPOLL
1000 // FIXME: How to get i?
1001 READ_STRUCT *rs = READ_STRUCT::get(jj);
1002#else
1003
1004 const int i = (jj%4)*10 + (jj/4);
1005 READ_STRUCT *rs = &rd[i];
1006 if (!rs->connected)
1007 continue;
1008#endif
1009
1010#ifdef USE_SELECT
1011 if (!FD_ISSET(rs->socket, &readfs))
1012 continue;
1013#endif
1014
1015
1016#ifdef COMPLETE_EVENTS
1017 if (rs->bufTyp==READ_STRUCT::kWait)
1018 continue;
1019#endif
1020
1021 // ==================================================================
1022
1023 const bool rc_read = rs->read();
1024
1025 // Connect might have gotten closed during read
1026 gi_NumConnect[rs->sockId] = rs->connected;
1027 gj.numConn[rs->sockId] = rs->connected;
1028
1029 // Read either failed or disconnected, or the buffer is not yet full
1030 if (!rc_read)
1031 continue;
1032
1033 // ==================================================================
1034
1035 if (rs->bufTyp==READ_STRUCT::kHeader)
1036 {
1037 //check if startflag correct; else shift block ....
1038 // FIXME: This is not enough... this combination of
1039 // bytes can be anywhere... at least the end bytes
1040 // must be checked somewhere, too.
1041 uint k;
1042 for (k=0; k<sizeof(PEVNT_HEADER)-1; k++)
1043 {
1044 //if (rs->B[k]==0xfb && rs->B[k+1] == 0x01)
1045 if (*reinterpret_cast<uint16_t*>(rs->B+k) == 0x01fb)
1046 break;
1047 }
1048 rs->skip += k;
1049
1050 //no start of header found
1051 if (k==sizeof(PEVNT_HEADER)-1)
1052 {
1053 rs->B[0] = rs->B[sizeof(PEVNT_HEADER)-1];
1054 rs->bufPos = rs->B+1;
1055 rs->bufLen = sizeof(PEVNT_HEADER)-1;
1056 continue;
1057 }
1058
1059 if (k > 0)
1060 {
1061 memmove(rs->B, rs->B+k, sizeof(PEVNT_HEADER)-k);
1062
1063 rs->bufPos -= k;
1064 rs->bufLen += k;
1065
1066 continue; // We need to read more (bufLen>0)
1067 }
1068
1069 if (rs->skip>0)
1070 {
1071 factPrintf(MessageImp::kInfo, "Skipped %d bytes on port %d", rs->skip, rs->sockId);
1072 rs->skip = 0;
1073 }
1074
1075 // Swap the header entries from network to host order
1076 rs->swapHeader();
1077
1078 rs->bufTyp = READ_STRUCT::kData;
1079 rs->bufLen = rs->len() - sizeof(PEVNT_HEADER);
1080
1081 debugHead(rs->B); // i and fadBoard not used
1082
1083 continue;
1084 }
1085
1086 const uint16_t &end = *reinterpret_cast<uint16_t*>(rs->bufPos-2);
1087 if (end != 0xfe04)
1088 {
1089 factPrintf(MessageImp::kError, "End-of-event flag wrong on socket %2d for event %d (len=%d), got %04x",
1090 rs->sockId, rs->H.fad_evt_counter, rs->len(), end);
1091
1092 // ready to read next header
1093 rs->bufTyp = READ_STRUCT::kHeader;
1094 rs->bufLen = sizeof(PEVNT_HEADER);
1095 rs->bufPos = rs->B;
1096 // FIXME: What to do with the validity flag?
1097 continue;
1098 }
1099
1100 // get index into mBuffer for this event (create if needed)
1101 const shared_ptr<EVT_CTRL2> evt = mBufEvt(*rs, actrun);
1102
1103 // We have a valid entry, but no memory has yet been allocated
1104 if (evt && !evt->FADhead)
1105 {
1106 // Try to get memory from the big buffer
1107 PEVNT_HEADER *mem = (PEVNT_HEADER*)Memory::malloc();
1108 if (!mem)
1109 {
1110 // If this works properly, this is a hack which can be removed, or
1111 // replaced by a signal or dim message
1112 if (!rs->repmem)
1113 {
1114 factPrintf(MessageImp::kError, "No free memory left for %d (run=%d)", evt->evNum, evt->runNum);
1115 rs->repmem = true;
1116 }
1117 continue;
1118 }
1119
1120 evt->initEvent(shared_ptr<PEVNT_HEADER>(mem, Memory::free));
1121 }
1122
1123 // ready to read next header
1124 rs->bufTyp = READ_STRUCT::kHeader;
1125 rs->bufLen = sizeof(PEVNT_HEADER);
1126 rs->bufPos = rs->B;
1127
1128 // Fatal error occured. Event cannot be processed. Skip it. Start reading next header.
1129 if (!evt)
1130 continue;
1131
1132 /*
1133 const int fad = (i/10)<<8)|(i%10);
1134 if (fad != rs->H.board_id)
1135 {
1136 factPrintf(MessageImp::kWarn, "Board ID mismatch. Expected %x, got %x", fad, rs->H.board_id);
1137 }*/
1138
1139 // This should never happen
1140 if (evt->board[rs->sockId] != -1)
1141 {
1142 factPrintf(MessageImp::kError, "Got event %5d from board %3d (i=%3d, len=%5d) twice.",
1143 evt->evNum, rs->sockId, rs->sockId, rs->len());
1144 // FIXME: What to do with the validity flag?
1145 continue; // Continue reading next header
1146 }
1147
1148 // Swap the data entries (board headers) from network to host order
1149 rs->swapData();
1150
1151 // Copy data from rd[i] to mBuffer[evID]
1152 copyData(*rs, evt.get());
1153
1154#ifdef COMPLETE_EVENTS
1155 // Do not read anmymore from this board until the whole event has been received
1156 rs->bufTyp = READ_STRUCT::kWait;
1157#endif
1158 // now we have stored a new board contents into Event structure
1159 evt->fEvent->NumBoards++;
1160 evt->board[rs->sockId] = rs->sockId;
1161 evt->nBoard++;
1162
1163#ifdef COMPLETE_EPOLL
1164 if (epoll_ctl(READ_STRUCT::fd_epoll, EPOLL_CTL_DEL, rs->socket, NULL)<0)
1165 factPrintf(MessageImp::kError, "epoll_ctrl failed: %m (EPOLL_CTL_DEL,rc=%d)", errno);
1166#endif
1167 // event not yet complete
1168 if (evt->nBoard < READ_STRUCT::activeSockets)
1169 continue;
1170
1171 // All previous events are now flagged as incomplete ("expired")
1172 // and will be removed. (This is a bit tricky, because pop_front()
1173 // would invalidate the current iterator if not done _after_ the increment)
1174 for (auto it=evtCtrl.begin(); it!=evtCtrl.end(); )
1175 {
1176 const bool found = it->get()==evt.get();
1177 if (!found)
1178 reportIncomplete(*it, "expired");
1179 else
1180 primaryQueue.post(evt);
1181
1182 it++;
1183 evtCtrl.pop_front();
1184
1185 // We reached the current event, so we are done
1186 if (found)
1187 break;
1188 }
1189
1190#ifdef COMPLETE_EPOLL
1191 for (int j=0; j<40; j++)
1192 {
1193 epoll_event ev;
1194 ev.events = EPOLLIN;
1195 ev.data.ptr = &rd[j]; // user data (union: ev.ptr)
1196 if (epoll_ctl(READ_STRUCT::fd_epoll, EPOLL_CTL_ADD, rd[j].socket, &ev)<0)
1197 factPrintf(MessageImp::kError, "epoll_ctl failed: %m (EPOLL_CTL_ADD,rc=%d)", errno);
1198 }
1199#endif
1200
1201#ifdef COMPLETE_EVENTS
1202 for (int j=0; j<40; j++)
1203 {
1204 //if (rs->bufTyp==READ_STRUCT::kWait)
1205 {
1206 rs->bufTyp = READ_STRUCT::kHeader;
1207 rs->bufLen = sizeof(PEVNT_HEADER);
1208 rs->bufPos = rs->B;
1209 }
1210 }
1211#endif
1212 } // end for loop over all sockets
1213
1214 // ==================================================================
1215
1216 gj.bufNew = evtCtrl.size(); //# incomplete events in buffer
1217 gj.bufEvt = primaryQueue.size(); //# complete events in buffer
1218 gj.bufTot = gj.bufNew+gj.bufEvt; //# total events currently in buffer
1219 if (gj.bufNew>gj.maxEvt) //# maximum events in buffer past cycle
1220 gj.maxEvt = gj.bufNew;
1221
1222 // ==================================================================
1223
1224 const time_t actTime = time(NULL);
1225 if (actTime == gi_SecTime)
1226 {
1227#if !defined(USE_SELECT) && !defined(USE_EPOLL)
1228 if (evtCtrl.empty())
1229 usleep(1);
1230#endif
1231 continue;
1232 }
1233 gi_SecTime = actTime;
1234
1235 // ==================================================================
1236 //loop over all active events and flag those older than read-timeout
1237 //delete those that are written to disk ....
1238
1239 // This could be improved having the pointer which separates the queue with
1240 // the incomplete events from the queue with the complete events
1241 for (auto it=evtCtrl.begin(); it!=evtCtrl.end(); )
1242 {
1243 // A reference is enough because the shared_ptr is hold by the evtCtrl
1244 const shared_ptr<EVT_CTRL2> &evt = *it;
1245
1246 // The first event is the oldest. If the first event within the
1247 // timeout window was received, we can stop searchinf further.
1248 if (evt->time.tv_sec>=actTime - 30)
1249 break;
1250
1251 // This will result in the emission of a dim service.
1252 // It doesn't matter if that takes comparably long,
1253 // because we have to stop the run anyway.
1254 const uint64_t rep = reportIncomplete(evt, "timeout");
1255 factReportIncomplete(rep);
1256
1257 it++;
1258 evtCtrl.pop_front();
1259 }
1260
1261 // If nothing was received for more than 5min, close file
1262 if (actTime-actrun->lastTime>300)
1263 actrun->closeRequest |= kRequestTimeout;
1264
1265 // =================================================================
1266
1267 gj.bufTot = Memory::max_inuse/MAX_TOT_MEM;
1268 gj.usdMem = Memory::max_inuse;
1269 gj.totMem = Memory::allocated;
1270
1271 gj.deltaT = 1000; // temporary, must be improved
1272
1273 for (int ib=0; ib<NBOARDS; ib++)
1274 {
1275 gj.rateBytes[ib] = rd[ib].rateBytes;
1276 gj.totBytes[ib] += rd[ib].rateBytes;
1277
1278 if (rd[ib].check(g_port[ib].sockDef, g_port[ib].sockAddr))
1279 actrun->closeRequest |= kRequestConnectionChange;
1280
1281 gi_NumConnect[ib] = rd[ib].connected;
1282 gj.numConn[ib] = rd[ib].connected;
1283 }
1284
1285
1286 factStat(gj);
1287
1288 Memory::max_inuse = 0;
1289 gj.maxEvt = 0;
1290 for (int ib=0; ib<NBOARDS; ib++)
1291 rd[ib].rateBytes = 0;
1292
1293 // =================================================================
1294
1295 // This is a fake event to trigger possible run-closing conditions once a second
1296 // FIXME: This is not yet ideal because a file would never be closed
1297 // if a new file has been started and no events of the new file
1298 // have been received yet
1299 if (actrun->fileStat==kFileOpen)
1300 primaryQueue.post(shared_ptr<EVT_CTRL2>(new EVT_CTRL2(actrun)));
1301 }
1302
1303 // 1: Stop, wait for event to get processed
1304 // 2: Stop, finish immediately
1305 // 101: Restart, wait for events to get processed
1306 // 101: Restart, finish immediately
1307 //
1308 const int gi_reset = g_reset;
1309
1310 const bool abort = gi_reset%100==2;
1311
1312 factPrintf(MessageImp::kInfo, "Stop reading ... RESET=%d (%s threads)", gi_reset, abort?"abort":"join");
1313
1314 primaryQueue.wait(abort);
1315 secondaryQueue.wait(abort);
1316 processingQueue1.wait(abort);
1317
1318 // Here we also destroy all runCtrl structures and hence close all open files
1319 evtCtrl.clear();
1320
1321 factPrintf(MessageImp::kInfo, "Exit read Process...");
1322 factPrintf(MessageImp::kInfo, "%ld Bytes flagged as in-use.", Memory::inuse);
1323
1324 factStat(gj);
1325
1326 return gi_reset>=100;
1327}
1328
1329// ==========================================================================
1330// ==========================================================================
1331
1332void StartEvtBuild()
1333{
1334 factPrintf(MessageImp::kInfo, "Starting EventBuilder++");
1335
1336
1337 for (int k=0; k<NBOARDS; k++)
1338 {
1339 gi_NumConnect[k] = 0;
1340 gj.numConn[k] = 0;
1341 gj.totBytes[k] = 0;
1342 }
1343
1344 gj.bufTot = gj.maxEvt = gj.xxxEvt = 0;
1345 gj.maxMem = gj.xxxMem = 0;
1346
1347 gj.usdMem = Memory::inuse;
1348 gj.totMem = Memory::allocated;
1349
1350 gj.bufNew = gj.bufEvt = 0;
1351 gj.evtSkip = gj.evtWrite = gj.evtErr = 0;
1352 gj.readStat = gj.procStat = gj.writStat = 0;
1353
1354
1355
1356 READ_STRUCT rd[NBOARDS];
1357
1358 // This is only that every socket knows its id (maybe we replace that by arrays instead of an array of sockets)
1359 for (int i=0; i<NBOARDS; i++)
1360 rd[i].sockId = i;
1361
1362 while (mainloop(rd));
1363
1364 //must close all open sockets ...
1365 factPrintf(MessageImp::kInfo, "Close all sockets...");
1366
1367 READ_STRUCT::close();
1368
1369 // Now all sockets get closed. This is not reflected in gi_NumConnect
1370 // The current workaround is to count all sockets as closed when the thread is not running
1371}
Note: See TracBrowser for help on using the repository browser.