1 | #include <stdint.h>
|
---|
2 | #include <vector>
|
---|
3 | #include <iostream>
|
---|
4 | #include <vector>
|
---|
5 | #include <cstdlib>
|
---|
6 | #include <stdio.h> /* printf */
|
---|
7 | #include <time.h> /* time_t, struct tm, difftime, time, mktime */
|
---|
8 |
|
---|
9 |
|
---|
10 | #define fNumChannels 1440
|
---|
11 | std::vector<int64_t> fSum;
|
---|
12 | std::vector<int64_t> fSum2;
|
---|
13 |
|
---|
14 | void AddRel_complex(const int16_t *val, const int16_t *start)
|
---|
15 | {
|
---|
16 | // This version is 2.5 times faster because the compilers optimization
|
---|
17 | // is not biased by the evaluation of %1024
|
---|
18 | for (size_t ch=0; ch<fNumChannels; ch++)
|
---|
19 | {
|
---|
20 | const int16_t &spos = start[ch];
|
---|
21 | if (spos<0)
|
---|
22 | continue;
|
---|
23 |
|
---|
24 | const size_t pos = ch*1024;
|
---|
25 |
|
---|
26 | const int16_t *beg_val = val + pos;
|
---|
27 | int64_t *beg_sum = fSum.data() + pos;
|
---|
28 | int64_t *beg_sum2 = fSum2.data() + pos;
|
---|
29 |
|
---|
30 | const int16_t *pval = beg_val; // val[rel]
|
---|
31 | int64_t *psum = beg_sum + spos; // fSum[abs]
|
---|
32 | int64_t *psum2 = beg_sum2 + spos; // fSum2[abs]
|
---|
33 |
|
---|
34 | while (psum<beg_sum+1024)
|
---|
35 | {
|
---|
36 | const int64_t v = *pval++;
|
---|
37 |
|
---|
38 | *psum++ += v;
|
---|
39 | *psum2++ += v*v;
|
---|
40 | }
|
---|
41 |
|
---|
42 | psum = beg_sum;
|
---|
43 | psum2 = beg_sum2;
|
---|
44 |
|
---|
45 | while (pval<beg_val+1024)
|
---|
46 | {
|
---|
47 | const int64_t v = *pval++;
|
---|
48 |
|
---|
49 | *psum++ += v;
|
---|
50 | *psum2++ += v*v;
|
---|
51 | }
|
---|
52 | }
|
---|
53 | }
|
---|
54 |
|
---|
55 | void AddRel_simple(const int16_t *val, const int16_t *start)
|
---|
56 | {
|
---|
57 | for (size_t ch=0; ch<fNumChannels; ch++)
|
---|
58 | {
|
---|
59 | const int16_t &spos = start[ch];
|
---|
60 | if (spos<0)
|
---|
61 | continue;
|
---|
62 |
|
---|
63 | const size_t pos = ch*1024;
|
---|
64 | for (size_t i=0; i<1024; i++)
|
---|
65 | {
|
---|
66 | // Value is relative to trigger
|
---|
67 | // Abs is corresponding index relative to DRS pipeline
|
---|
68 | const size_t rel = pos + i;
|
---|
69 | const size_t abs = pos + (spos+i)%1024;
|
---|
70 |
|
---|
71 | const int64_t v = val[rel];
|
---|
72 |
|
---|
73 | fSum[abs] += v;
|
---|
74 | fSum2[abs] += v*v;
|
---|
75 | }
|
---|
76 | }
|
---|
77 | }
|
---|
78 |
|
---|
79 | int main(void){
|
---|
80 | const int REPS = 1000;
|
---|
81 | clock_t t;
|
---|
82 | std::vector<int16_t> values(fNumChannels * 1024, 3);
|
---|
83 | std::vector<int16_t> start(fNumChannels, 0);
|
---|
84 | for (size_t i=0; i<start.size(); i++){
|
---|
85 | start[i] = rand()%1024;
|
---|
86 | }
|
---|
87 |
|
---|
88 |
|
---|
89 | fSum.resize(fNumChannels * 1024);
|
---|
90 | fSum2.resize(fNumChannels * 1024);
|
---|
91 | t = clock();
|
---|
92 | for(int i=0; i<REPS; i++){
|
---|
93 | AddRel_simple(
|
---|
94 | (const int16_t *)values.data(),
|
---|
95 | (const int16_t *)start.data()
|
---|
96 | );
|
---|
97 | }
|
---|
98 | t = clock() - t;
|
---|
99 | printf ("AddRel_simple took %f s.\n",((float)t)/CLOCKS_PER_SEC);
|
---|
100 |
|
---|
101 | fSum.resize(fNumChannels * 1024);
|
---|
102 | fSum2.resize(fNumChannels * 1024);
|
---|
103 | t = clock();
|
---|
104 | for(int i=0; i<REPS; i++){
|
---|
105 | AddRel_complex(
|
---|
106 | (const int16_t *)values.data(),
|
---|
107 | (const int16_t *)start.data()
|
---|
108 | );
|
---|
109 | }
|
---|
110 | t = clock() - t;
|
---|
111 | printf ("AddRel_complex took %f s.\n",((float)t)/CLOCKS_PER_SEC);
|
---|
112 |
|
---|
113 | }
|
---|