| 1 | #include <stdint.h>
|
|---|
| 2 | #include <vector>
|
|---|
| 3 | #include <iostream>
|
|---|
| 4 | #include <vector>
|
|---|
| 5 | #include <cstdlib>
|
|---|
| 6 | #include <stdio.h> /* printf */
|
|---|
| 7 | #include <time.h> /* time_t, struct tm, difftime, time, mktime */
|
|---|
| 8 |
|
|---|
| 9 |
|
|---|
| 10 | #define fNumChannels 1440
|
|---|
| 11 | std::vector<int64_t> fSum;
|
|---|
| 12 | std::vector<int64_t> fSum2;
|
|---|
| 13 |
|
|---|
| 14 | void AddRel_complex(const int16_t *val, const int16_t *start)
|
|---|
| 15 | {
|
|---|
| 16 | // This version is 2.5 times faster because the compilers optimization
|
|---|
| 17 | // is not biased by the evaluation of %1024
|
|---|
| 18 | for (size_t ch=0; ch<fNumChannels; ch++)
|
|---|
| 19 | {
|
|---|
| 20 | const int16_t &spos = start[ch];
|
|---|
| 21 | if (spos<0)
|
|---|
| 22 | continue;
|
|---|
| 23 |
|
|---|
| 24 | const size_t pos = ch*1024;
|
|---|
| 25 |
|
|---|
| 26 | const int16_t *beg_val = val + pos;
|
|---|
| 27 | int64_t *beg_sum = fSum.data() + pos;
|
|---|
| 28 | int64_t *beg_sum2 = fSum2.data() + pos;
|
|---|
| 29 |
|
|---|
| 30 | const int16_t *pval = beg_val; // val[rel]
|
|---|
| 31 | int64_t *psum = beg_sum + spos; // fSum[abs]
|
|---|
| 32 | int64_t *psum2 = beg_sum2 + spos; // fSum2[abs]
|
|---|
| 33 |
|
|---|
| 34 | while (psum<beg_sum+1024)
|
|---|
| 35 | {
|
|---|
| 36 | const int64_t v = *pval++;
|
|---|
| 37 |
|
|---|
| 38 | *psum++ += v;
|
|---|
| 39 | *psum2++ += v*v;
|
|---|
| 40 | }
|
|---|
| 41 |
|
|---|
| 42 | psum = beg_sum;
|
|---|
| 43 | psum2 = beg_sum2;
|
|---|
| 44 |
|
|---|
| 45 | while (pval<beg_val+1024)
|
|---|
| 46 | {
|
|---|
| 47 | const int64_t v = *pval++;
|
|---|
| 48 |
|
|---|
| 49 | *psum++ += v;
|
|---|
| 50 | *psum2++ += v*v;
|
|---|
| 51 | }
|
|---|
| 52 | }
|
|---|
| 53 | }
|
|---|
| 54 |
|
|---|
| 55 | void AddRel_simple(const int16_t *val, const int16_t *start)
|
|---|
| 56 | {
|
|---|
| 57 | for (size_t ch=0; ch<fNumChannels; ch++)
|
|---|
| 58 | {
|
|---|
| 59 | const int16_t &spos = start[ch];
|
|---|
| 60 | if (spos<0)
|
|---|
| 61 | continue;
|
|---|
| 62 |
|
|---|
| 63 | const size_t pos = ch*1024;
|
|---|
| 64 | for (size_t i=0; i<1024; i++)
|
|---|
| 65 | {
|
|---|
| 66 | // Value is relative to trigger
|
|---|
| 67 | // Abs is corresponding index relative to DRS pipeline
|
|---|
| 68 | const size_t rel = pos + i;
|
|---|
| 69 | const size_t abs = pos + (spos+i)%1024;
|
|---|
| 70 |
|
|---|
| 71 | const int64_t v = val[rel];
|
|---|
| 72 |
|
|---|
| 73 | fSum[abs] += v;
|
|---|
| 74 | fSum2[abs] += v*v;
|
|---|
| 75 | }
|
|---|
| 76 | }
|
|---|
| 77 | }
|
|---|
| 78 |
|
|---|
| 79 | int main(void){
|
|---|
| 80 | const int REPS = 1000;
|
|---|
| 81 | clock_t t;
|
|---|
| 82 | std::vector<int16_t> values(fNumChannels * 1024, 3);
|
|---|
| 83 | std::vector<int16_t> start(fNumChannels, 0);
|
|---|
| 84 | for (size_t i=0; i<start.size(); i++){
|
|---|
| 85 | start[i] = rand()%1024;
|
|---|
| 86 | }
|
|---|
| 87 |
|
|---|
| 88 |
|
|---|
| 89 | fSum.resize(fNumChannels * 1024);
|
|---|
| 90 | fSum2.resize(fNumChannels * 1024);
|
|---|
| 91 | t = clock();
|
|---|
| 92 | for(int i=0; i<REPS; i++){
|
|---|
| 93 | AddRel_simple(
|
|---|
| 94 | (const int16_t *)values.data(),
|
|---|
| 95 | (const int16_t *)start.data()
|
|---|
| 96 | );
|
|---|
| 97 | }
|
|---|
| 98 | t = clock() - t;
|
|---|
| 99 | printf ("AddRel_simple took %f s.\n",((float)t)/CLOCKS_PER_SEC);
|
|---|
| 100 |
|
|---|
| 101 | fSum.resize(fNumChannels * 1024);
|
|---|
| 102 | fSum2.resize(fNumChannels * 1024);
|
|---|
| 103 | t = clock();
|
|---|
| 104 | for(int i=0; i<REPS; i++){
|
|---|
| 105 | AddRel_complex(
|
|---|
| 106 | (const int16_t *)values.data(),
|
|---|
| 107 | (const int16_t *)start.data()
|
|---|
| 108 | );
|
|---|
| 109 | }
|
|---|
| 110 | t = clock() - t;
|
|---|
| 111 | printf ("AddRel_complex took %f s.\n",((float)t)/CLOCKS_PER_SEC);
|
|---|
| 112 |
|
|---|
| 113 | }
|
|---|