Ticket #39: forloop_optim.cpp

File forloop_optim.cpp, 2.8 KB (added by dneise, 8 years ago)
Line 
1#include <stdint.h>
2#include <vector>
3#include <iostream>
4#include <vector>
5#include <cstdlib>
6#include <stdio.h> /* printf */
7#include <time.h> /* time_t, struct tm, difftime, time, mktime */
8
9
10#define fNumChannels 1440
11std::vector<int64_t> fSum;
12std::vector<int64_t> fSum2;
13
14void AddRel_complex(const int16_t *val, const int16_t *start)
15{
16 // This version is 2.5 times faster because the compilers optimization
17 // is not biased by the evaluation of %1024
18 for (size_t ch=0; ch<fNumChannels; ch++)
19 {
20 const int16_t &spos = start[ch];
21 if (spos<0)
22 continue;
23
24 const size_t pos = ch*1024;
25
26 const int16_t *beg_val = val + pos;
27 int64_t *beg_sum = fSum.data() + pos;
28 int64_t *beg_sum2 = fSum2.data() + pos;
29
30 const int16_t *pval = beg_val; // val[rel]
31 int64_t *psum = beg_sum + spos; // fSum[abs]
32 int64_t *psum2 = beg_sum2 + spos; // fSum2[abs]
33
34 while (psum<beg_sum+1024)
35 {
36 const int64_t v = *pval++;
37
38 *psum++ += v;
39 *psum2++ += v*v;
40 }
41
42 psum = beg_sum;
43 psum2 = beg_sum2;
44
45 while (pval<beg_val+1024)
46 {
47 const int64_t v = *pval++;
48
49 *psum++ += v;
50 *psum2++ += v*v;
51 }
52 }
53}
54
55void AddRel_simple(const int16_t *val, const int16_t *start)
56{
57 for (size_t ch=0; ch<fNumChannels; ch++)
58 {
59 const int16_t &spos = start[ch];
60 if (spos<0)
61 continue;
62
63 const size_t pos = ch*1024;
64 for (size_t i=0; i<1024; i++)
65 {
66 // Value is relative to trigger
67 // Abs is corresponding index relative to DRS pipeline
68 const size_t rel = pos + i;
69 const size_t abs = pos + (spos+i)%1024;
70
71 const int64_t v = val[rel];
72
73 fSum[abs] += v;
74 fSum2[abs] += v*v;
75 }
76 }
77}
78
79int main(void){
80 const int REPS = 1000;
81 clock_t t;
82 std::vector<int16_t> values(fNumChannels * 1024, 3);
83 std::vector<int16_t> start(fNumChannels, 0);
84 for (size_t i=0; i<start.size(); i++){
85 start[i] = rand()%1024;
86 }
87
88
89 fSum.resize(fNumChannels * 1024);
90 fSum2.resize(fNumChannels * 1024);
91 t = clock();
92 for(int i=0; i<REPS; i++){
93 AddRel_simple(
94 (const int16_t *)values.data(),
95 (const int16_t *)start.data()
96 );
97 }
98 t = clock() - t;
99 printf ("AddRel_simple took %f s.\n",((float)t)/CLOCKS_PER_SEC);
100
101 fSum.resize(fNumChannels * 1024);
102 fSum2.resize(fNumChannels * 1024);
103 t = clock();
104 for(int i=0; i<REPS; i++){
105 AddRel_complex(
106 (const int16_t *)values.data(),
107 (const int16_t *)start.data()
108 );
109 }
110 t = clock() - t;
111 printf ("AddRel_complex took %f s.\n",((float)t)/CLOCKS_PER_SEC);
112
113}