Context Navigation

Back to Ticket #39

Ticket #39: forloop_optim.cpp

File forloop_optim.cpp, 2.8 KB (added by dneise, 8 years ago)

Line
1	#include <stdint.h>
2	#include <vector>
3	#include <iostream>
4	#include <vector>
5	#include <cstdlib>
6	#include <stdio.h> /* printf */
7	#include <time.h> /* time_t, struct tm, difftime, time, mktime */
8
9
10	#define fNumChannels 1440
11	std::vector<int64_t> fSum;
12	std::vector<int64_t> fSum2;
13
14	void AddRel_complex(const int16_t val, const int16_t start)
15	{
16	// This version is 2.5 times faster because the compilers optimization
17	// is not biased by the evaluation of %1024
18	for (size_t ch=0; ch<fNumChannels; ch++)
19	{
20	const int16_t &spos = start[ch];
21	if (spos<0)
22	continue;
23
24	const size_t pos = ch*1024;
25
26	const int16_t *beg_val = val + pos;
27	int64_t *beg_sum = fSum.data() + pos;
28	int64_t *beg_sum2 = fSum2.data() + pos;
29
30	const int16_t *pval = beg_val; // val[rel]
31	int64_t *psum = beg_sum + spos; // fSum[abs]
32	int64_t *psum2 = beg_sum2 + spos; // fSum2[abs]
33
34	while (psum<beg_sum+1024)
35	{
36	const int64_t v = *pval++;
37
38	*psum++ += v;
39	psum2++ += vv;
40	}
41
42	psum = beg_sum;
43	psum2 = beg_sum2;
44
45	while (pval<beg_val+1024)
46	{
47	const int64_t v = *pval++;
48
49	*psum++ += v;
50	psum2++ += vv;
51	}
52	}
53	}
54
55	void AddRel_simple(const int16_t val, const int16_t start)
56	{
57	for (size_t ch=0; ch<fNumChannels; ch++)
58	{
59	const int16_t &spos = start[ch];
60	if (spos<0)
61	continue;
62
63	const size_t pos = ch*1024;
64	for (size_t i=0; i<1024; i++)
65	{
66	// Value is relative to trigger
67	// Abs is corresponding index relative to DRS pipeline
68	const size_t rel = pos + i;
69	const size_t abs = pos + (spos+i)%1024;
70
71	const int64_t v = val[rel];
72
73	fSum[abs] += v;
74	fSum2[abs] += v*v;
75	}
76	}
77	}
78
79	int main(void){
80	const int REPS = 1000;
81	clock_t t;
82	std::vector<int16_t> values(fNumChannels * 1024, 3);
83	std::vector<int16_t> start(fNumChannels, 0);
84	for (size_t i=0; i<start.size(); i++){
85	start[i] = rand()%1024;
86	}
87
88
89	fSum.resize(fNumChannels * 1024);
90	fSum2.resize(fNumChannels * 1024);
91	t = clock();
92	for(int i=0; i<REPS; i++){
93	AddRel_simple(
94	(const int16_t *)values.data(),
95	(const int16_t *)start.data()
96	);
97	}
98	t = clock() - t;
99	printf ("AddRel_simple took %f s.\n",((float)t)/CLOCKS_PER_SEC);
100
101	fSum.resize(fNumChannels * 1024);
102	fSum2.resize(fNumChannels * 1024);
103	t = clock();
104	for(int i=0; i<REPS; i++){
105	AddRel_complex(
106	(const int16_t *)values.data(),
107	(const int16_t *)start.data()
108	);
109	}
110	t = clock() - t;
111	printf ("AddRel_complex took %f s.\n",((float)t)/CLOCKS_PER_SEC);
112
113	}

Download in other formats:

Original Format