Changeset 16561 for trunk/Mars/mcore
- Timestamp:
- 06/01/13 19:55:16 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Mars/mcore/DrsCalib.h
r16428 r16561 51 51 void AddRel(const int16_t *val, const int16_t *start) 52 52 { 53 /* 53 54 for (size_t ch=0; ch<fNumChannels; ch++) 54 55 { 55 const int16_t spos = start[ch];56 const int16_t &spos = start[ch]; 56 57 if (spos<0) 57 58 continue; 58 59 59 60 const size_t pos = ch*1024; 60 61 61 for (size_t i=0; i<1024; i++) 62 62 { … … 71 71 fSum2[abs] += v*v; 72 72 } 73 */ 74 75 // This version is 2.5 times faster because the compilers optimization 76 // is not biased by the evaluation of %1024 77 for (size_t ch=0; ch<fNumChannels; ch++) 78 { 79 const int16_t &spos = start[ch]; 80 if (spos<0) 81 continue; 82 83 const size_t pos = ch*1024; 84 85 const int16_t *pval = val + pos; 86 const int16_t *end_val = val + 1024; 87 88 int64_t *beg_sum = fSum.data() + pos; 89 int64_t *beg_sum2 = fSum2.data() + pos; 90 91 int64_t *psum = beg_sum + spos; 92 int64_t *psum2 = beg_sum2 + spos; 93 94 while (psum<beg_sum+1024) 95 { 96 const int64_t v = *pval++; 97 98 *psum++ = v; 99 *psum2++ = v*v; 100 } 101 102 psum = beg_sum; 103 psum2 = beg_sum2; 104 105 while (pval<end_val) 106 { 107 const int64_t v = *pval++; 108 109 *psum++ = v; 110 *psum2++ = v*v; 111 } 73 112 } 74 113 … … 79 118 const int32_t *offset, const uint32_t scale) 80 119 { 120 /* 81 121 for (size_t ch=0; ch<fNumChannels; ch++) 82 122 { … … 85 125 continue; 86 126 87 const size_t pos = ch* fNumSamples;127 const size_t pos = ch*1024; 88 128 89 129 for (size_t i=0; i<fNumSamples; i++) … … 93 133 // Abs is corresponding index relative to DRS pipeline 94 134 const size_t rel = pos + i; 95 const size_t abs = pos + (spos+i)% fNumSamples;135 const size_t abs = pos + (spos+i)%1024; 96 136 97 137 const int64_t v = int64_t(val[rel])*scale-offset[abs]; … … 99 139 fSum[abs] += v; 100 140 fSum2[abs] += v*v; 141 } 142 }*/ 143 144 // This version is 2.5 times faster because the compilers optimization 145 // is not biased by the evaluation of %1024 146 for (size_t ch=0; ch<fNumChannels; ch++) 147 { 148 const int16_t &spos = start[ch]; 149 if (spos<0) 150 continue; 151 152 const size_t pos = ch*1024; 153 154 const int16_t *pval = val + pos; 155 const int16_t *end_val = val + 1024; 156 157 const int32_t *beg_offset = offset + pos; 158 const int32_t *poffset = beg_offset + spos; 159 160 int64_t *beg_sum = fSum.data() + pos; 161 int64_t *beg_sum2 = fSum2.data() + pos; 162 163 int64_t *psum = beg_sum + spos; 164 int64_t *psum2 = beg_sum2 + spos; 165 166 while (psum<beg_sum+1024) 167 { 168 const int64_t v = int64_t(*pval++)*scale - *poffset++; 169 170 *psum++ = v; 171 *psum2++ = v*v; 172 } 173 174 psum = beg_sum; 175 psum2 = beg_sum2; 176 poffset = beg_offset; 177 178 while (pval<end_val) 179 { 180 const int64_t v = int64_t(*pval++)*scale - *poffset++; 181 182 *psum++ = v; 183 *psum2++ = v*v; 101 184 } 102 185 } … … 122 205 // Abs is corresponding index relative to DRS pipeline 123 206 const size_t rel = pos + i; 124 const size_t abs = pos + (spos+i)% fNumSamples;207 const size_t abs = pos + (spos+i)%1024; 125 208 126 209 const int64_t v = int64_t(val[rel])*scale-offset[abs]; … … 137 220 const int32_t *offset, const uint32_t scale) 138 221 { 222 /* 139 223 // 1440 without tm, 1600 with tm 140 224 for (size_t ch=0; ch<fNumChannels; ch++) … … 160 244 fSum2[rel] += v*v; 161 245 } 246 }*/ 247 248 // This version is 1.5 times faster because the compilers optimization 249 // is not biased by the evaluation of %1024 250 for (size_t ch=0; ch<fNumChannels; ch++) 251 { 252 const int16_t &spos = start[ch]; 253 if (spos<0) 254 continue; 255 256 const size_t pos = ch*fNumSamples; 257 258 const int16_t *pval = val + pos; 259 260 const int32_t *beg_offset = offset + ch*1024; 261 const int32_t *poffset = beg_offset + spos; 262 263 int64_t *beg_sum = fSum.data() + pos; 264 int64_t *beg_sum2 = fSum2.data() + pos; 265 266 int64_t *psum = beg_sum; 267 int64_t *psum2 = beg_sum2; 268 269 if (spos+fNumSamples>1024) 270 { 271 while (poffset<beg_offset+1024) 272 { 273 const int64_t v = int64_t(*pval++)*scale - *poffset++; 274 275 *psum++ = v; 276 *psum2++ = v*v; 277 } 278 279 poffset = beg_offset; 280 } 281 282 while (psum<beg_sum+fNumSamples) 283 { 284 const int64_t v = int64_t(*pval++)*scale - *poffset++; 285 286 *psum++ = v; 287 *psum2++ = v*v; 288 } 162 289 } 163 290 … … 176 303 } 177 304 305 /* 178 306 for (size_t i=0; i<roi; i++) 179 307 { … … 189 317 const int64_t div = gain[abs]; 190 318 vec[i] = div==0 ? 0 : double(v)*scalegain/div; 319 }*/ 320 321 // This version is faster because the compilers optimization 322 // is not biased by the evaluation of %1024 323 // (Here we are dominated by numerics... improvement ~10%) 324 const int32_t *poffset = offset + start; 325 const int64_t *pgain = gain + start; 326 const int16_t *pval = val; 327 328 float *pvec = vec; 329 330 if (start+roi>1024) 331 { 332 while (poffset<offset+1024) 333 { 334 const int64_t v = 335 + int64_t(*pval++)*scaleabs - *poffset++ 336 ; 337 338 *pvec++ = *pgain==0 ? 0 : double(v)*scalegain / *pgain; 339 340 pgain++; 341 } 342 343 poffset = offset; 344 pgain = gain; 345 } 346 347 while (pvec<vec+roi) 348 { 349 const int64_t v = 350 + int64_t(*pval++)*scaleabs - *poffset++ 351 ; 352 353 *pvec++ = *pgain==0 ? 0 : double(v)*scalegain / *pgain; 354 355 pgain++; 191 356 } 192 357 } … … 202 367 return; 203 368 } 204 369 /* 205 370 for (size_t i=0; i<roi; i++) 206 371 { … … 217 382 const int64_t div = gain[abs]*scalerel; 218 383 vec[i] = div==0 ? 0 : double(v)*scalegain/div; 384 }*/ 385 386 // (Here we are dominated by numerics... improvement ~10%) 387 const int32_t *poffset = offset + start; 388 const int64_t *pgain = gain + start; 389 const int16_t *pval = val; 390 const int64_t *ptrgoff = trgoff; 391 392 float *pvec = vec; 393 394 if (start+roi>1024) 395 { 396 while (poffset<offset+1024) 397 { 398 const int64_t v = 399 + (int64_t(*pval++)*scaleabs - *poffset++)*scalerel 400 - *ptrgoff++; 401 ; 402 403 const int64_t div = *pgain * scalerel; 404 *pvec++ = div==0 ? 0 : double(v)*scalegain / div; 405 406 pgain++; 407 } 408 409 poffset = offset; 410 pgain = gain; 411 } 412 413 while (pvec<vec+roi) 414 { 415 const int64_t v = 416 + (int64_t(*pval++)*scaleabs - *poffset++)*scalerel 417 - *ptrgoff++; 418 ; 419 420 const int64_t div = *pgain * scalerel; 421 *pvec++ = div==0 ? 0 : double(v)*scalegain / div; 422 423 pgain++; 219 424 } 220 425 }
Note:
See TracChangeset
for help on using the changeset viewer.