Changeset 19802 for trunk/FACT++/src/csv2root.cc
- Timestamp:
- 10/27/19 11:11:01 (5 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/FACT++/src/csv2root.cc
r19798 r19802 1 #include <random>2 3 1 #include <boost/regex.hpp> 4 2 #include <boost/filesystem.hpp> … … 7 5 #include "tools.h" 8 6 #include "Time.h" 9 #include " Configuration.h"7 #include "Splitting.h" 10 8 11 9 #include <TROOT.h> … … 18 16 using namespace std; 19 17 namespace fs = boost::filesystem; 18 19 // ------------------------------------------------------------------------ 20 20 21 21 22 // ------------------------------------------------------------------------ … … 32 33 ("compression,c", var<uint16_t>(1), "zlib compression level for the root file") 33 34 ("no-header,n", po_switch(), "Use if the first line contains no header") 35 ("rename.*", var<string>(), "Can be used to rename a column") 34 36 ("dry-run", po_switch(), "Do not create or manipulate any output file") 35 ;36 37 po::options_description split("Splitting options");38 split.add_options()39 ("split-sequence,S", vars<uint16_t>(), "Split data sequentially into several trees/files (e.g. 1, 1, 2)")40 ("split-quantile,Q", vars<double>(), "Split data randomly into several trees/files (e.g. 0.5, 1)")41 ("seed", var<uint64_t>(mt19937_64::default_seed), "Seed value in case of random split")42 ;43 44 po::options_description debug("Debug options");45 debug.add_options()46 37 ("verbose,v", var<uint16_t>(1), "Verbosity (0: quiet, 1: default, 2: more, 3, ...)") 47 38 ; … … 53 44 54 45 conf.AddOptions(control); 55 conf.AddOptions(split); 56 conf.AddOptions(debug); 46 conf.AddOptions(Tools::Splitting::options()); 57 47 conf.SetArgumentPositions(p); 58 48 } … … 90 80 "with --compression.\n" 91 81 "\n" 92 "For several purposes, it might be convenient to split the output to several " 93 "different root-treess. This can be done using the --split-sequence (-S) " 94 "and the --split-quantile (-Q) options. If a split sequence is defined as " 95 "-S 1 -S 2 -S 1 the events are split by 1:2:1 in this sequence order. If " 96 "quantiles are given as -Q 0.5 -Q 0.6, the first tree will contain 50% of " 97 "the second one 10% and the third one 40%. The corresponding seed value can " 98 "be set with --seed.\n" 82 "Columns can be renamed with --rename.new=old\n" 83 "\n" 84 << Tools::Splitting::usage() << 99 85 "\n" 100 86 "In case of success, 0 is returned, a value>0 otherwise.\n" … … 178 164 } 179 165 180 // ----------------------------- Setup splitting ---------------------------181 182 vector<uint16_t> split_seq = conf.Vec<uint16_t>("split-sequence");183 vector<double> split_quant = conf.Vec<double>("split-quantile");184 185 if (!split_seq.empty() && !split_quant.empty())186 throw runtime_error("Only splitting by --split-sequence or --split-quantile is allowed.");187 188 const size_t num_split = split_seq.size()+split_quant.size()==0 ? 0 :189 ::max(split_seq.size(), split_quant.size()+1);190 191 map<size_t, size_t> split_lut;192 for (size_t i=0; i<split_seq.size(); i++)193 {194 const size_t sz = split_lut.size();195 for (size_t j=0; j<split_seq[i]; j++)196 split_lut.emplace(j+sz, i);197 }198 199 for (size_t i=0; i<split_quant.size(); i++)200 if (split_quant[i]<0 || split_quant[i]>=1)201 throw runtime_error("Splitting quantiles must be in the range [0;1)");202 203 for (size_t i=1; i<split_quant.size(); i++)204 {205 if (split_quant[i]<=split_quant[i-1])206 throw runtime_error("Splitting quantiles must be in increasing order.");207 }208 209 166 // ------------------------------------------------------------------------- 210 167 211 const uniform_real_distribution<double> distribution(0,1); 212 mt19937_64 generator; 213 generator.seed(conf.Get<uint64_t>("seed")); 214 auto rndm = bind(distribution, generator); 215 216 // ------------------------------------------------------------------------- 168 /*const*/ Tools::Splitting split(conf); 217 169 218 170 if (verbose>0) … … 300 252 cout << "Opened root file '" << path << "'.\n"; 301 253 cout << "Writing to tree: " << tree << ".\n"; 254 split.print(); 302 255 } 303 256 … … 306 259 307 260 size_t entries = 0; 308 if ( num_split==0)261 if (split.empty()) 309 262 { 310 263 if (AddTree(ttree, tfile, tree, update, verbose)) … … 318 271 { 319 272 bool found = false; 320 for (size_t i=0; i< num_split; i++)273 for (size_t i=0; i<split.size(); i++) 321 274 found |= AddTree(ttree, tfile, tree+"["+to_string(i)+"]", update, verbose); 322 275 … … 328 281 } 329 282 283 const auto rename = conf.GetWildcardOptions("rename.*"); 284 330 285 vector<float> vec(numcol); 331 286 for (int i=0; i<numcol; i++) … … 333 288 string col = noheader ? Tools::Form("col%d", i) : title->At(i)->GetName(); 334 289 290 if (verbose>1) 291 cout << "Column: " << col; 292 335 293 boost::regex rexpr(":"); 336 294 col = boost::regex_replace(col, rexpr, ""); 337 295 338 296 if (verbose>1) 339 cout << "Column: " << col << '\n'; 297 cout << " -> " << col; 298 299 for (auto it=rename.cbegin(); it!=rename.cend(); it++) 300 { 301 if (col!=it->substr(7)) 302 continue; 303 304 col = conf.Get<string>(*it); 305 if (verbose>1) 306 cout << " -> " << col; 307 break; 308 } 309 if (verbose>1) 310 cout << endl; 340 311 341 312 for (auto it=ttree.begin(); it!=ttree.end(); it++) … … 361 332 if (buf.IsNull() || buf[0]=='#') 362 333 continue; 363 364 valid++;365 334 366 335 TObjArray *arr = buf.Tokenize(" "); … … 386 355 delete arr; 387 356 388 389 size_t index = 0; 390 if (!split_lut.empty()) 391 index = split_lut[line % split_lut.size()]; 392 if (!split_quant.empty()) 393 { 394 const float r = rndm(); 395 for (; r>=split_quant[index]; index++) 396 if (index==split_quant.size()) 397 break; 398 } 357 const size_t index = split.index(valid++); 399 358 400 359 // Fill only branches for which an adress was set
Note:
See TracChangeset
for help on using the changeset viewer.