Changeset 19802 for trunk/FACT++/src/rootifysql.cc
- Timestamp:
- 10/27/19 11:11:01 (5 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/FACT++/src/rootifysql.cc
r19482 r19802 1 1 #include "Database.h" 2 3 #include <random>4 2 5 3 #include <boost/regex.hpp> … … 10 8 #include "tools.h" 11 9 #include "Time.h" 12 #include " Configuration.h"10 #include "Splitting.h" 13 11 14 12 #include <TROOT.h> … … 62 60 ; 63 61 64 po::options_description split("Splitting options");65 split.add_options()66 ("split-sequence,S", vars<uint16_t>(), "Split data sequentially into several trees/files (e.g. 1, 1, 2)")67 ("split-quantile,Q", vars<double>(), "Split data randomly into several trees/files (e.g. 0.5, 1)")68 ("seed", var<uint64_t>(mt19937_64::default_seed), "Seed value in case of random split")69 ;70 71 62 po::positional_options_description p; 72 63 p.add("file", 1); // The 1st positional options (n=1) … … 76 67 conf.AddOptions(ascii); 77 68 conf.AddOptions(root); 78 conf.AddOptions( split);69 conf.AddOptions(Tools::Splitting::options()); 79 70 conf.SetArgumentPositions(p); 80 71 } … … 125 116 "/*comment*/ or introduced with # (shell script style) or -- (SQL style).\n" 126 117 "\n" 127 "For several purposes, it might be convenient to split the output to several " 128 "different root-trees or ascii files. This can be done using the --split-sequence (-S) " 129 "and the --split-quantile (-Q) options. If a split sequence is defined as " 130 "-S 1 -S 2 -S 1 the events are split by 1:2:1 in this sequence order. If " 131 "quantiled are given as -Q 0.5 -Q 0.6, the first tree will contain 50% of " 132 "the second one 10% and the third one 40%. The corresponding seed value can " 133 "be set with --seed.\n" 118 << Tools::Splitting::usage() << 134 119 "\n" 135 120 "In case of success, 0 is returned, a value>0 otherwise.\n" … … 522 507 // ----------------------- Setup splitting --------------------------------- 523 508 524 vector<uint16_t> split_seq = conf.Vec<uint16_t>("split-sequence");525 vector<double> split_quant = conf.Vec<double>("split-quantile");526 527 if (!split_seq.empty() && !split_quant.empty())528 throw runtime_error("Only splitting by --split-sequence or --split-quantile is allowed.");529 530 const size_t num_split = split_seq.size()+split_quant.size()==0 ? 0 :531 ::max(split_seq.size(), split_quant.size()+1);532 533 map<size_t, size_t> split_lut;534 for (size_t i=0; i<split_seq.size(); i++)535 {536 const size_t sz = split_lut.size();537 for (size_t j=0; j<split_seq[i]; j++)538 split_lut.emplace(j+sz, i);539 }540 541 for (size_t i=0; i<split_quant.size(); i++)542 if (split_quant[i]<0 || split_quant[i]>=1)543 throw runtime_error("Splitting quantiles must be in the range [0;1)");544 545 for (size_t i=1; i<split_quant.size(); i++)546 {547 if (split_quant[i]<=split_quant[i-1])548 throw runtime_error("Splitting quantiles must be in increasing order.");549 }550 551 // -------------------------------------------------------------------------552 553 509 const auto vars = conf.GetWildcardOptions("var.*"); 554 510 … … 558 514 559 515 // ------------------------------------------------------------------------- 516 517 /*const*/ Tools::Splitting split(conf); 560 518 561 519 if (verbose>0) … … 826 784 cout << "Opening file '" << path << "' [compression=" << compression << "]...\n"; 827 785 cout << "Writing data to tree '" << tree << "'" << (nofill?" (--skipped--)":"") << endl; 828 if (num_split) 829 { 830 cout << "Splitting configured " << (split_seq.empty()?"randomly":"in sequence") << " into " << num_split << " branches." << endl; 831 if (!split_quant.empty()) 832 cout << "Seed value configured as " << conf.Get<uint64_t>("seed") << "." << endl; 833 } 786 split.print(); 834 787 } 835 788 … … 875 828 vector<TTree*> ttree; 876 829 877 if ( num_split==0)830 if (split.empty()) 878 831 ttree.emplace_back(new TTree(tree.c_str(), query.c_str())); 879 832 else 880 for (size_t i=0; i< num_split; i++)833 for (size_t i=0; i<split.size(); i++) 881 834 ttree.emplace_back(new TTree((tree+"["+to_string(i)+"]").c_str(), query.c_str())); 882 835 … … 955 908 { 956 909 vector<string> names; 957 if ( num_split==0)910 if (split.empty()) 958 911 names.emplace_back(write); 959 912 else 960 for (size_t i=0; i< num_split; i++)913 for (size_t i=0; i<split.size(); i++) 961 914 names.emplace_back(write+"-"+to_string(i)); 962 915 … … 1017 970 // ---------------------- Fill TTree with DB data -------------------------- 1018 971 1019 const uniform_real_distribution<double> distribution(0,1);1020 mt19937_64 generator;1021 generator.seed(conf.Get<uint64_t>("seed"));1022 auto rndm = bind(distribution, generator);1023 1024 972 size_t count = 0; 1025 973 size_t skip = 0; 1026 974 do 1027 975 { 1028 size_t index = 0; 1029 if (!split_lut.empty()) 1030 index = split_lut[count % split_lut.size()]; 1031 if (!split_quant.empty()) 1032 { 1033 const float r = rndm(); 1034 for (; r>=split_quant[index]; index++) 1035 if (index==split_quant.size()) 1036 break; 1037 } 1038 1039 count++; 976 size_t index = split.index(count++); 1040 977 1041 978 ostringstream rtxt;
Note:
See TracChangeset
for help on using the changeset viewer.