Changeset 19280
- Timestamp:
- 10/23/18 18:48:24 (6 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/FACT++/src/rootifysql.cc
r19235 r19280 60 60 ; 61 61 62 po::options_description split("Splitting options"); 63 split.add_options() 64 ("split-sequence,S", vars<uint16_t>(), "Split data sequentially into several trees/files (e.g. 1, 1, 2)") 65 ("split-quantile,Q", vars<double>(), "Split data randomly into several trees/files (e.g. 0.5, 1)") 66 ("seed", var<uint64_t>(mt19937_64::default_seed), "Seed value in case of random split") 67 ; 68 62 69 po::positional_options_description p; 63 70 p.add("file", 1); // The 1st positional options (n=1) … … 67 74 conf.AddOptions(ascii); 68 75 conf.AddOptions(root); 76 conf.AddOptions(split); 69 77 conf.SetArgumentPositions(p); 70 78 } … … 114 122 "Comments in the query-file can be placed according to the SQL standard inline " 115 123 "/*comment*/ or introduced with # (shell script style) or -- (SQL style).\n" 124 "\n" 125 "For several purposes, it might be convenient to split the output to several " 126 "different root-trees or ascii files. This can be done using the --split-sequence (-S) " 127 "and the --split-quantile (-Q) options. If a split sequence is defined as " 128 "-S 1 -S 2 -S 1 the events are split by 1:2:1 in this sequence order. If " 129 "quantiled are given as -Q 0.5 -Q 0.6, the first tree will contain 50% of " 130 "the second one 10% and the third one 40%. The corresponding seed value can " 131 "be set with --seed.\n" 116 132 "\n" 117 133 "In case of success, 0 is returned, a value>0 otherwise.\n" … … 502 518 //const vector<Map> mymap = conf.Vec<Map>("map"); 503 519 520 vector<uint16_t> split_seq = conf.Vec<uint16_t>("split-sequence"); 521 vector<double> split_quant = conf.Vec<double>("split-quantile"); 522 523 if (!split_seq.empty() && !split_quant.empty()) 524 throw runtime_error("Only splitting by --split-sequence or --split-quantile is allowed."); 525 526 const size_t num_split = ::max(split_seq.size(), split_quant.size()+1); 527 528 map<size_t, size_t> split_lut; 529 for (size_t i=0; i<split_seq.size(); i++) 530 { 531 const size_t sz = split_lut.size(); 532 for (size_t j=0; j<split_seq[i]; j++) 533 split_lut.emplace(j+sz, i); 534 } 535 536 for (size_t i=0; i<split_quant.size(); i++) 537 if (split_quant[i]<0 || split_quant[i]>=1) 538 throw runtime_error("Splitting quantiles must be in the range [0;1)"); 539 540 for (size_t i=1; i<split_quant.size(); i++) 541 { 542 if (split_quant[i]<=split_quant[i-1]) 543 throw runtime_error("Splitting quantiles must be in increasing order."); 544 } 545 504 546 // ------------------------------------------------------------------------- 505 547 … … 582 624 // waiting long for the query result 583 625 // 626 584 627 // I am using root here instead of boost to be 585 628 // consistent with the access pattern by TFile … … 778 821 cout << "Opening file '" << path << "' [compression=" << compression << "]...\n"; 779 822 cout << "Writing data to tree '" << tree << "'" << (nofill?" (--skipped--)":"") << endl; 823 if (num_split) 824 cout << "Splitting configured with " << num_split << " branches." << endl; 780 825 } 781 826 … … 808 853 UInt_t cols = 0; 809 854 855 // IMPLEMENT FILE SPLITTING! 856 // OpenFile(tree, query) 857 // SetupColumns 858 // WriteRow 859 // CloseFile 860 861 // Ratio[3]: 50%, 20%, 30% 862 // File[x3]: root, cout, fout 863 810 864 811 865 // -------------------- Configure branches of TTree ------------------------ 812 TTree *ttree = new TTree(tree.c_str(), query.c_str()); 866 vector<TTree*> ttree; 867 868 if (num_split==0) 869 ttree.emplace_back(new TTree(tree.c_str(), query.c_str())); 870 else 871 for (int i=0; i<num_split; i++) 872 ttree.emplace_back(new TTree((tree+"["+to_string(i)+"]").c_str(), query.c_str())); 813 873 814 874 size_t skipat = 0; … … 862 922 // name = boost::regex_replace(l[i], boost::regex(m.first), m.second); 863 923 864 ttree->Branch(l[i].c_str(), buf.data()+i); 924 for (auto it=ttree.begin(); it!=ttree.end(); it++) 925 it[0]->Branch(l[i].c_str(), buf.data()+i); 865 926 cols++; 866 927 } … … 879 940 } 880 941 881 // ------------------------------------------------------------------------- 942 // ------------------------- Open the ascii files -------------------------- 943 944 vector<ofstream> fout; 945 if (!write.empty()) 946 { 947 vector<string> names; 948 if (num_split==0) 949 names.emplace_back(write); 950 else 951 for (int i=0; i<num_split; i++) 952 names.emplace_back(write+"-"+to_string(i)); 953 954 for (auto it=names.cbegin(); it!=names.cend(); it++) 955 { 956 fout.emplace_back(*it); 957 if (!*fout.rbegin()) 958 cout << "WARNING: Writing to '" << write << "' failed: " << strerror(errno) << endl; 959 } 960 } 961 962 // ----------------------- Prepare the ascii comment ----------------------- 882 963 883 964 string contents; … … 905 986 (copy_comments && comment && !header)) 906 987 contents += '#' + ibuf + '\n'; 907 908 } 909 910 ofstream fout(write); 911 if (!write.empty() && !fout) 912 cout << "WARNING: Writing to '" << write << "' failed: " << strerror(errno) << endl; 988 } 989 990 // ----------------------- Write the ascii headers ------------------------- 991 992 ostringstream htxt; 993 if (display || !fout.empty()) 994 htxt << row.field_list(delimiter.c_str()); 913 995 914 996 if (display) … … 916 998 cout << endl; 917 999 cout << contents << endl; 918 cout << "# " << row.field_list(delimiter.c_str()) << endl; 919 } 920 921 if (!write.empty()) 922 { 923 fout << contents; 924 fout << "# " << row.field_list(delimiter.c_str()) << endl; 1000 cout << "# " << htxt.str() << endl; 1001 } 1002 for (auto ff=fout.begin(); ff!=fout.end(); ff++) 1003 { 1004 *ff << contents; 1005 *ff << "# " << htxt.str() << endl; 925 1006 } 926 1007 927 1008 // ---------------------- Fill TTree with DB data -------------------------- 1009 1010 const uniform_real_distribution<double> distribution(0,1); 1011 mt19937_64 generator; 1012 if (conf.Has("seed")) 1013 generator.seed(conf.Get<uint64_t>("seed")); 1014 auto rndm = bind(distribution, generator); 1015 928 1016 size_t count = 0; 929 1017 size_t skip = 0; 930 1018 do 931 1019 { 1020 size_t index = 0; 1021 if (!split_lut.empty()) 1022 index = split_lut[count % split_lut.size()]; 1023 if (!split_quant.empty()) 1024 { 1025 const float r = rndm(); 1026 for (; r>=split_quant[index]; index++) 1027 if (index==split_quant.size()) 1028 break; 1029 } 1030 932 1031 count++; 933 1032 1033 ostringstream rtxt; 1034 if (display || !fout.empty()) 1035 rtxt << row.value_list(delimiter.c_str(), mysqlpp::do_nothing); 1036 934 1037 if (display) 935 cout << r ow.value_list(delimiter.c_str(), mysqlpp::do_nothing) << '\n';936 if (! write.empty())937 fout << row.value_list(delimiter.c_str(), mysqlpp::do_nothing) << '\n';1038 cout << rtxt.str() << '\n'; 1039 if (!fout.empty()) 1040 fout[index] << rtxt.str() << '\n'; 938 1041 939 1042 size_t idx=0; … … 972 1075 973 1076 if (idx==row.size() && !nofill) 974 ttree ->Fill();1077 ttree[index]->Fill(); 975 1078 976 1079 row = res.fetch_row(); … … 990 1093 cout << skip << " rows skipped due to NULL field." << endl; 991 1094 992 cout << ttree->GetEntries() << " rows filled into tree." << endl; 993 } 994 995 ttree->Write(); 1095 for (int i=0; i<ttree.size(); i++) 1096 cout << ttree[i]->GetEntries() << " rows filled into tree #" << i << "." << endl; 1097 } 1098 1099 for (auto it=ttree.begin(); it!=ttree.end(); it++) 1100 (*it)->Write(); 996 1101 tfile.Close(); 997 1102
Note:
See TracChangeset
for help on using the changeset viewer.