source: trunk/FACT++/src/rootifysql.cc@ 19438

Last change on this file since 19438 was 19423, checked in by Daniela Dorner, 6 years ago
bugfix in file splitting
File size: 38.4 KB
Line 
1#include "Database.h"
2
3#include <random>
4
5#include <boost/regex.hpp>
6#include <boost/tokenizer.hpp>
7#include <boost/algorithm/string.hpp>
8#include <boost/algorithm/string/join.hpp>
9
10#include "tools.h"
11#include "Time.h"
12#include "Configuration.h"
13
14#include <TROOT.h>
15#include <TSystem.h>
16#include <TFile.h>
17#include <TTree.h>
18
19using namespace std;
20
21// ------------------------------------------------------------------------
22
23void SetupConfiguration(Configuration &conf)
24{
25 po::options_description control("Database options");
26 control.add_options()
27 ("uri,u", var<string>()->required(), "Database link as in\n\tuser:password@server[:port]/database[?compress=0|1].")
28 ("query,q", var<string>(""), "MySQL query (overwrites --file)")
29 ("file", var<string>("rootify.sql"), "An ASCII file with the MySQL query (overwrites --query)")
30 ("ignore-null,i", po_switch(), "Do not skip rows containing any NULL field")
31 ("display,d", po_switch(), "Displays contents on the screen (most usefull in combination with mysql statements as SHOW or EXPLAIN)")
32 ("explain", po_switch(), "Requests an EXPLAIN from the server (shows the server optimized query)\nsee also https://dev.mysql.com/doc/refman/explain-output.html")
33 ("profiling", po_switch(), "Turn on profiling and print profile")
34 ("var.*", var<string>(), "Predefined SQL user variables (@VAR)")
35 ("env.*", vars<string>(), "Predefined environment for substitutions in the query ($ENV)")
36 ("list.*", var<string>(), "Predefined environment for substitutions in the query ($ENV). The list is read from the given file (one list entry per line)")
37 ("print-connection", po_switch(), "Print database connection information")
38 ("verbose,v", var<uint16_t>(1), "Verbosity (0: quiet, 1: default, 2: more, 3, ...)")
39 ;
40
41 po::options_description ascii("ASCII output");
42 ascii.add_options()
43 ("write,w", var<string>(""), "Write output to an ascii file")
44 ("delimiter", var<string>(), "The delimiter used if contents are displayed with --display (default=\\t)")
45 ("copy-shabang", po_switch(), "Copy the sha-bang line if exists to the output file")
46 ("copy-header", po_switch(), "Copy the header (all line starting with '#' up to the first non-comment line to the output file")
47 ("copy-query", po_switch(), "Copy the query to the ascii output file")
48 ("copy-comments", po_switch(), "Copy all lines starting with '#' to the output file which are not part of header")
49 ("copy-all", po_switch(), "An alias for --copy-header --copy-query --copy-comments")
50 ;
51
52 po::options_description root("Root file options");
53 root.add_options()
54 ("out,o", var<string>("rootify.root"), "Output root file name")
55 ("force,f", po_switch(), "Force overwriting an existing root file ('RECREATE')")
56 ("update", po_switch(), "Update an existing root file with the new tree ('UPDATE')")
57 ("compression,c", var<uint16_t>(1), "zlib compression level for the root file")
58 ("tree,t", var<string>("Result"), "Name of the root tree")
59 ("ignore", vars<string>(), "Ignore the given columns")
60 ("null,n", po_switch(), "Redirect the output file to /dev/null (mainly for debugging purposes, e.g. performance studies)")
61 ("no-fill", po_switch(), "Do not fill events into the root file (mainly for debugging purposes, e.g. performance studies)")
62 ;
63
64 po::options_description split("Splitting options");
65 split.add_options()
66 ("split-sequence,S", vars<uint16_t>(), "Split data sequentially into several trees/files (e.g. 1, 1, 2)")
67 ("split-quantile,Q", vars<double>(), "Split data randomly into several trees/files (e.g. 0.5, 1)")
68 ("seed", var<uint64_t>(mt19937_64::default_seed), "Seed value in case of random split")
69 ;
70
71 po::positional_options_description p;
72 p.add("file", 1); // The 1st positional options (n=1)
73 p.add("out", 1); // The 2nd positional options (n=1)
74
75 conf.AddOptions(control);
76 conf.AddOptions(ascii);
77 conf.AddOptions(root);
78 conf.AddOptions(split);
79 conf.SetArgumentPositions(p);
80}
81
82void PrintUsage()
83{
84 cout <<
85 "rootifysql - Converts the result of a mysql query into a root file\n"
86 "\n"
87 "For convenience, this documentation uses the extended version of the options, "
88 "refer to the output below to get the abbreviations.\n"
89 "\n"
90 "Writes the result of a mysql query into a root file. For each column, a branch is "
91 "created of type double with the field name as name. This is usually the column name "
92 "if not specified otherwise by the AS mysql directive.\n"
93 "\n"
94 "Columns with CHAR or VARCHAR as field type are ignored. DATETIME, DATE and TIME "
95 "columns are converted to unix time (time_t). Rows containing any file which is "
96 "NULL are skipped if not suppressed by the --ignore-null option. Ideally, the query "
97 "is compiled in a way that no NULL field is returned. With the --display option the "
98 "result of the request is printed on the screen (NULL skipping still in action). "
99 "This can be useful to create an ascii file or to show results as 'SHOW DATABASES' "
100 "or 'EXPLAIN table'. To redirect the contents into an ascii file, the option -v0 "
101 "is useful. To suppress writing to an output file --null can be used.\n"
102 "\n"
103 "The default is to read the query from a file called rootify.sql. Except if a different "
104 "filename is specified by the --file option or a query is given with --query.\n"
105 "\n"
106 "As a trick, the rootify.sql file can be made excutable (chmod u+x rootify.sql). "
107 "If the first line contains '#!rootifysql', the script can be executed directly.\n"
108 "\n"
109 "Columns whose name start with @ are skipped. If you want them in your output file "
110 "give them a name using AS, e.g. 'SELECT @A:=5 AS A'.\n"
111 "\n"
112 "You can use variables in your sql query like @MyVar and define them on the "
113 "command line. In this example with --var.MyVar=5\n"
114 "\n"
115 "You can use environment definitions for substitutions in your SQL query. "
116 "For example --env.TEST=5 would replace $TEST or ${TEST} in your query by 5."
117 "If you specify one environment variable more than once, a list is created. "
118 "For example --env.TEST=1 --env.TEST=2 --env.TEST=3 would substitute "
119 "$TEST or ${TEST} by '1, 2, 3'. This is useful for the SQL `IN` keyword. "
120 "You can also read the values for an enviroment substitution from a file "
121 "(one element per line), e.g. --env.TEST=file.txt. Empty lines and lines "
122 "starting with a # are skipped.\n"
123 "\n"
124 "Comments in the query-file can be placed according to the SQL standard inline "
125 "/*comment*/ or introduced with # (shell script style) or -- (SQL style).\n"
126 "\n"
127 "For several purposes, it might be convenient to split the output to several "
128 "different root-trees or ascii files. This can be done using the --split-sequence (-S) "
129 "and the --split-quantile (-Q) options. If a split sequence is defined as "
130 "-S 1 -S 2 -S 1 the events are split by 1:2:1 in this sequence order. If "
131 "quantiled are given as -Q 0.5 -Q 0.6, the first tree will contain 50% of "
132 "the second one 10% and the third one 40%. The corresponding seed value can "
133 "be set with --seed.\n"
134 "\n"
135 "In case of success, 0 is returned, a value>0 otherwise.\n"
136 "\n"
137 "Usage: rootifysql [rootify.sql [rootify.root]] [-u URI] [-q query|-f file] [-i] [-o out] [-f] [-cN] [-t tree] [-vN]\n"
138 "\n"
139 ;
140 cout << endl;
141}
142
143struct ExplainParser
144{
145 string sql;
146
147 vector<string> vec;
148
149 string substitute(string _str, const boost::regex &expr)
150 {
151 boost::smatch match;
152 while (boost::regex_search(_str, match, expr, boost::regex_constants::format_first_only))
153 {
154 const auto &len = match.length();
155 const auto &pos = match.position();
156 const auto &str = match.str();
157
158 const auto it = find(vec.cbegin(), vec.cend(), str);
159 const size_t id = it==vec.cend() ? vec.size() : it-vec.cbegin();
160
161 _str.replace(pos, len, "{"+to_string(id)+"}");
162
163 if (it==vec.cend())
164 vec.push_back(str);//.substr(1, str.size()-2));
165 }
166
167 return _str;
168 }
169
170 string substitute(const string &str, const string &expr)
171 {
172 return substitute(str, boost::regex(expr));
173 }
174
175 vector<string> queries;
176
177 string resub(string str)
178 {
179 // search for "KEYWORD expression"
180 boost::regex reg("\\{[0-9]+\\}");
181
182 boost::smatch match;
183 while (boost::regex_search(str, match, reg, boost::regex_constants::format_first_only))
184 {
185 const auto &len = match.length();
186 const auto &pos = match.position();
187 const auto &arg = match.str(); // Argument
188
189 const auto idx = atoi(arg.c_str()+1);
190
191 str.replace(pos, len, resub(vec[idx]));
192 }
193
194 return str;
195 }
196
197 void expression(string expr, size_t indent=0)
198 {
199 if (expr[0]=='{')
200 {
201 const auto idx = atoi(expr.c_str()+1);
202
203 // This is a subquery
204 if (vec[idx].substr(0,3)=="(/*")
205 {
206 cout << setw(indent) << ' ' << "(\n";
207 find_tokens(vec[idx], indent+4);
208 cout << setw(indent) << ' ' << ") ";
209 }
210 else
211 // This is just something to substitute back
212 if (vec[idx].substr(0,2)=="({")
213 {
214 cout << setw(indent) << ' ' << "(" << resub(vec[idx]) << ") ";
215 }
216 else
217 {
218 if (indent>0)
219 cout << setw(indent) << ' ';
220 cout << resub(vec[idx]);
221 }
222 }
223 else
224 {
225 if (indent>0)
226 cout << setw(indent) << ' ';
227 cout << resub(expr);
228 }
229 }
230
231 void find_tokens(string str, size_t indent=0)
232 {
233 // ( COMMENT )?( TOKEN )?(( {NNN} | NNN )( AS|ON ( {NNN}) ))?(,)?)
234 //regex reg("(\\/\\*\\ select\\#[0-9]+\\ \\*\\/\\ *)?([a-zA-Z ]+)?((\\{[0-9]+\\}|[0-9]+)(\\ ?([Aa][Ss]|[Oo][Nn])\\ ?(\\{[0-9]+\\}))?(,)?)");
235
236 const string _com = "\\/\\*\\ select\\#[0-9]+\\ \\*\\/\\ *";
237
238 const string _tok = "[a-zA-Z_ ]+";
239
240 const string _nnn = "\\{[0-9]+\\}|[0-9]+";
241
242 const string _as = "\\ ?([Aa][Ss])\\ ?";
243
244 // ( _nnn ) ( _as ( _nnn ))?(,)? // can also match noting in between two {NNN}
245 const string _exp = "("+_nnn+")" + "("+_as+"("+_nnn+"))?(,)?";
246
247 // Matche: ( _com )? ( ( _tok )? ( _exp ) | ( _tok ) )
248 boost::regex reg("("+_com+")?" + "(" + "("+_tok+")?"+"("+_exp+")" + "|" + "("+_tok+")" + ")");
249
250 boost::smatch match;
251 while (boost::regex_search(str, match, reg, boost::regex_constants::format_first_only))
252 {
253
254 const auto &com = match.str(1); // comment
255 const auto &tok1 = Tools::Trim(match.str(3)); // token with expression
256 const auto &arg1 = match.str(5); // argument 1
257 const auto &as = match.str(7); // as
258 const auto &arg2 = match.str(8); // argument 2
259 const auto &comma = match.str(9); // comma
260 const auto &tok2 = Tools::Trim(match.str(10)); // token without expression
261
262 if (!com.empty())
263 cout << setw(indent) << ' ' << "\033[34m" << com << "\033[0m" << '\n';
264
265 if (!tok1.empty())
266 cout << setw(indent) << ' ' << "\033[32m" << tok1 << "\033[0m" << '\n';
267 if (!tok2.empty())
268 cout << setw(indent) << ' ' << "\033[32m" << tok2 << "\033[0m" << '\n';
269
270 if (!arg1.empty())
271 {
272 expression(arg1, indent+4);
273
274 if (!as.empty())
275 cout << " \033[33m" << as << "\033[0m ";
276
277 if (!arg2.empty())
278 expression(arg2);
279
280 if (!comma.empty())
281 cout << ',';
282
283 cout << '\n';
284 }
285
286 str = str.substr(match.position()+match.length());
287 }
288 }
289
290
291 ExplainParser(const string &_sql) : sql(_sql)
292 {
293 // substitute all strings
294 sql = substitute(sql, "'[^']*'");
295
296 // substitute all escaped sequences (`something`.`something-else`)
297 sql = substitute(sql, "`[^`]*`(\\.`[^`]*`)*");
298
299 // substitute all paranthesis
300 sql = substitute(sql, "[a-zA-Z0-9_]*\\([^\\(\\)]*\\)");
301
302 //cout << sql << "\n\n";
303 find_tokens(sql);
304 cout << endl;
305 }
306};
307
308// Remove queries...
309void format(string sql)
310{
311 ExplainParser p(sql);
312
313 /*
314
315 SELECT
316 [ALL | DISTINCT | DISTINCTROW ]
317 [HIGH_PRIORITY]
318 [STRAIGHT_JOIN]
319 [SQL_SMALL_RESULT] [SQL_BIG_RESULT] [SQL_BUFFER_RESULT]
320 [SQL_CACHE | SQL_NO_CACHE] [SQL_CALC_FOUND_ROWS]
321 select_expr [, select_expr ...]
322 [FROM table_references
323 [PARTITION partition_list]
324 [WHERE where_condition]
325 [GROUP BY {col_name | expr | position}, ... [WITH ROLLUP]]
326 [HAVING where_condition]
327 [WINDOW window_name AS (window_spec)
328 [, window_name AS (window_spec)] ...]
329 [ORDER BY {col_name | expr | position}
330 [ASC | DESC], ... [WITH ROLLUP]]
331 [LIMIT {[offset,] row_count | row_count OFFSET offset}]
332 [INTO OUTFILE 'file_name'
333 [CHARACTER SET charset_name]
334 export_options
335 | INTO DUMPFILE 'file_name'
336 | INTO var_name [, var_name]]
337 [FOR {UPDATE | SHARE} [OF tbl_name [, tbl_name] ...] [NOWAIT | SKIP LOCKED]
338 | LOCK IN SHARE MODE]]
339 */
340
341 /*
342table_references:
343 escaped_table_reference [, escaped_table_reference] ...
344
345escaped_table_reference:
346 table_reference
347 | { OJ table_reference }
348
349table_reference:
350 table_factor
351 | join_table
352
353table_factor:
354 tbl_name [PARTITION (partition_names)]
355 [[AS] alias] [index_hint_list]
356 | table_subquery [AS] alias [(col_list)]
357 | ( table_references )
358
359join_table:
360 table_reference [INNER | CROSS] JOIN table_factor [join_condition]
361 | table_reference STRAIGHT_JOIN table_factor
362 | table_reference STRAIGHT_JOIN table_factor ON conditional_expr
363 | table_reference {LEFT|RIGHT} [OUTER] JOIN table_reference join_condition
364 | table_reference NATURAL [INNER | {LEFT|RIGHT} [OUTER]] JOIN table_factor
365
366join_condition:
367 ON conditional_expr
368 | USING (column_list)
369
370index_hint_list:
371 index_hint [, index_hint] ...
372
373index_hint:
374 USE {INDEX|KEY}
375 [FOR {JOIN|ORDER BY|GROUP BY}] ([index_list])
376 | IGNORE {INDEX|KEY}
377 [FOR {JOIN|ORDER BY|GROUP BY}] (index_list)
378 | FORCE {INDEX|KEY}
379 [FOR {JOIN|ORDER BY|GROUP BY}] (index_list)
380
381index_list:
382 index_name [, index_name] ...
383 */
384
385}
386
387int finish(Database &connection, const uint16_t &verbose, const bool &profiling, const bool &print_connection)
388{
389 if (verbose>0)
390 {
391 try
392 {
393 const auto resw =
394 connection.query("SHOW WARNINGS").store();
395
396 if (resw.num_rows()>0)
397 cout << "\n" << resw.num_rows() << " Warning(s) issued:\n\n";
398
399 for (size_t i=0; i<resw.num_rows(); i++)
400 {
401 const mysqlpp::Row &roww = resw[i];
402
403 cout << roww["Level"] << '[' << roww["Code"] << "]: ";
404 cout << roww["Message"] << '\n';
405 }
406 cout << endl;
407
408 }
409 catch (const exception &e)
410 {
411 cerr << "\nSHOW WARNINGS\n\n";
412 cerr << "SQL query failed:\n" << e.what() << endl;
413 return 1;
414 }
415 }
416
417 if (profiling)
418 {
419 try
420 {
421 const auto N =
422 connection.query("SHOW PROFILES").store().num_rows();
423
424 const auto resp =
425 connection.query("SHOW PROFILE ALL FOR QUERY "+to_string(verbose?N-1:N)).store();
426
427 cout << '\n';
428 cout << left;
429 cout << setw(26) << "Status" << ' ';
430 cout << right;
431 cout << setw(11) << "Duration" << ' ';
432 cout << setw(11) << "CPU User" << ' ';
433 cout << setw(11) << "CPU System" << '\n';
434 cout << "--------------------------------------------------------------\n";
435 for (size_t i=0; i<resp.num_rows(); i++)
436 {
437 const mysqlpp::Row &rowp = resp[i];
438
439 cout << left;
440 cout << setw(26) << rowp["Status"] << ' ';
441 cout << right;
442 cout << setw(11) << rowp["Duration"] << ' ';
443 cout << setw(11) << rowp["CPU_user"] << ' ';
444 cout << setw(11) << rowp["CPU_system"] << '\n';
445 }
446 cout << "--------------------------------------------------------------\n";
447 cout << endl;
448 }
449 catch (const exception &e)
450 {
451 cerr << "\nSHOW PROFILE ALL\n\n";
452 cerr << "SQL query failed:\n" << e.what() << '\n' <<endl;
453 return 2;
454 }
455 }
456
457 if (print_connection)
458 {
459 try
460 {
461 // Exchange _send and _received as it is the view of the server
462 const auto &res1 = connection.query("SHOW STATUS LIKE 'Bytes_%'").store();
463 cout << left << setw(16) << res1[1]["Variable_name"] << ' ' << Tools::Scientific(res1[0]["Value"]) << endl;
464 cout << left << setw(16) << res1[0]["Variable_name"] << ' ' << Tools::Scientific(res1[1]["Value"]) << endl;
465 cout << endl;
466 }
467 catch (const exception &e)
468 {
469 cerr << "\nSHOW STATUS LIKE 'Bytes_%'\n\n";
470 cerr << "SQL query failed:\n" << e.what() << endl;
471 return 3;
472 }
473 }
474
475 if (verbose>0)
476 cout << "Success!\n" << endl;
477 return 0;
478
479}
480
481int main(int argc, const char* argv[])
482{
483 Time start;
484
485 gROOT->SetBatch();
486
487 Configuration conf(argv[0]);
488 conf.SetPrintUsage(PrintUsage);
489 SetupConfiguration(conf);
490
491 if (!conf.DoParse(argc, argv))
492 return 127;
493
494 // ----------------------------- Evaluate options --------------------------
495 const string uri = conf.Get<string>("uri");
496 const string out = conf.Get<string>("out");
497 const string file = conf.Get<string>("file");
498 const string tree = conf.Get<string>("tree");
499 const bool force = conf.Get<bool>("force");
500 const bool ignorenull = conf.Get<bool>("ignore-null");
501 const bool update = conf.Get<bool>("update");
502 const bool display = conf.Get<bool>("display");
503 const string write = conf.Get<string>("write");
504 const bool noout = conf.Get<bool>("null");
505 const bool nofill = conf.Get<bool>("no-fill");
506 const bool explain = conf.Get<bool>("explain");
507 const bool profiling = conf.Get<bool>("profiling");
508 const uint16_t verbose = conf.Get<uint16_t>("verbose");
509 const uint16_t compression = conf.Get<uint16_t>("compression");
510 const string delimiter = conf.Has("delimiter") ? conf.Get<string>("delimiter") : "\t";
511
512 const bool copy_all = conf.Get<bool>("copy-all");
513 const bool copy_shabang = conf.Get<bool>("copy-shabang");
514 const bool copy_header = copy_all || conf.Get<bool>("copy-header");
515 const bool copy_query = copy_all || conf.Get<bool>("copy-query");
516 const bool copy_comments = copy_all || conf.Get<bool>("copy-comments");
517
518 const vector<string> _ignore = conf.Vec<string>("ignore");
519 const bool print_connection = conf.Get<bool>("print-connection");
520 //const vector<Map> mymap = conf.Vec<Map>("map");
521
522 // ----------------------- Setup splitting ---------------------------------
523
524 vector<uint16_t> split_seq = conf.Vec<uint16_t>("split-sequence");
525 vector<double> split_quant = conf.Vec<double>("split-quantile");
526
527 if (!split_seq.empty() && !split_quant.empty())
528 throw runtime_error("Only splitting by --split-sequence or --split-quantile is allowed.");
529
530 const size_t num_split = split_seq.size()+split_quant.size()==0 ? 0 :
531 ::max(split_seq.size(), split_quant.size()+1);
532
533 map<size_t, size_t> split_lut;
534 for (size_t i=0; i<split_seq.size(); i++)
535 {
536 const size_t sz = split_lut.size();
537 for (size_t j=0; j<split_seq[i]; j++)
538 split_lut.emplace(j+sz, i);
539 }
540
541 for (size_t i=0; i<split_quant.size(); i++)
542 if (split_quant[i]<0 || split_quant[i]>=1)
543 throw runtime_error("Splitting quantiles must be in the range [0;1)");
544
545 for (size_t i=1; i<split_quant.size(); i++)
546 {
547 if (split_quant[i]<=split_quant[i-1])
548 throw runtime_error("Splitting quantiles must be in increasing order.");
549 }
550
551 // -------------------------------------------------------------------------
552
553 const auto vars = conf.GetWildcardOptions("var.*");
554
555 vector<string> variables;
556 for (auto var=vars.cbegin(); var!=vars.cend(); var++)
557 variables.emplace_back('@'+var->substr(4)+":="+Tools::Trim(conf.Get<string>(*var)));
558
559 // -------------------------------------------------------------------------
560
561 if (verbose>0)
562 {
563 cout << "\n------------------------ Rootify SQL -------------------------" << endl;
564 cout << "Start Time: " << Time::sql << Time(Time::local) << endl;
565 }
566
567 string query = conf.Get<string>("query");
568 if (query.empty())
569 {
570 if (verbose>0)
571 cout << "Reading query from file '" << file << "'." << endl;
572
573 ifstream fin(file);
574 if (!fin)
575 {
576 cerr << "Could not open query in '" << file << "': " << strerror(errno) << endl;
577 return 4;
578 }
579
580 getline(fin, query, (char)fin.eof());
581 }
582
583 if (query.empty())
584 {
585 cerr << "No query specified." << endl;
586 return 5;
587 }
588
589 // -------------------------------------------------------------------------
590
591 map<string, vector<string>> envs;
592
593 const auto &envs1 = conf.GetWildcardOptions("env.*");
594 for (auto env=envs1.cbegin(); env!=envs1.cend(); env++)
595 envs[env->substr(4)] = conf.Vec<string>(*env);
596
597 const auto &envs2 = conf.GetWildcardOptions("list.*");
598 for (auto env=envs2.cbegin(); env!=envs2.cend(); env++)
599 {
600 const string fname = conf.Get<string>(*env);
601 const string &ident = env->substr(5);
602
603 ifstream fin(fname);
604 if (!fin)
605 {
606 cerr << "Could not open environment in '" << fname << "' for ${" << ident << "}: " << strerror(errno) << endl;
607 return 6;
608 }
609 for (string line; getline(fin, line); )
610 {
611 const auto &l = Tools::Trim(line);
612 if (!l.empty() && l[0]!='#')
613 envs[ident].push_back(line);
614 }
615
616 if (verbose>0)
617 cout << "Found " << envs[ident].size() << " list element(s) for ${" << ident << "}" << endl;
618 }
619
620 for (auto env=envs.cbegin(); env!=envs.cend(); env++)
621 {
622 boost::regex rexpr("\\$(\\{"+env->first+"\\}|"+env->first+"\\b)");
623 query = boost::regex_replace(query, rexpr, boost::join(env->second, ", "));
624 }
625
626 // -------------------------- Check for file permssion ---------------------
627 // Strictly speaking, checking for write permission and existance is not necessary,
628 // but it is convenient that the user does not find out that it failed after
629 // waiting long for the query result
630 //
631
632 // I am using root here instead of boost to be
633 // consistent with the access pattern by TFile
634 TString path(noout?"/dev/null":out.c_str());
635 gSystem->ExpandPathName(path);
636
637 if (!noout)
638 {
639 FileStat_t stat;
640 const Int_t exist = !gSystem->GetPathInfo(path, stat);
641 const Bool_t _write = !gSystem->AccessPathName(path, kWritePermission) && R_ISREG(stat.fMode);
642
643 if ((update && !exist) || (update && exist && !_write) || (force && exist && !_write))
644 {
645 cerr << "File '" << path << "' is not writable." << endl;
646 return 7;
647 }
648
649 if (!update && !force && exist)
650 {
651 cerr << "File '" << path << "' already exists." << endl;
652 return 8;
653 }
654 }
655
656 Time start2;
657
658 // --------------------------- Connect to database -------------------------------------------------
659
660 if (*query.rbegin()!='\n')
661 query += '\n';
662
663 if (verbose>0)
664 {
665 cout << "Connecting to database...\n";
666 cout << "Client Version: " << mysqlpp::Connection().client_version() << endl;
667 }
668
669 Database connection(uri); // Keep alive while fetching rows
670
671 if (verbose>0)
672 cout << "Server Version: " << connection.server_version() << endl;
673
674 if (print_connection)
675 {
676 try
677 {
678 const auto &res1 = connection.query("SHOW STATUS LIKE 'Compression'").store();
679 cout << "Compression of database connection is " << string(res1[0][1]) << endl;
680
681 const auto &res2 = connection.query("SHOW STATUS LIKE 'Ssl_cipher'").store();
682 cout << "Connection to databases is " << (string(res2[0][1]).empty()?"UNENCRYPTED":"ENCRYPTED ("+string(res2[0][1])+")") << endl;
683 }
684 catch (const exception &e)
685 {
686 cerr << "\nSHOW STATUS LIKE 'Compression'\n\n";
687 cerr << "SQL query failed:\n" << e.what() << endl;
688 return 9;
689 }
690 }
691
692 try
693 {
694 if (profiling)
695 connection.query("SET PROFILING=1").execute();
696 }
697 catch (const exception &e)
698 {
699 cerr << "\nSET profiling=1\n\n";
700 cerr << "SQL query failed:\n" << e.what() << endl;
701 return 10;
702 }
703
704 // -------------------------- Set user defined variables -------------------
705 if (variables.size()>0)
706 {
707 if (verbose>0)
708 cout << "Setting user defined variables..." << endl;
709
710 const string varset =
711 "SET\n "+boost::algorithm::join(variables, ",\n ");
712
713 try
714 {
715 connection.query(varset).execute();
716 }
717 catch (const exception &e)
718 {
719 cerr << '\n' << varset << "\n\n";
720 cerr << "SQL query failed:\n" << e.what() << endl;
721 return 11;
722 }
723
724 if (verbose>2)
725 cout << '\n' << varset << '\n' << endl;
726 }
727
728 // ------------------------- Explain query if requested --------------------
729
730 if (explain)
731 {
732 try
733 {
734 const auto res0 =
735 connection.query("EXPLAIN FORMAT=JSON "+query).store();
736
737 cout << res0[0][0] << endl;
738 cout << endl;
739
740 const mysqlpp::StoreQueryResult res1 =
741 connection.query("EXPLAIN "+query).store();
742
743 for (size_t i=0; i<res1.num_rows(); i++)
744 {
745 const mysqlpp::Row &row = res1[i];
746
747 cout << "\nid : " << row["id"];
748 cout << "\nselect type : " << row["select_type"];
749
750 if (!row["table"].is_null())
751 cout << "\ntable : " << row["table"];
752
753 if (!row["partitions"].is_null())
754 cout << "\npartitions : " << row["partitions"];
755
756 if (!row["key"].is_null())
757 cout << "\nselected key : " << row["key"] << " [len=" << row["key_len"] << "] out of (" << row["possible_keys"] << ")";
758
759 if (!row["type"].is_null())
760 cout << "\njoin type : " << row["type"];
761
762 //if (!row["possible_keys"].is_null())
763 // cout << "\npossible_keys: " << row["possible_keys"];
764
765 //if (!row["key_len"].is_null())
766 // cout << "\nkey_len : " << row["key_len"];
767
768 if (!row["ref"].is_null())
769 cout << "\nref : (" << row["ref"] << ") compared to the index";
770
771 if (!row["rows"].is_null())
772 cout << "\nrows : " << row["rows"];
773
774 if (!row["filtered"].is_null())
775 cout << "\nfiltered : " << row["filtered"];
776
777 if (!row["extra"].is_null())
778 cout << "\nExtra : " << row["extra"];
779
780 cout << endl;
781 }
782
783 cout << endl;
784
785 const mysqlpp::StoreQueryResult res2 =
786 connection.query("SHOW WARNINGS").store();
787
788 for (size_t i=0; i<res2.num_rows(); i++)
789 {
790 const mysqlpp::Row &row = res2[i];
791
792 // 1003 //
793 cout << row["Level"] << '[' << row["Code"] << "]:\n";
794 if (uint32_t(row["Code"])==1003)
795 format(row["Message"].c_str());
796 else
797 cout << row["Message"] << '\n' << endl;
798
799 }
800
801 }
802 catch (const exception &e)
803 {
804 cerr << '\n' << query << "\n\n";
805 cerr << "SQL query failed:\n" << e.what() << endl;
806 return 12;
807 }
808
809 return 0;
810 }
811
812 // -------------------------- Request data from database -------------------
813 if (verbose>0)
814 cout << "Requesting data... please be patient!" << endl;
815
816 if (verbose>2)
817 cout << '\n' << query << endl;
818
819 const mysqlpp::UseQueryResult res =
820 connection.query(query).use();
821
822 // -------------------------------------------------------------------------
823
824 if (verbose>0)
825 {
826 cout << "Opening file '" << path << "' [compression=" << compression << "]...\n";
827 cout << "Writing data to tree '" << tree << "'" << (nofill?" (--skipped--)":"") << endl;
828 if (num_split)
829 {
830 cout << "Splitting configured " << (split_seq.empty()?"randomly":"in sequence") << " into " << num_split << " branches." << endl;
831 if (!split_quant.empty())
832 cout << "Seed value configured as " << conf.Get<uint64_t>("seed") << "." << endl;
833 }
834 }
835
836 // ----------------------------- Open output file --------------------------
837 TFile tfile(path, update?"UPDATE":(force?"RECREATE":"CREATE"), "Rootify SQL", compression);
838 if (tfile.IsZombie())
839 return 13;
840
841 // -------------------------------------------------------------------------
842
843 // get the first row to get the field description
844 mysqlpp::Row row = res.fetch_row();
845 if (!row)
846 {
847 cerr << "Empty set returned... nothing to write." << endl;
848 return finish(connection, verbose, profiling, print_connection)+20;
849 }
850
851 if (verbose>0)
852 cout << "Trying to setup " << row.size() << " branches..." << endl;
853
854 if (verbose>1)
855 cout << endl;
856
857 const mysqlpp::FieldNames &l = *row.field_list().list;
858
859 vector<double> buf(l.size());
860 vector<uint8_t> typ(l.size(),'n'); // n=number [double], d is used for DateTime
861
862 UInt_t cols = 0;
863
864 // IMPLEMENT FILE SPLITTING!
865 // OpenFile(tree, query)
866 // SetupColumns
867 // WriteRow
868 // CloseFile
869
870 // Ratio[3]: 50%, 20%, 30%
871 // File[x3]: root, cout, fout
872
873
874 // -------------------- Configure branches of TTree ------------------------
875 vector<TTree*> ttree;
876
877 if (num_split==0)
878 ttree.emplace_back(new TTree(tree.c_str(), query.c_str()));
879 else
880 for (size_t i=0; i<num_split; i++)
881 ttree.emplace_back(new TTree((tree+"["+to_string(i)+"]").c_str(), query.c_str()));
882
883 size_t skipat = 0;
884 size_t skipreg = 0;
885 for (size_t i=0; i<l.size(); i++)
886 {
887 const string t = row[i].type().sql_name();
888
889 if (t.find("DATETIME")!=string::npos)
890 typ[i] = 'd';
891 else
892 if (t.find("DATE")!=string::npos)
893 typ[i] = 'D';
894 else
895 if (t.find("TIME")!=string::npos)
896 typ[i] = 'T';
897 else
898 if (t.find("VARCHAR")!=string::npos)
899 typ[i] = 'V';
900 else
901 if (t.find("CHAR")!=string::npos)
902 typ[i] = 'C';
903
904 bool found = false;
905 for (auto pattern=_ignore.cbegin(); pattern!=_ignore.cend(); pattern++)
906 {
907 if (boost::regex_match(l[i], boost::regex(*pattern)))
908 {
909 found = true;
910 typ[i] = '-';
911 skipreg++;
912 break;
913 }
914 }
915
916 if (l[i][0]=='@')
917 {
918 typ[i] = '@';
919 skipat++;
920 }
921
922 const bool use = l[i][0]!='@' && typ[i]!='V' && typ[i]!='C' && !found;
923
924 if (verbose>1)
925 cout << (use?" + ":" - ") << l[i].c_str() << " [" << t << "] {" << typ[i] << "}\n";
926
927 if (use)
928 {
929 // string name = l[i];
930 // for (const auto &m: mymap)
931 // name = boost::regex_replace(l[i], boost::regex(m.first), m.second);
932
933 for (auto it=ttree.begin(); it!=ttree.end(); it++)
934 it[0]->Branch(l[i].c_str(), buf.data()+i);
935 cols++;
936 }
937 }
938 // -------------------------------------------------------------------------
939
940 if (verbose>1)
941 cout << endl;
942 if (verbose>0)
943 {
944 if (skipreg)
945 cout << skipreg << " branches skipped due to ignore list." << endl;
946 if (skipat)
947 cout << skipat << " branches skipped due to name starting with @." << endl;
948 cout << "Configured " << cols << " branches.\nFilling branches..." << endl;
949 }
950
951 // ------------------------- Open the ascii files --------------------------
952
953 vector<ofstream> fout;
954 if (!write.empty())
955 {
956 vector<string> names;
957 if (num_split==0)
958 names.emplace_back(write);
959 else
960 for (size_t i=0; i<num_split; i++)
961 names.emplace_back(write+"-"+to_string(i));
962
963 for (auto it=names.cbegin(); it!=names.cend(); it++)
964 {
965 fout.emplace_back(*it);
966 if (!*fout.rbegin())
967 cout << "WARNING: Writing to '" << write << "' failed: " << strerror(errno) << endl;
968 }
969 }
970
971 // ----------------------- Prepare the ascii comment -----------------------
972
973 string contents;
974
975 istringstream istr(query);
976 size_t line = 0;
977 bool header = true;
978 while (istr)
979 {
980 string ibuf;
981 getline(istr, ibuf);
982 const string sbuf = Tools::Trim(ibuf);
983
984 const bool shabang = line==0 && ibuf[0]=='#' && ibuf[1]=='!';
985 const bool comment = sbuf[0]=='#' && !shabang;
986 const bool isquery = !shabang && !comment;
987 if (isquery)
988 header = false;
989
990 line++;
991
992 if ((copy_shabang && shabang) ||
993 (copy_header && comment && header) ||
994 (copy_query && isquery) ||
995 (copy_comments && comment && !header))
996 contents += '#' + ibuf + '\n';
997 }
998
999 // ----------------------- Write the ascii headers -------------------------
1000
1001 ostringstream htxt;
1002 if (display || !fout.empty())
1003 htxt << row.field_list(delimiter.c_str());
1004
1005 if (display)
1006 {
1007 cout << endl;
1008 cout << contents << endl;
1009 cout << "# " << htxt.str() << endl;
1010 }
1011 for (auto ff=fout.begin(); ff!=fout.end(); ff++)
1012 {
1013 *ff << contents;
1014 *ff << "# " << htxt.str() << endl;
1015 }
1016
1017 // ---------------------- Fill TTree with DB data --------------------------
1018
1019 const uniform_real_distribution<double> distribution(0,1);
1020 mt19937_64 generator;
1021 generator.seed(conf.Get<uint64_t>("seed"));
1022 auto rndm = bind(distribution, generator);
1023
1024 size_t count = 0;
1025 size_t skip = 0;
1026 do
1027 {
1028 size_t index = 0;
1029 if (!split_lut.empty())
1030 index = split_lut[count % split_lut.size()];
1031 if (!split_quant.empty())
1032 {
1033 const float r = rndm();
1034 for (; r>=split_quant[index]; index++)
1035 if (index==split_quant.size())
1036 break;
1037 }
1038
1039 count++;
1040
1041 ostringstream rtxt;
1042 if (display || !fout.empty())
1043 rtxt << row.value_list(delimiter.c_str(), mysqlpp::do_nothing);
1044
1045 if (display)
1046 cout << rtxt.str() << '\n';
1047 if (!fout.empty())
1048 fout[index] << rtxt.str() << '\n';
1049
1050 size_t idx=0;
1051 for (auto col=row.begin(); col!=row.end(); col++, idx++)
1052 {
1053 if (!ignorenull && col->is_null())
1054 {
1055 skip++;
1056 break;
1057 }
1058
1059 switch (typ[idx])
1060 {
1061 case 'd':
1062 buf[idx] = time_t((mysqlpp::DateTime)(*col));
1063 break;
1064
1065 case 'D':
1066 buf[idx] = time_t((mysqlpp::Date)(*col));
1067 break;
1068
1069 case 'T':
1070 buf[idx] = time_t((mysqlpp::Time)(*col));
1071 break;
1072
1073 case 'V':
1074 case 'C':
1075 case '-':
1076 case '@':
1077 break;
1078
1079 default:
1080 buf[idx] = atof(col->c_str());
1081 }
1082 }
1083
1084 if (idx==row.size() && !nofill)
1085 ttree[index]->Fill();
1086
1087 row = res.fetch_row();
1088
1089
1090 } while (row);
1091
1092 // -------------------------------------------------------------------------
1093
1094 if (display)
1095 cout << '\n' << endl;
1096
1097 if (verbose>0)
1098 {
1099 cout << count << " rows fetched." << endl;
1100 if (skip>0)
1101 cout << skip << " rows skipped due to NULL field." << endl;
1102
1103 for (size_t i=0; i<ttree.size(); i++)
1104 cout << ttree[i]->GetEntries() << " rows filled into tree #" << i << "." << endl;
1105 }
1106
1107 for (auto it=ttree.begin(); it!=ttree.end(); it++)
1108 (*it)->Write();
1109 tfile.Close();
1110
1111 if (verbose>0)
1112 {
1113 const auto sec = Time().UnixTime()-start.UnixTime();
1114
1115 cout << Tools::Scientific(tfile.GetSize()) << "B written to disk.\n";
1116 cout << "File closed.\n";
1117 cout << "Execution time: " << sec << "s ";
1118 cout << "(" << Tools::Fractional(sec/count) << "s/row)\n";
1119 cout << "--------------------------------------------------------------" << endl;
1120 }
1121
1122 return finish(connection, verbose, profiling, print_connection);
1123}
Note: See TracBrowser for help on using the repository browser.