source: trunk/FACT++/src/rootifysql.cc@ 19282

Last change on this file since 19282 was 19281, checked in by tbretz, 7 years ago
Some minor updates to the output.
File size: 38.3 KB
Line 
1#include "Database.h"
2
3#include <boost/algorithm/string/join.hpp>
4#include <boost/regex.hpp>
5#include <boost/tokenizer.hpp>
6#include <boost/algorithm/string.hpp>
7
8#include "tools.h"
9#include "Time.h"
10#include "Configuration.h"
11
12#include <TROOT.h>
13#include <TSystem.h>
14#include <TFile.h>
15#include <TTree.h>
16
17using namespace std;
18
19// ------------------------------------------------------------------------
20
21void SetupConfiguration(Configuration &conf)
22{
23 po::options_description control("Database options");
24 control.add_options()
25 ("uri,u", var<string>()->required(), "Database link as in\n\tuser:password@server[:port]/database[?compress=0|1].")
26 ("query,q", var<string>(""), "MySQL query (overwrites --file)")
27 ("file", var<string>("rootify.sql"), "An ASCII file with the MySQL query (overwrites --query)")
28 ("ignore-null,i", po_switch(), "Do not skip rows containing any NULL field")
29 ("display,d", po_switch(), "Displays contents on the screen (most usefull in combination with mysql statements as SHOW or EXPLAIN)")
30 ("explain", po_switch(), "Requests an EXPLAIN from the server (shows the server optimized query)\nsee also https://dev.mysql.com/doc/refman/explain-output.html")
31 ("profiling", po_switch(), "Turn on profiling and print profile")
32 ("var.*", var<string>(), "Predefined SQL user variables (@VAR)")
33 ("env.*", vars<string>(), "Predefined environment for substitutions in the query ($ENV)")
34 ("list.*", var<string>(), "Predefined environment for substitutions in the query ($ENV). The list is read from the given file (one list entry per line)")
35 ("print-connection", po_switch(), "Print database connection information")
36 ("verbose,v", var<uint16_t>(1), "Verbosity (0: quiet, 1: default, 2: more, 3, ...)")
37 ;
38
39 po::options_description ascii("ASCII output");
40 ascii.add_options()
41 ("write,w", var<string>(""), "Write output to an ascii file")
42 ("delimiter", var<string>(), "The delimiter used if contents are displayed with --display (default=\\t)")
43 ("copy-shabang", po_switch(), "Copy the sha-bang line if exists to the output file")
44 ("copy-header", po_switch(), "Copy the header (all line starting with '#' up to the first non-comment line to the output file")
45 ("copy-query", po_switch(), "Copy the query to the ascii output file")
46 ("copy-comments", po_switch(), "Copy all lines starting with '#' to the output file which are not part of header")
47 ("copy-all", po_switch(), "An alias for --copy-header --copy-query --copy-comments")
48 ;
49
50 po::options_description root("Root file options");
51 root.add_options()
52 ("out,o", var<string>("rootify.root"), "Output root file name")
53 ("force,f", po_switch(), "Force overwriting an existing root file ('RECREATE')")
54 ("update", po_switch(), "Update an existing root file with the new tree ('UPDATE')")
55 ("compression,c", var<uint16_t>(1), "zlib compression level for the root file")
56 ("tree,t", var<string>("Result"), "Name of the root tree")
57 ("ignore", vars<string>(), "Ignore the given columns")
58 ("null,n", po_switch(), "Redirect the output file to /dev/null (mainly for debugging purposes, e.g. performance studies)")
59 ("no-fill", po_switch(), "Do not fill events into the root file (mainly for debugging purposes, e.g. performance studies)")
60 ;
61
62 po::options_description split("Splitting options");
63 split.add_options()
64 ("split-sequence,S", vars<uint16_t>(), "Split data sequentially into several trees/files (e.g. 1, 1, 2)")
65 ("split-quantile,Q", vars<double>(), "Split data randomly into several trees/files (e.g. 0.5, 1)")
66 ("seed", var<uint64_t>(mt19937_64::default_seed), "Seed value in case of random split")
67 ;
68
69 po::positional_options_description p;
70 p.add("file", 1); // The 1st positional options (n=1)
71 p.add("out", 1); // The 2nd positional options (n=1)
72
73 conf.AddOptions(control);
74 conf.AddOptions(ascii);
75 conf.AddOptions(root);
76 conf.AddOptions(split);
77 conf.SetArgumentPositions(p);
78}
79
80void PrintUsage()
81{
82 cout <<
83 "rootifysql - Converts the result of a mysql query into a root file\n"
84 "\n"
85 "For convenience, this documentation uses the extended version of the options, "
86 "refer to the output below to get the abbreviations.\n"
87 "\n"
88 "Writes the result of a mysql query into a root file. For each column, a branch is "
89 "created of type double with the field name as name. This is usually the column name "
90 "if not specified otherwise by the AS mysql directive.\n"
91 "\n"
92 "Columns with CHAR or VARCHAR as field type are ignored. DATETIME, DATE and TIME "
93 "columns are converted to unix time (time_t). Rows containing any file which is "
94 "NULL are skipped if not suppressed by the --ignore-null option. Ideally, the query "
95 "is compiled in a way that no NULL field is returned. With the --display option the "
96 "result of the request is printed on the screen (NULL skipping still in action). "
97 "This can be useful to create an ascii file or to show results as 'SHOW DATABASES' "
98 "or 'EXPLAIN table'. To redirect the contents into an ascii file, the option -v0 "
99 "is useful. To suppress writing to an output file --null can be used.\n"
100 "\n"
101 "The default is to read the query from a file called rootify.sql. Except if a different "
102 "filename is specified by the --file option or a query is given with --query.\n"
103 "\n"
104 "As a trick, the rootify.sql file can be made excutable (chmod u+x rootify.sql). "
105 "If the first line contains '#!rootifysql', the script can be executed directly.\n"
106 "\n"
107 "Columns whose name start with @ are skipped. If you want them in your output file "
108 "give them a name using AS, e.g. 'SELECT @A:=5 AS A'.\n"
109 "\n"
110 "You can use variables in your sql query like @MyVar and define them on the "
111 "command line. In this example with --var.MyVar=5\n"
112 "\n"
113 "You can use environment definitions for substitutions in your SQL query. "
114 "For example --env.TEST=5 would replace $TEST or ${TEST} in your query by 5."
115 "If you specify one environment variable more than once, a list is created. "
116 "For example --env.TEST=1 --env.TEST=2 --env.TEST=3 would substitute "
117 "$TEST or ${TEST} by '1, 2, 3'. This is useful for the SQL `IN` keyword. "
118 "You can also read the values for an enviroment substitution from a file "
119 "(one element per line), e.g. --env.TEST=file.txt. Empty lines and lines "
120 "starting with a # are skipped.\n"
121 "\n"
122 "Comments in the query-file can be placed according to the SQL standard inline "
123 "/*comment*/ or introduced with # (shell script style) or -- (SQL style).\n"
124 "\n"
125 "For several purposes, it might be convenient to split the output to several "
126 "different root-trees or ascii files. This can be done using the --split-sequence (-S) "
127 "and the --split-quantile (-Q) options. If a split sequence is defined as "
128 "-S 1 -S 2 -S 1 the events are split by 1:2:1 in this sequence order. If "
129 "quantiled are given as -Q 0.5 -Q 0.6, the first tree will contain 50% of "
130 "the second one 10% and the third one 40%. The corresponding seed value can "
131 "be set with --seed.\n"
132 "\n"
133 "In case of success, 0 is returned, a value>0 otherwise.\n"
134 "\n"
135 "Usage: rootifysql [rootify.sql [rootify.root]] [-u URI] [-q query|-f file] [-i] [-o out] [-f] [-cN] [-t tree] [-vN]\n"
136 "\n"
137 ;
138 cout << endl;
139}
140
141struct ExplainParser
142{
143 string sql;
144
145 vector<string> vec;
146
147 string substitute(string _str, const boost::regex &expr)
148 {
149 boost::smatch match;
150 while (boost::regex_search(_str, match, expr, boost::regex_constants::format_first_only))
151 {
152 const auto &len = match.length();
153 const auto &pos = match.position();
154 const auto &str = match.str();
155
156 const auto it = find(vec.cbegin(), vec.cend(), str);
157 const size_t id = it==vec.cend() ? vec.size() : it-vec.cbegin();
158
159 _str.replace(pos, len, "{"+to_string(id)+"}");
160
161 if (it==vec.cend())
162 vec.push_back(str);//.substr(1, str.size()-2));
163 }
164
165 return _str;
166 }
167
168 string substitute(const string &str, const string &expr)
169 {
170 return substitute(str, boost::regex(expr));
171 }
172
173 vector<string> queries;
174
175 string resub(string str)
176 {
177 // search for "KEYWORD expression"
178 boost::regex reg("\\{[0-9]+\\}");
179
180 boost::smatch match;
181 while (boost::regex_search(str, match, reg, boost::regex_constants::format_first_only))
182 {
183 const auto &len = match.length();
184 const auto &pos = match.position();
185 const auto &arg = match.str(); // Argument
186
187 const auto idx = atoi(arg.c_str()+1);
188
189 str.replace(pos, len, resub(vec[idx]));
190 }
191
192 return str;
193 }
194
195 void expression(string expr, size_t indent=0)
196 {
197 if (expr[0]=='{')
198 {
199 const auto idx = atoi(expr.c_str()+1);
200
201 // This is a subquery
202 if (vec[idx].substr(0,3)=="(/*")
203 {
204 cout << setw(indent) << ' ' << "(\n";
205 find_tokens(vec[idx], indent+4);
206 cout << setw(indent) << ' ' << ") ";
207 }
208 else
209 // This is just something to substitute back
210 if (vec[idx].substr(0,2)=="({")
211 {
212 cout << setw(indent) << ' ' << "(" << resub(vec[idx]) << ") ";
213 }
214 else
215 {
216 if (indent>0)
217 cout << setw(indent) << ' ';
218 cout << resub(vec[idx]);
219 }
220 }
221 else
222 {
223 if (indent>0)
224 cout << setw(indent) << ' ';
225 cout << resub(expr);
226 }
227 }
228
229 void find_tokens(string str, size_t indent=0)
230 {
231 // ( COMMENT )?( TOKEN )?(( {NNN} | NNN )( AS|ON ( {NNN}) ))?(,)?)
232 //regex reg("(\\/\\*\\ select\\#[0-9]+\\ \\*\\/\\ *)?([a-zA-Z ]+)?((\\{[0-9]+\\}|[0-9]+)(\\ ?([Aa][Ss]|[Oo][Nn])\\ ?(\\{[0-9]+\\}))?(,)?)");
233
234 const string _com = "\\/\\*\\ select\\#[0-9]+\\ \\*\\/\\ *";
235
236 const string _tok = "[a-zA-Z_ ]+";
237
238 const string _nnn = "\\{[0-9]+\\}|[0-9]+";
239
240 const string _as = "\\ ?([Aa][Ss])\\ ?";
241
242 // ( _nnn ) ( _as ( _nnn ))?(,)? // can also match noting in between two {NNN}
243 const string _exp = "("+_nnn+")" + "("+_as+"("+_nnn+"))?(,)?";
244
245 // Matche: ( _com )? ( ( _tok )? ( _exp ) | ( _tok ) )
246 boost::regex reg("("+_com+")?" + "(" + "("+_tok+")?"+"("+_exp+")" + "|" + "("+_tok+")" + ")");
247
248 boost::smatch match;
249 while (boost::regex_search(str, match, reg, boost::regex_constants::format_first_only))
250 {
251
252 const auto &com = match.str(1); // comment
253 const auto &tok1 = Tools::Trim(match.str(3)); // token with expression
254 const auto &arg1 = match.str(5); // argument 1
255 const auto &as = match.str(7); // as
256 const auto &arg2 = match.str(8); // argument 2
257 const auto &comma = match.str(9); // comma
258 const auto &tok2 = Tools::Trim(match.str(10)); // token without expression
259
260 if (!com.empty())
261 cout << setw(indent) << ' ' << "\033[34m" << com << "\033[0m" << '\n';
262
263 if (!tok1.empty())
264 cout << setw(indent) << ' ' << "\033[32m" << tok1 << "\033[0m" << '\n';
265 if (!tok2.empty())
266 cout << setw(indent) << ' ' << "\033[32m" << tok2 << "\033[0m" << '\n';
267
268 if (!arg1.empty())
269 {
270 expression(arg1, indent+4);
271
272 if (!as.empty())
273 cout << " \033[33m" << as << "\033[0m ";
274
275 if (!arg2.empty())
276 expression(arg2);
277
278 if (!comma.empty())
279 cout << ',';
280
281 cout << '\n';
282 }
283
284 str = str.substr(match.position()+match.length());
285 }
286 }
287
288
289 ExplainParser(const string &_sql) : sql(_sql)
290 {
291 // substitute all strings
292 sql = substitute(sql, "'[^']*'");
293
294 // substitute all escaped sequences (`something`.`something-else`)
295 sql = substitute(sql, "`[^`]*`(\\.`[^`]*`)*");
296
297 // substitute all paranthesis
298 sql = substitute(sql, "[a-zA-Z0-9_]*\\([^\\(\\)]*\\)");
299
300 //cout << sql << "\n\n";
301 find_tokens(sql);
302 cout << endl;
303 }
304};
305
306// Remove queries...
307void format(string sql)
308{
309 ExplainParser p(sql);
310
311 /*
312
313 SELECT
314 [ALL | DISTINCT | DISTINCTROW ]
315 [HIGH_PRIORITY]
316 [STRAIGHT_JOIN]
317 [SQL_SMALL_RESULT] [SQL_BIG_RESULT] [SQL_BUFFER_RESULT]
318 [SQL_CACHE | SQL_NO_CACHE] [SQL_CALC_FOUND_ROWS]
319 select_expr [, select_expr ...]
320 [FROM table_references
321 [PARTITION partition_list]
322 [WHERE where_condition]
323 [GROUP BY {col_name | expr | position}, ... [WITH ROLLUP]]
324 [HAVING where_condition]
325 [WINDOW window_name AS (window_spec)
326 [, window_name AS (window_spec)] ...]
327 [ORDER BY {col_name | expr | position}
328 [ASC | DESC], ... [WITH ROLLUP]]
329 [LIMIT {[offset,] row_count | row_count OFFSET offset}]
330 [INTO OUTFILE 'file_name'
331 [CHARACTER SET charset_name]
332 export_options
333 | INTO DUMPFILE 'file_name'
334 | INTO var_name [, var_name]]
335 [FOR {UPDATE | SHARE} [OF tbl_name [, tbl_name] ...] [NOWAIT | SKIP LOCKED]
336 | LOCK IN SHARE MODE]]
337 */
338
339 /*
340table_references:
341 escaped_table_reference [, escaped_table_reference] ...
342
343escaped_table_reference:
344 table_reference
345 | { OJ table_reference }
346
347table_reference:
348 table_factor
349 | join_table
350
351table_factor:
352 tbl_name [PARTITION (partition_names)]
353 [[AS] alias] [index_hint_list]
354 | table_subquery [AS] alias [(col_list)]
355 | ( table_references )
356
357join_table:
358 table_reference [INNER | CROSS] JOIN table_factor [join_condition]
359 | table_reference STRAIGHT_JOIN table_factor
360 | table_reference STRAIGHT_JOIN table_factor ON conditional_expr
361 | table_reference {LEFT|RIGHT} [OUTER] JOIN table_reference join_condition
362 | table_reference NATURAL [INNER | {LEFT|RIGHT} [OUTER]] JOIN table_factor
363
364join_condition:
365 ON conditional_expr
366 | USING (column_list)
367
368index_hint_list:
369 index_hint [, index_hint] ...
370
371index_hint:
372 USE {INDEX|KEY}
373 [FOR {JOIN|ORDER BY|GROUP BY}] ([index_list])
374 | IGNORE {INDEX|KEY}
375 [FOR {JOIN|ORDER BY|GROUP BY}] (index_list)
376 | FORCE {INDEX|KEY}
377 [FOR {JOIN|ORDER BY|GROUP BY}] (index_list)
378
379index_list:
380 index_name [, index_name] ...
381 */
382
383}
384
385int finish(Database &connection, const uint16_t &verbose, const bool &profiling, const bool &print_connection)
386{
387 if (verbose>0)
388 {
389 try
390 {
391 const auto resw =
392 connection.query("SHOW WARNINGS").store();
393
394 if (resw.num_rows()>0)
395 cout << "\n" << resw.num_rows() << " Warning(s) issued:\n\n";
396
397 for (size_t i=0; i<resw.num_rows(); i++)
398 {
399 const mysqlpp::Row &roww = resw[i];
400
401 cout << roww["Level"] << '[' << roww["Code"] << "]: ";
402 cout << roww["Message"] << '\n';
403 }
404 cout << endl;
405
406 }
407 catch (const exception &e)
408 {
409 cerr << "\nSHOW WARNINGS\n\n";
410 cerr << "SQL query failed:\n" << e.what() << endl;
411 return 1;
412 }
413 }
414
415 if (profiling)
416 {
417 try
418 {
419 const auto N =
420 connection.query("SHOW PROFILES").store().num_rows();
421
422 const auto resp =
423 connection.query("SHOW PROFILE ALL FOR QUERY "+to_string(verbose?N-1:N)).store();
424
425 cout << '\n';
426 cout << left;
427 cout << setw(26) << "Status" << ' ';
428 cout << right;
429 cout << setw(11) << "Duration" << ' ';
430 cout << setw(11) << "CPU User" << ' ';
431 cout << setw(11) << "CPU System" << '\n';
432 cout << "--------------------------------------------------------------\n";
433 for (size_t i=0; i<resp.num_rows(); i++)
434 {
435 const mysqlpp::Row &rowp = resp[i];
436
437 cout << left;
438 cout << setw(26) << rowp["Status"] << ' ';
439 cout << right;
440 cout << setw(11) << rowp["Duration"] << ' ';
441 cout << setw(11) << rowp["CPU_user"] << ' ';
442 cout << setw(11) << rowp["CPU_system"] << '\n';
443 }
444 cout << "--------------------------------------------------------------\n";
445 cout << endl;
446 }
447 catch (const exception &e)
448 {
449 cerr << "\nSHOW PROFILE ALL\n\n";
450 cerr << "SQL query failed:\n" << e.what() << '\n' <<endl;
451 return 2;
452 }
453 }
454
455 if (print_connection)
456 {
457 try
458 {
459 // Exchange _send and _received as it is the view of the server
460 const auto &res1 = connection.query("SHOW STATUS LIKE 'Bytes_%'").store();
461 cout << left << setw(16) << res1[1]["Variable_name"] << ' ' << Tools::Scientific(res1[0]["Value"]) << endl;
462 cout << left << setw(16) << res1[0]["Variable_name"] << ' ' << Tools::Scientific(res1[1]["Value"]) << endl;
463 cout << endl;
464 }
465 catch (const exception &e)
466 {
467 cerr << "\nSHOW STATUS LIKE 'Bytes_%'\n\n";
468 cerr << "SQL query failed:\n" << e.what() << endl;
469 return 3;
470 }
471 }
472
473 if (verbose>0)
474 cout << "Success!\n" << endl;
475 return 0;
476
477}
478
479int main(int argc, const char* argv[])
480{
481 Time start;
482
483 gROOT->SetBatch();
484
485 Configuration conf(argv[0]);
486 conf.SetPrintUsage(PrintUsage);
487 SetupConfiguration(conf);
488
489 if (!conf.DoParse(argc, argv))
490 return 127;
491
492 // ----------------------------- Evaluate options --------------------------
493 const string uri = conf.Get<string>("uri");
494 const string out = conf.Get<string>("out");
495 const string file = conf.Get<string>("file");
496 const string tree = conf.Get<string>("tree");
497 const bool force = conf.Get<bool>("force");
498 const bool ignorenull = conf.Get<bool>("ignore-null");
499 const bool update = conf.Get<bool>("update");
500 const bool display = conf.Get<bool>("display");
501 const string write = conf.Get<string>("write");
502 const bool noout = conf.Get<bool>("null");
503 const bool nofill = conf.Get<bool>("no-fill");
504 const bool explain = conf.Get<bool>("explain");
505 const bool profiling = conf.Get<bool>("profiling");
506 const uint16_t verbose = conf.Get<uint16_t>("verbose");
507 const uint16_t compression = conf.Get<uint16_t>("compression");
508 const string delimiter = conf.Has("delimiter") ? conf.Get<string>("delimiter") : "\t";
509
510 const bool copy_all = conf.Get<bool>("copy-all");
511 const bool copy_shabang = conf.Get<bool>("copy-shabang");
512 const bool copy_header = copy_all || conf.Get<bool>("copy-header");
513 const bool copy_query = copy_all || conf.Get<bool>("copy-query");
514 const bool copy_comments = copy_all || conf.Get<bool>("copy-comments");
515
516 const vector<string> _ignore = conf.Vec<string>("ignore");
517 const bool print_connection = conf.Get<bool>("print-connection");
518 //const vector<Map> mymap = conf.Vec<Map>("map");
519
520 vector<uint16_t> split_seq = conf.Vec<uint16_t>("split-sequence");
521 vector<double> split_quant = conf.Vec<double>("split-quantile");
522
523 if (!split_seq.empty() && !split_quant.empty())
524 throw runtime_error("Only splitting by --split-sequence or --split-quantile is allowed.");
525
526 const size_t num_split = ::max(split_seq.size(), split_quant.size()+1);
527
528 map<size_t, size_t> split_lut;
529 for (size_t i=0; i<split_seq.size(); i++)
530 {
531 const size_t sz = split_lut.size();
532 for (size_t j=0; j<split_seq[i]; j++)
533 split_lut.emplace(j+sz, i);
534 }
535
536 for (size_t i=0; i<split_quant.size(); i++)
537 if (split_quant[i]<0 || split_quant[i]>=1)
538 throw runtime_error("Splitting quantiles must be in the range [0;1)");
539
540 for (size_t i=1; i<split_quant.size(); i++)
541 {
542 if (split_quant[i]<=split_quant[i-1])
543 throw runtime_error("Splitting quantiles must be in increasing order.");
544 }
545
546 // -------------------------------------------------------------------------
547
548 const auto vars = conf.GetWildcardOptions("var.*");
549
550 vector<string> variables;
551 for (auto var=vars.cbegin(); var!=vars.cend(); var++)
552 variables.emplace_back('@'+var->substr(4)+":="+Tools::Trim(conf.Get<string>(*var)));
553
554 // -------------------------------------------------------------------------
555
556 if (verbose>0)
557 {
558 cout << "\n------------------------ Rootify SQL -------------------------" << endl;
559 cout << "Start Time: " << Time::sql << Time(Time::local) << endl;
560 }
561
562 string query = conf.Get<string>("query");
563 if (query.empty())
564 {
565 if (verbose>0)
566 cout << "Reading query from file '" << file << "'." << endl;
567
568 ifstream fin(file);
569 if (!fin)
570 {
571 cerr << "Could not open query in '" << file << "': " << strerror(errno) << endl;
572 return 4;
573 }
574
575 getline(fin, query, (char)fin.eof());
576 }
577
578 if (query.empty())
579 {
580 cerr << "No query specified." << endl;
581 return 5;
582 }
583
584 // -------------------------------------------------------------------------
585
586 map<string, vector<string>> envs;
587
588 const auto &envs1 = conf.GetWildcardOptions("env.*");
589 for (auto env=envs1.cbegin(); env!=envs1.cend(); env++)
590 envs[env->substr(4)] = conf.Vec<string>(*env);
591
592 const auto &envs2 = conf.GetWildcardOptions("list.*");
593 for (auto env=envs2.cbegin(); env!=envs2.cend(); env++)
594 {
595 const string fname = conf.Get<string>(*env);
596 const string &ident = env->substr(5);
597
598 ifstream fin(fname);
599 if (!fin)
600 {
601 cerr << "Could not open environment in '" << fname << "' for ${" << ident << "}: " << strerror(errno) << endl;
602 return 6;
603 }
604 for (string line; getline(fin, line); )
605 {
606 const auto &l = Tools::Trim(line);
607 if (!l.empty() && l[0]!='#')
608 envs[ident].push_back(line);
609 }
610
611 if (verbose>0)
612 cout << "Found " << envs[ident].size() << " list element(s) for ${" << ident << "}" << endl;
613 }
614
615 for (auto env=envs.cbegin(); env!=envs.cend(); env++)
616 {
617 boost::regex rexpr("\\$(\\{"+env->first+"\\}|"+env->first+"\\b)");
618 query = boost::regex_replace(query, rexpr, boost::join(env->second, ", "));
619 }
620
621 // -------------------------- Check for file permssion ---------------------
622 // Strictly speaking, checking for write permission and existance is not necessary,
623 // but it is convenient that the user does not find out that it failed after
624 // waiting long for the query result
625 //
626
627 // I am using root here instead of boost to be
628 // consistent with the access pattern by TFile
629 TString path(noout?"/dev/null":out.c_str());
630 gSystem->ExpandPathName(path);
631
632 if (!noout)
633 {
634 FileStat_t stat;
635 const Int_t exist = !gSystem->GetPathInfo(path, stat);
636 const Bool_t _write = !gSystem->AccessPathName(path, kWritePermission) && R_ISREG(stat.fMode);
637
638 if ((update && !exist) || (update && exist && !_write) || (force && exist && !_write))
639 {
640 cerr << "File '" << path << "' is not writable." << endl;
641 return 7;
642 }
643
644 if (!update && !force && exist)
645 {
646 cerr << "File '" << path << "' already exists." << endl;
647 return 8;
648 }
649 }
650
651 Time start2;
652
653 // --------------------------- Connect to database -------------------------------------------------
654
655 if (*query.rbegin()!='\n')
656 query += '\n';
657
658 if (verbose>0)
659 {
660 cout << "Connecting to database...\n";
661 cout << "Client Version: " << mysqlpp::Connection().client_version() << endl;
662 }
663
664 Database connection(uri); // Keep alive while fetching rows
665
666 if (verbose>0)
667 cout << "Server Version: " << connection.server_version() << endl;
668
669 if (print_connection)
670 {
671 try
672 {
673 const auto &res1 = connection.query("SHOW STATUS LIKE 'Compression'").store();
674 cout << "Compression of database connection is " << string(res1[0][1]) << endl;
675
676 const auto &res2 = connection.query("SHOW STATUS LIKE 'Ssl_cipher'").store();
677 cout << "Connection to databases is " << (string(res2[0][1]).empty()?"UNENCRYPTED":"ENCRYPTED ("+string(res2[0][1])+")") << endl;
678 }
679 catch (const exception &e)
680 {
681 cerr << "\nSHOW STATUS LIKE 'Compression'\n\n";
682 cerr << "SQL query failed:\n" << e.what() << endl;
683 return 9;
684 }
685 }
686
687 try
688 {
689 if (profiling)
690 connection.query("SET PROFILING=1").execute();
691 }
692 catch (const exception &e)
693 {
694 cerr << "\nSET profiling=1\n\n";
695 cerr << "SQL query failed:\n" << e.what() << endl;
696 return 10;
697 }
698
699 // -------------------------- Set user defined variables -------------------
700 if (variables.size()>0)
701 {
702 if (verbose>0)
703 cout << "Setting user defined variables..." << endl;
704
705 const string varset =
706 "SET\n "+boost::algorithm::join(variables, ",\n ");
707
708 try
709 {
710 connection.query(varset).execute();
711 }
712 catch (const exception &e)
713 {
714 cerr << '\n' << varset << "\n\n";
715 cerr << "SQL query failed:\n" << e.what() << endl;
716 return 11;
717 }
718
719 if (verbose>2)
720 cout << '\n' << varset << '\n' << endl;
721 }
722
723 // ------------------------- Explain query if requested --------------------
724
725 if (explain)
726 {
727 try
728 {
729 const auto res0 =
730 connection.query("EXPLAIN FORMAT=JSON "+query).store();
731
732 cout << res0[0][0] << endl;
733 cout << endl;
734
735 const mysqlpp::StoreQueryResult res1 =
736 connection.query("EXPLAIN "+query).store();
737
738 for (size_t i=0; i<res1.num_rows(); i++)
739 {
740 const mysqlpp::Row &row = res1[i];
741
742 cout << "\nid : " << row["id"];
743 cout << "\nselect type : " << row["select_type"];
744
745 if (!row["table"].is_null())
746 cout << "\ntable : " << row["table"];
747
748 if (!row["partitions"].is_null())
749 cout << "\npartitions : " << row["partitions"];
750
751 if (!row["key"].is_null())
752 cout << "\nselected key : " << row["key"] << " [len=" << row["key_len"] << "] out of (" << row["possible_keys"] << ")";
753
754 if (!row["type"].is_null())
755 cout << "\njoin type : " << row["type"];
756
757 //if (!row["possible_keys"].is_null())
758 // cout << "\npossible_keys: " << row["possible_keys"];
759
760 //if (!row["key_len"].is_null())
761 // cout << "\nkey_len : " << row["key_len"];
762
763 if (!row["ref"].is_null())
764 cout << "\nref : (" << row["ref"] << ") compared to the index";
765
766 if (!row["rows"].is_null())
767 cout << "\nrows : " << row["rows"];
768
769 if (!row["filtered"].is_null())
770 cout << "\nfiltered : " << row["filtered"];
771
772 if (!row["extra"].is_null())
773 cout << "\nExtra : " << row["extra"];
774
775 cout << endl;
776 }
777
778 cout << endl;
779
780 const mysqlpp::StoreQueryResult res2 =
781 connection.query("SHOW WARNINGS").store();
782
783 for (size_t i=0; i<res2.num_rows(); i++)
784 {
785 const mysqlpp::Row &row = res2[i];
786
787 // 1003 //
788 cout << row["Level"] << '[' << row["Code"] << "]:\n";
789 if (uint32_t(row["Code"])==1003)
790 format(row["Message"].c_str());
791 else
792 cout << row["Message"] << '\n' << endl;
793
794 }
795
796 }
797 catch (const exception &e)
798 {
799 cerr << '\n' << query << "\n\n";
800 cerr << "SQL query failed:\n" << e.what() << endl;
801 return 12;
802 }
803
804 return 0;
805 }
806
807 // -------------------------- Request data from database -------------------
808 if (verbose>0)
809 cout << "Requesting data... please be patient!" << endl;
810
811 if (verbose>2)
812 cout << '\n' << query << endl;
813
814 const mysqlpp::UseQueryResult res =
815 connection.query(query).use();
816
817 // -------------------------------------------------------------------------
818
819 if (verbose>0)
820 {
821 cout << "Opening file '" << path << "' [compression=" << compression << "]...\n";
822 cout << "Writing data to tree '" << tree << "'" << (nofill?" (--skipped--)":"") << endl;
823 if (num_split)
824 {
825 cout << "Splitting configured " << (split_seq.empty()?"randomly":"in sequence") << " into " << num_split << " branches." << endl;
826 if (!split_quant.empty())
827 cout << "Seed value configured as " << conf.Get<uint64_t>("seed") << "." << endl;
828 }
829 }
830
831 // ----------------------------- Open output file --------------------------
832 TFile tfile(path, update?"UPDATE":(force?"RECREATE":"CREATE"), "Rootify SQL", compression);
833 if (tfile.IsZombie())
834 return 13;
835
836 // -------------------------------------------------------------------------
837
838 // get the first row to get the field description
839 mysqlpp::Row row = res.fetch_row();
840 if (!row)
841 {
842 cerr << "Empty set returned... nothing to write." << endl;
843 return finish(connection, verbose, profiling, print_connection)+20;
844 }
845
846 if (verbose>0)
847 cout << "Trying to setup " << row.size() << " branches..." << endl;
848
849 if (verbose>1)
850 cout << endl;
851
852 const mysqlpp::FieldNames &l = *row.field_list().list;
853
854 vector<double> buf(l.size());
855 vector<uint8_t> typ(l.size(),'n'); // n=number [double], d is used for DateTime
856
857 UInt_t cols = 0;
858
859 // IMPLEMENT FILE SPLITTING!
860 // OpenFile(tree, query)
861 // SetupColumns
862 // WriteRow
863 // CloseFile
864
865 // Ratio[3]: 50%, 20%, 30%
866 // File[x3]: root, cout, fout
867
868
869 // -------------------- Configure branches of TTree ------------------------
870 vector<TTree*> ttree;
871
872 if (num_split==0)
873 ttree.emplace_back(new TTree(tree.c_str(), query.c_str()));
874 else
875 for (int i=0; i<num_split; i++)
876 ttree.emplace_back(new TTree((tree+"["+to_string(i)+"]").c_str(), query.c_str()));
877
878 size_t skipat = 0;
879 size_t skipreg = 0;
880 for (size_t i=0; i<l.size(); i++)
881 {
882 const string t = row[i].type().sql_name();
883
884 if (t.find("DATETIME")!=string::npos)
885 typ[i] = 'd';
886 else
887 if (t.find("DATE")!=string::npos)
888 typ[i] = 'D';
889 else
890 if (t.find("TIME")!=string::npos)
891 typ[i] = 'T';
892 else
893 if (t.find("VARCHAR")!=string::npos)
894 typ[i] = 'V';
895 else
896 if (t.find("CHAR")!=string::npos)
897 typ[i] = 'C';
898
899 bool found = false;
900 for (auto pattern=_ignore.cbegin(); pattern!=_ignore.cend(); pattern++)
901 {
902 if (boost::regex_match(l[i], boost::regex(*pattern)))
903 {
904 found = true;
905 typ[i] = '-';
906 skipreg++;
907 break;
908 }
909 }
910
911 if (l[i][0]=='@')
912 {
913 typ[i] = '@';
914 skipat++;
915 }
916
917 const bool use = l[i][0]!='@' && typ[i]!='V' && typ[i]!='C' && !found;
918
919 if (verbose>1)
920 cout << (use?" + ":" - ") << l[i].c_str() << " [" << t << "] {" << typ[i] << "}\n";
921
922 if (use)
923 {
924 // string name = l[i];
925 // for (const auto &m: mymap)
926 // name = boost::regex_replace(l[i], boost::regex(m.first), m.second);
927
928 for (auto it=ttree.begin(); it!=ttree.end(); it++)
929 it[0]->Branch(l[i].c_str(), buf.data()+i);
930 cols++;
931 }
932 }
933 // -------------------------------------------------------------------------
934
935 if (verbose>1)
936 cout << endl;
937 if (verbose>0)
938 {
939 if (skipreg)
940 cout << skipreg << " branches skipped due to ignore list." << endl;
941 if (skipat)
942 cout << skipat << " branches skipped due to name starting with @." << endl;
943 cout << "Configured " << cols << " branches.\nFilling branches..." << endl;
944 }
945
946 // ------------------------- Open the ascii files --------------------------
947
948 vector<ofstream> fout;
949 if (!write.empty())
950 {
951 vector<string> names;
952 if (num_split==0)
953 names.emplace_back(write);
954 else
955 for (int i=0; i<num_split; i++)
956 names.emplace_back(write+"-"+to_string(i));
957
958 for (auto it=names.cbegin(); it!=names.cend(); it++)
959 {
960 fout.emplace_back(*it);
961 if (!*fout.rbegin())
962 cout << "WARNING: Writing to '" << write << "' failed: " << strerror(errno) << endl;
963 }
964 }
965
966 // ----------------------- Prepare the ascii comment -----------------------
967
968 string contents;
969
970 istringstream istr(query);
971 size_t line = 0;
972 bool header = true;
973 while (istr)
974 {
975 string ibuf;
976 getline(istr, ibuf);
977 const string sbuf = Tools::Trim(ibuf);
978
979 const bool shabang = line==0 && ibuf[0]=='#' && ibuf[1]=='!';
980 const bool comment = sbuf[0]=='#' && !shabang;
981 const bool isquery = !shabang && !comment;
982 if (isquery)
983 header = false;
984
985 line++;
986
987 if ((copy_shabang && shabang) ||
988 (copy_header && comment && header) ||
989 (copy_query && isquery) ||
990 (copy_comments && comment && !header))
991 contents += '#' + ibuf + '\n';
992 }
993
994 // ----------------------- Write the ascii headers -------------------------
995
996 ostringstream htxt;
997 if (display || !fout.empty())
998 htxt << row.field_list(delimiter.c_str());
999
1000 if (display)
1001 {
1002 cout << endl;
1003 cout << contents << endl;
1004 cout << "# " << htxt.str() << endl;
1005 }
1006 for (auto ff=fout.begin(); ff!=fout.end(); ff++)
1007 {
1008 *ff << contents;
1009 *ff << "# " << htxt.str() << endl;
1010 }
1011
1012 // ---------------------- Fill TTree with DB data --------------------------
1013
1014 const uniform_real_distribution<double> distribution(0,1);
1015 mt19937_64 generator;
1016 generator.seed(conf.Get<uint64_t>("seed"));
1017 auto rndm = bind(distribution, generator);
1018
1019 size_t count = 0;
1020 size_t skip = 0;
1021 do
1022 {
1023 size_t index = 0;
1024 if (!split_lut.empty())
1025 index = split_lut[count % split_lut.size()];
1026 if (!split_quant.empty())
1027 {
1028 const float r = rndm();
1029 for (; r>=split_quant[index]; index++)
1030 if (index==split_quant.size())
1031 break;
1032 }
1033
1034 count++;
1035
1036 ostringstream rtxt;
1037 if (display || !fout.empty())
1038 rtxt << row.value_list(delimiter.c_str(), mysqlpp::do_nothing);
1039
1040 if (display)
1041 cout << rtxt.str() << '\n';
1042 if (!fout.empty())
1043 fout[index] << rtxt.str() << '\n';
1044
1045 size_t idx=0;
1046 for (auto col=row.begin(); col!=row.end(); col++, idx++)
1047 {
1048 if (!ignorenull && col->is_null())
1049 {
1050 skip++;
1051 break;
1052 }
1053
1054 switch (typ[idx])
1055 {
1056 case 'd':
1057 buf[idx] = time_t((mysqlpp::DateTime)(*col));
1058 break;
1059
1060 case 'D':
1061 buf[idx] = time_t((mysqlpp::Date)(*col));
1062 break;
1063
1064 case 'T':
1065 buf[idx] = time_t((mysqlpp::Time)(*col));
1066 break;
1067
1068 case 'V':
1069 case 'C':
1070 case '-':
1071 case '@':
1072 break;
1073
1074 default:
1075 buf[idx] = atof(col->c_str());
1076 }
1077 }
1078
1079 if (idx==row.size() && !nofill)
1080 ttree[index]->Fill();
1081
1082 row = res.fetch_row();
1083
1084
1085 } while (row);
1086
1087 // -------------------------------------------------------------------------
1088
1089 if (display)
1090 cout << '\n' << endl;
1091
1092 if (verbose>0)
1093 {
1094 cout << count << " rows fetched." << endl;
1095 if (skip>0)
1096 cout << skip << " rows skipped due to NULL field." << endl;
1097
1098 for (int i=0; i<ttree.size(); i++)
1099 cout << ttree[i]->GetEntries() << " rows filled into tree #" << i << "." << endl;
1100 }
1101
1102 for (auto it=ttree.begin(); it!=ttree.end(); it++)
1103 (*it)->Write();
1104 tfile.Close();
1105
1106 if (verbose>0)
1107 {
1108 const auto sec = Time().UnixTime()-start.UnixTime();
1109
1110 cout << Tools::Scientific(tfile.GetSize()) << "B written to disk.\n";
1111 cout << "File closed.\n";
1112 cout << "Execution time: " << sec << "s ";
1113 cout << "(" << Tools::Fractional(sec/count) << "s/row)\n";
1114 cout << "--------------------------------------------------------------" << endl;
1115 }
1116
1117 return finish(connection, verbose, profiling, print_connection);
1118}
Note: See TracBrowser for help on using the repository browser.