source: trunk/FACT++/src/rootifysql.cc@ 19884

Last change on this file since 19884 was 19802, checked in by tbretz, 5 years ago
Moved the common Splitting algorithm to a common class 'Splitting' (added) and added the possibility to rename columns (csv2root)
File size: 35.7 KB
Line 
1#include "Database.h"
2
3#include <boost/regex.hpp>
4#include <boost/tokenizer.hpp>
5#include <boost/algorithm/string.hpp>
6#include <boost/algorithm/string/join.hpp>
7
8#include "tools.h"
9#include "Time.h"
10#include "Splitting.h"
11
12#include <TROOT.h>
13#include <TSystem.h>
14#include <TFile.h>
15#include <TTree.h>
16
17using namespace std;
18
19// ------------------------------------------------------------------------
20
21void SetupConfiguration(Configuration &conf)
22{
23 po::options_description control("Database options");
24 control.add_options()
25 ("uri,u", var<string>()->required(), "Database link as in\n\tuser:password@server[:port]/database[?compress=0|1].")
26 ("query,q", var<string>(""), "MySQL query (overwrites --file)")
27 ("file", var<string>("rootify.sql"), "An ASCII file with the MySQL query (overwrites --query)")
28 ("ignore-null,i", po_switch(), "Do not skip rows containing any NULL field")
29 ("display,d", po_switch(), "Displays contents on the screen (most usefull in combination with mysql statements as SHOW or EXPLAIN)")
30 ("explain", po_switch(), "Requests an EXPLAIN from the server (shows the server optimized query)\nsee also https://dev.mysql.com/doc/refman/explain-output.html")
31 ("profiling", po_switch(), "Turn on profiling and print profile")
32 ("var.*", var<string>(), "Predefined SQL user variables (@VAR)")
33 ("env.*", vars<string>(), "Predefined environment for substitutions in the query ($ENV)")
34 ("list.*", var<string>(), "Predefined environment for substitutions in the query ($ENV). The list is read from the given file (one list entry per line)")
35 ("print-connection", po_switch(), "Print database connection information")
36 ("verbose,v", var<uint16_t>(1), "Verbosity (0: quiet, 1: default, 2: more, 3, ...)")
37 ;
38
39 po::options_description ascii("ASCII output");
40 ascii.add_options()
41 ("write,w", var<string>(""), "Write output to an ascii file")
42 ("delimiter", var<string>(), "The delimiter used if contents are displayed with --display (default=\\t)")
43 ("copy-shabang", po_switch(), "Copy the sha-bang line if exists to the output file")
44 ("copy-header", po_switch(), "Copy the header (all line starting with '#' up to the first non-comment line to the output file")
45 ("copy-query", po_switch(), "Copy the query to the ascii output file")
46 ("copy-comments", po_switch(), "Copy all lines starting with '#' to the output file which are not part of header")
47 ("copy-all", po_switch(), "An alias for --copy-header --copy-query --copy-comments")
48 ;
49
50 po::options_description root("Root file options");
51 root.add_options()
52 ("out,o", var<string>("rootify.root"), "Output root file name")
53 ("force,f", po_switch(), "Force overwriting an existing root file ('RECREATE')")
54 ("update", po_switch(), "Update an existing root file with the new tree ('UPDATE')")
55 ("compression,c", var<uint16_t>(1), "zlib compression level for the root file")
56 ("tree,t", var<string>("Result"), "Name of the root tree")
57 ("ignore", vars<string>(), "Ignore the given columns")
58 ("null,n", po_switch(), "Redirect the root output file to /dev/null (mainly for debugging purposes, e.g. performance studies)")
59 ("no-fill", po_switch(), "Do not fill events into the root file (mainly for debugging purposes, e.g. performance studies)")
60 ;
61
62 po::positional_options_description p;
63 p.add("file", 1); // The 1st positional options (n=1)
64 p.add("out", 1); // The 2nd positional options (n=1)
65
66 conf.AddOptions(control);
67 conf.AddOptions(ascii);
68 conf.AddOptions(root);
69 conf.AddOptions(Tools::Splitting::options());
70 conf.SetArgumentPositions(p);
71}
72
73void PrintUsage()
74{
75 cout <<
76 "rootifysql - Converts the result of a mysql query into a root file\n"
77 "\n"
78 "For convenience, this documentation uses the extended version of the options, "
79 "refer to the output below to get the abbreviations.\n"
80 "\n"
81 "Writes the result of a mysql query into a root file. For each column, a branch is "
82 "created of type double with the field name as name. This is usually the column name "
83 "if not specified otherwise by the AS mysql directive.\n"
84 "\n"
85 "Columns with CHAR or VARCHAR as field type are ignored. DATETIME, DATE and TIME "
86 "columns are converted to unix time (time_t). Rows containing any file which is "
87 "NULL are skipped if not suppressed by the --ignore-null option. Ideally, the query "
88 "is compiled in a way that no NULL field is returned. With the --display option the "
89 "result of the request is printed on the screen (NULL skipping still in action). "
90 "This can be useful to create an ascii file or to show results as 'SHOW DATABASES' "
91 "or 'EXPLAIN table'. To redirect the contents into an ascii file, the option -v0 "
92 "is useful. To suppress writing to an output file --null can be used.\n"
93 "\n"
94 "The default is to read the query from a file called rootify.sql. Except if a different "
95 "filename is specified by the --file option or a query is given with --query.\n"
96 "\n"
97 "As a trick, the rootify.sql file can be made excutable (chmod u+x rootify.sql). "
98 "If the first line contains '#!rootifysql', the script can be executed directly.\n"
99 "\n"
100 "Columns whose name start with @ are skipped. If you want them in your output file "
101 "give them a name using AS, e.g. 'SELECT @A:=5 AS A'.\n"
102 "\n"
103 "You can use variables in your sql query like @MyVar and define them on the "
104 "command line. In this example with --var.MyVar=5\n"
105 "\n"
106 "You can use environment definitions for substitutions in your SQL query. "
107 "For example --env.TEST=5 would replace $TEST or ${TEST} in your query by 5."
108 "If you specify one environment variable more than once, a list is created. "
109 "For example --env.TEST=1 --env.TEST=2 --env.TEST=3 would substitute "
110 "$TEST or ${TEST} by '1, 2, 3'. This is useful for the SQL `IN` keyword. "
111 "You can also read the values for an enviroment substitution from a file "
112 "(one element per line), e.g. --env.TEST=file.txt. Empty lines and lines "
113 "starting with a # are skipped.\n"
114 "\n"
115 "Comments in the query-file can be placed according to the SQL standard inline "
116 "/*comment*/ or introduced with # (shell script style) or -- (SQL style).\n"
117 "\n"
118 << Tools::Splitting::usage() <<
119 "\n"
120 "In case of success, 0 is returned, a value>0 otherwise.\n"
121 "\n"
122 "Usage: rootifysql [rootify.sql [rootify.root]] [-u URI] [-q query|-f file] [-i] [-o out] [-f] [-cN] [-t tree] [-vN]\n"
123 "\n"
124 ;
125 cout << endl;
126}
127
128struct ExplainParser
129{
130 string sql;
131
132 vector<string> vec;
133
134 string substitute(string _str, const boost::regex &expr)
135 {
136 boost::smatch match;
137 while (boost::regex_search(_str, match, expr, boost::regex_constants::format_first_only))
138 {
139 const auto &len = match.length();
140 const auto &pos = match.position();
141 const auto &str = match.str();
142
143 const auto it = find(vec.cbegin(), vec.cend(), str);
144 const size_t id = it==vec.cend() ? vec.size() : it-vec.cbegin();
145
146 _str.replace(pos, len, "{"+to_string(id)+"}");
147
148 if (it==vec.cend())
149 vec.push_back(str);//.substr(1, str.size()-2));
150 }
151
152 return _str;
153 }
154
155 string substitute(const string &str, const string &expr)
156 {
157 return substitute(str, boost::regex(expr));
158 }
159
160 vector<string> queries;
161
162 string resub(string str)
163 {
164 // search for "KEYWORD expression"
165 boost::regex reg("\\{[0-9]+\\}");
166
167 boost::smatch match;
168 while (boost::regex_search(str, match, reg, boost::regex_constants::format_first_only))
169 {
170 const auto &len = match.length();
171 const auto &pos = match.position();
172 const auto &arg = match.str(); // Argument
173
174 const auto idx = atoi(arg.c_str()+1);
175
176 str.replace(pos, len, resub(vec[idx]));
177 }
178
179 return str;
180 }
181
182 void expression(string expr, size_t indent=0)
183 {
184 if (expr[0]=='{')
185 {
186 const auto idx = atoi(expr.c_str()+1);
187
188 // This is a subquery
189 if (vec[idx].substr(0,3)=="(/*")
190 {
191 cout << setw(indent) << ' ' << "(\n";
192 find_tokens(vec[idx], indent+4);
193 cout << setw(indent) << ' ' << ") ";
194 }
195 else
196 // This is just something to substitute back
197 if (vec[idx].substr(0,2)=="({")
198 {
199 cout << setw(indent) << ' ' << "(" << resub(vec[idx]) << ") ";
200 }
201 else
202 {
203 if (indent>0)
204 cout << setw(indent) << ' ';
205 cout << resub(vec[idx]);
206 }
207 }
208 else
209 {
210 if (indent>0)
211 cout << setw(indent) << ' ';
212 cout << resub(expr);
213 }
214 }
215
216 void find_tokens(string str, size_t indent=0)
217 {
218 // ( COMMENT )?( TOKEN )?(( {NNN} | NNN )( AS|ON ( {NNN}) ))?(,)?)
219 //regex reg("(\\/\\*\\ select\\#[0-9]+\\ \\*\\/\\ *)?([a-zA-Z ]+)?((\\{[0-9]+\\}|[0-9]+)(\\ ?([Aa][Ss]|[Oo][Nn])\\ ?(\\{[0-9]+\\}))?(,)?)");
220
221 const string _com = "\\/\\*\\ select\\#[0-9]+\\ \\*\\/\\ *";
222
223 const string _tok = "[a-zA-Z_ ]+";
224
225 const string _nnn = "\\{[0-9]+\\}|[0-9]+";
226
227 const string _as = "\\ ?([Aa][Ss])\\ ?";
228
229 // ( _nnn ) ( _as ( _nnn ))?(,)? // can also match noting in between two {NNN}
230 const string _exp = "("+_nnn+")" + "("+_as+"("+_nnn+"))?(,)?";
231
232 // Matche: ( _com )? ( ( _tok )? ( _exp ) | ( _tok ) )
233 boost::regex reg("("+_com+")?" + "(" + "("+_tok+")?"+"("+_exp+")" + "|" + "("+_tok+")" + ")");
234
235 boost::smatch match;
236 while (boost::regex_search(str, match, reg, boost::regex_constants::format_first_only))
237 {
238
239 const auto &com = match.str(1); // comment
240 const auto &tok1 = Tools::Trim(match.str(3)); // token with expression
241 const auto &arg1 = match.str(5); // argument 1
242 const auto &as = match.str(7); // as
243 const auto &arg2 = match.str(8); // argument 2
244 const auto &comma = match.str(9); // comma
245 const auto &tok2 = Tools::Trim(match.str(10)); // token without expression
246
247 if (!com.empty())
248 cout << setw(indent) << ' ' << "\033[34m" << com << "\033[0m" << '\n';
249
250 if (!tok1.empty())
251 cout << setw(indent) << ' ' << "\033[32m" << tok1 << "\033[0m" << '\n';
252 if (!tok2.empty())
253 cout << setw(indent) << ' ' << "\033[32m" << tok2 << "\033[0m" << '\n';
254
255 if (!arg1.empty())
256 {
257 expression(arg1, indent+4);
258
259 if (!as.empty())
260 cout << " \033[33m" << as << "\033[0m ";
261
262 if (!arg2.empty())
263 expression(arg2);
264
265 if (!comma.empty())
266 cout << ',';
267
268 cout << '\n';
269 }
270
271 str = str.substr(match.position()+match.length());
272 }
273 }
274
275
276 ExplainParser(const string &_sql) : sql(_sql)
277 {
278 // substitute all strings
279 sql = substitute(sql, "'[^']*'");
280
281 // substitute all escaped sequences (`something`.`something-else`)
282 sql = substitute(sql, "`[^`]*`(\\.`[^`]*`)*");
283
284 // substitute all paranthesis
285 sql = substitute(sql, "[a-zA-Z0-9_]*\\([^\\(\\)]*\\)");
286
287 //cout << sql << "\n\n";
288 find_tokens(sql);
289 cout << endl;
290 }
291};
292
293// Remove queries...
294void format(string sql)
295{
296 ExplainParser p(sql);
297
298 /*
299
300 SELECT
301 [ALL | DISTINCT | DISTINCTROW ]
302 [HIGH_PRIORITY]
303 [STRAIGHT_JOIN]
304 [SQL_SMALL_RESULT] [SQL_BIG_RESULT] [SQL_BUFFER_RESULT]
305 [SQL_CACHE | SQL_NO_CACHE] [SQL_CALC_FOUND_ROWS]
306 select_expr [, select_expr ...]
307 [FROM table_references
308 [PARTITION partition_list]
309 [WHERE where_condition]
310 [GROUP BY {col_name | expr | position}, ... [WITH ROLLUP]]
311 [HAVING where_condition]
312 [WINDOW window_name AS (window_spec)
313 [, window_name AS (window_spec)] ...]
314 [ORDER BY {col_name | expr | position}
315 [ASC | DESC], ... [WITH ROLLUP]]
316 [LIMIT {[offset,] row_count | row_count OFFSET offset}]
317 [INTO OUTFILE 'file_name'
318 [CHARACTER SET charset_name]
319 export_options
320 | INTO DUMPFILE 'file_name'
321 | INTO var_name [, var_name]]
322 [FOR {UPDATE | SHARE} [OF tbl_name [, tbl_name] ...] [NOWAIT | SKIP LOCKED]
323 | LOCK IN SHARE MODE]]
324 */
325
326 /*
327table_references:
328 escaped_table_reference [, escaped_table_reference] ...
329
330escaped_table_reference:
331 table_reference
332 | { OJ table_reference }
333
334table_reference:
335 table_factor
336 | join_table
337
338table_factor:
339 tbl_name [PARTITION (partition_names)]
340 [[AS] alias] [index_hint_list]
341 | table_subquery [AS] alias [(col_list)]
342 | ( table_references )
343
344join_table:
345 table_reference [INNER | CROSS] JOIN table_factor [join_condition]
346 | table_reference STRAIGHT_JOIN table_factor
347 | table_reference STRAIGHT_JOIN table_factor ON conditional_expr
348 | table_reference {LEFT|RIGHT} [OUTER] JOIN table_reference join_condition
349 | table_reference NATURAL [INNER | {LEFT|RIGHT} [OUTER]] JOIN table_factor
350
351join_condition:
352 ON conditional_expr
353 | USING (column_list)
354
355index_hint_list:
356 index_hint [, index_hint] ...
357
358index_hint:
359 USE {INDEX|KEY}
360 [FOR {JOIN|ORDER BY|GROUP BY}] ([index_list])
361 | IGNORE {INDEX|KEY}
362 [FOR {JOIN|ORDER BY|GROUP BY}] (index_list)
363 | FORCE {INDEX|KEY}
364 [FOR {JOIN|ORDER BY|GROUP BY}] (index_list)
365
366index_list:
367 index_name [, index_name] ...
368 */
369
370}
371
372int finish(Database &connection, const uint16_t &verbose, const bool &profiling, const bool &print_connection)
373{
374 if (verbose>0)
375 {
376 try
377 {
378 const auto resw =
379 connection.query("SHOW WARNINGS").store();
380
381 if (resw.num_rows()>0)
382 cout << "\n" << resw.num_rows() << " Warning(s) issued:\n\n";
383
384 for (size_t i=0; i<resw.num_rows(); i++)
385 {
386 const mysqlpp::Row &roww = resw[i];
387
388 cout << roww["Level"] << '[' << roww["Code"] << "]: ";
389 cout << roww["Message"] << '\n';
390 }
391 cout << endl;
392
393 }
394 catch (const exception &e)
395 {
396 cerr << "\nSHOW WARNINGS\n\n";
397 cerr << "SQL query failed:\n" << e.what() << endl;
398 return 1;
399 }
400 }
401
402 if (profiling)
403 {
404 try
405 {
406 const auto N =
407 connection.query("SHOW PROFILES").store().num_rows();
408
409 const auto resp =
410 connection.query("SHOW PROFILE ALL FOR QUERY "+to_string(verbose?N-1:N)).store();
411
412 cout << '\n';
413 cout << left;
414 cout << setw(26) << "Status" << ' ';
415 cout << right;
416 cout << setw(11) << "Duration" << ' ';
417 cout << setw(11) << "CPU User" << ' ';
418 cout << setw(11) << "CPU System" << '\n';
419 cout << "--------------------------------------------------------------\n";
420 for (size_t i=0; i<resp.num_rows(); i++)
421 {
422 const mysqlpp::Row &rowp = resp[i];
423
424 cout << left;
425 cout << setw(26) << rowp["Status"] << ' ';
426 cout << right;
427 cout << setw(11) << rowp["Duration"] << ' ';
428 cout << setw(11) << rowp["CPU_user"] << ' ';
429 cout << setw(11) << rowp["CPU_system"] << '\n';
430 }
431 cout << "--------------------------------------------------------------\n";
432 cout << endl;
433 }
434 catch (const exception &e)
435 {
436 cerr << "\nSHOW PROFILE ALL\n\n";
437 cerr << "SQL query failed:\n" << e.what() << '\n' <<endl;
438 return 2;
439 }
440 }
441
442 if (print_connection)
443 {
444 try
445 {
446 // Exchange _send and _received as it is the view of the server
447 const auto &res1 = connection.query("SHOW STATUS LIKE 'Bytes_%'").store();
448 cout << left << setw(16) << res1[1]["Variable_name"] << ' ' << Tools::Scientific(res1[0]["Value"]) << endl;
449 cout << left << setw(16) << res1[0]["Variable_name"] << ' ' << Tools::Scientific(res1[1]["Value"]) << endl;
450 cout << endl;
451 }
452 catch (const exception &e)
453 {
454 cerr << "\nSHOW STATUS LIKE 'Bytes_%'\n\n";
455 cerr << "SQL query failed:\n" << e.what() << endl;
456 return 3;
457 }
458 }
459
460 if (verbose>0)
461 cout << "Success!\n" << endl;
462 return 0;
463
464}
465
466int main(int argc, const char* argv[])
467{
468 Time start;
469
470 gROOT->SetBatch();
471
472 Configuration conf(argv[0]);
473 conf.SetPrintUsage(PrintUsage);
474 SetupConfiguration(conf);
475
476 if (!conf.DoParse(argc, argv))
477 return 127;
478
479 // ----------------------------- Evaluate options --------------------------
480 const string uri = conf.Get<string>("uri");
481 const string out = conf.Get<string>("out");
482 const string file = conf.Get<string>("file");
483 const string tree = conf.Get<string>("tree");
484 const bool force = conf.Get<bool>("force");
485 const bool ignorenull = conf.Get<bool>("ignore-null");
486 const bool update = conf.Get<bool>("update");
487 const bool display = conf.Get<bool>("display");
488 const string write = conf.Get<string>("write");
489 const bool noout = conf.Get<bool>("null");
490 const bool nofill = conf.Get<bool>("no-fill");
491 const bool explain = conf.Get<bool>("explain");
492 const bool profiling = conf.Get<bool>("profiling");
493 const uint16_t verbose = conf.Get<uint16_t>("verbose");
494 const uint16_t compression = conf.Get<uint16_t>("compression");
495 const string delimiter = conf.Has("delimiter") ? conf.Get<string>("delimiter") : "\t";
496
497 const bool copy_all = conf.Get<bool>("copy-all");
498 const bool copy_shabang = conf.Get<bool>("copy-shabang");
499 const bool copy_header = copy_all || conf.Get<bool>("copy-header");
500 const bool copy_query = copy_all || conf.Get<bool>("copy-query");
501 const bool copy_comments = copy_all || conf.Get<bool>("copy-comments");
502
503 const vector<string> _ignore = conf.Vec<string>("ignore");
504 const bool print_connection = conf.Get<bool>("print-connection");
505 //const vector<Map> mymap = conf.Vec<Map>("map");
506
507 // ----------------------- Setup splitting ---------------------------------
508
509 const auto vars = conf.GetWildcardOptions("var.*");
510
511 vector<string> variables;
512 for (auto var=vars.cbegin(); var!=vars.cend(); var++)
513 variables.emplace_back('@'+var->substr(4)+":="+Tools::Trim(conf.Get<string>(*var)));
514
515 // -------------------------------------------------------------------------
516
517 /*const*/ Tools::Splitting split(conf);
518
519 if (verbose>0)
520 {
521 cout << "\n------------------------ Rootify SQL -------------------------" << endl;
522 cout << "Start Time: " << Time::sql << Time(Time::local) << endl;
523 }
524
525 string query = conf.Get<string>("query");
526 if (query.empty())
527 {
528 if (verbose>0)
529 cout << "Reading query from file '" << file << "'." << endl;
530
531 ifstream fin(file);
532 if (!fin)
533 {
534 cerr << "Could not open query in '" << file << "': " << strerror(errno) << endl;
535 return 4;
536 }
537
538 getline(fin, query, (char)fin.eof());
539 }
540
541 if (query.empty())
542 {
543 cerr << "No query specified." << endl;
544 return 5;
545 }
546
547 // -------------------------------------------------------------------------
548
549 map<string, vector<string>> envs;
550
551 const auto &envs1 = conf.GetWildcardOptions("env.*");
552 for (auto env=envs1.cbegin(); env!=envs1.cend(); env++)
553 envs[env->substr(4)] = conf.Vec<string>(*env);
554
555 const auto &envs2 = conf.GetWildcardOptions("list.*");
556 for (auto env=envs2.cbegin(); env!=envs2.cend(); env++)
557 {
558 const string fname = conf.Get<string>(*env);
559 const string &ident = env->substr(5);
560
561 ifstream fin(fname);
562 if (!fin)
563 {
564 cerr << "Could not open environment in '" << fname << "' for ${" << ident << "}: " << strerror(errno) << endl;
565 return 6;
566 }
567 for (string line; getline(fin, line); )
568 {
569 const auto &l = Tools::Trim(line);
570 if (!l.empty() && l[0]!='#')
571 envs[ident].push_back(line);
572 }
573
574 if (verbose>0)
575 cout << "Found " << envs[ident].size() << " list element(s) for ${" << ident << "}" << endl;
576 }
577
578 for (auto env=envs.cbegin(); env!=envs.cend(); env++)
579 {
580 boost::regex rexpr("\\$(\\{"+env->first+"\\}|"+env->first+"\\b)");
581 query = boost::regex_replace(query, rexpr, boost::join(env->second, ", "));
582 }
583
584 // -------------------------- Check for file permssion ---------------------
585 // Strictly speaking, checking for write permission and existance is not necessary,
586 // but it is convenient that the user does not find out that it failed after
587 // waiting long for the query result
588 //
589
590 // I am using root here instead of boost to be
591 // consistent with the access pattern by TFile
592 TString path(noout?"/dev/null":out.c_str());
593 gSystem->ExpandPathName(path);
594
595 if (!noout)
596 {
597 FileStat_t stat;
598 const Int_t exist = !gSystem->GetPathInfo(path, stat);
599 const Bool_t _write = !gSystem->AccessPathName(path, kWritePermission) && R_ISREG(stat.fMode);
600
601 if ((update && !exist) || (update && exist && !_write) || (force && exist && !_write))
602 {
603 cerr << "File '" << path << "' is not writable." << endl;
604 return 7;
605 }
606
607 if (!update && !force && exist)
608 {
609 cerr << "File '" << path << "' already exists." << endl;
610 return 8;
611 }
612 }
613
614 Time start2;
615
616 // --------------------------- Connect to database -------------------------------------------------
617
618 if (*query.rbegin()!='\n')
619 query += '\n';
620
621 if (verbose>0)
622 {
623 cout << "Connecting to database...\n";
624 cout << "Client Version: " << mysqlpp::Connection().client_version() << endl;
625 }
626
627 Database connection(uri); // Keep alive while fetching rows
628
629 if (verbose>0)
630 cout << "Server Version: " << connection.server_version() << endl;
631
632 if (print_connection)
633 {
634 try
635 {
636 const auto &res1 = connection.query("SHOW STATUS LIKE 'Compression'").store();
637 cout << "Compression of database connection is " << string(res1[0][1]) << endl;
638
639 const auto &res2 = connection.query("SHOW STATUS LIKE 'Ssl_cipher'").store();
640 cout << "Connection to databases is " << (string(res2[0][1]).empty()?"UNENCRYPTED":"ENCRYPTED ("+string(res2[0][1])+")") << endl;
641 }
642 catch (const exception &e)
643 {
644 cerr << "\nSHOW STATUS LIKE 'Compression'\n\n";
645 cerr << "SQL query failed:\n" << e.what() << endl;
646 return 9;
647 }
648 }
649
650 try
651 {
652 if (profiling)
653 connection.query("SET PROFILING=1").execute();
654 }
655 catch (const exception &e)
656 {
657 cerr << "\nSET profiling=1\n\n";
658 cerr << "SQL query failed:\n" << e.what() << endl;
659 return 10;
660 }
661
662 // -------------------------- Set user defined variables -------------------
663 if (variables.size()>0)
664 {
665 if (verbose>0)
666 cout << "Setting user defined variables..." << endl;
667
668 const string varset =
669 "SET\n "+boost::algorithm::join(variables, ",\n ");
670
671 try
672 {
673 connection.query(varset).execute();
674 }
675 catch (const exception &e)
676 {
677 cerr << '\n' << varset << "\n\n";
678 cerr << "SQL query failed:\n" << e.what() << endl;
679 return 11;
680 }
681
682 if (verbose>2)
683 cout << '\n' << varset << '\n' << endl;
684 }
685
686 // ------------------------- Explain query if requested --------------------
687
688 if (explain)
689 {
690 try
691 {
692 const auto res0 =
693 connection.query("EXPLAIN FORMAT=JSON "+query).store();
694
695 cout << res0[0][0] << endl;
696 cout << endl;
697
698 const mysqlpp::StoreQueryResult res1 =
699 connection.query("EXPLAIN "+query).store();
700
701 for (size_t i=0; i<res1.num_rows(); i++)
702 {
703 const mysqlpp::Row &row = res1[i];
704
705 cout << "\nid : " << row["id"];
706 cout << "\nselect type : " << row["select_type"];
707
708 if (!row["table"].is_null())
709 cout << "\ntable : " << row["table"];
710
711 if (!row["partitions"].is_null())
712 cout << "\npartitions : " << row["partitions"];
713
714 if (!row["key"].is_null())
715 cout << "\nselected key : " << row["key"] << " [len=" << row["key_len"] << "] out of (" << row["possible_keys"] << ")";
716
717 if (!row["type"].is_null())
718 cout << "\njoin type : " << row["type"];
719
720 //if (!row["possible_keys"].is_null())
721 // cout << "\npossible_keys: " << row["possible_keys"];
722
723 //if (!row["key_len"].is_null())
724 // cout << "\nkey_len : " << row["key_len"];
725
726 if (!row["ref"].is_null())
727 cout << "\nref : (" << row["ref"] << ") compared to the index";
728
729 if (!row["rows"].is_null())
730 cout << "\nrows : " << row["rows"];
731
732 if (!row["filtered"].is_null())
733 cout << "\nfiltered : " << row["filtered"];
734
735 if (!row["extra"].is_null())
736 cout << "\nExtra : " << row["extra"];
737
738 cout << endl;
739 }
740
741 cout << endl;
742
743 const mysqlpp::StoreQueryResult res2 =
744 connection.query("SHOW WARNINGS").store();
745
746 for (size_t i=0; i<res2.num_rows(); i++)
747 {
748 const mysqlpp::Row &row = res2[i];
749
750 // 1003 //
751 cout << row["Level"] << '[' << row["Code"] << "]:\n";
752 if (uint32_t(row["Code"])==1003)
753 format(row["Message"].c_str());
754 else
755 cout << row["Message"] << '\n' << endl;
756
757 }
758
759 }
760 catch (const exception &e)
761 {
762 cerr << '\n' << query << "\n\n";
763 cerr << "SQL query failed:\n" << e.what() << endl;
764 return 12;
765 }
766
767 return 0;
768 }
769
770 // -------------------------- Request data from database -------------------
771 if (verbose>0)
772 cout << "Requesting data... please be patient!" << endl;
773
774 if (verbose>2)
775 cout << '\n' << query << endl;
776
777 const mysqlpp::UseQueryResult res =
778 connection.query(query).use();
779
780 // -------------------------------------------------------------------------
781
782 if (verbose>0)
783 {
784 cout << "Opening file '" << path << "' [compression=" << compression << "]...\n";
785 cout << "Writing data to tree '" << tree << "'" << (nofill?" (--skipped--)":"") << endl;
786 split.print();
787 }
788
789 // ----------------------------- Open output file --------------------------
790 TFile tfile(path, update?"UPDATE":(force?"RECREATE":"CREATE"), "Rootify SQL", compression);
791 if (tfile.IsZombie())
792 return 13;
793
794 // -------------------------------------------------------------------------
795
796 // get the first row to get the field description
797 mysqlpp::Row row = res.fetch_row();
798 if (!row)
799 {
800 cerr << "Empty set returned... nothing to write." << endl;
801 return finish(connection, verbose, profiling, print_connection)+20;
802 }
803
804 if (verbose>0)
805 cout << "Trying to setup " << row.size() << " branches..." << endl;
806
807 if (verbose>1)
808 cout << endl;
809
810 const mysqlpp::FieldNames &l = *row.field_list().list;
811
812 vector<double> buf(l.size());
813 vector<uint8_t> typ(l.size(),'n'); // n=number [double], d is used for DateTime
814
815 UInt_t cols = 0;
816
817 // IMPLEMENT FILE SPLITTING!
818 // OpenFile(tree, query)
819 // SetupColumns
820 // WriteRow
821 // CloseFile
822
823 // Ratio[3]: 50%, 20%, 30%
824 // File[x3]: root, cout, fout
825
826
827 // -------------------- Configure branches of TTree ------------------------
828 vector<TTree*> ttree;
829
830 if (split.empty())
831 ttree.emplace_back(new TTree(tree.c_str(), query.c_str()));
832 else
833 for (size_t i=0; i<split.size(); i++)
834 ttree.emplace_back(new TTree((tree+"["+to_string(i)+"]").c_str(), query.c_str()));
835
836 size_t skipat = 0;
837 size_t skipreg = 0;
838 for (size_t i=0; i<l.size(); i++)
839 {
840 const string t = row[i].type().sql_name();
841
842 if (t.find("DATETIME")!=string::npos)
843 typ[i] = 'd';
844 else
845 if (t.find("DATE")!=string::npos)
846 typ[i] = 'D';
847 else
848 if (t.find("TIME")!=string::npos)
849 typ[i] = 'T';
850 else
851 if (t.find("VARCHAR")!=string::npos)
852 typ[i] = 'V';
853 else
854 if (t.find("CHAR")!=string::npos)
855 typ[i] = 'C';
856
857 bool found = false;
858 for (auto pattern=_ignore.cbegin(); pattern!=_ignore.cend(); pattern++)
859 {
860 if (boost::regex_match(l[i], boost::regex(*pattern)))
861 {
862 found = true;
863 typ[i] = '-';
864 skipreg++;
865 break;
866 }
867 }
868
869 if (l[i][0]=='@')
870 {
871 typ[i] = '@';
872 skipat++;
873 }
874
875 const bool use = l[i][0]!='@' && typ[i]!='V' && typ[i]!='C' && !found;
876
877 if (verbose>1)
878 cout << (use?" + ":" - ") << l[i].c_str() << " [" << t << "] {" << typ[i] << "}\n";
879
880 if (use)
881 {
882 // string name = l[i];
883 // for (const auto &m: mymap)
884 // name = boost::regex_replace(l[i], boost::regex(m.first), m.second);
885
886 for (auto it=ttree.begin(); it!=ttree.end(); it++)
887 it[0]->Branch(l[i].c_str(), buf.data()+i);
888 cols++;
889 }
890 }
891 // -------------------------------------------------------------------------
892
893 if (verbose>1)
894 cout << endl;
895 if (verbose>0)
896 {
897 if (skipreg)
898 cout << skipreg << " branches skipped due to ignore list." << endl;
899 if (skipat)
900 cout << skipat << " branches skipped due to name starting with @." << endl;
901 cout << "Configured " << cols << " branches.\nFilling branches..." << endl;
902 }
903
904 // ------------------------- Open the ascii files --------------------------
905
906 vector<ofstream> fout;
907 if (!write.empty())
908 {
909 vector<string> names;
910 if (split.empty())
911 names.emplace_back(write);
912 else
913 for (size_t i=0; i<split.size(); i++)
914 names.emplace_back(write+"-"+to_string(i));
915
916 for (auto it=names.cbegin(); it!=names.cend(); it++)
917 {
918 fout.emplace_back(*it);
919 if (!*fout.rbegin())
920 cout << "WARNING: Writing to '" << write << "' failed: " << strerror(errno) << endl;
921 }
922 }
923
924 // ----------------------- Prepare the ascii comment -----------------------
925
926 string contents;
927
928 istringstream istr(query);
929 size_t line = 0;
930 bool header = true;
931 while (istr)
932 {
933 string ibuf;
934 getline(istr, ibuf);
935 const string sbuf = Tools::Trim(ibuf);
936
937 const bool shabang = line==0 && ibuf[0]=='#' && ibuf[1]=='!';
938 const bool comment = sbuf[0]=='#' && !shabang;
939 const bool isquery = !shabang && !comment;
940 if (isquery)
941 header = false;
942
943 line++;
944
945 if ((copy_shabang && shabang) ||
946 (copy_header && comment && header) ||
947 (copy_query && isquery) ||
948 (copy_comments && comment && !header))
949 contents += '#' + ibuf + '\n';
950 }
951
952 // ----------------------- Write the ascii headers -------------------------
953
954 ostringstream htxt;
955 if (display || !fout.empty())
956 htxt << row.field_list(delimiter.c_str());
957
958 if (display)
959 {
960 cout << endl;
961 cout << contents << endl;
962 cout << "# " << htxt.str() << endl;
963 }
964 for (auto ff=fout.begin(); ff!=fout.end(); ff++)
965 {
966 *ff << contents;
967 *ff << "# " << htxt.str() << endl;
968 }
969
970 // ---------------------- Fill TTree with DB data --------------------------
971
972 size_t count = 0;
973 size_t skip = 0;
974 do
975 {
976 size_t index = split.index(count++);
977
978 ostringstream rtxt;
979 if (display || !fout.empty())
980 rtxt << row.value_list(delimiter.c_str(), mysqlpp::do_nothing);
981
982 if (display)
983 cout << rtxt.str() << '\n';
984 if (!fout.empty())
985 fout[index] << rtxt.str() << '\n';
986
987 size_t idx=0;
988 for (auto col=row.begin(); col!=row.end(); col++, idx++)
989 {
990 if (!ignorenull && col->is_null())
991 {
992 skip++;
993 break;
994 }
995
996 switch (typ[idx])
997 {
998 case 'd':
999 buf[idx] = time_t((mysqlpp::DateTime)(*col));
1000 break;
1001
1002 case 'D':
1003 buf[idx] = time_t((mysqlpp::Date)(*col));
1004 break;
1005
1006 case 'T':
1007 buf[idx] = time_t((mysqlpp::Time)(*col));
1008 break;
1009
1010 case 'V':
1011 case 'C':
1012 case '-':
1013 case '@':
1014 break;
1015
1016 default:
1017 buf[idx] = atof(col->c_str());
1018 }
1019 }
1020
1021 if (idx==row.size() && !nofill)
1022 ttree[index]->Fill();
1023
1024 row = res.fetch_row();
1025
1026
1027 } while (row);
1028
1029 // -------------------------------------------------------------------------
1030
1031 if (display)
1032 cout << '\n' << endl;
1033
1034 if (verbose>0)
1035 {
1036 cout << count << " rows fetched." << endl;
1037 if (skip>0)
1038 cout << skip << " rows skipped due to NULL field." << endl;
1039
1040 for (size_t i=0; i<ttree.size(); i++)
1041 cout << ttree[i]->GetEntries() << " rows filled into tree #" << i << "." << endl;
1042 }
1043
1044 for (auto it=ttree.begin(); it!=ttree.end(); it++)
1045 (*it)->Write();
1046 tfile.Close();
1047
1048 if (verbose>0)
1049 {
1050 const auto sec = Time().UnixTime()-start.UnixTime();
1051
1052 cout << Tools::Scientific(tfile.GetSize()) << "B written to disk.\n";
1053 cout << "File closed.\n";
1054 cout << "Execution time: " << sec << "s ";
1055 cout << "(" << Tools::Fractional(sec/count) << "s/row)\n";
1056 cout << "--------------------------------------------------------------" << endl;
1057 }
1058
1059 return finish(connection, verbose, profiling, print_connection);
1060}
Note: See TracBrowser for help on using the repository browser.