source: trunk/FACT++/src/rootifysql.cc@ 20086

Last change on this file since 20086 was 20056, checked in by tbretz, 4 years ago
Added printing of connection information and a default query on all in rootifysql
File size: 40.1 KB
Line 
1#include "Database.h"
2
3#include <boost/regex.hpp>
4#include <boost/tokenizer.hpp>
5#include <boost/algorithm/string.hpp>
6#include <boost/algorithm/string/join.hpp>
7
8#include "tools.h"
9#include "Time.h"
10#include "Splitting.h"
11#include "FileEntry.h"
12
13#include <TROOT.h>
14#include <TSystem.h>
15#include <TFile.h>
16#include <TTree.h>
17
18using namespace std;
19
20// ------------------------------------------------------------------------
21
22void SetupConfiguration(Configuration &conf)
23{
24 po::options_description control("Database options");
25 control.add_options()
26 ("uri,u", var<string>()->required(), "Database link as in\n\tuser:password@server[:port]/database[?compress=0|1].")
27 ("query,q", var<string>(""), "MySQL query (overwrites --file, used as table name if without spaces)")
28 ("file", var<string>("rootify.sql"), "An ASCII file with the MySQL query (overwrites --query)")
29 ("ignore-null,i", po_switch(), "Do not skip rows containing any NULL field")
30 ("display,d", po_switch(), "Displays contents on the screen (most usefull in combination with mysql statements as SHOW or EXPLAIN)")
31 ("explain", po_switch(), "Requests an EXPLAIN from the server (shows the server optimized query)\nsee also https://dev.mysql.com/doc/refman/explain-output.html")
32 ("profiling", po_switch(), "Turn on profiling and print profile")
33 ("var.*", var<string>(), "Predefined SQL user variables (@VAR)")
34 ("env.*", vars<string>(), "Predefined environment for substitutions in the query ($ENV)")
35 ("list.*", var<string>(), "Predefined environment for substitutions in the query ($ENV). The list is read from the given file (one list entry per line)")
36 ("print-connection", po_switch(), "Print database connection information")
37 ("verbose,v", var<uint16_t>(1), "Verbosity (0: quiet, 1: default, 2: more, 3, ...)")
38 ;
39
40 po::options_description ascii("ASCII output");
41 ascii.add_options()
42 ("write,w", var<string>(""), "Write output to an ascii file")
43 ("delimiter", var<string>(), "The delimiter used if contents are displayed with --display (default=\\t)")
44 ("copy-shabang", po_switch(), "Copy the sha-bang line if exists to the output file")
45 ("copy-header", po_switch(), "Copy the header (all line starting with '#' up to the first non-comment line to the output file")
46 ("copy-query", po_switch(), "Copy the query to the ascii output file")
47 ("copy-comments", po_switch(), "Copy all lines starting with '#' to the output file which are not part of header")
48 ("copy-all", po_switch(), "An alias for --copy-header --copy-query --copy-comments")
49 ;
50
51 po::options_description root("Root file options");
52 root.add_options()
53 ("out,o", var<string>("rootify.root"), "Output root file name")
54 ("force,f", po_switch(), "Force overwriting an existing root file ('RECREATE')")
55 ("update", po_switch(), "Update an existing root file with the new tree ('UPDATE')")
56 ("compression,c", var<uint16_t>(1), "zlib compression level for the root file")
57 ("tree,t", var<string>("Result"), "Name of the root tree")
58 ("accurate", po_switch(), "Accurate type conversion, otherwise all branches are creates as double which is often more convenient.")
59 ("ignore", vars<string>(), "Ignore the given columns")
60 ("null,n", po_switch(), "Redirect the root output file to /dev/null (mainly for debugging purposes, e.g. performance studies)")
61 ("no-fill", po_switch(), "Do not fill events into the root file (mainly for debugging purposes, e.g. performance studies)")
62 ;
63
64 po::positional_options_description p;
65 p.add("file", 1); // The 1st positional options (n=1)
66 p.add("out", 1); // The 2nd positional options (n=1)
67
68 conf.AddOptions(control);
69 conf.AddOptions(ascii);
70 conf.AddOptions(root);
71 conf.AddOptions(Tools::Splitting::options());
72 conf.SetArgumentPositions(p);
73}
74
75void PrintUsage()
76{
77 cout <<
78 "rootifysql - Converts the result of a mysql query into a root file\n"
79 "\n"
80 "For convenience, this documentation uses the extended version of the options, "
81 "refer to the output below to get the abbreviations.\n"
82 "\n"
83 "Writes the result of a mysql query into a root file. For each column, a branch is "
84 "created of type double with the field name as name. This is usually the column name "
85 "if not specified otherwise by the AS mysql directive.\n"
86 "\n"
87 "Columns with CHAR or VARCHAR as field type are ignored. DATETIME, DATE and TIME "
88 "columns are converted to unix time (time_t). Rows containing any file which is "
89 "NULL are skipped if not suppressed by the --ignore-null option. Ideally, the query "
90 "is compiled in a way that no NULL field is returned. With the --display option the "
91 "result of the request is printed on the screen (NULL skipping still in action). "
92 "This can be useful to create an ascii file or to show results as 'SHOW DATABASES' "
93 "or 'EXPLAIN table'. To redirect the contents into an ascii file, the option -v0 "
94 "is useful. To suppress writing to an output file --null can be used.\n"
95 "\n"
96 "The default is to read the query from a file called rootify.sql. Except if a different "
97 "filename is specified by the --file option or a query is given with --query. "
98 "A special case is if the 'query' does not contain any whitespace. Then the "
99 "query is considered as a table name and the query 'SELECT * FROM table' is "
100 "executed.\n"
101 "\n"
102 "As a trick, the rootify.sql file can be made excutable (chmod u+x rootify.sql). "
103 "If the first line contains '#!rootifysql', the script can be executed directly.\n"
104 "\n"
105 "Columns whose name start with @ are skipped. If you want them in your output file "
106 "give them a name using AS, e.g. 'SELECT @A:=5 AS A'.\n"
107 "\n"
108 "You can use variables in your sql query like @MyVar and define them on the "
109 "command line. In this example with --var.MyVar=5\n"
110 "\n"
111 "You can use environment definitions for substitutions in your SQL query. "
112 "For example --env.TEST=5 would replace $TEST or ${TEST} in your query by 5."
113 "If you specify one environment variable more than once, a list is created. "
114 "For example --env.TEST=1 --env.TEST=2 --env.TEST=3 would substitute "
115 "$TEST or ${TEST} by '1, 2, 3'. This is useful for the SQL `IN` keyword. "
116 "You can also read the values for an enviroment substitution from a file "
117 "(one element per line), e.g. --env.TEST=file.txt. Empty lines and lines "
118 "starting with a # are skipped.\n"
119 "\n"
120 "Comments in the query-file can be placed according to the SQL standard inline "
121 "/*comment*/ or introduced with # (shell script style) or -- (SQL style).\n"
122 "\n"
123 << Tools::Splitting::usage() <<
124 "\n"
125 "In case of success, 0 is returned, a value>0 otherwise.\n"
126 "\n"
127 "Usage: rootifysql [rootify.sql [rootify.root]] [-u URI] [-q query|-f file] [-i] [-o out] [-f] [-cN] [-t tree] [-vN]\n"
128 "\n"
129 ;
130 cout << endl;
131}
132
133struct ExplainParser
134{
135 string sql;
136
137 vector<string> vec;
138
139 string substitute(string _str, const boost::regex &expr)
140 {
141 boost::smatch match;
142 while (boost::regex_search(_str, match, expr, boost::regex_constants::format_first_only))
143 {
144 const auto &len = match.length();
145 const auto &pos = match.position();
146 const auto &str = match.str();
147
148 const auto it = find(vec.cbegin(), vec.cend(), str);
149 const size_t id = it==vec.cend() ? vec.size() : it-vec.cbegin();
150
151 _str.replace(pos, len, "{"+to_string(id)+"}");
152
153 if (it==vec.cend())
154 vec.push_back(str);//.substr(1, str.size()-2));
155 }
156
157 return _str;
158 }
159
160 string substitute(const string &str, const string &expr)
161 {
162 return substitute(str, boost::regex(expr));
163 }
164
165 vector<string> queries;
166
167 string resub(string str)
168 {
169 // search for "KEYWORD expression"
170 boost::regex reg("\\{[0-9]+\\}");
171
172 boost::smatch match;
173 while (boost::regex_search(str, match, reg, boost::regex_constants::format_first_only))
174 {
175 const auto &len = match.length();
176 const auto &pos = match.position();
177 const auto &arg = match.str(); // Argument
178
179 const auto idx = atoi(arg.c_str()+1);
180
181 str.replace(pos, len, resub(vec[idx]));
182 }
183
184 return str;
185 }
186
187 void expression(string expr, size_t indent=0)
188 {
189 if (expr[0]=='{')
190 {
191 const auto idx = atoi(expr.c_str()+1);
192
193 // This is a subquery
194 if (vec[idx].substr(0,3)=="(/*")
195 {
196 cout << setw(indent) << ' ' << "(\n";
197 find_tokens(vec[idx], indent+4);
198 cout << setw(indent) << ' ' << ") ";
199 }
200 else
201 // This is just something to substitute back
202 if (vec[idx].substr(0,2)=="({")
203 {
204 cout << setw(indent) << ' ' << "(" << resub(vec[idx]) << ") ";
205 }
206 else
207 {
208 if (indent>0)
209 cout << setw(indent) << ' ';
210 cout << resub(vec[idx]);
211 }
212 }
213 else
214 {
215 if (indent>0)
216 cout << setw(indent) << ' ';
217 cout << resub(expr);
218 }
219 }
220
221 void find_tokens(string str, size_t indent=0)
222 {
223 // ( COMMENT )?( TOKEN )?(( {NNN} | NNN )( AS|ON ( {NNN}) ))?(,)?)
224 //regex reg("(\\/\\*\\ select\\#[0-9]+\\ \\*\\/\\ *)?([a-zA-Z ]+)?((\\{[0-9]+\\}|[0-9]+)(\\ ?([Aa][Ss]|[Oo][Nn])\\ ?(\\{[0-9]+\\}))?(,)?)");
225
226 const string _com = "\\/\\*\\ select\\#[0-9]+\\ \\*\\/\\ *";
227
228 const string _tok = "[a-zA-Z_ ]+";
229
230 const string _nnn = "\\{[0-9]+\\}|[0-9]+";
231
232 const string _as = "\\ ?([Aa][Ss])\\ ?";
233
234 // ( _nnn ) ( _as ( _nnn ))?(,)? // can also match noting in between two {NNN}
235 const string _exp = "("+_nnn+")" + "("+_as+"("+_nnn+"))?(,)?";
236
237 // Matche: ( _com )? ( ( _tok )? ( _exp ) | ( _tok ) )
238 boost::regex reg("("+_com+")?" + "(" + "("+_tok+")?"+"("+_exp+")" + "|" + "("+_tok+")" + ")");
239
240 boost::smatch match;
241 while (boost::regex_search(str, match, reg, boost::regex_constants::format_first_only))
242 {
243
244 const auto &com = match.str(1); // comment
245 const auto &tok1 = Tools::Trim(match.str(3)); // token with expression
246 const auto &arg1 = match.str(5); // argument 1
247 const auto &as = match.str(7); // as
248 const auto &arg2 = match.str(8); // argument 2
249 const auto &comma = match.str(9); // comma
250 const auto &tok2 = Tools::Trim(match.str(10)); // token without expression
251
252 if (!com.empty())
253 cout << setw(indent) << ' ' << "\033[34m" << com << "\033[0m" << '\n';
254
255 if (!tok1.empty())
256 cout << setw(indent) << ' ' << "\033[32m" << tok1 << "\033[0m" << '\n';
257 if (!tok2.empty())
258 cout << setw(indent) << ' ' << "\033[32m" << tok2 << "\033[0m" << '\n';
259
260 if (!arg1.empty())
261 {
262 expression(arg1, indent+4);
263
264 if (!as.empty())
265 cout << " \033[33m" << as << "\033[0m ";
266
267 if (!arg2.empty())
268 expression(arg2);
269
270 if (!comma.empty())
271 cout << ',';
272
273 cout << '\n';
274 }
275
276 str = str.substr(match.position()+match.length());
277 }
278 }
279
280
281 ExplainParser(const string &_sql) : sql(_sql)
282 {
283 // substitute all strings
284 sql = substitute(sql, "'[^']*'");
285
286 // substitute all escaped sequences (`something`.`something-else`)
287 sql = substitute(sql, "`[^`]*`(\\.`[^`]*`)*");
288
289 // substitute all paranthesis
290 sql = substitute(sql, "[a-zA-Z0-9_]*\\([^\\(\\)]*\\)");
291
292 //cout << sql << "\n\n";
293 find_tokens(sql);
294 cout << endl;
295 }
296};
297
298// Remove queries...
299void format(string sql)
300{
301 ExplainParser p(sql);
302
303 /*
304
305 SELECT
306 [ALL | DISTINCT | DISTINCTROW ]
307 [HIGH_PRIORITY]
308 [STRAIGHT_JOIN]
309 [SQL_SMALL_RESULT] [SQL_BIG_RESULT] [SQL_BUFFER_RESULT]
310 [SQL_CACHE | SQL_NO_CACHE] [SQL_CALC_FOUND_ROWS]
311 select_expr [, select_expr ...]
312 [FROM table_references
313 [PARTITION partition_list]
314 [WHERE where_condition]
315 [GROUP BY {col_name | expr | position}, ... [WITH ROLLUP]]
316 [HAVING where_condition]
317 [WINDOW window_name AS (window_spec)
318 [, window_name AS (window_spec)] ...]
319 [ORDER BY {col_name | expr | position}
320 [ASC | DESC], ... [WITH ROLLUP]]
321 [LIMIT {[offset,] row_count | row_count OFFSET offset}]
322 [INTO OUTFILE 'file_name'
323 [CHARACTER SET charset_name]
324 export_options
325 | INTO DUMPFILE 'file_name'
326 | INTO var_name [, var_name]]
327 [FOR {UPDATE | SHARE} [OF tbl_name [, tbl_name] ...] [NOWAIT | SKIP LOCKED]
328 | LOCK IN SHARE MODE]]
329 */
330
331 /*
332table_references:
333 escaped_table_reference [, escaped_table_reference] ...
334
335escaped_table_reference:
336 table_reference
337 | { OJ table_reference }
338
339table_reference:
340 table_factor
341 | join_table
342
343table_factor:
344 tbl_name [PARTITION (partition_names)]
345 [[AS] alias] [index_hint_list]
346 | table_subquery [AS] alias [(col_list)]
347 | ( table_references )
348
349join_table:
350 table_reference [INNER | CROSS] JOIN table_factor [join_condition]
351 | table_reference STRAIGHT_JOIN table_factor
352 | table_reference STRAIGHT_JOIN table_factor ON conditional_expr
353 | table_reference {LEFT|RIGHT} [OUTER] JOIN table_reference join_condition
354 | table_reference NATURAL [INNER | {LEFT|RIGHT} [OUTER]] JOIN table_factor
355
356join_condition:
357 ON conditional_expr
358 | USING (column_list)
359
360index_hint_list:
361 index_hint [, index_hint] ...
362
363index_hint:
364 USE {INDEX|KEY}
365 [FOR {JOIN|ORDER BY|GROUP BY}] ([index_list])
366 | IGNORE {INDEX|KEY}
367 [FOR {JOIN|ORDER BY|GROUP BY}] (index_list)
368 | FORCE {INDEX|KEY}
369 [FOR {JOIN|ORDER BY|GROUP BY}] (index_list)
370
371index_list:
372 index_name [, index_name] ...
373 */
374
375}
376
377int finish(Database &connection, const uint16_t &verbose, const bool &profiling, const bool &print_connection)
378{
379 if (verbose>0)
380 {
381 try
382 {
383 const auto resw =
384 connection.query("SHOW WARNINGS").store();
385
386 if (resw.num_rows()>0)
387 cout << "\n" << resw.num_rows() << " Warning(s) issued:\n\n";
388
389 for (size_t i=0; i<resw.num_rows(); i++)
390 {
391 const mysqlpp::Row &roww = resw[i];
392
393 cout << roww["Level"] << '[' << roww["Code"] << "]: ";
394 cout << roww["Message"] << '\n';
395 }
396 cout << endl;
397
398 }
399 catch (const exception &e)
400 {
401 cerr << "\nSHOW WARNINGS\n\n";
402 cerr << "SQL query failed:\n" << e.what() << endl;
403 return 1;
404 }
405 }
406
407 if (profiling)
408 {
409 try
410 {
411 const auto N =
412 connection.query("SHOW PROFILES").store().num_rows();
413
414 const auto resp =
415 connection.query("SHOW PROFILE ALL FOR QUERY "+to_string(verbose?N-1:N)).store();
416
417 cout << '\n';
418 cout << left;
419 cout << setw(26) << "Status" << ' ';
420 cout << right;
421 cout << setw(11) << "Duration" << ' ';
422 cout << setw(11) << "CPU User" << ' ';
423 cout << setw(11) << "CPU System" << '\n';
424 cout << "--------------------------------------------------------------\n";
425 for (size_t i=0; i<resp.num_rows(); i++)
426 {
427 const mysqlpp::Row &rowp = resp[i];
428
429 cout << left;
430 cout << setw(26) << rowp["Status"] << ' ';
431 cout << right;
432 cout << setw(11) << rowp["Duration"] << ' ';
433 cout << setw(11) << rowp["CPU_user"] << ' ';
434 cout << setw(11) << rowp["CPU_system"] << '\n';
435 }
436 cout << "--------------------------------------------------------------\n";
437 cout << endl;
438 }
439 catch (const exception &e)
440 {
441 cerr << "\nSHOW PROFILE ALL\n\n";
442 cerr << "SQL query failed:\n" << e.what() << '\n' <<endl;
443 return 2;
444 }
445 }
446
447 if (print_connection)
448 {
449 try
450 {
451 // Exchange _send and _received as it is the view of the server
452 const auto &res1 = connection.query("SHOW STATUS LIKE 'Bytes_%'").store();
453 cout << left << setw(16) << res1[1]["Variable_name"] << ' ' << Tools::Scientific(res1[0]["Value"]) << endl;
454 cout << left << setw(16) << res1[0]["Variable_name"] << ' ' << Tools::Scientific(res1[1]["Value"]) << endl;
455 cout << endl;
456 }
457 catch (const exception &e)
458 {
459 cerr << "\nSHOW STATUS LIKE 'Bytes_%'\n\n";
460 cerr << "SQL query failed:\n" << e.what() << endl;
461 return 3;
462 }
463 }
464
465 if (verbose>0)
466 cout << "Success!\n" << endl;
467 return 0;
468
469}
470
471template<typename T>
472void Convert(FileEntry::Container &container, const mysqlpp::String &col)
473{
474 *reinterpret_cast<T*>(container.ptr) = col.is_null() ? T(0) : static_cast<T>(col);
475}
476
477
478int main(int argc, const char* argv[])
479{
480 Time start;
481
482 gROOT->SetBatch();
483
484 Configuration conf(argv[0]);
485 conf.SetPrintUsage(PrintUsage);
486 SetupConfiguration(conf);
487
488 if (!conf.DoParse(argc, argv))
489 return 127;
490
491 // ----------------------------- Evaluate options --------------------------
492 const string uri = conf.Get<string>("uri");
493 const string out = conf.Get<string>("out");
494 const string file = conf.Get<string>("file");
495 const string tree = conf.Get<string>("tree");
496 const bool force = conf.Get<bool>("force");
497 const bool ignorenull = conf.Get<bool>("ignore-null");
498 const bool update = conf.Get<bool>("update");
499 const bool display = conf.Get<bool>("display");
500 const string write = conf.Get<string>("write");
501 const bool noout = conf.Get<bool>("null");
502 const bool nofill = conf.Get<bool>("no-fill");
503 const bool explain = conf.Get<bool>("explain");
504 const bool profiling = conf.Get<bool>("profiling");
505 const bool accurate = conf.Get<bool>("accurate");
506 const uint16_t verbose = conf.Get<uint16_t>("verbose");
507 const uint16_t compression = conf.Get<uint16_t>("compression");
508 const string delimiter = conf.Has("delimiter") ? conf.Get<string>("delimiter") : "\t";
509
510 const bool copy_all = conf.Get<bool>("copy-all");
511 const bool copy_shabang = conf.Get<bool>("copy-shabang");
512 const bool copy_header = copy_all || conf.Get<bool>("copy-header");
513 const bool copy_query = copy_all || conf.Get<bool>("copy-query");
514 const bool copy_comments = copy_all || conf.Get<bool>("copy-comments");
515
516 const vector<string> _ignore = conf.Vec<string>("ignore");
517 const bool print_connection = conf.Get<bool>("print-connection");
518 //const vector<Map> mymap = conf.Vec<Map>("map");
519
520 // ----------------------- Setup splitting ---------------------------------
521
522 const auto vars = conf.GetWildcardOptions("var.*");
523
524 vector<string> variables;
525 for (auto var=vars.cbegin(); var!=vars.cend(); var++)
526 variables.emplace_back('@'+var->substr(4)+":="+Tools::Trim(conf.Get<string>(*var)));
527
528 // -------------------------------------------------------------------------
529
530 /*const*/ Tools::Splitting split(conf);
531
532 if (verbose>0)
533 {
534 cout << "\n------------------------ Rootify SQL -------------------------" << endl;
535 cout << "Start Time: " << Time::sql << Time(Time::local) << endl;
536 }
537
538 string query = conf.Get<string>("query");
539 if (!query.empty() && query.find_first_of(' ')==string::npos)
540 query.insert(0, "SELECT * FROM ");
541
542 if (query.empty())
543 {
544 if (verbose>0)
545 cout << "Reading query from file '" << file << "'." << endl;
546
547 ifstream fin(file);
548 if (!fin)
549 {
550 cerr << "Could not open query in '" << file << "': " << strerror(errno) << endl;
551 return 4;
552 }
553
554 getline(fin, query, (char)fin.eof());
555 }
556
557 if (query.empty())
558 {
559 cerr << "No query specified." << endl;
560 return 5;
561 }
562
563 // -------------------------------------------------------------------------
564
565 map<string, vector<string>> envs;
566
567 const auto &envs1 = conf.GetWildcardOptions("env.*");
568 for (auto env=envs1.cbegin(); env!=envs1.cend(); env++)
569 envs[env->substr(4)] = conf.Vec<string>(*env);
570
571 const auto &envs2 = conf.GetWildcardOptions("list.*");
572 for (auto env=envs2.cbegin(); env!=envs2.cend(); env++)
573 {
574 const string fname = conf.Get<string>(*env);
575 const string &ident = env->substr(5);
576
577 ifstream fin(fname);
578 if (!fin)
579 {
580 cerr << "Could not open environment in '" << fname << "' for ${" << ident << "}: " << strerror(errno) << endl;
581 return 6;
582 }
583 for (string line; getline(fin, line); )
584 {
585 const auto &l = Tools::Trim(line);
586 if (!l.empty() && l[0]!='#')
587 envs[ident].push_back(line);
588 }
589
590 if (verbose>0)
591 cout << "Found " << envs[ident].size() << " list element(s) for ${" << ident << "}" << endl;
592 }
593
594 for (auto env=envs.cbegin(); env!=envs.cend(); env++)
595 {
596 boost::regex rexpr("\\$(\\{"+env->first+"\\}|"+env->first+"\\b)");
597 query = boost::regex_replace(query, rexpr, boost::join(env->second, ", "));
598 }
599
600 // -------------------------- Check for file permssion ---------------------
601 // Strictly speaking, checking for write permission and existance is not necessary,
602 // but it is convenient that the user does not find out that it failed after
603 // waiting long for the query result
604 //
605
606 // I am using root here instead of boost to be
607 // consistent with the access pattern by TFile
608 TString path(noout?"/dev/null":out.c_str());
609 gSystem->ExpandPathName(path);
610
611 if (!noout)
612 {
613 FileStat_t stat;
614 const Int_t exist = !gSystem->GetPathInfo(path, stat);
615 const Bool_t _write = !gSystem->AccessPathName(path, kWritePermission) && R_ISREG(stat.fMode);
616
617 if ((update && !exist) || (update && exist && !_write) || (force && exist && !_write))
618 {
619 cerr << "File '" << path << "' is not writable." << endl;
620 return 7;
621 }
622
623 if (!update && !force && exist)
624 {
625 cerr << "File '" << path << "' already exists." << endl;
626 return 8;
627 }
628 }
629
630 Time start2;
631
632 // --------------------------- Connect to database -------------------------------------------------
633
634 if (*query.rbegin()!='\n')
635 query += '\n';
636
637 if (verbose>0)
638 {
639 cout << "Connecting to database...\n";
640 cout << "Client Version: " << mysqlpp::Connection().client_version() << endl;
641 }
642
643 Database connection(uri, print_connection); // Keep alive while fetching rows
644
645 if (verbose>0)
646 cout << "Server Version: " << connection.server_version() << endl;
647
648 if (print_connection)
649 {
650 try
651 {
652 const auto &res1 = connection.query("SHOW STATUS LIKE 'Compression'").store();
653 cout << "Compression of database connection is " << string(res1[0][1]) << endl;
654
655 const auto &res2 = connection.query("SHOW STATUS LIKE 'Ssl_cipher'").store();
656 cout << "Connection to databases is " << (string(res2[0][1]).empty()?"UNENCRYPTED":"ENCRYPTED ("+string(res2[0][1])+")") << endl;
657 }
658 catch (const exception &e)
659 {
660 cerr << "\nSHOW STATUS LIKE 'Compression'\n\n";
661 cerr << "SQL query failed:\n" << e.what() << endl;
662 return 9;
663 }
664 }
665
666 try
667 {
668 if (profiling)
669 connection.query("SET PROFILING=1").execute();
670 }
671 catch (const exception &e)
672 {
673 cerr << "\nSET profiling=1\n\n";
674 cerr << "SQL query failed:\n" << e.what() << endl;
675 return 10;
676 }
677
678 // -------------------------- Set user defined variables -------------------
679 if (variables.size()>0)
680 {
681 if (verbose>0)
682 cout << "Setting user defined variables..." << endl;
683
684 const string varset =
685 "SET\n "+boost::algorithm::join(variables, ",\n ");
686
687 try
688 {
689 connection.query(varset).execute();
690 }
691 catch (const exception &e)
692 {
693 cerr << '\n' << varset << "\n\n";
694 cerr << "SQL query failed:\n" << e.what() << endl;
695 return 11;
696 }
697
698 if (verbose>2)
699 cout << '\n' << varset << '\n' << endl;
700 }
701
702 // ------------------------- Explain query if requested --------------------
703
704 if (explain)
705 {
706 try
707 {
708 const auto res0 =
709 connection.query("EXPLAIN FORMAT=JSON "+query).store();
710
711 cout << res0[0][0] << endl;
712 cout << endl;
713
714 const mysqlpp::StoreQueryResult res1 =
715 connection.query("EXPLAIN "+query).store();
716
717 for (size_t i=0; i<res1.num_rows(); i++)
718 {
719 const mysqlpp::Row &row = res1[i];
720
721 cout << "\nid : " << row["id"];
722 cout << "\nselect type : " << row["select_type"];
723
724 if (!row["table"].is_null())
725 cout << "\ntable : " << row["table"];
726
727 if (!row["partitions"].is_null())
728 cout << "\npartitions : " << row["partitions"];
729
730 if (!row["key"].is_null())
731 cout << "\nselected key : " << row["key"] << " [len=" << row["key_len"] << "] out of (" << row["possible_keys"] << ")";
732
733 if (!row["type"].is_null())
734 cout << "\njoin type : " << row["type"];
735
736 //if (!row["possible_keys"].is_null())
737 // cout << "\npossible_keys: " << row["possible_keys"];
738
739 //if (!row["key_len"].is_null())
740 // cout << "\nkey_len : " << row["key_len"];
741
742 if (!row["ref"].is_null())
743 cout << "\nref : (" << row["ref"] << ") compared to the index";
744
745 if (!row["rows"].is_null())
746 cout << "\nrows : " << row["rows"];
747
748 if (!row["filtered"].is_null())
749 cout << "\nfiltered : " << row["filtered"];
750
751 if (!row["extra"].is_null())
752 cout << "\nExtra : " << row["extra"];
753
754 cout << endl;
755 }
756
757 cout << endl;
758
759 const mysqlpp::StoreQueryResult res2 =
760 connection.query("SHOW WARNINGS").store();
761
762 for (size_t i=0; i<res2.num_rows(); i++)
763 {
764 const mysqlpp::Row &row = res2[i];
765
766 // 1003 //
767 cout << row["Level"] << '[' << row["Code"] << "]:\n";
768 if (uint32_t(row["Code"])==1003)
769 format(row["Message"].c_str());
770 else
771 cout << row["Message"] << '\n' << endl;
772
773 }
774
775 }
776 catch (const exception &e)
777 {
778 cerr << '\n' << query << "\n\n";
779 cerr << "SQL query failed:\n" << e.what() << endl;
780 return 12;
781 }
782
783 return 0;
784 }
785
786 // -------------------------- Request data from database -------------------
787 if (verbose>0)
788 cout << "Requesting data... please be patient!" << endl;
789
790 if (verbose>2)
791 cout << '\n' << query << endl;
792
793 const mysqlpp::UseQueryResult res =
794 connection.query(query).use();
795
796 // -------------------------------------------------------------------------
797
798 if (verbose>0)
799 {
800 cout << "Opening file '" << path << "' [compression=" << compression << "]...\n";
801 cout << "Writing data to tree '" << tree << "'" << (nofill?" (--skipped--)":"") << endl;
802 split.print();
803 }
804
805 // ----------------------------- Open output file --------------------------
806 TFile tfile(path, update?"UPDATE":(force?"RECREATE":"CREATE"), "Rootify SQL", compression);
807 if (tfile.IsZombie())
808 return 13;
809
810 // -------------------------------------------------------------------------
811
812 // get the first row to get the field description
813 mysqlpp::Row row = res.fetch_row();
814 if (!row)
815 {
816 cerr << "Empty set returned... nothing to write." << endl;
817 return finish(connection, verbose, profiling, print_connection)+20;
818 }
819
820 if (verbose>0)
821 cout << "Trying to setup " << row.size() << " branches..." << endl;
822
823 if (verbose>1)
824 cout << endl;
825
826 const mysqlpp::FieldNames &l = *row.field_list().list;
827
828 vector<FileEntry::Container> container;
829
830 UInt_t cols = 0;
831
832 // IMPLEMENT FILE SPLITTING!
833 // OpenFile(tree, query)
834 // SetupColumns
835 // WriteRow
836 // CloseFile
837
838 // Ratio[3]: 50%, 20%, 30%
839 // File[x3]: root, cout, fout
840
841
842 // -------------------- Configure branches of TTree ------------------------
843 vector<TTree*> ttree;
844
845 if (split.empty())
846 ttree.emplace_back(new TTree(tree.c_str(), query.c_str()));
847 else
848 for (size_t i=0; i<split.size(); i++)
849 ttree.emplace_back(new TTree((tree+"["+to_string(i)+"]").c_str(), query.c_str()));
850
851 size_t skipno = 0;
852 size_t skipat = 0;
853 size_t skipreg = 0;
854 size_t skipch = 0;
855 for (size_t i=0; i<l.size(); i++)
856 {
857 string t = row[i].type().sql_name();
858
859 bool skip = false;
860
861 // Remove trailing " NULL"
862 if (t.find(" NOT NULL")==t.size()-9)
863 t = t.substr(0, t.size()-9);
864 if (t.find(" NULL")==t.size()-5)
865 t = t.substr(0, t.size()-5);
866
867 // Get FileEntry description corresponding to the sql type
868 const auto it = FileEntry::LUT.sql(t);
869
870 // Skip all columns that do not follow a convertible type
871 if (it==FileEntry::LUT.end())
872 {
873 skip = true;
874 skipno++;
875 }
876
877 // For valid colums, check if they are of a type that can not be written to a root file
878 if (!skip && (it->type==FileEntry::kVarchar || it->type==FileEntry::kChar))
879 {
880 skip = true;
881 skipch++;
882 }
883
884 // Check if there is any user request for skipping a column
885 if (!skip)
886 {
887 for (auto pattern=_ignore.cbegin(); pattern!=_ignore.cend(); pattern++)
888 {
889 if (boost::regex_match(l[i], boost::regex(*pattern)))
890 {
891 skip = true;
892 skipreg++;
893 break;
894 }
895 }
896 }
897
898 // Skip all columns that start with an @ (variable names)
899 if (!skip && l[i][0]=='@')
900 {
901 skip = true;
902 skipat++;
903 }
904
905 // Create the 'leaflist'. If no accurate conversion is requested, create doubles for all leaves
906 const string leaflist = l[i] + "/" + (accurate ? it->branch : 'D');
907
908 if (verbose>1)
909 cout << (skip?" - ":" + ") << leaflist.c_str() << " [" << t << "] {" << (it==FileEntry::LUT.end()?'-':it->branch) << "}\n";
910
911 // Create the container entry (must be emplace_back due to the std::string)
912 if (accurate)
913 container.emplace_back(leaflist, "", it->type);
914 else
915 container.emplace_back(leaflist, it->type);
916
917 if (skip)
918 continue;
919
920 // Create corresponding branches in all trees
921 for (auto itree=ttree.begin(); itree!=ttree.end(); itree++)
922 itree[0]->Branch(l[i].c_str(), container[i].ptr, leaflist.c_str());
923
924 cols++;
925 }
926 // -------------------------------------------------------------------------
927
928 if (verbose>1)
929 cout << endl;
930 if (verbose>0)
931 {
932 if (skipno)
933 cout << skipno << " branches skipped because no suitable type available." << endl;
934 if (skipch)
935 cout << skipch << " branches skipped because type is a character string." << endl;
936 if (skipreg)
937 cout << skipreg << " branches skipped due to ignore list." << endl;
938 if (skipat)
939 cout << skipat << " branches skipped due to name starting with @." << endl;
940 cout << "Configured " << cols << " branches.\nFilling branches..." << endl;
941 }
942
943 // ------------------------- Open the ascii files --------------------------
944
945 vector<unique_ptr<ofstream>> fout;
946 if (!write.empty())
947 {
948 vector<string> names;
949 if (split.empty())
950 names.emplace_back(write);
951 else
952 for (size_t i=0; i<split.size(); i++)
953 names.emplace_back(write+"-"+to_string(i));
954
955 for (auto it=names.cbegin(); it!=names.cend(); it++)
956 {
957 fout.emplace_back(new ofstream(*it));
958 if (!*fout.rbegin())
959 cout << "WARNING: Writing to '" << write << "' failed: " << strerror(errno) << endl;
960 }
961 }
962
963 // ----------------------- Prepare the ascii comment -----------------------
964
965 string contents;
966
967 istringstream istr(query);
968 size_t line = 0;
969 bool header = true;
970 while (istr)
971 {
972 string ibuf;
973 getline(istr, ibuf);
974 const string sbuf = Tools::Trim(ibuf);
975
976 const bool shabang = line==0 && ibuf[0]=='#' && ibuf[1]=='!';
977 const bool comment = sbuf[0]=='#' && !shabang;
978 const bool isquery = !shabang && !comment;
979 if (isquery)
980 header = false;
981
982 line++;
983
984 if ((copy_shabang && shabang) ||
985 (copy_header && comment && header) ||
986 (copy_query && isquery) ||
987 (copy_comments && comment && !header))
988 contents += '#' + ibuf + '\n';
989 }
990
991 // ----------------------- Write the ascii headers -------------------------
992
993 ostringstream htxt;
994 if (display || !fout.empty())
995 htxt << row.field_list(delimiter.c_str());
996
997 if (display)
998 {
999 cout << endl;
1000 cout << contents << endl;
1001 cout << "# " << htxt.str() << endl;
1002 }
1003 for (auto ff=fout.begin(); ff!=fout.end(); ff++)
1004 {
1005 **ff << contents;
1006 **ff << "# " << htxt.str() << endl;
1007 }
1008
1009 // ---------------------- Fill TTree with DB data --------------------------
1010
1011 size_t count = 0;
1012 size_t skip = 0;
1013 do
1014 {
1015 size_t index = split.index(count++);
1016
1017 ostringstream rtxt;
1018 if (display || !fout.empty())
1019 rtxt << row.value_list(delimiter.c_str(), mysqlpp::do_nothing);
1020
1021 if (display)
1022 cout << rtxt.str() << '\n';
1023 if (!fout.empty())
1024 *fout[index] << rtxt.str() << '\n';
1025
1026 size_t idx=0;
1027 for (auto col=row.begin(); col!=row.end(); col++, idx++)
1028 {
1029 if (!ignorenull && col->is_null())
1030 {
1031 skip++;
1032 break;
1033 }
1034
1035 if (accurate)
1036 {
1037 // Do an accurate type conversion and assign to the memory allocated as branch-address
1038 switch (container[idx].type)
1039 {
1040 case FileEntry::kBool: Convert<bool> (container[idx], *col); break;
1041 case FileEntry::kFloat: Convert<float> (container[idx], *col); break;
1042 case FileEntry::kDecimal:
1043 case FileEntry::kNumeric:
1044 case FileEntry::kDouble: Convert<double> (container[idx], *col); break;
1045 case FileEntry::kUInt64: Convert<uint64_t>(container[idx], *col); break;
1046 case FileEntry::kInt64: Convert<int64_t> (container[idx], *col); break;
1047 case FileEntry::kUInt32: Convert<uint32_t>(container[idx], *col); break;
1048 case FileEntry::kInt32: Convert<int32_t> (container[idx], *col); break;
1049 case FileEntry::kUInt16: Convert<uint16_t>(container[idx], *col); break;
1050 case FileEntry::kInt16: Convert<int16_t> (container[idx], *col); break;
1051 case FileEntry::kUInt8: Convert<uint8_t> (container[idx], *col); break;
1052 case FileEntry::kInt8:
1053 case FileEntry::kDate:
1054 *reinterpret_cast<uint64_t*>(container[idx].ptr) = static_cast<time_t>(mysqlpp::Date(*col));
1055 break;
1056 case FileEntry::kDateTime:
1057 *reinterpret_cast<uint64_t*>(container[idx].ptr) = static_cast<time_t>(mysqlpp::DateTime(*col));
1058 break;
1059 case FileEntry::kTime:
1060 *reinterpret_cast<uint32_t*>(container[idx].ptr) = static_cast<time_t>(mysqlpp::Time(*col));
1061 break;
1062 default:
1063 break;
1064 }
1065 }
1066 else
1067 {
1068 // Convert everything to double, no matter what... and assign to the memory allocated as branch-address
1069 switch (container[idx].type)
1070 {
1071 case FileEntry::kBool:
1072 case FileEntry::kFloat:
1073 case FileEntry::kDecimal:
1074 case FileEntry::kNumeric:
1075 case FileEntry::kDouble:
1076 case FileEntry::kUInt64:
1077 case FileEntry::kInt64:
1078 case FileEntry::kUInt32:
1079 case FileEntry::kInt32:
1080 case FileEntry::kUInt16:
1081 case FileEntry::kInt16:
1082 case FileEntry::kUInt8:
1083 case FileEntry::kInt8:
1084 Convert<double>(container[idx], *col);
1085 break;
1086 case FileEntry::kDate:
1087 *reinterpret_cast<double*>(container[idx].ptr) = static_cast<time_t>(mysqlpp::Date(*col));
1088 break;
1089 case FileEntry::kDateTime:
1090 *reinterpret_cast<double*>(container[idx].ptr) = static_cast<time_t>(mysqlpp::DateTime(*col));
1091 break;
1092 case FileEntry::kTime:
1093 *reinterpret_cast<double*>(container[idx].ptr) = static_cast<time_t>(mysqlpp::Time(*col));
1094 break;
1095 default:
1096 break;
1097 }
1098 }
1099 }
1100
1101 if (idx==row.size() && !nofill)
1102 ttree[index]->Fill();
1103
1104 row = res.fetch_row();
1105
1106
1107 } while (row);
1108
1109 // -------------------------------------------------------------------------
1110
1111 if (display)
1112 cout << '\n' << endl;
1113
1114 if (verbose>0)
1115 {
1116 cout << count << " rows fetched." << endl;
1117 if (skip>0)
1118 cout << skip << " rows skipped due to NULL field." << endl;
1119
1120 for (size_t i=0; i<ttree.size(); i++)
1121 cout << ttree[i]->GetEntries() << " rows filled into tree #" << i << "." << endl;
1122 }
1123
1124 for (auto it=ttree.begin(); it!=ttree.end(); it++)
1125 (*it)->Write();
1126 tfile.Close();
1127
1128 if (verbose>0)
1129 {
1130 const auto sec = Time().UnixTime()-start.UnixTime();
1131
1132 cout << Tools::Scientific(tfile.GetSize()) << "B written to disk.\n";
1133 cout << "File closed.\n";
1134 cout << "Execution time: " << sec << "s ";
1135 cout << "(" << Tools::Fractional(sec/count) << "s/row)\n";
1136 cout << "--------------------------------------------------------------" << endl;
1137 }
1138
1139 return finish(connection, verbose, profiling, print_connection);
1140}
Note: See TracBrowser for help on using the repository browser.