source: trunk/FACT++/src/rootifysql.cc@ 20030

Last change on this file since 20030 was 20011, checked in by tbretz, 4 years ago
Had to change to pointers, emplace_back and ofstream do not work in gcc 4.8(\!) ... ARGH\! CentOS 7 technology from the 80s
File size: 39.7 KB
Line 
1#include "Database.h"
2
3#include <boost/regex.hpp>
4#include <boost/tokenizer.hpp>
5#include <boost/algorithm/string.hpp>
6#include <boost/algorithm/string/join.hpp>
7
8#include "tools.h"
9#include "Time.h"
10#include "Splitting.h"
11#include "FileEntry.h"
12
13#include <TROOT.h>
14#include <TSystem.h>
15#include <TFile.h>
16#include <TTree.h>
17
18using namespace std;
19
20// ------------------------------------------------------------------------
21
22void SetupConfiguration(Configuration &conf)
23{
24 po::options_description control("Database options");
25 control.add_options()
26 ("uri,u", var<string>()->required(), "Database link as in\n\tuser:password@server[:port]/database[?compress=0|1].")
27 ("query,q", var<string>(""), "MySQL query (overwrites --file)")
28 ("file", var<string>("rootify.sql"), "An ASCII file with the MySQL query (overwrites --query)")
29 ("ignore-null,i", po_switch(), "Do not skip rows containing any NULL field")
30 ("display,d", po_switch(), "Displays contents on the screen (most usefull in combination with mysql statements as SHOW or EXPLAIN)")
31 ("explain", po_switch(), "Requests an EXPLAIN from the server (shows the server optimized query)\nsee also https://dev.mysql.com/doc/refman/explain-output.html")
32 ("profiling", po_switch(), "Turn on profiling and print profile")
33 ("var.*", var<string>(), "Predefined SQL user variables (@VAR)")
34 ("env.*", vars<string>(), "Predefined environment for substitutions in the query ($ENV)")
35 ("list.*", var<string>(), "Predefined environment for substitutions in the query ($ENV). The list is read from the given file (one list entry per line)")
36 ("print-connection", po_switch(), "Print database connection information")
37 ("verbose,v", var<uint16_t>(1), "Verbosity (0: quiet, 1: default, 2: more, 3, ...)")
38 ;
39
40 po::options_description ascii("ASCII output");
41 ascii.add_options()
42 ("write,w", var<string>(""), "Write output to an ascii file")
43 ("delimiter", var<string>(), "The delimiter used if contents are displayed with --display (default=\\t)")
44 ("copy-shabang", po_switch(), "Copy the sha-bang line if exists to the output file")
45 ("copy-header", po_switch(), "Copy the header (all line starting with '#' up to the first non-comment line to the output file")
46 ("copy-query", po_switch(), "Copy the query to the ascii output file")
47 ("copy-comments", po_switch(), "Copy all lines starting with '#' to the output file which are not part of header")
48 ("copy-all", po_switch(), "An alias for --copy-header --copy-query --copy-comments")
49 ;
50
51 po::options_description root("Root file options");
52 root.add_options()
53 ("out,o", var<string>("rootify.root"), "Output root file name")
54 ("force,f", po_switch(), "Force overwriting an existing root file ('RECREATE')")
55 ("update", po_switch(), "Update an existing root file with the new tree ('UPDATE')")
56 ("compression,c", var<uint16_t>(1), "zlib compression level for the root file")
57 ("tree,t", var<string>("Result"), "Name of the root tree")
58 ("accurate", po_switch(), "Accurate type conversion, otherwise all branches are creates as double which is often more convenient.")
59 ("ignore", vars<string>(), "Ignore the given columns")
60 ("null,n", po_switch(), "Redirect the root output file to /dev/null (mainly for debugging purposes, e.g. performance studies)")
61 ("no-fill", po_switch(), "Do not fill events into the root file (mainly for debugging purposes, e.g. performance studies)")
62 ;
63
64 po::positional_options_description p;
65 p.add("file", 1); // The 1st positional options (n=1)
66 p.add("out", 1); // The 2nd positional options (n=1)
67
68 conf.AddOptions(control);
69 conf.AddOptions(ascii);
70 conf.AddOptions(root);
71 conf.AddOptions(Tools::Splitting::options());
72 conf.SetArgumentPositions(p);
73}
74
75void PrintUsage()
76{
77 cout <<
78 "rootifysql - Converts the result of a mysql query into a root file\n"
79 "\n"
80 "For convenience, this documentation uses the extended version of the options, "
81 "refer to the output below to get the abbreviations.\n"
82 "\n"
83 "Writes the result of a mysql query into a root file. For each column, a branch is "
84 "created of type double with the field name as name. This is usually the column name "
85 "if not specified otherwise by the AS mysql directive.\n"
86 "\n"
87 "Columns with CHAR or VARCHAR as field type are ignored. DATETIME, DATE and TIME "
88 "columns are converted to unix time (time_t). Rows containing any file which is "
89 "NULL are skipped if not suppressed by the --ignore-null option. Ideally, the query "
90 "is compiled in a way that no NULL field is returned. With the --display option the "
91 "result of the request is printed on the screen (NULL skipping still in action). "
92 "This can be useful to create an ascii file or to show results as 'SHOW DATABASES' "
93 "or 'EXPLAIN table'. To redirect the contents into an ascii file, the option -v0 "
94 "is useful. To suppress writing to an output file --null can be used.\n"
95 "\n"
96 "The default is to read the query from a file called rootify.sql. Except if a different "
97 "filename is specified by the --file option or a query is given with --query.\n"
98 "\n"
99 "As a trick, the rootify.sql file can be made excutable (chmod u+x rootify.sql). "
100 "If the first line contains '#!rootifysql', the script can be executed directly.\n"
101 "\n"
102 "Columns whose name start with @ are skipped. If you want them in your output file "
103 "give them a name using AS, e.g. 'SELECT @A:=5 AS A'.\n"
104 "\n"
105 "You can use variables in your sql query like @MyVar and define them on the "
106 "command line. In this example with --var.MyVar=5\n"
107 "\n"
108 "You can use environment definitions for substitutions in your SQL query. "
109 "For example --env.TEST=5 would replace $TEST or ${TEST} in your query by 5."
110 "If you specify one environment variable more than once, a list is created. "
111 "For example --env.TEST=1 --env.TEST=2 --env.TEST=3 would substitute "
112 "$TEST or ${TEST} by '1, 2, 3'. This is useful for the SQL `IN` keyword. "
113 "You can also read the values for an enviroment substitution from a file "
114 "(one element per line), e.g. --env.TEST=file.txt. Empty lines and lines "
115 "starting with a # are skipped.\n"
116 "\n"
117 "Comments in the query-file can be placed according to the SQL standard inline "
118 "/*comment*/ or introduced with # (shell script style) or -- (SQL style).\n"
119 "\n"
120 << Tools::Splitting::usage() <<
121 "\n"
122 "In case of success, 0 is returned, a value>0 otherwise.\n"
123 "\n"
124 "Usage: rootifysql [rootify.sql [rootify.root]] [-u URI] [-q query|-f file] [-i] [-o out] [-f] [-cN] [-t tree] [-vN]\n"
125 "\n"
126 ;
127 cout << endl;
128}
129
130struct ExplainParser
131{
132 string sql;
133
134 vector<string> vec;
135
136 string substitute(string _str, const boost::regex &expr)
137 {
138 boost::smatch match;
139 while (boost::regex_search(_str, match, expr, boost::regex_constants::format_first_only))
140 {
141 const auto &len = match.length();
142 const auto &pos = match.position();
143 const auto &str = match.str();
144
145 const auto it = find(vec.cbegin(), vec.cend(), str);
146 const size_t id = it==vec.cend() ? vec.size() : it-vec.cbegin();
147
148 _str.replace(pos, len, "{"+to_string(id)+"}");
149
150 if (it==vec.cend())
151 vec.push_back(str);//.substr(1, str.size()-2));
152 }
153
154 return _str;
155 }
156
157 string substitute(const string &str, const string &expr)
158 {
159 return substitute(str, boost::regex(expr));
160 }
161
162 vector<string> queries;
163
164 string resub(string str)
165 {
166 // search for "KEYWORD expression"
167 boost::regex reg("\\{[0-9]+\\}");
168
169 boost::smatch match;
170 while (boost::regex_search(str, match, reg, boost::regex_constants::format_first_only))
171 {
172 const auto &len = match.length();
173 const auto &pos = match.position();
174 const auto &arg = match.str(); // Argument
175
176 const auto idx = atoi(arg.c_str()+1);
177
178 str.replace(pos, len, resub(vec[idx]));
179 }
180
181 return str;
182 }
183
184 void expression(string expr, size_t indent=0)
185 {
186 if (expr[0]=='{')
187 {
188 const auto idx = atoi(expr.c_str()+1);
189
190 // This is a subquery
191 if (vec[idx].substr(0,3)=="(/*")
192 {
193 cout << setw(indent) << ' ' << "(\n";
194 find_tokens(vec[idx], indent+4);
195 cout << setw(indent) << ' ' << ") ";
196 }
197 else
198 // This is just something to substitute back
199 if (vec[idx].substr(0,2)=="({")
200 {
201 cout << setw(indent) << ' ' << "(" << resub(vec[idx]) << ") ";
202 }
203 else
204 {
205 if (indent>0)
206 cout << setw(indent) << ' ';
207 cout << resub(vec[idx]);
208 }
209 }
210 else
211 {
212 if (indent>0)
213 cout << setw(indent) << ' ';
214 cout << resub(expr);
215 }
216 }
217
218 void find_tokens(string str, size_t indent=0)
219 {
220 // ( COMMENT )?( TOKEN )?(( {NNN} | NNN )( AS|ON ( {NNN}) ))?(,)?)
221 //regex reg("(\\/\\*\\ select\\#[0-9]+\\ \\*\\/\\ *)?([a-zA-Z ]+)?((\\{[0-9]+\\}|[0-9]+)(\\ ?([Aa][Ss]|[Oo][Nn])\\ ?(\\{[0-9]+\\}))?(,)?)");
222
223 const string _com = "\\/\\*\\ select\\#[0-9]+\\ \\*\\/\\ *";
224
225 const string _tok = "[a-zA-Z_ ]+";
226
227 const string _nnn = "\\{[0-9]+\\}|[0-9]+";
228
229 const string _as = "\\ ?([Aa][Ss])\\ ?";
230
231 // ( _nnn ) ( _as ( _nnn ))?(,)? // can also match noting in between two {NNN}
232 const string _exp = "("+_nnn+")" + "("+_as+"("+_nnn+"))?(,)?";
233
234 // Matche: ( _com )? ( ( _tok )? ( _exp ) | ( _tok ) )
235 boost::regex reg("("+_com+")?" + "(" + "("+_tok+")?"+"("+_exp+")" + "|" + "("+_tok+")" + ")");
236
237 boost::smatch match;
238 while (boost::regex_search(str, match, reg, boost::regex_constants::format_first_only))
239 {
240
241 const auto &com = match.str(1); // comment
242 const auto &tok1 = Tools::Trim(match.str(3)); // token with expression
243 const auto &arg1 = match.str(5); // argument 1
244 const auto &as = match.str(7); // as
245 const auto &arg2 = match.str(8); // argument 2
246 const auto &comma = match.str(9); // comma
247 const auto &tok2 = Tools::Trim(match.str(10)); // token without expression
248
249 if (!com.empty())
250 cout << setw(indent) << ' ' << "\033[34m" << com << "\033[0m" << '\n';
251
252 if (!tok1.empty())
253 cout << setw(indent) << ' ' << "\033[32m" << tok1 << "\033[0m" << '\n';
254 if (!tok2.empty())
255 cout << setw(indent) << ' ' << "\033[32m" << tok2 << "\033[0m" << '\n';
256
257 if (!arg1.empty())
258 {
259 expression(arg1, indent+4);
260
261 if (!as.empty())
262 cout << " \033[33m" << as << "\033[0m ";
263
264 if (!arg2.empty())
265 expression(arg2);
266
267 if (!comma.empty())
268 cout << ',';
269
270 cout << '\n';
271 }
272
273 str = str.substr(match.position()+match.length());
274 }
275 }
276
277
278 ExplainParser(const string &_sql) : sql(_sql)
279 {
280 // substitute all strings
281 sql = substitute(sql, "'[^']*'");
282
283 // substitute all escaped sequences (`something`.`something-else`)
284 sql = substitute(sql, "`[^`]*`(\\.`[^`]*`)*");
285
286 // substitute all paranthesis
287 sql = substitute(sql, "[a-zA-Z0-9_]*\\([^\\(\\)]*\\)");
288
289 //cout << sql << "\n\n";
290 find_tokens(sql);
291 cout << endl;
292 }
293};
294
295// Remove queries...
296void format(string sql)
297{
298 ExplainParser p(sql);
299
300 /*
301
302 SELECT
303 [ALL | DISTINCT | DISTINCTROW ]
304 [HIGH_PRIORITY]
305 [STRAIGHT_JOIN]
306 [SQL_SMALL_RESULT] [SQL_BIG_RESULT] [SQL_BUFFER_RESULT]
307 [SQL_CACHE | SQL_NO_CACHE] [SQL_CALC_FOUND_ROWS]
308 select_expr [, select_expr ...]
309 [FROM table_references
310 [PARTITION partition_list]
311 [WHERE where_condition]
312 [GROUP BY {col_name | expr | position}, ... [WITH ROLLUP]]
313 [HAVING where_condition]
314 [WINDOW window_name AS (window_spec)
315 [, window_name AS (window_spec)] ...]
316 [ORDER BY {col_name | expr | position}
317 [ASC | DESC], ... [WITH ROLLUP]]
318 [LIMIT {[offset,] row_count | row_count OFFSET offset}]
319 [INTO OUTFILE 'file_name'
320 [CHARACTER SET charset_name]
321 export_options
322 | INTO DUMPFILE 'file_name'
323 | INTO var_name [, var_name]]
324 [FOR {UPDATE | SHARE} [OF tbl_name [, tbl_name] ...] [NOWAIT | SKIP LOCKED]
325 | LOCK IN SHARE MODE]]
326 */
327
328 /*
329table_references:
330 escaped_table_reference [, escaped_table_reference] ...
331
332escaped_table_reference:
333 table_reference
334 | { OJ table_reference }
335
336table_reference:
337 table_factor
338 | join_table
339
340table_factor:
341 tbl_name [PARTITION (partition_names)]
342 [[AS] alias] [index_hint_list]
343 | table_subquery [AS] alias [(col_list)]
344 | ( table_references )
345
346join_table:
347 table_reference [INNER | CROSS] JOIN table_factor [join_condition]
348 | table_reference STRAIGHT_JOIN table_factor
349 | table_reference STRAIGHT_JOIN table_factor ON conditional_expr
350 | table_reference {LEFT|RIGHT} [OUTER] JOIN table_reference join_condition
351 | table_reference NATURAL [INNER | {LEFT|RIGHT} [OUTER]] JOIN table_factor
352
353join_condition:
354 ON conditional_expr
355 | USING (column_list)
356
357index_hint_list:
358 index_hint [, index_hint] ...
359
360index_hint:
361 USE {INDEX|KEY}
362 [FOR {JOIN|ORDER BY|GROUP BY}] ([index_list])
363 | IGNORE {INDEX|KEY}
364 [FOR {JOIN|ORDER BY|GROUP BY}] (index_list)
365 | FORCE {INDEX|KEY}
366 [FOR {JOIN|ORDER BY|GROUP BY}] (index_list)
367
368index_list:
369 index_name [, index_name] ...
370 */
371
372}
373
374int finish(Database &connection, const uint16_t &verbose, const bool &profiling, const bool &print_connection)
375{
376 if (verbose>0)
377 {
378 try
379 {
380 const auto resw =
381 connection.query("SHOW WARNINGS").store();
382
383 if (resw.num_rows()>0)
384 cout << "\n" << resw.num_rows() << " Warning(s) issued:\n\n";
385
386 for (size_t i=0; i<resw.num_rows(); i++)
387 {
388 const mysqlpp::Row &roww = resw[i];
389
390 cout << roww["Level"] << '[' << roww["Code"] << "]: ";
391 cout << roww["Message"] << '\n';
392 }
393 cout << endl;
394
395 }
396 catch (const exception &e)
397 {
398 cerr << "\nSHOW WARNINGS\n\n";
399 cerr << "SQL query failed:\n" << e.what() << endl;
400 return 1;
401 }
402 }
403
404 if (profiling)
405 {
406 try
407 {
408 const auto N =
409 connection.query("SHOW PROFILES").store().num_rows();
410
411 const auto resp =
412 connection.query("SHOW PROFILE ALL FOR QUERY "+to_string(verbose?N-1:N)).store();
413
414 cout << '\n';
415 cout << left;
416 cout << setw(26) << "Status" << ' ';
417 cout << right;
418 cout << setw(11) << "Duration" << ' ';
419 cout << setw(11) << "CPU User" << ' ';
420 cout << setw(11) << "CPU System" << '\n';
421 cout << "--------------------------------------------------------------\n";
422 for (size_t i=0; i<resp.num_rows(); i++)
423 {
424 const mysqlpp::Row &rowp = resp[i];
425
426 cout << left;
427 cout << setw(26) << rowp["Status"] << ' ';
428 cout << right;
429 cout << setw(11) << rowp["Duration"] << ' ';
430 cout << setw(11) << rowp["CPU_user"] << ' ';
431 cout << setw(11) << rowp["CPU_system"] << '\n';
432 }
433 cout << "--------------------------------------------------------------\n";
434 cout << endl;
435 }
436 catch (const exception &e)
437 {
438 cerr << "\nSHOW PROFILE ALL\n\n";
439 cerr << "SQL query failed:\n" << e.what() << '\n' <<endl;
440 return 2;
441 }
442 }
443
444 if (print_connection)
445 {
446 try
447 {
448 // Exchange _send and _received as it is the view of the server
449 const auto &res1 = connection.query("SHOW STATUS LIKE 'Bytes_%'").store();
450 cout << left << setw(16) << res1[1]["Variable_name"] << ' ' << Tools::Scientific(res1[0]["Value"]) << endl;
451 cout << left << setw(16) << res1[0]["Variable_name"] << ' ' << Tools::Scientific(res1[1]["Value"]) << endl;
452 cout << endl;
453 }
454 catch (const exception &e)
455 {
456 cerr << "\nSHOW STATUS LIKE 'Bytes_%'\n\n";
457 cerr << "SQL query failed:\n" << e.what() << endl;
458 return 3;
459 }
460 }
461
462 if (verbose>0)
463 cout << "Success!\n" << endl;
464 return 0;
465
466}
467
468template<typename T>
469void Convert(FileEntry::Container &container, const mysqlpp::String &col)
470{
471 *reinterpret_cast<T*>(container.ptr) = static_cast<T>(col);
472}
473
474
475int main(int argc, const char* argv[])
476{
477 Time start;
478
479 gROOT->SetBatch();
480
481 Configuration conf(argv[0]);
482 conf.SetPrintUsage(PrintUsage);
483 SetupConfiguration(conf);
484
485 if (!conf.DoParse(argc, argv))
486 return 127;
487
488 // ----------------------------- Evaluate options --------------------------
489 const string uri = conf.Get<string>("uri");
490 const string out = conf.Get<string>("out");
491 const string file = conf.Get<string>("file");
492 const string tree = conf.Get<string>("tree");
493 const bool force = conf.Get<bool>("force");
494 const bool ignorenull = conf.Get<bool>("ignore-null");
495 const bool update = conf.Get<bool>("update");
496 const bool display = conf.Get<bool>("display");
497 const string write = conf.Get<string>("write");
498 const bool noout = conf.Get<bool>("null");
499 const bool nofill = conf.Get<bool>("no-fill");
500 const bool explain = conf.Get<bool>("explain");
501 const bool profiling = conf.Get<bool>("profiling");
502 const bool accurate = conf.Get<bool>("accurate");
503 const uint16_t verbose = conf.Get<uint16_t>("verbose");
504 const uint16_t compression = conf.Get<uint16_t>("compression");
505 const string delimiter = conf.Has("delimiter") ? conf.Get<string>("delimiter") : "\t";
506
507 const bool copy_all = conf.Get<bool>("copy-all");
508 const bool copy_shabang = conf.Get<bool>("copy-shabang");
509 const bool copy_header = copy_all || conf.Get<bool>("copy-header");
510 const bool copy_query = copy_all || conf.Get<bool>("copy-query");
511 const bool copy_comments = copy_all || conf.Get<bool>("copy-comments");
512
513 const vector<string> _ignore = conf.Vec<string>("ignore");
514 const bool print_connection = conf.Get<bool>("print-connection");
515 //const vector<Map> mymap = conf.Vec<Map>("map");
516
517 // ----------------------- Setup splitting ---------------------------------
518
519 const auto vars = conf.GetWildcardOptions("var.*");
520
521 vector<string> variables;
522 for (auto var=vars.cbegin(); var!=vars.cend(); var++)
523 variables.emplace_back('@'+var->substr(4)+":="+Tools::Trim(conf.Get<string>(*var)));
524
525 // -------------------------------------------------------------------------
526
527 /*const*/ Tools::Splitting split(conf);
528
529 if (verbose>0)
530 {
531 cout << "\n------------------------ Rootify SQL -------------------------" << endl;
532 cout << "Start Time: " << Time::sql << Time(Time::local) << endl;
533 }
534
535 string query = conf.Get<string>("query");
536 if (query.empty())
537 {
538 if (verbose>0)
539 cout << "Reading query from file '" << file << "'." << endl;
540
541 ifstream fin(file);
542 if (!fin)
543 {
544 cerr << "Could not open query in '" << file << "': " << strerror(errno) << endl;
545 return 4;
546 }
547
548 getline(fin, query, (char)fin.eof());
549 }
550
551 if (query.empty())
552 {
553 cerr << "No query specified." << endl;
554 return 5;
555 }
556
557 // -------------------------------------------------------------------------
558
559 map<string, vector<string>> envs;
560
561 const auto &envs1 = conf.GetWildcardOptions("env.*");
562 for (auto env=envs1.cbegin(); env!=envs1.cend(); env++)
563 envs[env->substr(4)] = conf.Vec<string>(*env);
564
565 const auto &envs2 = conf.GetWildcardOptions("list.*");
566 for (auto env=envs2.cbegin(); env!=envs2.cend(); env++)
567 {
568 const string fname = conf.Get<string>(*env);
569 const string &ident = env->substr(5);
570
571 ifstream fin(fname);
572 if (!fin)
573 {
574 cerr << "Could not open environment in '" << fname << "' for ${" << ident << "}: " << strerror(errno) << endl;
575 return 6;
576 }
577 for (string line; getline(fin, line); )
578 {
579 const auto &l = Tools::Trim(line);
580 if (!l.empty() && l[0]!='#')
581 envs[ident].push_back(line);
582 }
583
584 if (verbose>0)
585 cout << "Found " << envs[ident].size() << " list element(s) for ${" << ident << "}" << endl;
586 }
587
588 for (auto env=envs.cbegin(); env!=envs.cend(); env++)
589 {
590 boost::regex rexpr("\\$(\\{"+env->first+"\\}|"+env->first+"\\b)");
591 query = boost::regex_replace(query, rexpr, boost::join(env->second, ", "));
592 }
593
594 // -------------------------- Check for file permssion ---------------------
595 // Strictly speaking, checking for write permission and existance is not necessary,
596 // but it is convenient that the user does not find out that it failed after
597 // waiting long for the query result
598 //
599
600 // I am using root here instead of boost to be
601 // consistent with the access pattern by TFile
602 TString path(noout?"/dev/null":out.c_str());
603 gSystem->ExpandPathName(path);
604
605 if (!noout)
606 {
607 FileStat_t stat;
608 const Int_t exist = !gSystem->GetPathInfo(path, stat);
609 const Bool_t _write = !gSystem->AccessPathName(path, kWritePermission) && R_ISREG(stat.fMode);
610
611 if ((update && !exist) || (update && exist && !_write) || (force && exist && !_write))
612 {
613 cerr << "File '" << path << "' is not writable." << endl;
614 return 7;
615 }
616
617 if (!update && !force && exist)
618 {
619 cerr << "File '" << path << "' already exists." << endl;
620 return 8;
621 }
622 }
623
624 Time start2;
625
626 // --------------------------- Connect to database -------------------------------------------------
627
628 if (*query.rbegin()!='\n')
629 query += '\n';
630
631 if (verbose>0)
632 {
633 cout << "Connecting to database...\n";
634 cout << "Client Version: " << mysqlpp::Connection().client_version() << endl;
635 }
636
637 Database connection(uri); // Keep alive while fetching rows
638
639 if (verbose>0)
640 cout << "Server Version: " << connection.server_version() << endl;
641
642 if (print_connection)
643 {
644 try
645 {
646 const auto &res1 = connection.query("SHOW STATUS LIKE 'Compression'").store();
647 cout << "Compression of database connection is " << string(res1[0][1]) << endl;
648
649 const auto &res2 = connection.query("SHOW STATUS LIKE 'Ssl_cipher'").store();
650 cout << "Connection to databases is " << (string(res2[0][1]).empty()?"UNENCRYPTED":"ENCRYPTED ("+string(res2[0][1])+")") << endl;
651 }
652 catch (const exception &e)
653 {
654 cerr << "\nSHOW STATUS LIKE 'Compression'\n\n";
655 cerr << "SQL query failed:\n" << e.what() << endl;
656 return 9;
657 }
658 }
659
660 try
661 {
662 if (profiling)
663 connection.query("SET PROFILING=1").execute();
664 }
665 catch (const exception &e)
666 {
667 cerr << "\nSET profiling=1\n\n";
668 cerr << "SQL query failed:\n" << e.what() << endl;
669 return 10;
670 }
671
672 // -------------------------- Set user defined variables -------------------
673 if (variables.size()>0)
674 {
675 if (verbose>0)
676 cout << "Setting user defined variables..." << endl;
677
678 const string varset =
679 "SET\n "+boost::algorithm::join(variables, ",\n ");
680
681 try
682 {
683 connection.query(varset).execute();
684 }
685 catch (const exception &e)
686 {
687 cerr << '\n' << varset << "\n\n";
688 cerr << "SQL query failed:\n" << e.what() << endl;
689 return 11;
690 }
691
692 if (verbose>2)
693 cout << '\n' << varset << '\n' << endl;
694 }
695
696 // ------------------------- Explain query if requested --------------------
697
698 if (explain)
699 {
700 try
701 {
702 const auto res0 =
703 connection.query("EXPLAIN FORMAT=JSON "+query).store();
704
705 cout << res0[0][0] << endl;
706 cout << endl;
707
708 const mysqlpp::StoreQueryResult res1 =
709 connection.query("EXPLAIN "+query).store();
710
711 for (size_t i=0; i<res1.num_rows(); i++)
712 {
713 const mysqlpp::Row &row = res1[i];
714
715 cout << "\nid : " << row["id"];
716 cout << "\nselect type : " << row["select_type"];
717
718 if (!row["table"].is_null())
719 cout << "\ntable : " << row["table"];
720
721 if (!row["partitions"].is_null())
722 cout << "\npartitions : " << row["partitions"];
723
724 if (!row["key"].is_null())
725 cout << "\nselected key : " << row["key"] << " [len=" << row["key_len"] << "] out of (" << row["possible_keys"] << ")";
726
727 if (!row["type"].is_null())
728 cout << "\njoin type : " << row["type"];
729
730 //if (!row["possible_keys"].is_null())
731 // cout << "\npossible_keys: " << row["possible_keys"];
732
733 //if (!row["key_len"].is_null())
734 // cout << "\nkey_len : " << row["key_len"];
735
736 if (!row["ref"].is_null())
737 cout << "\nref : (" << row["ref"] << ") compared to the index";
738
739 if (!row["rows"].is_null())
740 cout << "\nrows : " << row["rows"];
741
742 if (!row["filtered"].is_null())
743 cout << "\nfiltered : " << row["filtered"];
744
745 if (!row["extra"].is_null())
746 cout << "\nExtra : " << row["extra"];
747
748 cout << endl;
749 }
750
751 cout << endl;
752
753 const mysqlpp::StoreQueryResult res2 =
754 connection.query("SHOW WARNINGS").store();
755
756 for (size_t i=0; i<res2.num_rows(); i++)
757 {
758 const mysqlpp::Row &row = res2[i];
759
760 // 1003 //
761 cout << row["Level"] << '[' << row["Code"] << "]:\n";
762 if (uint32_t(row["Code"])==1003)
763 format(row["Message"].c_str());
764 else
765 cout << row["Message"] << '\n' << endl;
766
767 }
768
769 }
770 catch (const exception &e)
771 {
772 cerr << '\n' << query << "\n\n";
773 cerr << "SQL query failed:\n" << e.what() << endl;
774 return 12;
775 }
776
777 return 0;
778 }
779
780 // -------------------------- Request data from database -------------------
781 if (verbose>0)
782 cout << "Requesting data... please be patient!" << endl;
783
784 if (verbose>2)
785 cout << '\n' << query << endl;
786
787 const mysqlpp::UseQueryResult res =
788 connection.query(query).use();
789
790 // -------------------------------------------------------------------------
791
792 if (verbose>0)
793 {
794 cout << "Opening file '" << path << "' [compression=" << compression << "]...\n";
795 cout << "Writing data to tree '" << tree << "'" << (nofill?" (--skipped--)":"") << endl;
796 split.print();
797 }
798
799 // ----------------------------- Open output file --------------------------
800 TFile tfile(path, update?"UPDATE":(force?"RECREATE":"CREATE"), "Rootify SQL", compression);
801 if (tfile.IsZombie())
802 return 13;
803
804 // -------------------------------------------------------------------------
805
806 // get the first row to get the field description
807 mysqlpp::Row row = res.fetch_row();
808 if (!row)
809 {
810 cerr << "Empty set returned... nothing to write." << endl;
811 return finish(connection, verbose, profiling, print_connection)+20;
812 }
813
814 if (verbose>0)
815 cout << "Trying to setup " << row.size() << " branches..." << endl;
816
817 if (verbose>1)
818 cout << endl;
819
820 const mysqlpp::FieldNames &l = *row.field_list().list;
821
822 vector<FileEntry::Container> container;
823
824 UInt_t cols = 0;
825
826 // IMPLEMENT FILE SPLITTING!
827 // OpenFile(tree, query)
828 // SetupColumns
829 // WriteRow
830 // CloseFile
831
832 // Ratio[3]: 50%, 20%, 30%
833 // File[x3]: root, cout, fout
834
835
836 // -------------------- Configure branches of TTree ------------------------
837 vector<TTree*> ttree;
838
839 if (split.empty())
840 ttree.emplace_back(new TTree(tree.c_str(), query.c_str()));
841 else
842 for (size_t i=0; i<split.size(); i++)
843 ttree.emplace_back(new TTree((tree+"["+to_string(i)+"]").c_str(), query.c_str()));
844
845 size_t skipno = 0;
846 size_t skipat = 0;
847 size_t skipreg = 0;
848 size_t skipch = 0;
849 for (size_t i=0; i<l.size(); i++)
850 {
851 string t = row[i].type().sql_name();
852
853 bool skip = false;
854
855 // Remove trailing " NULL"
856 if (t.find(" NOT NULL")==t.size()-9)
857 t = t.substr(0, t.size()-9);
858 if (t.find(" NULL")==t.size()-5)
859 t = t.substr(0, t.size()-5);
860
861 // Get FileEntry description corresponding to the sql type
862 const auto it = FileEntry::LUT.sql(t);
863
864 // Skip all columns that do not follow a convertible type
865 if (it==FileEntry::LUT.end())
866 {
867 skip = true;
868 skipno++;
869 }
870
871 // For valid colums, check if they are of a type that can not be written to a root file
872 if (!skip && (it->type==FileEntry::kVarchar || it->type==FileEntry::kChar))
873 {
874 skip = true;
875 skipch++;
876 }
877
878 // Check if there is any user request for skipping a column
879 if (!skip)
880 {
881 for (auto pattern=_ignore.cbegin(); pattern!=_ignore.cend(); pattern++)
882 {
883 if (boost::regex_match(l[i], boost::regex(*pattern)))
884 {
885 skip = true;
886 skipreg++;
887 break;
888 }
889 }
890 }
891
892 // Skip all columns that start with an @ (variable names)
893 if (!skip && l[i][0]=='@')
894 {
895 skip = true;
896 skipat++;
897 }
898
899 // Create the 'leaflist'. If no accurate conversion is requested, create doubles for all leaves
900 const string leaflist = l[i] + "/" + (accurate ? it->branch : 'D');
901
902 if (verbose>1)
903 cout << (skip?" - ":" + ") << leaflist.c_str() << " [" << t << "] {" << (it==FileEntry::LUT.end()?'-':it->branch) << "}\n";
904
905 // Create the container entry (must be emplace_back due to the std::string)
906 if (accurate)
907 container.emplace_back(leaflist, "", it->type);
908 else
909 container.emplace_back(leaflist, it->type);
910
911 if (skip)
912 continue;
913
914 // Create corresponding branches in all trees
915 for (auto itree=ttree.begin(); itree!=ttree.end(); itree++)
916 itree[0]->Branch(l[i].c_str(), container[i].ptr, leaflist.c_str());
917
918 cols++;
919 }
920 // -------------------------------------------------------------------------
921
922 if (verbose>1)
923 cout << endl;
924 if (verbose>0)
925 {
926 if (skipno)
927 cout << skipno << " branches skipped because no suitable type available." << endl;
928 if (skipch)
929 cout << skipch << " branches skipped because type is a character string." << endl;
930 if (skipreg)
931 cout << skipreg << " branches skipped due to ignore list." << endl;
932 if (skipat)
933 cout << skipat << " branches skipped due to name starting with @." << endl;
934 cout << "Configured " << cols << " branches.\nFilling branches..." << endl;
935 }
936
937 // ------------------------- Open the ascii files --------------------------
938
939 vector<unique_ptr<ofstream>> fout;
940 if (!write.empty())
941 {
942 vector<string> names;
943 if (split.empty())
944 names.emplace_back(write);
945 else
946 for (size_t i=0; i<split.size(); i++)
947 names.emplace_back(write+"-"+to_string(i));
948
949 for (auto it=names.cbegin(); it!=names.cend(); it++)
950 {
951 fout.emplace_back(new ofstream(*it));
952 if (!*fout.rbegin())
953 cout << "WARNING: Writing to '" << write << "' failed: " << strerror(errno) << endl;
954 }
955 }
956
957 // ----------------------- Prepare the ascii comment -----------------------
958
959 string contents;
960
961 istringstream istr(query);
962 size_t line = 0;
963 bool header = true;
964 while (istr)
965 {
966 string ibuf;
967 getline(istr, ibuf);
968 const string sbuf = Tools::Trim(ibuf);
969
970 const bool shabang = line==0 && ibuf[0]=='#' && ibuf[1]=='!';
971 const bool comment = sbuf[0]=='#' && !shabang;
972 const bool isquery = !shabang && !comment;
973 if (isquery)
974 header = false;
975
976 line++;
977
978 if ((copy_shabang && shabang) ||
979 (copy_header && comment && header) ||
980 (copy_query && isquery) ||
981 (copy_comments && comment && !header))
982 contents += '#' + ibuf + '\n';
983 }
984
985 // ----------------------- Write the ascii headers -------------------------
986
987 ostringstream htxt;
988 if (display || !fout.empty())
989 htxt << row.field_list(delimiter.c_str());
990
991 if (display)
992 {
993 cout << endl;
994 cout << contents << endl;
995 cout << "# " << htxt.str() << endl;
996 }
997 for (auto ff=fout.begin(); ff!=fout.end(); ff++)
998 {
999 **ff << contents;
1000 **ff << "# " << htxt.str() << endl;
1001 }
1002
1003 // ---------------------- Fill TTree with DB data --------------------------
1004
1005 size_t count = 0;
1006 size_t skip = 0;
1007 do
1008 {
1009 size_t index = split.index(count++);
1010
1011 ostringstream rtxt;
1012 if (display || !fout.empty())
1013 rtxt << row.value_list(delimiter.c_str(), mysqlpp::do_nothing);
1014
1015 if (display)
1016 cout << rtxt.str() << '\n';
1017 if (!fout.empty())
1018 *fout[index] << rtxt.str() << '\n';
1019
1020 size_t idx=0;
1021 for (auto col=row.begin(); col!=row.end(); col++, idx++)
1022 {
1023 if (!ignorenull && col->is_null())
1024 {
1025 skip++;
1026 break;
1027 }
1028
1029 if (accurate)
1030 {
1031 // Do an accurate type conversion and assign to the memory allocated as branch-address
1032 switch (container[idx].type)
1033 {
1034 case FileEntry::kBool: Convert<bool> (container[idx], *col); break;
1035 case FileEntry::kFloat: Convert<float> (container[idx], *col); break;
1036 case FileEntry::kDecimal:
1037 case FileEntry::kNumeric:
1038 case FileEntry::kDouble: Convert<double> (container[idx], *col); break;
1039 case FileEntry::kUInt64: Convert<uint64_t>(container[idx], *col); break;
1040 case FileEntry::kInt64: Convert<int64_t> (container[idx], *col); break;
1041 case FileEntry::kUInt32: Convert<uint32_t>(container[idx], *col); break;
1042 case FileEntry::kInt32: Convert<int32_t> (container[idx], *col); break;
1043 case FileEntry::kUInt16: Convert<uint16_t>(container[idx], *col); break;
1044 case FileEntry::kInt16: Convert<int16_t> (container[idx], *col); break;
1045 case FileEntry::kUInt8: Convert<uint8_t> (container[idx], *col); break;
1046 case FileEntry::kInt8:
1047 case FileEntry::kDate:
1048 *reinterpret_cast<uint64_t*>(container[idx].ptr) = static_cast<time_t>(mysqlpp::Date(*col));
1049 break;
1050 case FileEntry::kDateTime:
1051 *reinterpret_cast<uint64_t*>(container[idx].ptr) = static_cast<time_t>(mysqlpp::DateTime(*col));
1052 break;
1053 case FileEntry::kTime:
1054 *reinterpret_cast<uint32_t*>(container[idx].ptr) = static_cast<time_t>(mysqlpp::Time(*col));
1055 break;
1056 default:
1057 break;
1058 }
1059 }
1060 else
1061 {
1062 // Convert everything to double, no matter what... and assign to the memory allocated as branch-address
1063 switch (container[idx].type)
1064 {
1065 case FileEntry::kBool:
1066 case FileEntry::kFloat:
1067 case FileEntry::kDecimal:
1068 case FileEntry::kNumeric:
1069 case FileEntry::kDouble:
1070 case FileEntry::kUInt64:
1071 case FileEntry::kInt64:
1072 case FileEntry::kUInt32:
1073 case FileEntry::kInt32:
1074 case FileEntry::kUInt16:
1075 case FileEntry::kInt16:
1076 case FileEntry::kUInt8:
1077 case FileEntry::kInt8:
1078 Convert<double>(container[idx], *col);
1079 break;
1080 case FileEntry::kDate:
1081 *reinterpret_cast<double*>(container[idx].ptr) = static_cast<time_t>(mysqlpp::Date(*col));
1082 break;
1083 case FileEntry::kDateTime:
1084 *reinterpret_cast<double*>(container[idx].ptr) = static_cast<time_t>(mysqlpp::DateTime(*col));
1085 break;
1086 case FileEntry::kTime:
1087 *reinterpret_cast<double*>(container[idx].ptr) = static_cast<time_t>(mysqlpp::Time(*col));
1088 break;
1089 default:
1090 break;
1091 }
1092 }
1093 }
1094
1095 if (idx==row.size() && !nofill)
1096 ttree[index]->Fill();
1097
1098 row = res.fetch_row();
1099
1100
1101 } while (row);
1102
1103 // -------------------------------------------------------------------------
1104
1105 if (display)
1106 cout << '\n' << endl;
1107
1108 if (verbose>0)
1109 {
1110 cout << count << " rows fetched." << endl;
1111 if (skip>0)
1112 cout << skip << " rows skipped due to NULL field." << endl;
1113
1114 for (size_t i=0; i<ttree.size(); i++)
1115 cout << ttree[i]->GetEntries() << " rows filled into tree #" << i << "." << endl;
1116 }
1117
1118 for (auto it=ttree.begin(); it!=ttree.end(); it++)
1119 (*it)->Write();
1120 tfile.Close();
1121
1122 if (verbose>0)
1123 {
1124 const auto sec = Time().UnixTime()-start.UnixTime();
1125
1126 cout << Tools::Scientific(tfile.GetSize()) << "B written to disk.\n";
1127 cout << "File closed.\n";
1128 cout << "Execution time: " << sec << "s ";
1129 cout << "(" << Tools::Fractional(sec/count) << "s/row)\n";
1130 cout << "--------------------------------------------------------------" << endl;
1131 }
1132
1133 return finish(connection, verbose, profiling, print_connection);
1134}
Note: See TracBrowser for help on using the repository browser.