source: trunk/FACT++/src/rootifysql.cc@ 19090

Last change on this file since 19090 was 19090, checked in by tbretz, 7 years ago
A very simple implementation which will EXPLAIN the query with what MySQL offers -- some formatting might help to read the output, but I found no easy way to do that.
File size: 18.2 KB
Line 
1#include "Database.h"
2
3#include <regex>
4
5#include <boost/algorithm/string/join.hpp>
6
7#include "tools.h"
8#include "Time.h"
9#include "Configuration.h"
10
11#include <TROOT.h>
12#include <TSystem.h>
13#include <TFile.h>
14#include <TTree.h>
15
16using namespace std;
17
18// ------------------------------------------------------------------------
19
20void SetupConfiguration(Configuration &conf)
21{
22 po::options_description control("Rootify SQL");
23 control.add_options()
24 ("uri,u", var<string>()->required(), "Database link as in\n\tuser:password@server[:port]/database.")
25 ("query,q", var<string>(""), "MySQL query (overwrites --file)")
26 ("file", var<string>("rootify.sql"), "An ASCII file with the MySQL query (overwrites --query)")
27 ("ignore-null,i", po_switch(), "Do not skip rows containing any NULL field")
28 ("out,o", var<string>("rootify.root"), "Output root file name")
29 ("force,f", po_switch(), "Force overwriting an existing root file ('RECREATE')")
30 ("update", po_switch(), "Update an existing root file with the new tree ('UPDATE')")
31 ("compression,c", var<uint16_t>(1), "zlib compression level for the root file")
32 ("tree,t", var<string>("Result"), "Name of the root tree")
33 ("ignore", vars<string>(), "Ignore the given columns")
34 ("display,d", po_switch(), "Displays contents on the screen (most usefull in combination with mysql statements as SHOW or EXPLAIN)")
35 ("null,n", po_switch(), "Redirect the output file to /dev/null (mainly for debugging purposes, e.g. performance studies)")
36 ("no-fill", po_switch(), "Do not fill events into the root file (mainly for debugging purposes, e.g. performance studies)")
37 ("delimiter", var<string>(""), "The delimiter used if contents are displayed with --display (default=\\t)")
38 ("explain", po_switch(), "Requests an EXPLAIN from the server (shows the serveroptimized query)")
39 ("var.*", var<string>(), "Predefined SQL user variables (@VAR)")
40 ("env.*", vars<string>(), "Predefined environment for substitutions in the query ($ENV)")
41 ("verbose,v", var<uint16_t>(1), "Verbosity (0: quiet, 1: default, 2: more, 3, ...)")
42 ;
43
44 po::positional_options_description p;
45 p.add("file", 1); // The 1st positional options (n=1)
46 p.add("out", 1); // The 2nd positional options (n=1)
47
48 conf.AddOptions(control);
49 conf.SetArgumentPositions(p);
50}
51
52void PrintUsage()
53{
54 cout <<
55 "rootifysql - Converts the result of a mysql query into a root file\n"
56 "\n"
57 "For convenience, this documentation uses the extended version of the options, "
58 "refer to the output below to get the abbreviations.\n"
59 "\n"
60 "Writes the result of a mysql query into a root file. For each column, a branch is "
61 "created of type double with the field name as name. This is usually the column name "
62 "if not specified otherwise by the AS mysql directive.\n"
63 "\n"
64 "Columns with CHAR or VARCHAR as field type are ignored. DATETIME, DATE and TIME "
65 "columns are converted to unix time (time_t). Rows containing any file which is "
66 "NULL are skipped if not suppressed by the --ignore-null option. Ideally, the query "
67 "is compiled in a way that no NULL field is returned. With the --display option the "
68 "result of the request is printed on the screen (NULL skipping still in action). "
69 "This can be useful to create an ascii file or to show results as 'SHOW DATABASES' "
70 "or 'EXPLAIN table'. To redirect the contents into an ascii file, the option -v0 "
71 "is useful. To suppredd writing to an output file --null can be used.\n"
72 "\n"
73 "The default is to read the query from a file called rootify.sql. Except if a different "
74 "filename is specified by the --file option or a query is given with --query.\n"
75 "\n"
76 "As a trick, the rootify.sql file can be made excutable (chmod u+x rootify.sql). "
77 "If the first line contains '#!rootifysql', the script can be executed directly.\n"
78 "\n"
79 "Columns whose name start with @ are skipped. If you want them in your output file "
80 "give them a name using AS, e.g. 'SELECT @A:=5 AS A'.\n"
81 "\n"
82 "You can use variables in your sql query like @MyVar and define them on the "
83 "command line. In this example with --var.MyVar=5\n"
84 "\n"
85 "You can use environment definitions for substitutions in your SQL query. "
86 "For example --env.TEST=5 would replace $TEST or ${TEST} in your query by 5."
87 "If you specifiy one environmentvariable more than once, a list is created. "
88 "For example --env.TEST=1 --env.TEST=2 --env.TEST=3 would substitute "
89 "$TEST or ${TEST} by '1, 2, 3'. This is useful for the SQL `IN` keyword.\n"
90 "\n"
91 "Comments in the query-file can be placed according to the SQL standard inline "
92 "/*comment*/ or introduced with # (shell script style) or -- (SQL style).\n"
93 "\n"
94 "In case of succes, 0 is returned, a value>0 otherwise.\n"
95 "\n"
96 "Usage: rootifysql [rootify.sql [rootify.root]] [-u URI] [-q query|-f file] [-i] [-o out] [-f] [-cN] [-t tree] [-vN]\n"
97 "\n"
98 ;
99 cout << endl;
100}
101
102int main(int argc, const char* argv[])
103{
104 Time start;
105
106 gROOT->SetBatch();
107
108 Configuration conf(argv[0]);
109 conf.SetPrintUsage(PrintUsage);
110 SetupConfiguration(conf);
111
112 if (!conf.DoParse(argc, argv))
113 return 127;
114
115 // ----------------------------- Evaluate options --------------------------
116 const string uri = conf.Get<string>("uri");
117 const string out = conf.Get<string>("out");
118 const string file = conf.Get<string>("file");
119 const string tree = conf.Get<string>("tree");
120 const bool force = conf.Get<bool>("force");
121 const bool ignorenull = conf.Get<bool>("ignore-null");
122 const bool update = conf.Get<bool>("update");
123 const bool display = conf.Get<bool>("display");
124 const bool noout = conf.Get<bool>("null");
125 const bool nofill = conf.Get<bool>("no-fill");
126 const bool explain = conf.Get<bool>("explain");
127 const uint16_t verbose = conf.Get<uint16_t>("verbose");
128 const uint16_t compression = conf.Get<uint16_t>("compression");
129 const string delimiter = conf.Get<string>("delimiter");
130 const vector<string> _ignore = conf.Vec<string>("ignore");
131 //const vector<Map> mymap = conf.Vec<Map>("map");
132
133 // -------------------------------------------------------------------------
134
135 const auto vars = conf.GetWildcardOptions("var.*");
136
137 vector<string> variables;
138 for (const auto &var : vars)
139 variables.emplace_back('@'+var.substr(4)+":="+Tools::Trim(conf.Get<string>(var)));
140
141 // -------------------------------------------------------------------------
142
143 if (verbose>0)
144 cout << "\n--------------------- Rootify SQL ----------------------" << endl;
145
146 string query = conf.Get<string>("query");
147 if (query.empty())
148 {
149 if (verbose>0)
150 cout << "Reading query from file '" << file << "'." << endl;
151
152 ifstream fin(file);
153 if (!fin)
154 {
155 cerr << "Could not open '" << file << "': " << strerror(errno) << endl;
156 return 1;
157 }
158 getline(fin, query, (char)fin.eof());
159 }
160
161 if (query.empty())
162 {
163 cerr << "No query specified." << endl;
164 return 2;
165 }
166
167 // -------------------------------------------------------------------------
168
169 const auto envs = conf.GetWildcardOptions("env.*");
170
171 for (const auto &env : envs)
172 {
173 regex r1("\\$\\{"+env.substr(4)+"\\}");
174 regex r2("\\$"+env.substr(4)+"\\b");
175
176 const string repl = boost::join(conf.Vec<string>(env), ", ");
177
178 query = regex_replace(query, r1, repl);
179 query = regex_replace(query, r2, repl);
180 }
181
182 // -------------------------- Check for file permssion ---------------------
183 // Strictly speaking, checking for write permission and existance is not necessary,
184 // but it is convenient that the user does not find out that it failed after
185 // waiting long for the query result
186 //
187 // I am using root here instead of boost to be
188 // consistent with the access pattern by TFile
189 TString path(noout?"/dev/null":out.c_str());
190 gSystem->ExpandPathName(path);
191
192 if (!noout)
193 {
194 FileStat_t stat;
195 const Int_t exist = !gSystem->GetPathInfo(path, stat);
196 const Bool_t write = !gSystem->AccessPathName(path, kWritePermission) && R_ISREG(stat.fMode);
197
198 if ((update && !exist) || (update && exist && !write) || (force && exist && !write))
199 {
200 cerr << "File '" << path << "' is not writable." << endl;
201 return 3;
202 }
203
204 if (!update && !force && exist)
205 {
206 cerr << "File '" << path << "' already exists." << endl;
207 return 4;
208 }
209 }
210
211 Time start2;
212
213 // --------------------------- Connect to database -------------------------------------------------
214
215 if (query.back()!='\n')
216 query += '\n';
217
218 if (verbose>0)
219 cout << "Connecting to database..." << endl;
220
221
222 Database connection(uri); // Keep alive while fetching rows
223
224 // -------------------------- Set user defined variables -------------------
225 if (variables.size()>0)
226 {
227 if (verbose>0)
228 cout << "Setting user defined variables..." << endl;
229
230 const string varset =
231 "SET\n "+boost::algorithm::join(variables, ",\n ");
232
233 try
234 {
235 connection.query(varset).execute();
236 }
237 catch (const exception &e)
238 {
239 cerr << varset << "\n\n";
240 cerr << "SQL query failed:\n" << e.what() << endl;
241 return 5;
242 }
243
244 if (verbose>2)
245 cout << '\n' << varset << '\n' << endl;
246 }
247
248 // ------------------------- Explain query if requested --------------------
249
250 if (explain)
251 {
252 try
253 {
254 const auto res0 =
255 connection.query("EXPLAIN FORMAT=JSON "+query).store();
256
257 cout << res0[0][0] << endl;
258 cout << endl;
259
260 const mysqlpp::StoreQueryResult res1 =
261 connection.query("EXPLAIN "+query).store();
262
263 for (size_t i=0; i<res1.num_rows(); i++)
264 {
265 const mysqlpp::Row &row = res1[i];
266
267 cout << "\nid : " << row["id"];
268 cout << "\nselect type : " << row["select_type"];
269
270 if (!row["table"].is_null())
271 cout << "\ntable : " << row["table"];
272
273 if (!row["partitions"].is_null())
274 cout << "\npartitions : " << row["partitions"];
275
276 if (!row["key"].is_null())
277 cout << "\nselected key : " << row["key"] << " [len=" << row["key_len"] << "] out of (" << row["possible_keys"] << ")";
278
279 if (!row["type"].is_null())
280 cout << "\njoin type : " << row["type"];
281
282 //if (!row["possible_keys"].is_null())
283 // cout << "\npossible_keys: " << row["possible_keys"];
284
285 //if (!row["key_len"].is_null())
286 // cout << "\nkey_len : " << row["key_len"];
287
288 if (!row["ref"].is_null())
289 cout << "\nref : (" << row["ref"] << ") compared to the index";
290
291 if (!row["rows"].is_null())
292 cout << "\nrows : " << row["rows"];
293
294 if (!row["filtered"].is_null())
295 cout << "\nfiltered : " << row["filtered"];
296
297 if (!row["extra"].is_null())
298 cout << "\nExtra : " << row["extra"];
299
300 cout << endl;
301 }
302
303 cout << endl;
304
305 return 0;
306
307 const mysqlpp::StoreQueryResult res2 =
308 connection.query("SHOW WARNINGS").store();
309
310 for (size_t i=0; i<res2.num_rows(); i++)
311 {
312 const mysqlpp::Row &row = res2[i];
313
314 // 1003 //
315 cout << row["Level"] << '[' << row["Code"] << "]:\n";
316 cout << row["Message"] << '\n' << endl;
317
318 }
319
320 }
321 catch (const exception &e)
322 {
323 cerr << query << "\n\n";
324 cerr << "SQL query failed:\n" << e.what() << endl;
325 return 6;
326 }
327
328 return 0;
329 }
330
331 // -------------------------- Request data from database -------------------
332 if (verbose>0)
333 cout << "Requesting data..." << endl;
334
335 if (verbose>2)
336 cout << '\n' << query << endl;
337
338 const mysqlpp::UseQueryResult res =
339 connection.query(query).use();
340
341 // -------------------------------------------------------------------------
342
343 if (verbose>0)
344 cout << "Opening file '" << path << "' [compression=" << compression << "]..." << endl;
345
346 // ----------------------------- Open output file --------------------------
347 TFile tfile(path, update?"UPDATE":(force?"RECREATE":"CREATE"), "Rootify SQL", compression);
348 if (tfile.IsZombie())
349 return 7;
350
351 // -------------------------------------------------------------------------
352
353 // get the first row to get the field description
354 mysqlpp::Row row = res.fetch_row();
355 if (!row)
356 {
357 cerr << "Empty set returned... nothing to write." << endl;
358 return 8;
359 }
360
361 if (verbose>0)
362 cout << "Trying to setup " << row.size() << " branches..." << endl;
363
364 if (verbose>1)
365 cout << endl;
366
367 const mysqlpp::FieldNames &l = *row.field_list().list;
368
369 vector<double> buf(l.size());
370 vector<uint8_t> typ(l.size(),'n'); // n=number [double], d is used for DateTime
371
372 UInt_t cols = 0;
373
374
375 // -------------------- Configure branches of TTree ------------------------
376 TTree *ttree = new TTree(tree.c_str(), query.c_str());
377
378 size_t skipat = 0;
379 size_t skipreg = 0;
380 for (size_t i=0; i<l.size(); i++)
381 {
382 const string t = row[i].type().sql_name();
383
384 if (t.find("DATETIME")!=string::npos)
385 typ[i] = 'd';
386 else
387 if (t.find("DATE")!=string::npos)
388 typ[i] = 'D';
389 else
390 if (t.find("TIME")!=string::npos)
391 typ[i] = 'T';
392 else
393 if (t.find("VARCHAR")!=string::npos)
394 typ[i] = 'V';
395 else
396 if (t.find("CHAR")!=string::npos)
397 typ[i] = 'C';
398
399 bool found = false;
400 for (const auto &pattern: _ignore)
401 {
402 if (regex_match(l[i], regex(pattern)))
403 {
404 found = true;
405 typ[i] = '-';
406 skipreg++;
407 break;
408 }
409 }
410
411 if (l[i][0]=='@')
412 {
413 typ[i] = '@';
414 skipat++;
415 }
416
417 const bool use = l[i][0]!='@' && typ[i]!='V' && typ[i]!='C' && !found;
418
419 if (verbose>1)
420 cout << (use?" + ":" - ") << l[i].c_str() << " [" << t << "] {" << typ[i] << "}\n";
421
422 if (use)
423 {
424 // string name = l[i];
425 // for (const auto &m: mymap)
426 // name = boost::regex_replace(l[i], boost::regex(m.first), m.second);
427
428 ttree->Branch(l[i].c_str(), buf.data()+i);
429 cols++;
430 }
431 }
432 // -------------------------------------------------------------------------
433
434 if (verbose>1)
435 cout << endl;
436 if (verbose>0)
437 {
438 if (skipreg)
439 cout << skipreg << " branches skipped due to ignore list." << endl;
440 if (skipat)
441 cout << skipat << " branches skipped due to name starting with @." << endl;
442 cout << "Configured " << cols << " branches.\nFilling branches..." << endl;
443 }
444
445 if (display)
446 {
447 cout << endl;
448 cout << "#";
449 for (size_t i=0; i<l.size(); i++)
450 cout << ' ' << l[i].c_str();
451 cout << endl;
452 }
453
454 // ---------------------- Fill TTree with DB data --------------------------
455 size_t count = 0;
456 size_t skip = 0;
457 do
458 {
459 count++;
460
461 ostringstream sout;
462
463 size_t idx=0;
464 for (auto col=row.begin(); col!=row.end(); col++, idx++)
465 {
466 if (display)
467 {
468 if (idx>0)
469 sout << (delimiter.empty()?"\t":delimiter);
470 sout << col->c_str();
471 }
472
473 if (!ignorenull && col->is_null())
474 {
475 skip++;
476 break;
477 }
478
479 switch (typ[idx])
480 {
481 case 'd':
482 buf[idx] = time_t((mysqlpp::DateTime)(*col));
483 break;
484
485 case 'D':
486 buf[idx] = time_t((mysqlpp::Date)(*col));
487 break;
488
489 case 'T':
490 buf[idx] = time_t((mysqlpp::Time)(*col));
491 break;
492
493 case 'V':
494 case 'C':
495 case '-':
496 case '@':
497 break;
498
499 default:
500 buf[idx] = atof(col->c_str());
501 }
502 }
503
504 if (idx==row.size())
505 {
506 if (!nofill)
507 ttree->Fill();
508
509 if (display)
510 cout << sout.str() << endl;
511 }
512
513 row = res.fetch_row();
514
515
516 } while (row);
517
518 // -------------------------------------------------------------------------
519
520 if (display)
521 cout << '\n' << endl;
522
523 if (verbose>0)
524 {
525 cout << count << " rows fetched." << endl;
526 if (skip>0)
527 cout << skip << " rows skipped due to NULL field." << endl;
528
529 cout << ttree->GetEntries() << " rows filled into tree." << endl;
530 }
531
532 ttree->Write();
533 tfile.Close();
534
535 if (verbose>0)
536 {
537 cout << "File closed.\n";
538 cout << "Execution time: " << Time().UnixTime()-start.UnixTime() << "s\n";
539 cout << "--------------------------------------------------------" << endl;
540 }
541
542 return 0;
543}
Note: See TracBrowser for help on using the repository browser.