source: trunk/FACT++/src/rootifysql.cc@ 19089

Last change on this file since 19089 was 19079, checked in by tbretz, 6 years ago
Added a possibility for substitutions in the query.
File size: 15.5 KB
Line 
1#include "Database.h"
2
3#include <regex>
4
5#include <boost/algorithm/string/join.hpp>
6
7#include "tools.h"
8#include "Time.h"
9#include "Configuration.h"
10
11#include <TROOT.h>
12#include <TSystem.h>
13#include <TFile.h>
14#include <TTree.h>
15
16using namespace std;
17
18// ------------------------------------------------------------------------
19
20void SetupConfiguration(Configuration &conf)
21{
22 po::options_description control("Rootify SQL");
23 control.add_options()
24 ("uri,u", var<string>()->required(), "Database link as in\n\tuser:password@server[:port]/database.")
25 ("query,q", var<string>(""), "MySQL query (overwrites --file)")
26 ("file", var<string>("rootify.sql"), "An ASCII file with the MySQL query (overwrites --query)")
27 ("ignore-null,i", po_switch(), "Do not skip rows containing any NULL field")
28 ("out,o", var<string>("rootify.root"), "Output root file name")
29 ("force,f", po_switch(), "Force overwriting an existing root file ('RECREATE')")
30 ("update", po_switch(), "Update an existing root file with the new tree ('UPDATE')")
31 ("compression,c", var<uint16_t>(1), "zlib compression level for the root file")
32 ("tree,t", var<string>("Result"), "Name of the root tree")
33 ("ignore", vars<string>(), "Ignore the given columns")
34 ("display,d", po_switch(), "Displays contents on the screen (most usefull in combination with mysql statements as SHOW or EXPLAIN)")
35 ("null,n", po_switch(), "Redirect the output file to /dev/null (mainly for debugging purposes, e.g. performance studies)")
36 ("no-fill", po_switch(), "Do not fill events into the root file (mainly for debugging purposes, e.g. performance studies)")
37 ("delimiter", var<string>(""), "The delimiter used if contents are displayed with --display (default=\\t)")
38 ("var.*", var<string>(), "Predefined SQL user variables (@VAR)")
39 ("env.*", vars<string>(), "Predefined environment for substitutions in the query ($ENV)")
40 ("verbose,v", var<uint16_t>(1), "Verbosity (0: quiet, 1: default, 2: more, 3, ...)")
41 ;
42
43 po::positional_options_description p;
44 p.add("file", 1); // The 1st positional options (n=1)
45 p.add("out", 1); // The 2nd positional options (n=1)
46
47 conf.AddOptions(control);
48 conf.SetArgumentPositions(p);
49}
50
51void PrintUsage()
52{
53 cout <<
54 "rootifysql - Converts the result of a mysql query into a root file\n"
55 "\n"
56 "For convenience, this documentation uses the extended version of the options, "
57 "refer to the output below to get the abbreviations.\n"
58 "\n"
59 "Writes the result of a mysql query into a root file. For each column, a branch is "
60 "created of type double with the field name as name. This is usually the column name "
61 "if not specified otherwise by the AS mysql directive.\n"
62 "\n"
63 "Columns with CHAR or VARCHAR as field type are ignored. DATETIME, DATE and TIME "
64 "columns are converted to unix time (time_t). Rows containing any file which is "
65 "NULL are skipped if not suppressed by the --ignore-null option. Ideally, the query "
66 "is compiled in a way that no NULL field is returned. With the --display option the "
67 "result of the request is printed on the screen (NULL skipping still in action). "
68 "This can be useful to create an ascii file or to show results as 'SHOW DATABASES' "
69 "or 'EXPLAIN table'. To redirect the contents into an ascii file, the option -v0 "
70 "is useful. To suppredd writing to an output file --null can be used.\n"
71 "\n"
72 "The default is to read the query from a file called rootify.sql. Except if a different "
73 "filename is specified by the --file option or a query is given with --query.\n"
74 "\n"
75 "As a trick, the rootify.sql file can be made excutable (chmod u+x rootify.sql). "
76 "If the first line contains '#!rootifysql', the script can be executed directly.\n"
77 "\n"
78 "Columns whose name start with @ are skipped. If you want them in your output file "
79 "give them a name using AS, e.g. 'SELECT @A:=5 AS A'.\n"
80 "\n"
81 "You can use variables in your sql query like @MyVar and define them on the "
82 "command line. In this example with --var.MyVar=5\n"
83 "\n"
84 "You can use environment definitions for substitutions in your SQL query. "
85 "For example --env.TEST=5 would replace $TEST or ${TEST} in your query by 5."
86 "If you specifiy one environmentvariable more than once, a list is created. "
87 "For example --env.TEST=1 --env.TEST=2 --env.TEST=3 would substitute "
88 "$TEST or ${TEST} by '1, 2, 3'. This is useful for the SQL `IN` keyword.\n"
89 "\n"
90 "Comments in the query-file can be placed according to the SQL standard inline "
91 "/*comment*/ or introduced with # (shell script style) or -- (SQL style).\n"
92 "\n"
93 "In case of succes, 0 is returned, a value>0 otherwise.\n"
94 "\n"
95 "Usage: rootifysql [rootify.sql [rootify.root]] [-u URI] [-q query|-f file] [-i] [-o out] [-f] [-cN] [-t tree] [-vN]\n"
96 "\n"
97 ;
98 cout << endl;
99}
100
101int main(int argc, const char* argv[])
102{
103 Time start;
104
105 gROOT->SetBatch();
106
107 Configuration conf(argv[0]);
108 conf.SetPrintUsage(PrintUsage);
109 SetupConfiguration(conf);
110
111 if (!conf.DoParse(argc, argv))
112 return 127;
113
114 // ----------------------------- Evaluate options --------------------------
115 const string uri = conf.Get<string>("uri");
116 const string out = conf.Get<string>("out");
117 const string file = conf.Get<string>("file");
118 const string tree = conf.Get<string>("tree");
119 const bool force = conf.Get<bool>("force");
120 const bool ignorenull = conf.Get<bool>("ignore-null");
121 const bool update = conf.Get<bool>("update");
122 const bool display = conf.Get<bool>("display");
123 const bool noout = conf.Get<bool>("null");
124 const bool nofill = conf.Get<bool>("no-fill");
125 const uint16_t verbose = conf.Get<uint16_t>("verbose");
126 const uint16_t compression = conf.Get<uint16_t>("compression");
127 const string delimiter = conf.Get<string>("delimiter");
128 const vector<string> _ignore = conf.Vec<string>("ignore");
129 //const vector<Map> mymap = conf.Vec<Map>("map");
130
131 // -------------------------------------------------------------------------
132
133 const auto vars = conf.GetWildcardOptions("var.*");
134
135 vector<string> variables;
136 for (const auto &var : vars)
137 variables.emplace_back('@'+var.substr(4)+":="+Tools::Trim(conf.Get<string>(var)));
138
139 // -------------------------------------------------------------------------
140
141 if (verbose>0)
142 cout << "\n--------------------- Rootify SQL ----------------------" << endl;
143
144 string query = conf.Get<string>("query");
145 if (query.empty())
146 {
147 if (verbose>0)
148 cout << "Reading query from file '" << file << "'." << endl;
149
150 ifstream fin(file);
151 if (!fin)
152 {
153 cerr << "Could not open '" << file << "': " << strerror(errno) << endl;
154 return 1;
155 }
156 getline(fin, query, (char)fin.eof());
157 }
158
159 if (query.empty())
160 {
161 cerr << "No query specified." << endl;
162 return 2;
163 }
164
165 // -------------------------------------------------------------------------
166
167 const auto envs = conf.GetWildcardOptions("env.*");
168
169 for (const auto &env : envs)
170 {
171 regex r1("\\$\\{"+env.substr(4)+"\\}");
172 regex r2("\\$"+env.substr(4)+"\\b");
173
174 const string repl = boost::join(conf.Vec<string>(env), ", ");
175
176 query = regex_replace(query, r1, repl);
177 query = regex_replace(query, r2, repl);
178 }
179
180 // -------------------------- Check for file permssion ---------------------
181 // Strictly speaking, checking for write permission and existance is not necessary,
182 // but it is convenient that the user does not find out that it failed after
183 // waiting long for the query result
184 //
185 // I am using root here instead of boost to be
186 // consistent with the access pattern by TFile
187 TString path(noout?"/dev/null":out.c_str());
188 gSystem->ExpandPathName(path);
189
190 if (!noout)
191 {
192 FileStat_t stat;
193 const Int_t exist = !gSystem->GetPathInfo(path, stat);
194 const Bool_t write = !gSystem->AccessPathName(path, kWritePermission) && R_ISREG(stat.fMode);
195
196 if ((update && !exist) || (update && exist && !write) || (force && exist && !write))
197 {
198 cerr << "File '" << path << "' is not writable." << endl;
199 return 3;
200 }
201
202 if (!update && !force && exist)
203 {
204 cerr << "File '" << path << "' already exists." << endl;
205 return 4;
206 }
207 }
208
209 Time start2;
210
211 // --------------------------- Connect to database -------------------------------------------------
212
213 if (query.back()!='\n')
214 query += '\n';
215
216 if (verbose>0)
217 cout << "Connecting to database..." << endl;
218
219
220 Database connection(uri); // Keep alive while fetching rows
221
222 // -------------------------- Set user defined variables -------------------
223 if (variables.size()>0)
224 {
225 if (verbose>0)
226 cout << "Setting user defined variables..." << endl;
227
228 const string varset =
229 "SET\n "+boost::algorithm::join(variables, ",\n ");
230
231 try
232 {
233 connection.query(varset).execute();
234 }
235 catch (const exception &e)
236 {
237 cerr << varset << "\n\n";
238 cerr << "SQL query failed:\n" << e.what() << endl;
239 return 5;
240 }
241
242 if (verbose>2)
243 cout << '\n' << varset << '\n' << endl;
244 }
245
246 // -------------------------- Request data from database -------------------
247 if (verbose>0)
248 cout << "Requesting data..." << endl;
249
250 if (verbose>2)
251 cout << '\n' << query << endl;
252
253 const mysqlpp::UseQueryResult res =
254 connection.query(query).use();
255
256 // -------------------------------------------------------------------------
257
258 if (verbose>0)
259 cout << "Opening file '" << path << "' [compression=" << compression << "]..." << endl;
260
261 // ----------------------------- Open output file --------------------------
262 TFile tfile(path, update?"UPDATE":(force?"RECREATE":"CREATE"), "Rootify SQL", compression);
263 if (tfile.IsZombie())
264 return 6;
265
266 // -------------------------------------------------------------------------
267
268 // get the first row to get the field description
269 mysqlpp::Row row = res.fetch_row();
270 if (!row)
271 {
272 cerr << "Empty set returned... nothing to write." << endl;
273 return 7;
274 }
275
276 if (verbose>0)
277 cout << "Trying to setup " << row.size() << " branches..." << endl;
278
279 if (verbose>1)
280 cout << endl;
281
282 const mysqlpp::FieldNames &l = *row.field_list().list;
283
284 vector<double> buf(l.size());
285 vector<uint8_t> typ(l.size(),'n'); // n=number [double], d is used for DateTime
286
287 UInt_t cols = 0;
288
289
290 // -------------------- Configure branches of TTree ------------------------
291 TTree *ttree = new TTree(tree.c_str(), query.c_str());
292
293 size_t skipat = 0;
294 size_t skipreg = 0;
295 for (size_t i=0; i<l.size(); i++)
296 {
297 const string t = row[i].type().sql_name();
298
299 if (t.find("DATETIME")!=string::npos)
300 typ[i] = 'd';
301 else
302 if (t.find("DATE")!=string::npos)
303 typ[i] = 'D';
304 else
305 if (t.find("TIME")!=string::npos)
306 typ[i] = 'T';
307 else
308 if (t.find("VARCHAR")!=string::npos)
309 typ[i] = 'V';
310 else
311 if (t.find("CHAR")!=string::npos)
312 typ[i] = 'C';
313
314 bool found = false;
315 for (const auto &pattern: _ignore)
316 {
317 if (regex_match(l[i], regex(pattern)))
318 {
319 found = true;
320 typ[i] = '-';
321 skipreg++;
322 break;
323 }
324 }
325
326 if (l[i][0]=='@')
327 {
328 typ[i] = '@';
329 skipat++;
330 }
331
332 const bool use = l[i][0]!='@' && typ[i]!='V' && typ[i]!='C' && !found;
333
334 if (verbose>1)
335 cout << (use?" + ":" - ") << l[i].c_str() << " [" << t << "] {" << typ[i] << "}\n";
336
337 if (use)
338 {
339 // string name = l[i];
340 // for (const auto &m: mymap)
341 // name = boost::regex_replace(l[i], boost::regex(m.first), m.second);
342
343 ttree->Branch(l[i].c_str(), buf.data()+i);
344 cols++;
345 }
346 }
347 // -------------------------------------------------------------------------
348
349 if (verbose>1)
350 cout << endl;
351 if (verbose>0)
352 {
353 if (skipreg)
354 cout << skipreg << " branches skipped due to ignore list." << endl;
355 if (skipat)
356 cout << skipat << " branches skipped due to name starting with @." << endl;
357 cout << "Configured " << cols << " branches.\nFilling branches..." << endl;
358 }
359
360 if (display)
361 {
362 cout << endl;
363 cout << "#";
364 for (size_t i=0; i<l.size(); i++)
365 cout << ' ' << l[i].c_str();
366 cout << endl;
367 }
368
369 // ---------------------- Fill TTree with DB data --------------------------
370 size_t count = 0;
371 size_t skip = 0;
372 do
373 {
374 count++;
375
376 ostringstream sout;
377
378 size_t idx=0;
379 for (auto col=row.begin(); col!=row.end(); col++, idx++)
380 {
381 if (display)
382 {
383 if (idx>0)
384 sout << (delimiter.empty()?"\t":delimiter);
385 sout << col->c_str();
386 }
387
388 if (!ignorenull && col->is_null())
389 {
390 skip++;
391 break;
392 }
393
394 switch (typ[idx])
395 {
396 case 'd':
397 buf[idx] = time_t((mysqlpp::DateTime)(*col));
398 break;
399
400 case 'D':
401 buf[idx] = time_t((mysqlpp::Date)(*col));
402 break;
403
404 case 'T':
405 buf[idx] = time_t((mysqlpp::Time)(*col));
406 break;
407
408 case 'V':
409 case 'C':
410 case '-':
411 case '@':
412 break;
413
414 default:
415 buf[idx] = atof(col->c_str());
416 }
417 }
418
419 if (idx==row.size())
420 {
421 if (!nofill)
422 ttree->Fill();
423
424 if (display)
425 cout << sout.str() << endl;
426 }
427
428 row = res.fetch_row();
429
430
431 } while (row);
432
433 // -------------------------------------------------------------------------
434
435 if (display)
436 cout << '\n' << endl;
437
438 if (verbose>0)
439 {
440 cout << count << " rows fetched." << endl;
441 if (skip>0)
442 cout << skip << " rows skipped due to NULL field." << endl;
443
444 cout << ttree->GetEntries() << " rows filled into tree." << endl;
445 }
446
447 ttree->Write();
448 tfile.Close();
449
450 if (verbose>0)
451 {
452 cout << "File closed.\n";
453 cout << "Execution time: " << Time().UnixTime()-start.UnixTime() << "s\n";
454 cout << "--------------------------------------------------------" << endl;
455 }
456
457 return 0;
458}
Note: See TracBrowser for help on using the repository browser.