source: trunk/FACT++/src/rootifysql.cc@ 18956

Last change on this file since 18956 was 18929, checked in by tbretz, 7 years ago
Allow to skip filling the tree to debug speed of reading the database. Skip columns starting with @. Add more output for the user. Implement a way to skip columns.
File size: 12.8 KB
Line 
1#include "Database.h"
2
3#include <regex>
4
5#include "Time.h"
6#include "Configuration.h"
7
8#include <TROOT.h>
9#include <TSystem.h>
10#include <TFile.h>
11#include <TTree.h>
12
13using namespace std;
14
15// ------------------------------------------------------------------------
16
17void SetupConfiguration(Configuration &conf)
18{
19 po::options_description control("Rootify SQL");
20 control.add_options()
21 ("uri,u", var<string>()->required(), "Database link as in\n\tuser:password@server[:port]/database.")
22 ("query,q", var<string>(""), "MySQL query (overwrites --file)")
23 ("file", var<string>("rootify.sql"), "An ASCII file with the MySQL query (overwrites --query)")
24 ("ignore-null,i", po_switch(), "Do not skip rows containing any NULL field")
25 ("out,o", var<string>("rootify.root"), "Output root file name")
26 ("force,f", po_switch(), "Force overwriting an existing root file ('RECREATE')")
27 ("update", po_switch(), "Update an existing root file with the new tree ('UPDATE')")
28 ("compression,c", var<uint16_t>(1), "zlib compression level for the root file")
29 ("tree,t", var<string>("Result"), "Name of the root tree")
30 ("ignore", vars<string>(), "Ignore the given columns")
31 ("display,d", po_switch(), "Displays contents on the screen (most usefull in combination with mysql statements as SHOW or EXPLAIN)")
32 ("null,n", po_switch(), "Redirect the output file to /dev/null (mainly for debugging purposes, e.g. performance studies)")
33 ("no-fill", po_switch(), "Do not fill events into the root file (mainly for debugging purposes, e.g. performance studies)")
34 ("delimiter", var<string>(""), "The delimiter used if contents are displayed with --display (default=\\t)")
35 ("verbose,v", var<uint16_t>(1), "Verbosity (0: quiet, 1: default, 2: more, 3, ...)")
36 ;
37
38 po::positional_options_description p;
39 p.add("file", 1); // The 1st positional options (n=1)
40 p.add("out", 1); // The 2nd positional options (n=1)
41
42 conf.AddOptions(control);
43 conf.SetArgumentPositions(p);
44}
45
46void PrintUsage()
47{
48 cout <<
49 "rootifysql - Converts the result of a mysql query into a root file\n"
50 "\n"
51 "For convenience, this documentation uses the extended version of the options, "
52 "refer to the output below to get the abbreviations.\n"
53 "\n"
54 "Writes the result of a mysql query into a root file. For each column, a branch is "
55 "created of type double with the field name as name. This is usually the column name "
56 "if not specified otherwise by the AS mysql directive.\n"
57 "\n"
58 "Columns with CHAR or VARCHAR as field type are ignored. DATETIME, DATE and TIME "
59 "columns are converted to unix time (time_t). Rows containing any file which is "
60 "NULL are skipped if not suppressed by the --ignore-null option. Ideally, the query "
61 "is compiled in a way that no NULL field is returned. With the --display option the "
62 "result of the request is printed on the screen (NULL skipping still in action). "
63 "This can be useful to create an ascii file or to show results as 'SHOW DATABASES' "
64 "or 'EXPLAIN table'. To redirect the contents into an ascii file, the option -v0 "
65 "is useful. To suppredd writing to an output file --null can be used.\n"
66 "\n"
67 "The default is to read the query from a file called rootify.sql. Except if a different "
68 "filename is specified by the --file option or a query is given with --query.\n"
69 "\n"
70 "As a trick, the rootify.sql file can be made excutable (chmod u+x rootify.sql). "
71 "If the first line contains '#!rootifysql', the script can be executed directly.\n"
72 "\n"
73 "Comments in the query-file can be placed according to the SQL standard inline "
74 "/*comment*/ or introduced with # (shell script style) or -- (SQL style).\n"
75 "\n"
76 "In case of succes, 0 is returned, a value>0 otherwise.\n"
77 "\n"
78 "Usage: rootifysql [rootify.sql [rootify.root]] [-u URI] [-q query|-f file] [-i] [-o out] [-f] [-cN] [-t tree] [-vN]\n"
79 "\n"
80 ;
81 cout << endl;
82}
83
84int main(int argc, const char* argv[])
85{
86 Time start;
87
88 gROOT->SetBatch();
89
90 Configuration conf(argv[0]);
91 conf.SetPrintUsage(PrintUsage);
92 SetupConfiguration(conf);
93
94 if (!conf.DoParse(argc, argv))
95 return 127;
96
97 // ----------------------------- Evaluate options --------------------------
98 const string uri = conf.Get<string>("uri");
99 const string out = conf.Get<string>("out");
100 const string file = conf.Get<string>("file");
101 const string tree = conf.Get<string>("tree");
102 const bool force = conf.Get<bool>("force");
103 const bool ignorenull = conf.Get<bool>("ignore-null");
104 const bool update = conf.Get<bool>("update");
105 const bool display = conf.Get<bool>("display");
106 const bool noout = conf.Get<bool>("null");
107 const bool nofill = conf.Get<bool>("no-fill");
108 const uint16_t verbose = conf.Get<uint16_t>("verbose");
109 const uint16_t compression = conf.Get<uint16_t>("compression");
110 const string delimiter = conf.Get<string>("delimiter");
111 const vector<string> ignore = conf.Vec<string>("ignore");
112 // -------------------------------------------------------------------------
113
114 if (verbose>0)
115 cout << "\n--------------------- Rootify SQL ----------------------" << endl;
116
117 string query = conf.Get<string>("query");
118 if (query.empty())
119 {
120 if (verbose>0)
121 cout << "Reading query from file '" << file << "'." << endl;
122
123 ifstream fin(file);
124 if (!fin)
125 {
126 cerr << "Could not open '" << file << "': " << strerror(errno) << endl;
127 return 1;
128 }
129 getline(fin, query, (char)fin.eof());
130 }
131
132 if (query.empty())
133 {
134 cerr << "No query specified." << endl;
135 return 2;
136 }
137
138 // -------------------------- Check for file permssion ---------------------
139 // Strictly speaking, checking for write permission and existance is not necessary,
140 // but it is convenient that the user does not find out that it failed after
141 // waiting long for the query result
142 //
143 // I am using root here instead of boost to be
144 // consistent with the access pattern by TFile
145 TString path(noout?"/dev/null":out.c_str());
146 gSystem->ExpandPathName(path);
147
148 if (!noout)
149 {
150 FileStat_t stat;
151 const Int_t exist = !gSystem->GetPathInfo(path, stat);
152 const Bool_t write = !gSystem->AccessPathName(path, kWritePermission) && R_ISREG(stat.fMode);
153
154 if ((update && !exist) || (update && exist && !write) || (force && exist && !write))
155 {
156 cerr << "File '" << path << "' is not writable." << endl;
157 return 3;
158 }
159
160 if (!update && !force && exist)
161 {
162 cerr << "File '" << path << "' already exists." << endl;
163 return 4;
164 }
165 }
166 // -------------------------------------------------------------------------
167
168 if (query.back()!='\n')
169 query += '\n';
170
171 if (verbose>2)
172 cout << '\n' << query << endl;
173
174 if (verbose>0)
175 cout << "Requesting data..." << endl;
176
177 Time start2;
178
179 // -------------------------- Request data from database -------------------
180 Database connection(uri); // Keep alive while fetching rows
181
182 const mysqlpp::UseQueryResult res =
183 connection.query(query).use();
184 // -------------------------------------------------------------------------
185
186 if (verbose>0)
187 cout << "Opening file '" << path << "' [compression=" << compression << "]..." << endl;
188
189 // ----------------------------- Open output file --------------------------
190 TFile tfile(path, update?"UPDATE":(force?"RECREATE":"CREATE"), "Rootify SQL", compression);
191 if (tfile.IsZombie())
192 return 5;
193
194 // -------------------------------------------------------------------------
195
196 // get the first row to get the field description
197 mysqlpp::Row row = res.fetch_row();
198 if (!row)
199 {
200 cerr << "Empty set returned... nothing to write." << endl;
201 return 6;
202 }
203
204 if (verbose>0)
205 cout << "Trying to setup " << row.size() << " branches..." << endl;
206
207 if (verbose>1)
208 cout << endl;
209
210 const mysqlpp::FieldNames &l = *row.field_list().list;
211
212 vector<double> buf(l.size());
213 vector<uint8_t> typ(l.size(),'n'); // n=number [double], d is used for DateTime
214
215 UInt_t cols = 0;
216
217
218 // -------------------- Configure branches of TTree ------------------------
219 TTree *ttree = new TTree(tree.c_str(), query.c_str());
220
221 size_t skipat = 0;
222 size_t skipreg = 0;
223 for (size_t i=0; i<l.size(); i++)
224 {
225 const string t = row[i].type().sql_name();
226
227 if (t.find("DATETIME")!=string::npos)
228 typ[i] = 'd';
229 else
230 if (t.find("DATE")!=string::npos)
231 typ[i] = 'D';
232 else
233 if (t.find("TIME")!=string::npos)
234 typ[i] = 'T';
235 else
236 if (t.find("VARCHAR")!=string::npos)
237 typ[i] = 'V';
238 else
239 if (t.find("CHAR")!=string::npos)
240 typ[i] = 'C';
241
242 bool found = false;
243 for (const auto &pattern: ignore)
244 {
245 if (regex_match(l[i], regex(pattern)))
246 {
247 found = true;
248 typ[i] = '-';
249 skipreg++;
250 break;
251 }
252 }
253
254 if (l[i][0]=='@')
255 {
256 typ[i] = '@';
257 skipat++;
258 }
259
260 const bool use = l[i][0]!='@' && typ[i]!='V' && typ[i]!='C' && !found;
261
262 if (verbose>1)
263 cout << (use?" + ":" - ") << l[i].c_str() << " [" << t << "] {" << typ[i] << "}\n";
264
265 if (use)
266 {
267 ttree->Branch(l[i].c_str(), buf.data()+i);
268 cols++;
269 }
270 }
271 // -------------------------------------------------------------------------
272
273 if (verbose>1)
274 cout << endl;
275 if (verbose>0)
276 {
277 if (skipreg)
278 cout << skipreg << " branches skipped due to ignore list." << endl;
279 if (skipat)
280 cout << skipat << " branches skipped due to name starting with @." << endl;
281 cout << "Configured " << cols << " branches.\nFilling branches..." << endl;
282 }
283
284 if (display)
285 {
286 cout << endl;
287 cout << "#";
288 for (size_t i=0; i<l.size(); i++)
289 cout << ' ' << l[i].c_str();
290 cout << endl;
291 }
292
293 // ---------------------- Fill TTree with DB data --------------------------
294 size_t count = 0;
295 size_t skip = 0;
296 do
297 {
298 count++;
299
300 ostringstream sout;
301
302 size_t idx=0;
303 for (auto col=row.begin(); col!=row.end(); col++, idx++)
304 {
305 if (display)
306 {
307 if (idx>0)
308 sout << (delimiter.empty()?"\t":delimiter);
309 sout << col->c_str();
310 }
311
312 if (!ignorenull && col->is_null())
313 {
314 skip++;
315 break;
316 }
317
318 switch (typ[idx])
319 {
320 case 'd':
321 buf[idx] = time_t((mysqlpp::DateTime)(*col));
322 break;
323
324 case 'D':
325 buf[idx] = time_t((mysqlpp::Date)(*col));
326 break;
327
328 case 'T':
329 buf[idx] = time_t((mysqlpp::Time)(*col));
330 break;
331
332 case 'V':
333 case 'C':
334 case '-':
335 case '@':
336 break;
337
338 default:
339 buf[idx] = atof(col->c_str());
340 }
341 }
342
343 if (idx==row.size())
344 {
345 if (!nofill)
346 ttree->Fill();
347
348 if (display)
349 cout << sout.str() << endl;
350 }
351
352 row = res.fetch_row();
353
354
355 } while (row);
356
357 // -------------------------------------------------------------------------
358
359 if (display)
360 cout << '\n' << endl;
361
362 if (verbose>0)
363 {
364 cout << count << " rows fetched." << endl;
365 if (skip>0)
366 cout << skip << " rows skipped due to NULL field." << endl;
367
368 cout << ttree->GetEntries() << " rows filled into tree." << endl;
369 }
370
371 ttree->Write();
372 tfile.Close();
373
374 if (verbose>0)
375 {
376 cout << "File closed.\n";
377 cout << "Execution time: " << Time().UnixTime()-start.UnixTime() << "s\n";
378 cout << "--------------------------------------------------------" << endl;
379 }
380
381 return 0;
382}
Note: See TracBrowser for help on using the repository browser.