source: trunk/FACT++/src/rootifysql.cc@ 18998

Last change on this file since 18998 was 18961, checked in by tbretz, 7 years ago
ignore already exists in 'tuple'
File size: 13.0 KB
Line 
1#include "Database.h"
2
3#include <regex>
4
5#include "Time.h"
6#include "Configuration.h"
7
8#include <TROOT.h>
9#include <TSystem.h>
10#include <TFile.h>
11#include <TTree.h>
12
13using namespace std;
14
15// ------------------------------------------------------------------------
16
17void SetupConfiguration(Configuration &conf)
18{
19 po::options_description control("Rootify SQL");
20 control.add_options()
21 ("uri,u", var<string>()->required(), "Database link as in\n\tuser:password@server[:port]/database.")
22 ("query,q", var<string>(""), "MySQL query (overwrites --file)")
23 ("file", var<string>("rootify.sql"), "An ASCII file with the MySQL query (overwrites --query)")
24 ("ignore-null,i", po_switch(), "Do not skip rows containing any NULL field")
25 ("out,o", var<string>("rootify.root"), "Output root file name")
26 ("force,f", po_switch(), "Force overwriting an existing root file ('RECREATE')")
27 ("update", po_switch(), "Update an existing root file with the new tree ('UPDATE')")
28 ("compression,c", var<uint16_t>(1), "zlib compression level for the root file")
29 ("tree,t", var<string>("Result"), "Name of the root tree")
30 ("ignore", vars<string>(), "Ignore the given columns")
31 ("display,d", po_switch(), "Displays contents on the screen (most usefull in combination with mysql statements as SHOW or EXPLAIN)")
32 ("null,n", po_switch(), "Redirect the output file to /dev/null (mainly for debugging purposes, e.g. performance studies)")
33 ("no-fill", po_switch(), "Do not fill events into the root file (mainly for debugging purposes, e.g. performance studies)")
34 ("delimiter", var<string>(""), "The delimiter used if contents are displayed with --display (default=\\t)")
35 ("verbose,v", var<uint16_t>(1), "Verbosity (0: quiet, 1: default, 2: more, 3, ...)")
36 ;
37
38 po::positional_options_description p;
39 p.add("file", 1); // The 1st positional options (n=1)
40 p.add("out", 1); // The 2nd positional options (n=1)
41
42 conf.AddOptions(control);
43 conf.SetArgumentPositions(p);
44}
45
46void PrintUsage()
47{
48 cout <<
49 "rootifysql - Converts the result of a mysql query into a root file\n"
50 "\n"
51 "For convenience, this documentation uses the extended version of the options, "
52 "refer to the output below to get the abbreviations.\n"
53 "\n"
54 "Writes the result of a mysql query into a root file. For each column, a branch is "
55 "created of type double with the field name as name. This is usually the column name "
56 "if not specified otherwise by the AS mysql directive.\n"
57 "\n"
58 "Columns with CHAR or VARCHAR as field type are ignored. DATETIME, DATE and TIME "
59 "columns are converted to unix time (time_t). Rows containing any file which is "
60 "NULL are skipped if not suppressed by the --ignore-null option. Ideally, the query "
61 "is compiled in a way that no NULL field is returned. With the --display option the "
62 "result of the request is printed on the screen (NULL skipping still in action). "
63 "This can be useful to create an ascii file or to show results as 'SHOW DATABASES' "
64 "or 'EXPLAIN table'. To redirect the contents into an ascii file, the option -v0 "
65 "is useful. To suppredd writing to an output file --null can be used.\n"
66 "\n"
67 "The default is to read the query from a file called rootify.sql. Except if a different "
68 "filename is specified by the --file option or a query is given with --query.\n"
69 "\n"
70 "As a trick, the rootify.sql file can be made excutable (chmod u+x rootify.sql). "
71 "If the first line contains '#!rootifysql', the script can be executed directly.\n"
72 "\n"
73 "Comments in the query-file can be placed according to the SQL standard inline "
74 "/*comment*/ or introduced with # (shell script style) or -- (SQL style).\n"
75 "\n"
76 "In case of succes, 0 is returned, a value>0 otherwise.\n"
77 "\n"
78 "Usage: rootifysql [rootify.sql [rootify.root]] [-u URI] [-q query|-f file] [-i] [-o out] [-f] [-cN] [-t tree] [-vN]\n"
79 "\n"
80 ;
81 cout << endl;
82}
83
84int main(int argc, const char* argv[])
85{
86 Time start;
87
88 gROOT->SetBatch();
89
90 Configuration conf(argv[0]);
91 conf.SetPrintUsage(PrintUsage);
92 SetupConfiguration(conf);
93
94 if (!conf.DoParse(argc, argv))
95 return 127;
96
97 // ----------------------------- Evaluate options --------------------------
98 const string uri = conf.Get<string>("uri");
99 const string out = conf.Get<string>("out");
100 const string file = conf.Get<string>("file");
101 const string tree = conf.Get<string>("tree");
102 const bool force = conf.Get<bool>("force");
103 const bool ignorenull = conf.Get<bool>("ignore-null");
104 const bool update = conf.Get<bool>("update");
105 const bool display = conf.Get<bool>("display");
106 const bool noout = conf.Get<bool>("null");
107 const bool nofill = conf.Get<bool>("no-fill");
108 const uint16_t verbose = conf.Get<uint16_t>("verbose");
109 const uint16_t compression = conf.Get<uint16_t>("compression");
110 const string delimiter = conf.Get<string>("delimiter");
111 const vector<string> _ignore = conf.Vec<string>("ignore");
112 //const vector<Map> mymap = conf.Vec<Map>("map");
113 // -------------------------------------------------------------------------
114
115 if (verbose>0)
116 cout << "\n--------------------- Rootify SQL ----------------------" << endl;
117
118 string query = conf.Get<string>("query");
119 if (query.empty())
120 {
121 if (verbose>0)
122 cout << "Reading query from file '" << file << "'." << endl;
123
124 ifstream fin(file);
125 if (!fin)
126 {
127 cerr << "Could not open '" << file << "': " << strerror(errno) << endl;
128 return 1;
129 }
130 getline(fin, query, (char)fin.eof());
131 }
132
133 if (query.empty())
134 {
135 cerr << "No query specified." << endl;
136 return 2;
137 }
138
139 // -------------------------- Check for file permssion ---------------------
140 // Strictly speaking, checking for write permission and existance is not necessary,
141 // but it is convenient that the user does not find out that it failed after
142 // waiting long for the query result
143 //
144 // I am using root here instead of boost to be
145 // consistent with the access pattern by TFile
146 TString path(noout?"/dev/null":out.c_str());
147 gSystem->ExpandPathName(path);
148
149 if (!noout)
150 {
151 FileStat_t stat;
152 const Int_t exist = !gSystem->GetPathInfo(path, stat);
153 const Bool_t write = !gSystem->AccessPathName(path, kWritePermission) && R_ISREG(stat.fMode);
154
155 if ((update && !exist) || (update && exist && !write) || (force && exist && !write))
156 {
157 cerr << "File '" << path << "' is not writable." << endl;
158 return 3;
159 }
160
161 if (!update && !force && exist)
162 {
163 cerr << "File '" << path << "' already exists." << endl;
164 return 4;
165 }
166 }
167 // -------------------------------------------------------------------------
168
169 if (query.back()!='\n')
170 query += '\n';
171
172 if (verbose>2)
173 cout << '\n' << query << endl;
174
175 if (verbose>0)
176 cout << "Requesting data..." << endl;
177
178 Time start2;
179
180 // -------------------------- Request data from database -------------------
181 Database connection(uri); // Keep alive while fetching rows
182
183 const mysqlpp::UseQueryResult res =
184 connection.query(query).use();
185 // -------------------------------------------------------------------------
186
187 if (verbose>0)
188 cout << "Opening file '" << path << "' [compression=" << compression << "]..." << endl;
189
190 // ----------------------------- Open output file --------------------------
191 TFile tfile(path, update?"UPDATE":(force?"RECREATE":"CREATE"), "Rootify SQL", compression);
192 if (tfile.IsZombie())
193 return 5;
194
195 // -------------------------------------------------------------------------
196
197 // get the first row to get the field description
198 mysqlpp::Row row = res.fetch_row();
199 if (!row)
200 {
201 cerr << "Empty set returned... nothing to write." << endl;
202 return 6;
203 }
204
205 if (verbose>0)
206 cout << "Trying to setup " << row.size() << " branches..." << endl;
207
208 if (verbose>1)
209 cout << endl;
210
211 const mysqlpp::FieldNames &l = *row.field_list().list;
212
213 vector<double> buf(l.size());
214 vector<uint8_t> typ(l.size(),'n'); // n=number [double], d is used for DateTime
215
216 UInt_t cols = 0;
217
218
219 // -------------------- Configure branches of TTree ------------------------
220 TTree *ttree = new TTree(tree.c_str(), query.c_str());
221
222 size_t skipat = 0;
223 size_t skipreg = 0;
224 for (size_t i=0; i<l.size(); i++)
225 {
226 const string t = row[i].type().sql_name();
227
228 if (t.find("DATETIME")!=string::npos)
229 typ[i] = 'd';
230 else
231 if (t.find("DATE")!=string::npos)
232 typ[i] = 'D';
233 else
234 if (t.find("TIME")!=string::npos)
235 typ[i] = 'T';
236 else
237 if (t.find("VARCHAR")!=string::npos)
238 typ[i] = 'V';
239 else
240 if (t.find("CHAR")!=string::npos)
241 typ[i] = 'C';
242
243 bool found = false;
244 for (const auto &pattern: _ignore)
245 {
246 if (regex_match(l[i], regex(pattern)))
247 {
248 found = true;
249 typ[i] = '-';
250 skipreg++;
251 break;
252 }
253 }
254
255 if (l[i][0]=='@')
256 {
257 typ[i] = '@';
258 skipat++;
259 }
260
261 const bool use = l[i][0]!='@' && typ[i]!='V' && typ[i]!='C' && !found;
262
263 if (verbose>1)
264 cout << (use?" + ":" - ") << l[i].c_str() << " [" << t << "] {" << typ[i] << "}\n";
265
266 if (use)
267 {
268 // string name = l[i];
269 // for (const auto &m: mymap)
270 // name = boost::regex_replace(l[i], boost::regex(m.first), m.second);
271
272 ttree->Branch(l[i].c_str(), buf.data()+i);
273 cols++;
274 }
275 }
276 // -------------------------------------------------------------------------
277
278 if (verbose>1)
279 cout << endl;
280 if (verbose>0)
281 {
282 if (skipreg)
283 cout << skipreg << " branches skipped due to ignore list." << endl;
284 if (skipat)
285 cout << skipat << " branches skipped due to name starting with @." << endl;
286 cout << "Configured " << cols << " branches.\nFilling branches..." << endl;
287 }
288
289 if (display)
290 {
291 cout << endl;
292 cout << "#";
293 for (size_t i=0; i<l.size(); i++)
294 cout << ' ' << l[i].c_str();
295 cout << endl;
296 }
297
298 // ---------------------- Fill TTree with DB data --------------------------
299 size_t count = 0;
300 size_t skip = 0;
301 do
302 {
303 count++;
304
305 ostringstream sout;
306
307 size_t idx=0;
308 for (auto col=row.begin(); col!=row.end(); col++, idx++)
309 {
310 if (display)
311 {
312 if (idx>0)
313 sout << (delimiter.empty()?"\t":delimiter);
314 sout << col->c_str();
315 }
316
317 if (!ignorenull && col->is_null())
318 {
319 skip++;
320 break;
321 }
322
323 switch (typ[idx])
324 {
325 case 'd':
326 buf[idx] = time_t((mysqlpp::DateTime)(*col));
327 break;
328
329 case 'D':
330 buf[idx] = time_t((mysqlpp::Date)(*col));
331 break;
332
333 case 'T':
334 buf[idx] = time_t((mysqlpp::Time)(*col));
335 break;
336
337 case 'V':
338 case 'C':
339 case '-':
340 case '@':
341 break;
342
343 default:
344 buf[idx] = atof(col->c_str());
345 }
346 }
347
348 if (idx==row.size())
349 {
350 if (!nofill)
351 ttree->Fill();
352
353 if (display)
354 cout << sout.str() << endl;
355 }
356
357 row = res.fetch_row();
358
359
360 } while (row);
361
362 // -------------------------------------------------------------------------
363
364 if (display)
365 cout << '\n' << endl;
366
367 if (verbose>0)
368 {
369 cout << count << " rows fetched." << endl;
370 if (skip>0)
371 cout << skip << " rows skipped due to NULL field." << endl;
372
373 cout << ttree->GetEntries() << " rows filled into tree." << endl;
374 }
375
376 ttree->Write();
377 tfile.Close();
378
379 if (verbose>0)
380 {
381 cout << "File closed.\n";
382 cout << "Execution time: " << Time().UnixTime()-start.UnixTime() << "s\n";
383 cout << "--------------------------------------------------------" << endl;
384 }
385
386 return 0;
387}
Note: See TracBrowser for help on using the repository browser.