source: trunk/FACT++/src/root2sql.cc@ 19166

Last change on this file since 19166 was 19166, checked in by tbretz, 6 years ago
Better handling of the replacement for the filename.
File size: 32.0 KB
Line 
1#include <boost/algorithm/string/join.hpp>
2#include <boost/regex.hpp>
3
4#include "Database.h"
5
6#include "tools.h"
7#include "Time.h"
8#include "Configuration.h"
9
10#include <TROOT.h>
11#include <TFile.h>
12#include <TTree.h>
13#include <TLeaf.h>
14#include <TError.h>
15
16using namespace std;
17
18// ------------------------------------------------------------------------
19
20struct Map : pair<string, string>
21{
22 Map() { }
23};
24
25std::istream &operator>>(std::istream &in, Map &m)
26{
27 const istreambuf_iterator<char> eos;
28 string txt(istreambuf_iterator<char>(in), eos);
29
30 const boost::regex expr("((.*)[^\\\\])/(.*)");
31 boost::smatch match;
32 if (!boost::regex_match(txt, match, expr))
33 throw runtime_error("Could not evaluate map argument: "+txt);
34
35 m.first = match[1].str();
36 m.second = match[3].str();
37
38 return in;
39}
40
41
42void SetupConfiguration(Configuration &conf)
43{
44 po::options_description control("Root to SQL");
45 control.add_options()
46 ("uri,u", var<string>()
47#if BOOST_VERSION >= 104200
48 ->required()
49#endif
50 , "Database link as in\n\tuser:password@server[:port]/database[?compress=0|1].")
51 ("file", var<string>("")
52#if BOOST_VERSION >= 104200
53 ->required()
54#endif
55 , "The root file to read from")
56 ("create", po_switch(), "Create the database if not existing")
57 ("drop", po_switch(), "Drop the table (implies create)")
58 ("tree,t", var<string>("Events"), "Name of the root tree to convert")
59 ("table", var<string>(""), "Name of the table to use (default is the tree name)")
60 ("map", vars<Map>(), "A regular expression which is applied to the leaf name befoee it is used as SQL column name)")
61 ("sql-type", vars<Map>(), "Allows to overwrite the calculated SQL type for a given column e.g. 'sql-column-name/UNSIGNED IN'")
62 ("ignore", vars<string>(), "Ignore the given leaf, if the given regular expression matches")
63 ("primary", vars<string>(), "List of columns to be used as primary keys during table creation (in connection with --create)")
64 ("first", var<int64_t>(int64_t(0)), "First event to start with (default: 0), mainly for test purpose")
65 ("max", var<int64_t>(int64_t(0)), "Maximum number of events to process (0: all), mainly for test purpose")
66 ("engine", var<string>(""), "Database engine to be used when a new table is created")
67 ("row-format", var<string>(""), "Defines the ROW_FORMAT keyword for table creation")
68 ("duplicate", vars<string>(), "Specifies an assignment_list for an 'ON DUPLICATE KEY UPDATE' expression")
69 ("ignore-errors", po_switch(), "Adds the IGNORE keyword to the INSERT query (turns errors into warnings, ignores rows with errors)")
70 ("const.*", var<string>(), "Insert a constant number into the given column (--const.mycolumn=5). A special case is `/..../N/`")
71 ("conditional", po_switch(), "Conditional insert. Only insert if no entry exists yet with the constants defined by --const")
72 ("delete", po_switch(), "Delete all entries first which fit all constant columns defined by --const")
73 ("index", po_switch(), "If a table is created, all const columns are used as a single index")
74 ;
75
76 po::options_description debug("Debug options");
77 debug.add_options()
78 ("no-insert", po_switch(), "Does not insert any data into the table")
79 ("dry-run", po_switch(), "Skip any query which changes the databse (might result in consecutive failures)")
80 ("print-connection", po_switch(), "Print database connection information")
81 ("print-branches", po_switch(), "Print the branches found in the tree")
82 ("print-leaves", po_switch(), "Print the leaves found in the tree (this is what is processed)")
83 ("print-insert", po_switch(), "Print the INSERT query (note that it contains all data)")
84 ("print-create", po_switch(), "Print the CREATE query")
85 ("print-select", po_switch(), "Print the SELECT query for the conditional execution")
86 ("print-delete", po_switch(), "Print the DELETE query")
87 ("verbose,v", var<uint16_t>(1), "Verbosity (0: quiet, 1: default, 2: more, 3, ...)")
88 ;
89
90 po::positional_options_description p;
91 p.add("file", 1); // The 1st positional options (n=1)
92
93 conf.AddOptions(control);
94 conf.AddOptions(debug);
95 conf.SetArgumentPositions(p);
96}
97
98void PrintUsage()
99{
100 cout <<
101 "root2sql - Fills the data from a root file into a database\n"
102 "\n"
103 "For convenience, this documentation uses the extended version of the options, "
104 "refer to the output below to get the abbreviations.\n"
105 "\n"
106 "This is a general purpose tool to fill the contents of a root file into a database "
107 "as long as this is technically possible and makes sense. Note that root can even "
108 "write complex data like a TH1F into a database, this is not the purpose of this "
109 "program.\n"
110 "\n"
111 "Each root tree has branches and leaves (the basic data types). These leaves can "
112 "be read independently of the classes which were used to write the root file. "
113 "The default tree to read from is 'Events' but the name can be overwritten "
114 "using --tree. The default table name to fill the data into is identical to "
115 "the tree name. It can be overwritten using --table.\n"
116 "\n"
117 "The name of each column to which data is filled from a leave is obtained from "
118 "the leaves' names. The leave names can be checked using --print-leaves. "
119 "A --print-branches exists for convenience to print only the high-level branches. "
120 "Sometimes these names might be quite unconvenient like MTime.fTime.fMilliSec or "
121 "just MHillas.fWidth. To allow to simplify column names, regular expressions "
122 "(using boost's regex) can be defined to change the names. Note that these regular "
123 "expressions are applied one by one on each leaf's name. A valid expression could "
124 "be:\n"
125 " --map=MHillas\\.f/\n"
126 "which would remove all occurances of 'MHillas.f'. This option can be used more than "
127 "once. They are applied in sequence. A single match does not stop the sequence.\n"
128 "\n"
129 "Sometimes it might also be convenient to skip a leaf. This can be done with "
130 "the --ignore resource. If the given regular expresion yields a match, the "
131 "leaf will be ignored. Note that the regular expression works on the raw-name "
132 "of the leaf not the readily mapped SQL column names. Example:\n"
133 " --ignore=ThetaSq\\..*\n"
134 "will skip all leaved which start with 'ThetaSq.'. This option can be used"
135 "more than once.\n"
136 "\n"
137 "The data type of each column is kept as close as possible to the leaves' data "
138 "types. If for some reason this is not wanted, the data type of the SQL column "
139 "can be overwritten with --sql-type sql-column/sql-ytpe, for example:\n"
140 " --sql-type=FileId/UNSIGNED INT\n"
141 "while the first argument of the name of the SQL column to which the data type "
142 "should be applied. The second column is the basic SQL data type. The option can "
143 "be given more than once.\n"
144 "\n"
145 "Database interaction:\n"
146 "\n"
147 "To drop an existing table, --drop can be used.\n"
148 "\n"
149 "To create a table according to theSQL column names and data types, --create "
150 "can be used. The query used can be printed with --print-create even --create "
151 "has not been specified.\n"
152 "\n"
153 "To choose the columns which should become primary keys, use --primary, "
154 "for example:\n"
155 " --primary=col1\n"
156 "To define more than one column as primary key, the option can be given more than "
157 "once. Note that the combination of these columns must be unique.\n"
158 "\n"
159 "All columns are created as NOT NULL as default. To force a database engine "
160 "and/or a storage format, use --engine and --row-format.\n"
161 "\n"
162 "Usually, the INSERT query would fail if the PRIMARY key exists already. "
163 "This can be avoided using the 'ON DUPLICATE KEY UPDATE' directive. With the "
164 "--duplicate, you can specify what should be updated in case of a duplicate key. "
165 "To keep the row untouched, you can just update the primary key "
166 "with the identical primary key, e.g. --duplicate='MyPrimary=VALUES(MyPrimary)'. "
167 "The --duplicate resource can be specified more than once to add more expressions "
168 "to the assignment_list. For more details, see the MySQL manual.\n"
169 "\n"
170 "For debugging purpose, or to just create or drop a table, the final insert "
171 "query can be skipped using --no-insert. Note that for performance reason, "
172 "all data is collected in memory and a single INSERT query is issued at the "
173 "end.\n"
174 "\n"
175 "Another possibility is to add the IGNORE keyword to the INSERT query by "
176 "--ignore-errors, which essentially ignores all errors and turns them into "
177 "warnings which are printed after the query succeeded.\n"
178 "\n"
179 "Using a higher verbosity level (-v), an overview of the written columns or all "
180 "processed leaves is printed depending on the verbosity level. The output looks "
181 "like the following\n"
182 " Leaf name [root data type] (SQL name)\n"
183 "for example\n"
184 " MTime.fTime.fMilliSec [Long64_t] (MilliSec)\n"
185 "which means that the leaf MTime.fTime.fMilliSec is detected to be a Long64_t "
186 "which is filled into a column called MilliSec. Leaves with non basic data types "
187 "are ignored automatically and are marked as (-n/a-). User ignored columns "
188 "are marked as (-ignored-).\n"
189 "\n"
190 "A constant value for the given file can be inserted by using the --const directive. "
191 "For example --const.mycolumn=42 would insert 42 into a column called mycolumn. "
192 "The column is created as INT UNSIGNED as default which can be altered by "
193 "--sql-type. A special case is a value of the form `/regex/format/`. Here, the given "
194 "regular expression is applied to the filename and it is newly formated with "
195 "the new format string. Uses the standard formatting rules to replace matches "
196 "(those used by ECMAScript's replace method).\n"
197 "\n"
198 "Usually the previously defined constant values are helpful to create an index "
199 "which relates unambiguously the inserted data to the file. It might be useful "
200 "to delete all data which belongs to this particular file before new data is "
201 "entered. This can be achieved with the `--delete` directive. It deletes all "
202 "data from the table before inserting new data which fulfills the condition "
203 "defined by the `--const` directives.\n"
204 "\n"
205 "The constant values can also be used for a conditional execution (--conditional). "
206 "If any row with the given constant values are found, the execution is stopped "
207 "(note that this happend after the table drop/create but before the delete/insert.\n"
208 "\n"
209 "To ensure efficient access for a conditonal execution, it makes sense to have "
210 "an index created for those columns. This can be done during table creation "
211 "with the --index option.\n"
212 "\n"
213 "If a query failed, the query is printed to stderr together with the error message. "
214 "For the main INSERT query, this is only true if the verbosity level is at least 2 "
215 "or the query has less than 80*25 bytes.\n"
216 "\n"
217 "In case of success, 0 is returned, a value>0 otherwise.\n"
218 "\n"
219 "Usage: root2sql [options] -uri URI rootfile.root\n"
220 "\n"
221 ;
222 cout << endl;
223}
224
225enum BasicType_t
226{
227 kNone = 0,
228 kConst,
229 kFloat,
230 kDouble,
231 kInt16,
232 kUInt16,
233 kInt32,
234 kUInt32,
235 kInt64,
236 kUInt64,
237};
238
239static const map<string, pair<BasicType_t, string>> ConvRoot =
240{
241 { "Float_t", { kFloat, "FLOAT" } },
242 { "Double_t", { kDouble, "DOUBLE" } },
243 { "ULong64_t", { kUInt64, "BIGINT UNSIGNED" } },
244 { "Long64_t", { kInt64, "BIGINT" } },
245 { "UInt_t", { kUInt32, "INT UNSIGNED" } },
246 { "Int_t", { kInt32, "INT" } },
247 { "UShort_t", { kUInt16, "SMALLINT UNSIGNED" } },
248 { "Short_t", { kInt16, "SMALLINT" } },
249};
250
251struct Container
252{
253 static map<void*, size_t> counter;
254
255 string branch; // branch name
256 string column; // column name
257 BasicType_t type;
258 size_t num;
259 void *ptr;
260
261 Container(const string &b, const string &c, const BasicType_t &t, const size_t &n) : branch(b), column(c), type(t), num(n), ptr(0)
262 {
263 switch (t)
264 {
265 case kFloat: ptr = new Float_t[n]; break;
266 case kDouble: ptr = new Double_t[n]; break;
267 case kInt16: ptr = new Short_t[n]; break;
268 case kUInt16: ptr = new UShort_t[n]; break;
269 case kInt32: ptr = new Int_t[n]; break;
270 case kUInt32: ptr = new UInt_t[n]; break;
271 case kInt64: ptr = new Long64_t[n]; break;
272 case kUInt64: ptr = new ULong64_t[n]; break;
273 case kConst:
274 case kNone:
275 break;
276 }
277 counter[ptr]++;
278 }
279 Container(const string &c, const string &value) : branch(value), column(c), type(kConst), num(1), ptr(0)
280 {
281 }
282
283 Container(const Container &c) : branch(c.branch), column(c.column), type(c.type), num(c.num), ptr(c.ptr)
284 {
285 counter[ptr]++;
286 }
287
288 ~Container()
289 {
290 counter[ptr]--;
291 if (counter[ptr]==0)
292 ::operator delete[](ptr); // It seems root is deleting it already
293 }
294
295 string fmt(const size_t &index) const
296 {
297 ostringstream str;
298
299 switch (type)
300 {
301 case kFloat: str << setprecision(8) << reinterpret_cast<Float_t*>(ptr)[index]; break;
302 case kDouble: str << setprecision(16) << reinterpret_cast<Double_t*>(ptr)[index]; break;
303 case kInt16: str << reinterpret_cast<Short_t*>(ptr)[index]; break;
304 case kUInt16: str << reinterpret_cast<UShort_t*>(ptr)[index]; break;
305 case kInt32: str << reinterpret_cast<Int_t*>(ptr)[index]; break;
306 case kUInt32: str << reinterpret_cast<UInt_t*>(ptr)[index]; break;
307 case kInt64: str << reinterpret_cast<Long64_t*>(ptr)[index]; break;
308 case kUInt64: str << reinterpret_cast<ULong64_t*>(ptr)[index]; break;
309 case kConst: str << branch; break;
310 case kNone:
311 break;
312 }
313
314 //if (str.str()=="nan" || str.str()=="-nan" || str.str()=="inf" || str.str()=="-inf")
315 // return "NULL";
316
317 return str.str();
318 }
319};
320
321map<void*, size_t> Container::counter;
322
323void ErrorHandlerAll(Int_t level, Bool_t abort, const char *location, const char *msg)
324{
325 if (string(msg).substr(0,24)=="no dictionary for class ")
326 return;
327 if (string(msg).substr(0, 5)=="unknown branch ")
328 return;
329
330 DefaultErrorHandler(level, abort, location, msg);
331}
332
333int main(int argc, const char* argv[])
334{
335 string input = "/this/is/my/dir/2018/12/13/12345_789_I.root";
336 boost::regex rgx("([0-9]+)_([0-9]+)");
337
338 boost::smatch ma;
339 cout << boost::regex_replace(input, rgx, "$1$2", boost::regex_constants::format_default|boost::regex_constants::format_no_copy) << endl;
340
341 return 1;
342
343 Time start;
344
345 gROOT->SetBatch();
346 SetErrorHandler(ErrorHandlerAll);
347
348 Configuration conf(argv[0]);
349 conf.SetPrintUsage(PrintUsage);
350 SetupConfiguration(conf);
351
352 if (!conf.DoParse(argc, argv))
353 return 127;
354
355 // ----------------------------- Evaluate options --------------------------
356 const string uri = conf.Get<string>("uri");
357 const string file = conf.Get<string>("file");
358 const string tree = conf.Get<string>("tree");
359 const string table = conf.Get<string>("table").empty() ? tree : conf.Get<string>("table");
360
361 const uint16_t verbose = conf.Get<uint16_t>("verbose");
362 const int64_t first = conf.Get<int64_t>("first");
363 const int64_t max = conf.Get<int64_t>("max");
364
365 const bool drop = conf.Get<bool>("drop");
366 const bool create = conf.Get<bool>("create") || drop;
367 const bool noinsert = conf.Get<bool>("no-insert");
368 const bool dry_run = conf.Get<bool>("dry-run");
369 const bool conditional = conf.Get<bool>("conditional");
370 const bool run_delete = conf.Get<bool>("delete");
371 const bool index = conf.Get<bool>("index");
372
373 const string engine = conf.Get<string>("engine");
374 const string row_format = conf.Get<string>("row-format");
375
376 const vector<string> duplicate = conf.Vec<string>("duplicate");
377
378 const bool ignore_errors = conf.Get<bool>("ignore-errors");
379
380 const bool print_connection = conf.Get<bool>("print-connection");
381 const bool print_branches = conf.Get<bool>("print-branches");
382 const bool print_leaves = conf.Get<bool>("print-leaves");
383 const bool print_create = conf.Get<bool>("print-create");
384 const bool print_insert = conf.Get<bool>("print-insert");
385 const bool print_select = conf.Get<bool>("print-select");
386 const bool print_delete = conf.Get<bool>("print-delete");
387
388 const vector<Map> mymap = conf.Vec<Map>("map");
389 const vector<Map> sqltypes = conf.Vec<Map>("sql-type");
390 const vector<string> _ignore = conf.Vec<string>("ignore");
391 const vector<string> primary = conf.Vec<string>("primary");
392
393 // -------------------------------------------------------------------------
394
395 if (verbose>0)
396 cout << "\n-------------------------- Evaluating file -------------------------" << endl;
397
398 TFile f(file.c_str());
399 if (f.IsZombie())
400 {
401 cerr << "Could not open file " << file << endl;
402 return 1;
403 }
404
405 if (verbose>0)
406 cout << "File: " << file << endl;
407
408 TTree *T = 0;
409 f.GetObject(tree.c_str(), T);
410 if (!T)
411 {
412 cerr << "Could not open tree " << tree << endl;
413 return 2;
414 }
415
416 if (verbose>0)
417 cout << "Tree: " << tree << endl;
418
419 T->SetMakeClass(1);
420
421 TObjArray *branches = T->GetListOfBranches();
422 TObjArray *leaves = T->GetListOfLeaves();
423
424 if (print_branches)
425 {
426 cout << '\n';
427 branches->Print();
428 }
429
430 if (verbose>0)
431 cout << T->GetEntriesFast() << " events found." << endl;
432
433
434 if (verbose>0)
435 cout << branches->GetEntries() << " branches found." << endl;
436
437 if (print_leaves)
438 {
439 cout << '\n';
440 leaves->Print();
441 }
442 if (verbose>0)
443 cout << leaves->GetEntries() << " leaves found." << endl;
444
445 string query =
446 "CREATE TABLE IF NOT EXISTS `"+table+"`\n"
447 "(\n";
448
449 vector<Container> vec;
450
451 const auto fixed = conf.GetWildcardOptions("const.*");
452
453 string where;
454 vector<string> vindex;
455 for (auto it=fixed.cbegin(); it!=fixed.cend(); it++)
456 {
457 const string name = it->substr(6);
458 string val = conf.Get<string>(*it);
459
460 boost::smatch match;
461 if (boost::regex_match(val, match, boost::regex("\\/(.+)(?<!\\\\)\\/(.*)(?<!\\\\)\\/")))
462 {
463 const string reg = match[1];
464 const string fmt = match[2];
465
466 val = boost::regex_replace(file, boost::regex(reg), fmt.empty()?"$0":fmt,
467 boost::regex_constants::format_default|boost::regex_constants::format_no_copy);
468
469 if (verbose>0)
470 {
471 cout << "Regular expression detected for constant column `" << *it << "`\n";
472 cout << "Filename converted with /" << reg << "/ to /" << fmt << "/\n";
473 cout << "Filename: " << file << '\n';
474 cout << "Result: " << file << endl;
475 }
476 }
477
478 if (verbose>2)
479 cout << "\n" << val << " [-const-]";
480 if (verbose>1)
481 cout << " (" << name << ")";
482
483 string sqltype = "INT UNSIGNED";
484
485 for (auto m=sqltypes.cbegin(); m!=sqltypes.cend(); m++)
486 if (m->first==name)
487 sqltype = m->second;
488
489 if (!vec.empty())
490 query += ",\n";
491 query += " `"+name+"` "+sqltype+" NOT NULL COMMENT '--user--'";
492
493 vec.emplace_back(name, val);
494 where += " AND `"+name+"`="+val;
495 vindex.emplace_back(name);
496 }
497
498 const size_t nvec = vec.size();
499
500 TIter Next(leaves);
501 TObject *o = 0;
502 while ((o=Next()))
503 {
504 TLeaf *L = T->GetLeaf(o->GetName());
505
506 if (verbose>2)
507 cout << '\n' << L->GetTitle() << " {" << L->GetTypeName() << "}";
508
509 if (L->GetLenStatic()!=L->GetLen())
510 {
511 if (verbose>2)
512 cout << " (-skipped-)";
513 continue;
514 }
515
516
517 string name = o->GetName();
518
519 bool found = false;
520 for (auto b=_ignore.cbegin(); b!=_ignore.cend(); b++)
521 {
522 if (boost::regex_match(name, boost::regex(*b)))
523 {
524 found = true;
525 if (verbose>2)
526 cout << " (-ignored-)";
527 break;
528 }
529 }
530 if (found)
531 continue;
532
533 const string tn = L->GetTypeName();
534
535 auto it = ConvRoot.find(tn);
536 if (it==ConvRoot.end())
537 {
538 if (verbose>2)
539 cout << " (-n/a-)";
540 continue;
541 }
542
543 if (verbose==2)
544 cout << '\n' << L->GetTitle() << " {" << L->GetTypeName() << "}";
545
546 for (auto m=mymap.cbegin(); m!=mymap.cend(); m++)
547 name = boost::regex_replace(name, boost::regex(m->first), m->second);
548
549 if (verbose>1)
550 cout << " (" << name << ")";
551
552 string sqltype = it->second.second;
553
554 for (auto m=sqltypes.cbegin(); m!=sqltypes.cend(); m++)
555 if (m->first==name)
556 sqltype = m->second;
557
558 if (!vec.empty())
559 query += ",\n";
560
561 const size_t N = L->GetLenStatic();
562 for (size_t i=0; i<N; i++)
563 {
564 query += " `"+name;
565 if (N>1)
566 query += "["+to_string(i)+"]";
567 query += "` "+sqltype+" NOT NULL COMMENT '"+o->GetTitle()+"'";
568 if (N>1 && i!=N-1)
569 query += ",\n";
570 }
571
572 vec.emplace_back(o->GetTitle(), name, it->second.first, L->GetLenStatic());
573 T->SetBranchAddress(o->GetTitle(), vec.back().ptr);
574 }
575
576 if (verbose>1)
577 cout << "\n\n";
578 if (verbose>0)
579 {
580 if (nvec>0)
581 cout << nvec << " constant value column(s) configured." << endl;
582 cout << vec.size()-nvec << " leaf/leaves setup for reading." << endl;
583 }
584
585 UInt_t datatype = 0;
586 const bool has_datatype = T->SetBranchAddress("DataType.fVal", &datatype) >= 0;
587
588 // Setiing up branch status (must be after all SetBranchAddress)
589 T->SetBranchStatus("*", 0);
590 for (auto c=vec.cbegin(); c!=vec.cend(); c++)
591 if (c->type!=kConst)
592 T->SetBranchStatus(c->branch.c_str(), 1);
593
594 if (has_datatype)
595 {
596 T->SetBranchStatus("DataType.fVal", 1);
597 if (verbose>0)
598 cout << "Rows with DataType.fVal!=1 will be skipped." << endl;
599 }
600
601
602 // -------------------------------------------------------------------------
603 // Checking for database connection
604
605 Database connection(uri);
606
607 if (verbose>0)
608 {
609 cout << "Client Version: " << connection.client_version() << '\n';
610 cout << "Server Version: " << connection.server_version() << endl;
611 }
612
613 if (print_connection)
614 {
615 try
616 {
617 const auto &res1 = connection.query("SHOW STATUS LIKE 'Compression'").store();
618 cout << "Compression of databse connection is " << string(res1[0][1]) << endl;
619
620 const auto &res2 = connection.query("SHOW STATUS LIKE 'Ssl_cipher'").store();
621 cout << "Connection to databases is " << (string(res2[0][1]).empty()?"UNENCRYPTED":"ENCRYPTED ("+string(res2[0][1])+")") << endl;
622 }
623 catch (const exception &e)
624 {
625 cerr << "\nSHOW STATUS LIKE COMPRESSION\n\n";
626 cerr << "SQL query failed:\n" << e.what() << endl;
627 return 3;
628 }
629 }
630
631 // -------------------------------------------------------------------------
632
633 if (verbose>0)
634 cout << "\n--------------------------- Database Table -------------------------" << endl;
635
636 if (!primary.empty())
637 query += ",\n PRIMARY KEY USING BTREE (`"+boost::algorithm::join(primary, "`, `")+"`)";
638
639 if (!vindex.empty() && index)
640 query += ",\n INDEX USING BTREE (`"+boost::algorithm::join(vindex, "`, `")+"`)";
641
642 query +=
643 "\n)\n"
644 "DEFAULT CHARSET=latin1 COLLATE=latin1_general_ci\n";
645 if (!engine.empty())
646 query += "ENGINE="+engine+"\n";
647 if (!row_format.empty())
648 query += "ROW_FORMAT="+row_format+"\n";
649 query += "COMMENT='created by "+conf.GetName()+"'\n";
650
651 // FIXME: Can we omit the catching to be able to print the
652 // query 'autmatically'?
653 try
654 {
655 if (drop)
656 {
657 // => Simple result
658 if (!dry_run)
659 connection.query("DROP TABLE `"+table+"`").execute();
660 if (verbose>0)
661 cout << "Table `" << table << "` dropped." << endl;
662 }
663 }
664 catch (const exception &e)
665 {
666 cerr << "DROP TABLE `" << table << "`\n\n";
667 cerr << "SQL query failed:\n" << e.what() << '\n' << endl;
668 return 4;
669 }
670
671 try
672 {
673 if (create && !dry_run)
674 connection.query(query).execute();
675 }
676 catch (const exception &e)
677 {
678 cerr << query << "\n\n";
679 cerr << "SQL query failed:\n" << e.what() << '\n' << endl;
680 return 5;
681 }
682
683 if (print_create)
684 cout << query << endl;
685
686 if (create && verbose>0)
687 cout << "Table `" << table << "` created." << endl;
688
689
690 try
691 {
692 if (conditional && !fixed.empty() && !drop)
693 {
694 const mysqlpp::StoreQueryResult res =
695 connection.query("SELECT 1 FROM `"+table+"` WHERE 1"+where+" LIMIT 1").store();
696
697 if (res.num_rows()>0)
698 {
699 if (verbose>0)
700 {
701 cout << "Conditional execution... detected existing rows!\n";
702 cout << "Exit.\n" << endl;
703 }
704 return 0;
705 }
706 }
707 }
708 catch (const exception &e)
709 {
710 cerr << "SELECT 1 FROM `" << table << "` WHERE 1" << where << " LIMIT 1\n\n";
711 cerr << "SQL query failed: " << e.what() << endl;
712 return 6;
713 }
714
715 if (print_select)
716 cout << "SELECT 1 FROM `" << table << "` WHERE 1" << where << " LIMIT 1" << endl;
717
718 try
719 {
720 if (run_delete && !fixed.empty() && !drop && !dry_run)
721 {
722 const mysqlpp::SimpleResult res =
723 connection.query("DELETE FROM `"+table+"` WHERE 1"+where).execute();
724
725 if (verbose>0)
726 cout << res.rows() << " row(s) deleted.\n" << endl;
727 }
728 }
729 catch (const exception &e)
730 {
731 cerr << "DELETE FROM `"+table+"` WHERE 1" << where << "\n\n";
732 cerr << "SQL query failed: " << e.what() << endl;
733 return 7;
734 }
735
736 if (print_delete)
737 cout << "DELETE FROM `"+table+"` WHERE 1" << where << endl;
738
739
740 // -------------------------------------------------------------------------
741
742 if (verbose>0)
743 cout << "\n---------------------------- Reading file --------------------------" << endl;
744
745 //query = update ? "UPDATE" : "INSERT";
746 query = "INSERT ";
747 if (ignore_errors)
748 query += "IGNORE ";
749 query += "`"+table+"`\n"
750 "(\n";
751
752 for (auto c=vec.cbegin(); c!=vec.cend(); c++)
753 {
754 if (c!=vec.cbegin())
755 query += ",\n";
756
757 const size_t N = c->num;
758 for (size_t i=0; i<N; i++)
759 {
760 if (N==1)
761 query += " `"+c->column+"`";
762 else
763 query += " `"+c->column+"["+to_string(i)+"]`";
764
765 if (N>1 && i!=N-1)
766 query += ",\n";
767 }
768 }
769
770 query +=
771 "\n)\n"
772 "VALUES\n";
773
774 size_t count = 0;
775
776 const size_t num = max>0 && (max-first)<T->GetEntriesFast() ? (max-first) : T->GetEntriesFast();
777 for (size_t j=first; j<num; j++)
778 {
779 T->GetEntry(j);
780 if (has_datatype && datatype!=1)
781 continue;
782
783 if (count>0)
784 query += ",\n";
785
786 query += "(\n";
787
788 for (auto c=vec.cbegin(); c!=vec.cend(); c++)
789 {
790 if (c!=vec.cbegin())
791 query += ",\n";
792
793 const size_t N = c->num;
794 for (size_t i=0; i<N; i++)
795 {
796 query += " "+c->fmt(i);
797
798 if (print_insert && i==0)
799 query += " /* "+c->column+" -> "+c->branch+" */";
800
801 if (N>1 && i!=N-1)
802 query += ",\n";
803 }
804 }
805 query += "\n)";
806
807 count ++;
808 }
809
810 if (!duplicate.empty())
811 query += "\nON DUPLICATE KEY UPDATE\n " + boost::join(duplicate, ",\n ");
812
813 if (verbose>0)
814 cout << count << " out of " << num << " row(s) read from file [N=" << first << ".." << num-1 << "]." << endl;
815
816 if (count==0)
817 {
818 if (verbose>0)
819 cout << "Total execution time: " << Time().UnixTime()-start.UnixTime() << "s\n" << endl;
820 return 0;
821 }
822
823 // -------------------------------------------------------------------------
824
825 if (verbose>0)
826 {
827 cout << "\n--------------------------- Inserting data -------------------------" << endl;
828 cout << "Sending INSERT query (" << query.length() << " bytes)" << endl;
829 }
830
831 try
832 {
833 if (!noinsert && !dry_run)
834 {
835 auto q = connection.query(query);
836 q.execute();
837 cout << q.info() << '\n' << endl;
838 }
839 else
840 cout << "Insert query skipped!" << endl;
841
842 if (print_insert)
843 cout << query << endl;
844 }
845 catch (const exception &e)
846 {
847 if (verbose>1 || query.length()<80*25)
848 cerr << query << "\n\n";
849 cerr << "SQL query failed (" << query.length() << " bytes):\n" << e.what() << '\n' << endl;
850 return 8;
851 }
852
853 if (verbose>0)
854 {
855 const auto sec = Time().UnixTime()-start.UnixTime();
856 cout << "Total execution time: " << sec << "s ";
857 cout << "(" << Tools::Fractional(sec/count) << "s/row)\n";
858
859 try
860 {
861 const auto resw =
862 connection.query("SHOW WARNINGS").store();
863
864 for (size_t i=0; i<resw.num_rows(); i++)
865 {
866 const mysqlpp::Row &roww = resw[i];
867
868 cout << roww["Level"] << '[' << roww["Code"] << "]: ";
869 cout << roww["Message"] << '\n';
870 }
871 cout << endl;
872
873 }
874 catch (const exception &e)
875 {
876 cerr << "\nSHOW WARNINGS\n\n";
877 cerr << "SQL query failed:\n" << e.what() << '\n' <<endl;
878 return 9;
879 }
880 }
881
882 if (print_connection)
883 {
884 try
885 {
886 // Exchange _send and _received as it is the view of the server
887 const auto &res1 = connection.query("SHOW STATUS LIKE 'Bytes_%'").store();
888 cout << left << setw(16) << res1[1]["Variable_name"] << ' ' << Tools::Scientific(res1[0]["Value"]) << endl;
889 cout << left << setw(16) << res1[0]["Variable_name"] << ' ' << Tools::Scientific(res1[1]["Value"]) << endl;
890 cout << endl;
891 }
892 catch (const exception &e)
893 {
894 cerr << "\nSHOW STATUS LIKE 'Bytes_%'\n\n";
895 cerr << "SQL query failed:\n" << e.what() << endl;
896 return 10;
897 }
898 }
899
900 return 0;
901}
Note: See TracBrowser for help on using the repository browser.