#include #include #include "Database.h" #include "Time.h" #include "Configuration.h" #include #include #include #include #include using namespace std; // ------------------------------------------------------------------------ struct Map : pair { Map() { } }; std::istream &operator>>(std::istream &in, Map &m) { const istreambuf_iterator eos; string txt(istreambuf_iterator(in), eos); const boost::regex expr("((.*)[^\\\\])/(.*)"); boost::smatch match; if (!boost::regex_match(txt, match, expr)) throw runtime_error("Could not evaluate map argument: "+txt); m.first = match[1].str(); m.second = match[3].str(); return in; } void SetupConfiguration(Configuration &conf) { po::options_description control("Root to SQL"); control.add_options() ("uri,u", var() #if BOOST_VERSION >= 104200 ->required() #endif , "Database link as in\n\tuser:password@server[:port]/database.") ("file", var(""), "The root file to read from") ("force", po_switch(), "Force processing even if there is no database connection") ("create", po_switch(), "Create the database if not existing") ("drop", po_switch(), "Drop the table (implies create)") ("tree,t", var("Events"), "Name of the root tree to convert") ("table", var(""), "Name of the table to use (default is the tree name)") ("map", vars(), "A regular expression which is applied to the leaf name befoee it is used as SQL column name)") ("sql-type", vars(), "Allows to overwrite the calculated SQL type for a given column e.g. 'sql-column-name/UNSIGNED IN'") ("ignore", vars(), "Ignore the given leaf, if the given regular expression matches") ("primary", vars(), "List of columns to be used as primary keys during table creation (in connection with --create)") ("first", var(int64_t(0)), "First event to start with (default: 0), mainly for test purpose") ("max", var(int64_t(0)), "Maximum number of events to process (0: all), mainly for test purpose") ("engine", var("InnoDB"), "Database engine to be used when a new table is created") ("duplicate", var(""), "Specifies an assignment_list for an 'ON DUPLICATE KEY UPDATE' expression") ("ignore-errors", po_switch(), "Adds the IGNORE keyword to the INSERT query (turns errors into warnings, ignores rows with errors)") ; po::options_description debug("Debug options"); debug.add_options() ("no-insert", po_switch(), "Does not insert any data into the table") ("dry-run", po_switch(), "Skip any query which changes the databse (might result in consecutive failures)") ("print-branches", po_switch(), "Print the branches found in the tree") ("print-leaves", po_switch(), "Print the leaves found in the tree (this is what is processed)") ("print-insert", po_switch(), "Print the INSERT query (note that it contains all data)") ("print-create", po_switch(), "Print the CREATE query") ("verbose,v", var(1), "Verbosity (0: quiet, 1: default, 2: more, 3, ...)") ; po::positional_options_description p; p.add("file", 1); // The 1st positional options (n=1) conf.AddOptions(control); conf.AddOptions(debug); conf.SetArgumentPositions(p); } void PrintUsage() { cout << "root2sql - Fills the data from a root file into a database\n" "\n" "For convenience, this documentation uses the extended version of the options, " "refer to the output below to get the abbreviations.\n" "\n" "This is a general purpose tool to fill the contents of a root file into a database " "as long as this is technically possible and makes sense. Note that root can even " "write complex data like a TH1F into a database, this is not the purpose of this " "program.\n" "\n" "Each root tree has branches and leaves (the basic data types). These leaves can " "be read independently of the classes which were used to write the root file. " "The default tree to read from is 'Events' but the name can be overwritten " "using --tree. The default table name to fill the data into is identical to " "the tree name. It can be overwritten using --table.\n" "\n" "The name of each column to which data is filled from a leave is obtained from " "the leaves' names. The leave names can be checked using --print-leaves. " "A --print-branches exists for convenience to print only the high-level branches. " "Sometimes these names might be quite unconvenient like MTime.fTime.fMilliSec or " "just MHillas.fWidth. To allow to simplify column names, regular expressions " "(using boost's regex) can be defined to change the names. Note that these regular " "expressions are applied one by one on each leaf's name. A valid expression could " "be:\n" " --map=MHillas\\.f/\n" "which would remove all occurances of 'MHillas.f'. This option can be used more than " "once. They are applied in sequence. A single match does not stop the sequence.\n" "\n" "Sometimes it might also be convenient to skip a leaf. This can be done with " "the --ignore resource. If the given regular expresion yields a match, the " "leaf will be ignored. Note that the regular expression works on the raw-name " "of the leaf not the readily mapped SQL column names. Example:\n" " --ignore=ThetaSq\\..*\n" "will skip all leaved which start with 'ThetaSq.'. This option can be used" "more than once.\n" "\n" "The data type of each column is kept as close as possible to the leaves' data " "types. If for some reason this is not wanted, the data type of the SQL column " "can be overwritten with --sql-type sql-column/sql-ytpe, for example:\n" " --sql-type=FileId/UNSIGNED INT\n" "while the first argument of the name of the SQL column to which the data type " "should be applied. The second column is the basic SQL data type. The option can " "be given more than once.\n" "\n" "Database interaction:\n" "\n" "To drop an existing table, --drop can be used.\n" "\n" "To create a table according to theSQL column names and data types, --create " "can be used. The query used can be printed with --print-create even --create " "has not been specified.\n" "\n" "To choose the columns which should become primary keys, use --primary, " "for exmaple:\n" " --primary=col1\n" "To define more than one column as primary key, the option can be given more than " "once. Note that the combination of these columns must be unique.\n" "\n" "All columns are created as NOT NULL as default and the table is created as " "InnoDB (default).\n" "\n" "Usually, the INSERT query would fail if the PRIMARY key exists already. " "This can be avoided using the 'ON DUPLICATE KEY' directive. With the " "--duplicate,you can specify what should be updated in case of a duplicate key. " "To keep the row untouched, you can just update the primary key " "with the identical primary key, e.g. --duplicate='MyPrimary=VALUES(MyPrimary)'. " "For more details, see the MySQL manual.\n" "\n" "For debugging purpose, or to just create or drop a table, the final insert " "query can be skipped using --no-insert. Note that for performance reason, " "all data is collected in memory and a single INSERT query is issued at the " "end.\n" "\n" "Another possibility is to add the IGNORE keyword to the INSERT query by " "--ignore-errors, which essentially ignores all errors and turns them into " "warnings which are printed after the query succeeded.\n" "\n" "Using a higher verbosity level (-v), an overview of the written columns or all " "processed leaves is printed depending on the verbosity level. The output looks " "like the following\n" " Leaf name [root data type] (SQL name)\n" "for example\n" " MTime.fTime.fMilliSec [Long64_t] (MilliSec)\n" "which means that the leaf MTime.fTime.fMilliSec is detected to be a Long64_t " "which is filled into a column called MilliSec. Leaves with non basic data types " "are ignored automatically and are marked as (-n/a-). User ignored columnd " "are marked as (-ignored-).\n" "\n" "If a query failed, the query is printed to stderr together with the error message. " "For the main INSERT query, this is only true if the verbosity level is at least 2 " "or the query has less than 80*25 bytes.\n" "\n" "In case of succes, 0 is returned, a value>0 otherwise.\n" "\n" "Usage: root2sql [options] rootfile.root\n" "\n" ; cout << endl; } enum BasicType_t { kNone = 0, kFloat, kDouble, kInt16, kUInt16, kInt32, kUInt32, kInt64, kUInt64, }; static const map> ConvRoot = { { "Float_t", { kFloat, "FLOAT" } }, { "Double_t", { kDouble, "DOUBLE" } }, { "ULong64_t", { kUInt64, "BIGINT UNSIGNED" } }, { "Long64_t", { kInt64, "BIGINT" } }, { "UInt_t", { kUInt32, "INT UNSIGNED" } }, { "Int_t", { kInt32, "INT" } }, { "UShort_t", { kUInt16, "SMALLINT UNSIGNED" } }, { "Short_t", { kInt16, "SMALLINT" } }, }; struct Container { string branch; // branch name string column; // column name BasicType_t type; void *ptr; Container(const string &b, const string &c, const BasicType_t &t) : branch(b), column(c), type(t), ptr(0) { switch (t) { case kFloat: ptr = new Float_t; break; case kDouble: ptr = new Double_t; break; case kInt16: ptr = new Short_t; break; case kUInt16: ptr = new UShort_t; break; case kInt32: ptr = new Int_t; break; case kUInt32: ptr = new UInt_t; break; case kInt64: ptr = new Long64_t; break; case kUInt64: ptr = new ULong64_t; break; case kNone: break; } } ~Container() { //::operator delete(ptr); // It seems root is deleting it already } string fmt() const { ostringstream str; switch (type) { case kFloat: str << setprecision(8) << *reinterpret_cast(ptr); break; case kDouble: str << setprecision(16) << *reinterpret_cast(ptr); break; case kInt16: str << *reinterpret_cast(ptr); break; case kUInt16: str << *reinterpret_cast(ptr); break; case kInt32: str << *reinterpret_cast(ptr); break; case kUInt32: str << *reinterpret_cast(ptr); break; case kInt64: str << *reinterpret_cast(ptr); break; case kUInt64: str << *reinterpret_cast(ptr); break; case kNone: break; } //if (str.str()=="nan" || str.str()=="-nan" || str.str()=="inf" || str.str()=="-inf") // return "NULL"; return str.str(); } }; void ErrorHandlerAll(Int_t level, Bool_t abort, const char *location, const char *msg) { if (string(msg).substr(0,24)=="no dictionary for class ") return; DefaultErrorHandler(level, abort, location, msg); } int main(int argc, const char* argv[]) { Time start; gROOT->SetBatch(); SetErrorHandler(ErrorHandlerAll); Configuration conf(argv[0]); conf.SetPrintUsage(PrintUsage); SetupConfiguration(conf); if (!conf.DoParse(argc, argv)) return 127; // ----------------------------- Evaluate options -------------------------- const string uri = conf.Get("uri"); const string file = conf.Get("file"); const string tree = conf.Get("tree"); const string table = conf.Get("table").empty() ? tree : conf.Get("table"); const uint16_t verbose = conf.Get("verbose"); const int64_t first = conf.Get("first"); const int64_t max = conf.Get("max"); const bool force = conf.Get("force"); const bool drop = conf.Get("drop"); const bool create = conf.Get("create") || drop; const bool noinsert = conf.Get("no-insert"); const bool dry_run = conf.Get("dry-run"); const string engine = conf.Get("engine"); const string duplicate = conf.Get("duplicate"); const bool ignore_errors = conf.Get("ignore-errors"); const bool print_branches = conf.Get("print-branches"); const bool print_leaves = conf.Get("print-leaves"); const bool print_create = conf.Get("print-create"); const bool print_insert = conf.Get("print-insert"); const vector mymap = conf.Vec("map"); const vector sqltypes = conf.Vec("sql-type"); const vector _ignore = conf.Vec("ignore"); const vector primary = conf.Vec("primary"); // ------------------------------------------------------------------------- if (verbose>0) cout << "\n-------------------------- Evaluating file -------------------------" << endl; TFile f(file.c_str()); if (f.IsZombie()) { cerr << "Could not open file " << file << endl; return 1; } if (verbose>0) cout << "File: " << file << endl; TTree *T = 0; f.GetObject(tree.c_str(), T); if (!T) { cerr << "Could not open tree " << tree << endl; return 2; } if (verbose>0) cout << "Tree: " << tree << endl; T->SetMakeClass(1); TObjArray *branches = T->GetListOfBranches(); TObjArray *leaves = T->GetListOfLeaves(); if (print_branches) { cout << '\n'; branches->Print(); } if (verbose>0) cout << T->GetEntriesFast() << " events found." << endl; if (verbose>0) cout << branches->GetEntries() << " branches found." << endl; if (print_leaves) { cout << '\n'; leaves->Print(); } if (verbose>0) cout << leaves->GetEntries() << " leaves found." << endl; string query = "CREATE TABLE IF NOT EXISTS `"+table+"`\n" "(\n"; vector vec; TIter Next(leaves); TObject *o = 0; while ((o=Next())) { TLeaf *L = T->GetLeaf(o->GetName()); if (verbose>2) cout << '\n' << o->GetName() << " [" << L->GetTypeName() << "]"; string name = o->GetName(); bool found = false; for (auto b=_ignore.cbegin(); b!=_ignore.cend(); b++) { if (boost::regex_match(name, boost::regex(*b))) { found = true; if (verbose>2) cout << " (-ignored-)"; break; } } if (found) continue; const string tn = L->GetTypeName(); auto it = ConvRoot.find(tn); if (it==ConvRoot.end()) { if (verbose>2) cout << " (-n/a-)"; continue; } if (verbose==2) cout << '\n' << o->GetName() << " [" << L->GetTypeName() << "]"; for (auto m=mymap.cbegin(); m!=mymap.cend(); m++) name = boost::regex_replace(name, boost::regex(m->first), m->second); if (verbose>1) cout << " (" << name << ")"; string sqltype = it->second.second; for (auto m=sqltypes.cbegin(); m!=sqltypes.cend(); m++) if (m->first==name) sqltype = m->second; if (!vec.empty()) query += ",\n"; query += " `"+name+"` "+sqltype+" NOT NULL COMMENT '"+o->GetName()+"'"; vec.emplace_back(o->GetName(), name, it->second.first); T->SetBranchAddress(o->GetName(), vec.back().ptr); } if (verbose>1) cout << "\n\n"; if (verbose>0) cout << vec.size() << " leaves setup for reading." << endl; UInt_t datatype = 0; const bool has_datatype = T->SetBranchAddress("DataType.fVal", &datatype) >= 0; // Setiing up branch status (must be after all SetBranchAddress) T->SetBranchStatus("*", 0); for (auto c=vec.cbegin(); c!=vec.cend(); c++) T->SetBranchStatus(c->branch.c_str(), 1); if (has_datatype) { T->SetBranchStatus("DataType.fVal", 1); if (verbose>0) cout << "Rows with DataType.fVal!=1 will be skipped." << endl; } // ------------------------------------------------------------------------- // Checking for database connection Database connection(uri); try { if (!force) connection.connected(); } catch (const exception &e) { cerr << "SQL connection failed: " << e.what() << endl; return 3; } // ------------------------------------------------------------------------- if (verbose>0) cout << "\n--------------------------- Database Table -------------------------" << endl; if (!primary.empty()) query += ",\n PRIMARY KEY USING BTREE (`"+boost::algorithm::join(primary, "`, `")+"`)\n"; query += ")\n" "DEFAULT CHARSET=latin1 COLLATE=latin1_general_ci\n" "ENGINE="+engine+"\n" "COMMENT='created by "+conf.GetName()+"'\n"; // FIXME: Can we omit the catching to be able to print the // query 'autmatically'? try { if (drop) { // => Simple result if (!dry_run) connection.query("DROP TABLE `"+table+"`").execute(); if (verbose>0) cout << "Table `" << table << "` dropped." << endl; } } catch (const exception &e) { cerr << "DROP TABLE `" << table << "`\n\n"; cerr << "SQL query failed:\n" << e.what() << endl; return 4; } try { if (create && !dry_run) connection.query(query).execute(); } catch (const exception &e) { cerr << query << "\n\n"; cerr << "SQL query failed:\n" << e.what() << endl; return 5; } if (print_create) cout << query << endl; if (create && verbose>0) cout << "Table `" << table << "` created." << endl; // ------------------------------------------------------------------------- if (verbose>0) cout << "\n---------------------------- Reading file --------------------------" << endl; //query = update ? "UPDATE" : "INSERT"; query = "INSERT "; if (ignore_errors) query += "IGNORE "; query += "`"+table+"`\n" "(\n"; for (auto c=vec.cbegin(); c!=vec.cend(); c++) { if (c!=vec.cbegin()) query += ",\n"; query += " `"+c->column+"`"; } query += "\n)\n" "VALUES\n"; size_t count = 0; const size_t num = max>0 && (max-first)GetEntriesFast() ? (max-first) : T->GetEntriesFast(); for (size_t j=first; jGetEntry(j); if (has_datatype && datatype!=1) continue; if (count>0) query += ",\n"; query += "(\n"; for (auto c=vec.cbegin(); c!=vec.cend(); c++) { if (c!=vec.cbegin()) query += ",\n"; query += " "+c->fmt(); if (print_insert) query += " /* "+c->column+" -> "+c->branch+" */"; } query += "\n)"; count ++; } if (!duplicate.empty()) query += "\nON DUPLICATE KEY UPDATE\n " + duplicate; if (verbose>0) cout << count << " out of " << num << " row(s) read from file [N=" << first << ".." << num-1 << "]." << endl; if (count==0) { if (verbose>0) cout << "Total execution time: " << Time().UnixTime()-start.UnixTime() << "s\n" << endl; return 0; } // ------------------------------------------------------------------------- if (verbose>0) { cout << "\n--------------------------- Inserting data -------------------------" << endl; cout << "Sending INSERT query (" << query.length() << " bytes)" << endl; } try { if (!noinsert && !dry_run) { auto q = connection.query(query); q.execute(); cout << q.info() << '\n' << endl; } else cout << "Insert query skipped!" << endl; if (print_insert) cout << query << endl; } catch (const exception &e) { if (verbose>1 || query.length()<80*25) cerr << query << "\n\n"; cerr << "SQL query failed (" << query.length() << " bytes):\n" << e.what() << endl; return 6; } if (verbose>0) { cout << "Total execution time: " << Time().UnixTime()-start.UnixTime() << "s\n" << endl; try { const auto resw = connection.query("SHOW WARNINGS").store(); for (size_t i=0; i