#include "Database.h" #include #include #include #include #include "tools.h" #include "Time.h" #include "Configuration.h" #include #include #include #include using namespace std; // ------------------------------------------------------------------------ void SetupConfiguration(Configuration &conf) { po::options_description control("Rootify SQL"); control.add_options() ("uri,u", var()->required(), "Database link as in\n\tuser:password@server[:port]/database.") ("query,q", var(""), "MySQL query (overwrites --file)") ("file", var("rootify.sql"), "An ASCII file with the MySQL query (overwrites --query)") ("ignore-null,i", po_switch(), "Do not skip rows containing any NULL field") ("out,o", var("rootify.root"), "Output root file name") ("force,f", po_switch(), "Force overwriting an existing root file ('RECREATE')") ("update", po_switch(), "Update an existing root file with the new tree ('UPDATE')") ("compression,c", var(1), "zlib compression level for the root file") ("tree,t", var("Result"), "Name of the root tree") ("ignore", vars(), "Ignore the given columns") ("display,d", po_switch(), "Displays contents on the screen (most usefull in combination with mysql statements as SHOW or EXPLAIN)") ("null,n", po_switch(), "Redirect the output file to /dev/null (mainly for debugging purposes, e.g. performance studies)") ("no-fill", po_switch(), "Do not fill events into the root file (mainly for debugging purposes, e.g. performance studies)") ("delimiter", var(""), "The delimiter used if contents are displayed with --display (default=\\t)") ("explain", po_switch(), "Requests an EXPLAIN from the server (shows the server optimized query)\nsee also https://dev.mysql.com/doc/refman/explain-output.html") ("profiling", po_switch(), "Turn on profiling and print profile") ("var.*", var(), "Predefined SQL user variables (@VAR)") ("env.*", vars(), "Predefined environment for substitutions in the query ($ENV)") ("verbose,v", var(1), "Verbosity (0: quiet, 1: default, 2: more, 3, ...)") ; po::positional_options_description p; p.add("file", 1); // The 1st positional options (n=1) p.add("out", 1); // The 2nd positional options (n=1) conf.AddOptions(control); conf.SetArgumentPositions(p); } void PrintUsage() { cout << "rootifysql - Converts the result of a mysql query into a root file\n" "\n" "For convenience, this documentation uses the extended version of the options, " "refer to the output below to get the abbreviations.\n" "\n" "Writes the result of a mysql query into a root file. For each column, a branch is " "created of type double with the field name as name. This is usually the column name " "if not specified otherwise by the AS mysql directive.\n" "\n" "Columns with CHAR or VARCHAR as field type are ignored. DATETIME, DATE and TIME " "columns are converted to unix time (time_t). Rows containing any file which is " "NULL are skipped if not suppressed by the --ignore-null option. Ideally, the query " "is compiled in a way that no NULL field is returned. With the --display option the " "result of the request is printed on the screen (NULL skipping still in action). " "This can be useful to create an ascii file or to show results as 'SHOW DATABASES' " "or 'EXPLAIN table'. To redirect the contents into an ascii file, the option -v0 " "is useful. To suppredd writing to an output file --null can be used.\n" "\n" "The default is to read the query from a file called rootify.sql. Except if a different " "filename is specified by the --file option or a query is given with --query.\n" "\n" "As a trick, the rootify.sql file can be made excutable (chmod u+x rootify.sql). " "If the first line contains '#!rootifysql', the script can be executed directly.\n" "\n" "Columns whose name start with @ are skipped. If you want them in your output file " "give them a name using AS, e.g. 'SELECT @A:=5 AS A'.\n" "\n" "You can use variables in your sql query like @MyVar and define them on the " "command line. In this example with --var.MyVar=5\n" "\n" "You can use environment definitions for substitutions in your SQL query. " "For example --env.TEST=5 would replace $TEST or ${TEST} in your query by 5." "If you specifiy one environmentvariable more than once, a list is created. " "For example --env.TEST=1 --env.TEST=2 --env.TEST=3 would substitute " "$TEST or ${TEST} by '1, 2, 3'. This is useful for the SQL `IN` keyword.\n" "\n" "Comments in the query-file can be placed according to the SQL standard inline " "/*comment*/ or introduced with # (shell script style) or -- (SQL style).\n" "\n" "In case of succes, 0 is returned, a value>0 otherwise.\n" "\n" "Usage: rootifysql [rootify.sql [rootify.root]] [-u URI] [-q query|-f file] [-i] [-o out] [-f] [-cN] [-t tree] [-vN]\n" "\n" ; cout << endl; } struct ExplainParser { string sql; vector vec; string substitute(string _str, const regex &expr) { smatch match; while (regex_search(_str, match, expr, regex_constants::format_first_only)) { const auto &len = match.length(0); const auto &pos = match.position(0); const auto &str = match.str(0); const auto it = find(vec.cbegin(), vec.cend(), str); const size_t id = it==vec.cend() ? vec.size() : it-vec.cbegin(); _str.replace(pos, len, "{"+to_string(id)+"}"); if (it==vec.cend()) vec.push_back(str);//.substr(1, str.size()-2)); } return _str; } string substitute(const string &str, const string &expr) { return substitute(str, regex(expr)); } vector queries; string resub(string str) { // search for "KEYWORD expression" regex reg("\\{[0-9]+\\}"); //smatch match; smatch match; while (regex_search(str, match, reg, regex_constants::format_first_only)) { const auto &len = match.length(0); const auto &pos = match.position(0); const auto &arg = match.str(0); // Argument const auto idx = atoi(arg.c_str()+1); str.replace(pos, len, resub(vec[idx])); } return str; } void expression(string expr, size_t indent=0) { if (expr[0]=='{') { const auto idx = atoi(expr.c_str()+1); // This is a subquery if (vec[idx].substr(0,3)=="(/*") { cout << setw(indent) << ' ' << "(\n"; find_tokens(vec[idx], indent+4); cout << setw(indent) << ' ' << ") "; } else // This is just something to substitute back if (vec[idx].substr(0,2)=="({") { cout << setw(indent) << ' ' << "(" << resub(vec[idx]) << ") "; } else { if (indent>0) cout << setw(indent) << ' '; cout << resub(vec[idx]); } } else { if (indent>0) cout << setw(indent) << ' '; cout << resub(expr); } } void find_tokens(string str, size_t indent=0) { // ( COMMENT )?( TOKEN )?(( {NNN} | NNN )( AS|ON ( {NNN}) ))?(,)?) //regex reg("(\\/\\*\\ select\\#[0-9]+\\ \\*\\/\\ *)?([a-zA-Z ]+)?((\\{[0-9]+\\}|[0-9]+)(\\ ?([Aa][Ss]|[Oo][Nn])\\ ?(\\{[0-9]+\\}))?(,)?)"); const string _com = "\\/\\*\\ select\\#[0-9]+\\ \\*\\/\\ *"; const string _tok = "[a-zA-Z_ ]+"; const string _nnn = "\\{[0-9]+\\}|[0-9]+"; const string _as = "\\ ?([Aa][Ss])\\ ?"; // ( _nnn ) ( _as ( _nnn ))?(,)? // can also match noting in between two {NNN} const string _exp = "("+_nnn+")" + "("+_as+"("+_nnn+"))?(,)?"; // Matche: ( _com )? ( ( _tok )? ( _exp ) | ( _tok ) ) regex reg("("+_com+")?" + "(" + "("+_tok+")?"+"("+_exp+")" + "|" + "("+_tok+")" + ")"); smatch match; while (regex_search(str, match, reg, regex_constants::format_first_only)) { const auto &com = match.str(1); // comment const auto &tok1 = Tools::Trim(match.str(3)); // token with expression const auto &arg1 = match.str(5); // argument 1 const auto &as = match.str(7); // as const auto &arg2 = match.str(8); // argument 2 const auto &comma = match.str(9); // comma const auto &tok2 = Tools::Trim(match.str(10)); // token without expression if (!com.empty()) cout << setw(indent) << ' ' << "\033[34m" << com << "\033[0m" << '\n'; if (!tok1.empty()) cout << setw(indent) << ' ' << "\033[32m" << tok1 << "\033[0m" << '\n'; if (!tok2.empty()) cout << setw(indent) << ' ' << "\033[32m" << tok2 << "\033[0m" << '\n'; if (!arg1.empty()) { expression(arg1, indent+4); if (!as.empty()) cout << " \033[33m" << as << "\033[0m "; if (!arg2.empty()) expression(arg2); if (!comma.empty()) cout << ','; cout << '\n'; } str = str.substr(match.position(0)+match.length(0)); } } ExplainParser(const string &_sql) : sql(_sql) { // substitute all strings sql = substitute(sql, "'[^']*'"); // substitute all escaped sequences (`something`.`something-else`) sql = substitute(sql, "`[^`]*`(\\.`[^`]*`)*"); // substitute all paranthesis sql = substitute(sql, "[a-zA-Z0-9_]*\\([^\\(\\)]*\\)"); //cout << sql << "\n\n"; find_tokens(sql); cout << endl; } }; // Remove queries... void format(string sql) { ExplainParser p(sql); /* SELECT [ALL | DISTINCT | DISTINCTROW ] [HIGH_PRIORITY] [STRAIGHT_JOIN] [SQL_SMALL_RESULT] [SQL_BIG_RESULT] [SQL_BUFFER_RESULT] [SQL_CACHE | SQL_NO_CACHE] [SQL_CALC_FOUND_ROWS] select_expr [, select_expr ...] [FROM table_references [PARTITION partition_list] [WHERE where_condition] [GROUP BY {col_name | expr | position}, ... [WITH ROLLUP]] [HAVING where_condition] [WINDOW window_name AS (window_spec) [, window_name AS (window_spec)] ...] [ORDER BY {col_name | expr | position} [ASC | DESC], ... [WITH ROLLUP]] [LIMIT {[offset,] row_count | row_count OFFSET offset}] [INTO OUTFILE 'file_name' [CHARACTER SET charset_name] export_options | INTO DUMPFILE 'file_name' | INTO var_name [, var_name]] [FOR {UPDATE | SHARE} [OF tbl_name [, tbl_name] ...] [NOWAIT | SKIP LOCKED] | LOCK IN SHARE MODE]] */ /* table_references: escaped_table_reference [, escaped_table_reference] ... escaped_table_reference: table_reference | { OJ table_reference } table_reference: table_factor | join_table table_factor: tbl_name [PARTITION (partition_names)] [[AS] alias] [index_hint_list] | table_subquery [AS] alias [(col_list)] | ( table_references ) join_table: table_reference [INNER | CROSS] JOIN table_factor [join_condition] | table_reference STRAIGHT_JOIN table_factor | table_reference STRAIGHT_JOIN table_factor ON conditional_expr | table_reference {LEFT|RIGHT} [OUTER] JOIN table_reference join_condition | table_reference NATURAL [INNER | {LEFT|RIGHT} [OUTER]] JOIN table_factor join_condition: ON conditional_expr | USING (column_list) index_hint_list: index_hint [, index_hint] ... index_hint: USE {INDEX|KEY} [FOR {JOIN|ORDER BY|GROUP BY}] ([index_list]) | IGNORE {INDEX|KEY} [FOR {JOIN|ORDER BY|GROUP BY}] (index_list) | FORCE {INDEX|KEY} [FOR {JOIN|ORDER BY|GROUP BY}] (index_list) index_list: index_name [, index_name] ... */ } int main(int argc, const char* argv[]) { Time start; gROOT->SetBatch(); Configuration conf(argv[0]); conf.SetPrintUsage(PrintUsage); SetupConfiguration(conf); if (!conf.DoParse(argc, argv)) return 127; // ----------------------------- Evaluate options -------------------------- const string uri = conf.Get("uri"); const string out = conf.Get("out"); const string file = conf.Get("file"); const string tree = conf.Get("tree"); const bool force = conf.Get("force"); const bool ignorenull = conf.Get("ignore-null"); const bool update = conf.Get("update"); const bool display = conf.Get("display"); const bool noout = conf.Get("null"); const bool nofill = conf.Get("no-fill"); const bool explain = conf.Get("explain"); const bool profiling = conf.Get("profiling"); const uint16_t verbose = conf.Get("verbose"); const uint16_t compression = conf.Get("compression"); const string delimiter = conf.Get("delimiter"); const vector _ignore = conf.Vec("ignore"); //const vector mymap = conf.Vec("map"); // ------------------------------------------------------------------------- const auto vars = conf.GetWildcardOptions("var.*"); vector variables; for (const auto &var : vars) variables.emplace_back('@'+var.substr(4)+":="+Tools::Trim(conf.Get(var))); // ------------------------------------------------------------------------- if (verbose>0) cout << "\n------------------------ Rootify SQL -------------------------" << endl; string query = conf.Get("query"); if (query.empty()) { if (verbose>0) cout << "Reading query from file '" << file << "'." << endl; ifstream fin(file); if (!fin) { cerr << "Could not open '" << file << "': " << strerror(errno) << endl; return 1; } getline(fin, query, (char)fin.eof()); } if (query.empty()) { cerr << "No query specified." << endl; return 2; } // ------------------------------------------------------------------------- const auto envs = conf.GetWildcardOptions("env.*"); for (const auto &env : envs) { regex r1("\\$\\{"+env.substr(4)+"\\}"); regex r2("\\$"+env.substr(4)+"\\b"); const string repl = boost::join(conf.Vec(env), ", "); query = regex_replace(query, r1, repl); query = regex_replace(query, r2, repl); } // -------------------------- Check for file permssion --------------------- // Strictly speaking, checking for write permission and existance is not necessary, // but it is convenient that the user does not find out that it failed after // waiting long for the query result // // I am using root here instead of boost to be // consistent with the access pattern by TFile TString path(noout?"/dev/null":out.c_str()); gSystem->ExpandPathName(path); if (!noout) { FileStat_t stat; const Int_t exist = !gSystem->GetPathInfo(path, stat); const Bool_t write = !gSystem->AccessPathName(path, kWritePermission) && R_ISREG(stat.fMode); if ((update && !exist) || (update && exist && !write) || (force && exist && !write)) { cerr << "File '" << path << "' is not writable." << endl; return 3; } if (!update && !force && exist) { cerr << "File '" << path << "' already exists." << endl; return 4; } } Time start2; // --------------------------- Connect to database ------------------------------------------------- if (query.back()!='\n') query += '\n'; if (verbose>0) cout << "Connecting to database..." << endl; Database connection(uri); // Keep alive while fetching rows try { if (profiling) connection.query("SET PROFILING=1").execute(); } catch (const exception &e) { cerr << "\nSET profiling=1\n\n"; cerr << "SQL query failed:\n" << e.what() << endl; return 6; } // -------------------------- Set user defined variables ------------------- if (variables.size()>0) { if (verbose>0) cout << "Setting user defined variables..." << endl; const string varset = "SET\n "+boost::algorithm::join(variables, ",\n "); try { connection.query(varset).execute(); } catch (const exception &e) { cerr << '\n' << varset << "\n\n"; cerr << "SQL query failed:\n" << e.what() << endl; return 7; } if (verbose>2) cout << '\n' << varset << '\n' << endl; } // ------------------------- Explain query if requested -------------------- if (explain) { try { const auto res0 = connection.query("EXPLAIN FORMAT=JSON "+query).store(); cout << res0[0][0] << endl; cout << endl; const mysqlpp::StoreQueryResult res1 = connection.query("EXPLAIN "+query).store(); for (size_t i=0; i0) cout << "Requesting data..." << endl; if (verbose>2) cout << '\n' << query << endl; const mysqlpp::UseQueryResult res = connection.query(query).use(); // ------------------------------------------------------------------------- if (verbose>0) cout << "Opening file '" << path << "' [compression=" << compression << "]..." << endl; // ----------------------------- Open output file -------------------------- TFile tfile(path, update?"UPDATE":(force?"RECREATE":"CREATE"), "Rootify SQL", compression); if (tfile.IsZombie()) return 9; // ------------------------------------------------------------------------- // get the first row to get the field description mysqlpp::Row row = res.fetch_row(); if (!row) { cerr << "Empty set returned... nothing to write." << endl; return 10; } if (verbose>0) cout << "Trying to setup " << row.size() << " branches..." << endl; if (verbose>1) cout << endl; const mysqlpp::FieldNames &l = *row.field_list().list; vector buf(l.size()); vector typ(l.size(),'n'); // n=number [double], d is used for DateTime UInt_t cols = 0; // -------------------- Configure branches of TTree ------------------------ TTree *ttree = new TTree(tree.c_str(), query.c_str()); size_t skipat = 0; size_t skipreg = 0; for (size_t i=0; i1) cout << (use?" + ":" - ") << l[i].c_str() << " [" << t << "] {" << typ[i] << "}\n"; if (use) { // string name = l[i]; // for (const auto &m: mymap) // name = boost::regex_replace(l[i], boost::regex(m.first), m.second); ttree->Branch(l[i].c_str(), buf.data()+i); cols++; } } // ------------------------------------------------------------------------- if (verbose>1) cout << endl; if (verbose>0) { if (skipreg) cout << skipreg << " branches skipped due to ignore list." << endl; if (skipat) cout << skipat << " branches skipped due to name starting with @." << endl; cout << "Configured " << cols << " branches.\nFilling branches..." << endl; } if (display) { cout << endl; cout << "#"; for (size_t i=0; i0) sout << (delimiter.empty()?"\t":delimiter); sout << col->c_str(); } if (!ignorenull && col->is_null()) { skip++; break; } switch (typ[idx]) { case 'd': buf[idx] = time_t((mysqlpp::DateTime)(*col)); break; case 'D': buf[idx] = time_t((mysqlpp::Date)(*col)); break; case 'T': buf[idx] = time_t((mysqlpp::Time)(*col)); break; case 'V': case 'C': case '-': case '@': break; default: buf[idx] = atof(col->c_str()); } } if (idx==row.size()) { if (!nofill) ttree->Fill(); if (display) cout << sout.str() << endl; } row = res.fetch_row(); } while (row); // ------------------------------------------------------------------------- if (display) cout << '\n' << endl; if (verbose>0) { cout << count << " rows fetched." << endl; if (skip>0) cout << skip << " rows skipped due to NULL field." << endl; cout << ttree->GetEntries() << " rows filled into tree." << endl; } ttree->Write(); tfile.Close(); if (verbose>0) { cout << Tools::Scientific(tfile.GetSize()) << "B written to disk.\n"; cout << "File closed.\n"; cout << "Execution time: " << Time().UnixTime()-start.UnixTime() << "s\n"; cout << "--------------------------------------------------------------" << endl; try { const auto resw = connection.query("SHOW WARNINGS").store(); if (resw.num_rows()>0) cout << "\nWARNINGS:\n\n"; for (size_t i=0; i