Index: trunk/FACT++/CMakeLists.txt
===================================================================
--- trunk/FACT++/CMakeLists.txt	(revision 19098)
+++ trunk/FACT++/CMakeLists.txt	(revision 19099)
@@ -461,4 +461,8 @@
 MANPAGE(root2sql "")
 
+ADD_EXECUTABLE(fits2sql src/fits2sql.cc)
+TARGET_LINK_LIBRARIES(fits2sql ${HELP++LIBS}  ZLIB::ZLIB)
+MANPAGE(fits2sql "")
+
 ADD_EXECUTABLE(calcsource src/calcsource.cc)
 TARGET_LINK_LIBRARIES(calcsource ${HELP++LIBS} ${ROOT_LIBRARIES} ${NOVA_LIBRARY})
Index: trunk/FACT++/src/fits2sql.cc
===================================================================
--- trunk/FACT++/src/fits2sql.cc	(revision 19099)
+++ trunk/FACT++/src/fits2sql.cc	(revision 19099)
@@ -0,0 +1,596 @@
+#include <boost/algorithm/string/join.hpp>
+#include <boost/regex.hpp>
+
+#include "Database.h"
+
+#include "Time.h"
+#include "Configuration.h"
+
+#include "zfits.h"
+
+/*
+#include <TROOT.h>
+#include <TFile.h>
+#include <TTree.h>
+#include <TLeaf.h>
+#include <TError.h>
+*/
+
+using namespace std;
+
+// ------------------------------------------------------------------------
+
+struct Map : pair<string, string>
+{
+    Map() { }
+};
+
+std::istream &operator>>(std::istream &in, Map &m)
+{
+    const istreambuf_iterator<char> eos;
+    string txt(istreambuf_iterator<char>(in), eos);
+
+    const boost::regex expr("((.*)[^\\\\])/(.*)");
+    boost::smatch match;
+    if (!boost::regex_match(txt, match, expr))
+        throw runtime_error("Could not evaluate map argument: "+txt);
+
+    m.first  = match[1].str();
+    m.second = match[3].str();
+
+    return in;
+}
+
+
+void SetupConfiguration(Configuration &conf)
+{
+    po::options_description control("Root to SQL");
+    control.add_options()
+        ("uri,u",          var<string>()
+#if BOOST_VERSION >= 104200
+         ->required()
+#endif
+         , "Database link as in\n\tuser:password@server[:port]/database.")
+        ("file",           var<string>(""),           "The root file to read from")
+        ("force",          po_switch(),               "Force processing even if there is no database connection")
+        ("create",         po_switch(),               "Create the database if not existing")
+        ("drop",           po_switch(),               "Drop the table (implies create)")
+        ("extension,e",    var<string>(""),           "Name of the fits extension (table) to convert")
+        ("table",          var<string>(""),           "Name of the table to use (default is the tree name)")
+        ("map",            vars<Map>(),               "A regular expression which is applied to the leaf name befoee it is used as SQL column name)")
+        ("sql-type",       vars<Map>(),               "Allows to overwrite the calculated SQL type for a given column e.g. 'sql-column-name/UNSIGNED IN'")
+        ("ignore",         vars<string>(),            "Ignore the given leaf, if the given regular expression matches")
+        ("primary",        vars<string>(),            "List of columns to be used as primary keys during table creation (in connection with --create)")
+        ("first",          var<int64_t>(int64_t(0)),  "First event to start with (default: 0), mainly for test purpose")
+        ("max",            var<int64_t>(int64_t(0)),  "Maximum number of events to process (0: all), mainly for test purpose")
+        ("engine",         var<string>("InnoDB"),     "Database engine to be used when a new table is created")
+        ;
+
+    po::options_description debug("Debug options");
+    debug.add_options()
+        ("no-insert",      po_switch(),               "Does not insert any data into the table")
+        ("dry-run",        po_switch(),               "Skip any query which changes the databse (might result in consecutive failures)")
+        ("print-extensions", po_switch(),             "Print extensions (tables) from fits file")
+        ("print-columns", po_switch(),               "Print columns in fits table")
+        ("print-insert",   po_switch(),               "Print the INSERT query (note that it contains all data)")
+        ("print-create",   po_switch(),               "Print the CREATE query")
+        ("verbose,v",      var<uint16_t>(1),          "Verbosity (0: quiet, 1: default, 2: more, 3, ...)")
+        ;
+
+    po::positional_options_description p;
+    p.add("file", 1); // The 1st positional options (n=1)
+
+    conf.AddOptions(control);
+    conf.AddOptions(debug);
+    conf.SetArgumentPositions(p);
+}
+
+void PrintUsage()
+{
+    cout <<
+        "fits2sql - Fills the data from a fits file into a database\n"
+        "\n"
+        "For convenience, this documentation uses the extended version of the options, "
+        "refer to the output below to get the abbreviations.\n"
+        "\n"
+        "This is a general purpose tool to fill the contents of a root file into a database "
+        "as long as this is technically possible and makes sense. Note that root can even "
+        "write complex data like a TH1F into a database, this is not the purpose of this "
+        "program.\n"
+        "\n"
+        "Each root tree has branches and leaves (the basic data types). These leaves can "
+        "be read independently of the classes which were used to write the root file. "
+        "The default tree to read from is 'Events' but the name can be overwritten "
+        "using --tree. The default table name to fill the data into is identical to "
+        "the tree name. It can be overwritten using --table.\n"
+        "\n"
+        "The name of each column to which data is filled from a leave is obtained from "
+        "the leaves' names. The leave names can be checked using --print-leaves. "
+        "A --print-branches exists for convenience to print only the high-level branches. "
+        "Sometimes these names might be quite unconvenient like MTime.fTime.fMilliSec or "
+        "just MHillas.fWidth. To allow to simplify column names, regular expressions "
+        "(using boost's regex) can be defined to change the names. Note that these regular "
+        "expressions are applied one by one on each leaf's name. A valid expression could "
+        "be:\n"
+        "   --map=MHillas\\.f/\n"
+        "which would remove all occurances of 'MHillas.f'. This option can be used more than "
+        "once. They are applied in sequence. A single match does not stop the sequence.\n"
+        "\n"
+        "Sometimes it might also be convenient to skip a leaf. This can be done with "
+        "the --ignore resource. If the given regular expresion yield a match, the "
+        "leaf will be ignored. Note that the regular expression work on the raw-name "
+        "of the leaf not the readily mapped SQL column names. Example:\n"
+        "   --ignore=ThetaSq\\..*\n"
+        "will skip all leaved which start with 'ThetaSq.'. This option can be used"
+        "more than once.\n"
+        "\n"
+        "The data type of each column is kept as close as possible to the leaves' data "
+        "types. If for some reason this is not wanted, the data type of the SQL column "
+        "can be overwritten with --sql-type sql-column/sql-ytpe, for example:\n"
+        "   --sql-type=FileId/UNSIGNED INT\n"
+        "while the first argument of the name of the SQL column to which the data type "
+        "should be applied. The second column is the basic SQL data type. The option can "
+        "be given more than once.\n"
+        "\n"
+        "Database interaction:\n"
+        "\n"
+        "To drop an existing table, --drop can be used.\n"
+        "\n"
+        "To create a table according to theSQL  column names and data types, --create "
+        "can be used. The query used can be printed with --print-create even --create "
+        "has not been specified.\n"
+        "\n"
+        "To choose the columns which should become primary keys, use --primary, "
+        "for exmaple:\n"
+        "   --primary=col1\n"
+        "To define more than one column as primary key, the option can be given more than "
+        "once. Note that the combination of these columns must be unique.\n"
+        "\n"
+        "All columns are created as NOT NULL as default and the table is created as "
+        "MyISAM (default).\n"
+        "\n"
+        "For debugging purpose, or to just create or drop a table, the final insert "
+        "query can be skipped using --no-insert. Note that for performance reason, "
+        "all data is collected in memory and a single INSERT query is issued at the "
+        "end.\n"
+        "\n"
+        "Using a higher verbosity level (-v), an overview of the written columns or all "
+        "processed leaves is printed depending on the verbosity level. The output looks "
+        "like the following\n"
+        "   Leaf name [root data type] (SQL name)\n"
+        "for example\n"
+        "   MTime.fTime.fMilliSec [Long64_t] (MilliSec)\n"
+        "which means that the leaf MTime.fTime.fMilliSec is detected to be a Long64_t "
+        "which is filled into a column called MilliSec. Leaves with non basic data types "
+        "are ignored automatically and are marked as (-n/a-). User ignored columnd "
+        "are marked as (-ignored-).\n"
+        "\n"
+        "If a query failed, the query is printed to stderr together with the error message. "
+        "For the main INSERT query, this is only true if the verbosity level is at least 2 "
+        "or the query has less than 80*25 bytes.\n"
+        "\n"
+        "In case of succes, 0 is returned, a value>0 otherwise.\n"
+        "\n"
+        "Usage: fits2sql [options] rootfile.root\n"
+        "\n"
+        ;
+    cout << endl;
+}
+
+enum BasicType_t
+{
+    kNone = 0,
+    kVarchar,
+    kBool,
+    kFloat,
+    kDouble,
+    kInt8,
+    kInt16,
+    kInt32,
+    kInt64,
+//    kMJD,
+};
+
+static const map<char, pair<BasicType_t, string>> ConvFits =
+{
+    { 'A', { kVarchar, "VARCHAR"  } },
+    { 'L', { kBool,    "BOOLEAN"  } },
+    { 'B', { kInt8,    "TINYINT"  } },
+    { 'I', { kInt16,   "SMALLINT" } },
+    { 'J', { kInt32,   "INT"      } },
+    { 'K', { kInt64,   "BIGINT"   } },
+    { 'E', { kFloat,   "FLOAT"    } },
+    { 'D', { kDouble,  "DOUBLE"   } },
+};
+
+struct Container
+{
+    string branch; // fits column name
+    string column; // sql  column name
+    BasicType_t type;
+    size_t num;
+//    double offset;
+    void *ptr;
+
+    Container(const string &b, const string &c, const BasicType_t &t, const size_t &n/*, const double &offset=0*/) : branch(b), column(c), type(t), num(n), ptr(0)
+    {
+    }
+    ~Container()
+    {
+    }
+
+    string fmt(const size_t &index) const
+    {
+        ostringstream str;
+
+        switch (type)
+        {
+        //case kVarchar: str << string(reinterpret_cast<char*>(ptr), num); break;
+        case kVarchar: str << string(reinterpret_cast<char*>(ptr), num).c_str(); break;
+        case kFloat:   str << setprecision(8) << reinterpret_cast<float*>(ptr)[index];  break;
+//        case kMJD:     str << setprecision(16) << reinterpret_cast<double*>(ptr)[0]+offset; break;
+        case kDouble:  str << setprecision(16) << reinterpret_cast<double*>(ptr)[index]; break;
+        case kBool:
+        case kInt8:    str << int32_t(reinterpret_cast<int8_t*>(ptr)[index]); break;
+        case kInt16:   str << reinterpret_cast<int16_t*>(ptr)[index]; break;
+        case kInt32:   str << reinterpret_cast<int32_t*>(ptr)[index]; break;
+        case kInt64:   str << reinterpret_cast<int64_t*>(ptr)[index]; break;
+        case kNone:
+            break;
+        }
+
+        return str.str();
+    }
+};
+
+int main(int argc, const char* argv[])
+{
+    Time start;
+
+    Configuration conf(argv[0]);
+    conf.SetPrintUsage(PrintUsage);
+    SetupConfiguration(conf);
+
+    if (!conf.DoParse(argc, argv))
+        return 127;
+
+    // ----------------------------- Evaluate options --------------------------
+    const string uri             = conf.Get<string>("uri");
+    const string file            = conf.Get<string>("file");
+    const string extension       = conf.Get<string>("extension");
+          string table           = conf.Get<string>("table");
+
+    const uint16_t verbose       = conf.Get<uint16_t>("verbose");
+    const int64_t  first         = conf.Get<int64_t>("first");
+    const int64_t  max           = conf.Get<int64_t>("max");
+
+    const bool force             = conf.Get<bool>("force");
+    const bool drop              = conf.Get<bool>("drop");
+    const bool create            = conf.Get<bool>("create") || drop;
+    const bool noinsert          = conf.Get<bool>("no-insert");
+    const bool dry_run           = conf.Get<bool>("dry-run");
+
+    const string engine          = conf.Get<string>("engine");
+
+    const bool print_extensions  = conf.Get<bool>("print-extensions");
+    const bool print_columns     = conf.Get<bool>("print-columns");
+    const bool print_create      = conf.Get<bool>("print-create");
+    const bool print_insert      = conf.Get<bool>("print-insert");
+
+    const vector<Map> mymap      = conf.Vec<Map>("map");
+    const vector<Map> sqltypes   = conf.Vec<Map>("sql-type");
+    const vector<string> _ignore = conf.Vec<string>("ignore");
+    const vector<string> primary = conf.Vec<string>("primary");
+
+    // -------------------------------------------------------------------------
+
+    if (verbose>0)
+        cout << "\n-------------------------- Evaluating file -------------------------" << endl;
+
+    zfits f(file.c_str(), extension.c_str());
+    if (!f)
+    {
+        cerr << "Could not open file " << file << ": " << strerror(errno) << endl;
+        return 1;
+    }
+
+    if (verbose>0)
+        cout << "File: " << file << endl;
+
+    if (!extension.empty() && extension!=f.Get<string>("EXTNAME"))
+    {
+        cerr << "Extension " << extension << " not found in file." << endl;
+        return 2;
+    }
+
+    if (print_extensions)
+    {
+        cout << "\nTables:\n - " << boost::join(f.GetTables(), "\n - ") << '\n' << endl;
+        return 2;
+    }
+
+    if (verbose>0)
+        cout << "FITS extension [table]: " << f.Get<string>("EXTNAME") << endl;
+
+    if (table.empty())
+        table = f.Get<string>("EXTNAME");
+
+    if (verbose>0)
+        cout << "SQL table: " << table << endl;
+
+//    const double mjdref = f.Get("MJDREF", double(0));
+
+    if (print_columns)
+    {
+        cout << '\n';
+        f.PrintColumns();
+        cout << '\n';
+    }
+
+    const auto cols = f.GetColumns();
+
+    if (verbose>0)
+    {
+        cout << f.GetNumRows() << " events found." << endl;
+        cout << cols.size() << " columns found." << endl;
+    }
+
+    string query =
+        "CREATE TABLE IF NOT EXISTS `"+table+"`\n"
+        "(\n";
+
+    vector<Container> vec;
+
+    for (const auto &ic : cols)
+    {
+        const auto &col = ic.second;
+
+        if (verbose>2)
+            cout << '\n' << col.type << " " << ic.first << "[" << col.num << "]";
+
+        string name = ic.first;
+
+        bool found = false;
+        for (auto b=_ignore.cbegin(); b!=_ignore.cend(); b++)
+        {
+            if (boost::regex_match(name, boost::regex(*b)))
+            {
+                found = true;
+                if (verbose>2)
+                    cout << " (-ignored-)";
+                break;
+            }
+        }
+        if (found)
+            continue;
+
+        const char tn = col.type;
+
+        auto it = ConvFits.find(tn);
+        if (it==ConvFits.end())
+        {
+            if (verbose>2)
+                cout << " (-n/a-)";
+            continue;
+        }
+
+        if (verbose==2)
+            cout << '\n' << name << " [" << tn << "]";
+
+        for (auto m=mymap.cbegin(); m!=mymap.cend(); m++)
+            name = boost::regex_replace(name, boost::regex(m->first), m->second);
+
+        if (verbose>1)
+            cout << " (" << name << ")";
+
+        string sqltype = it->second.second;
+
+        for (auto m=sqltypes.cbegin(); m!=sqltypes.cend(); m++)
+            if (m->first==name)
+                sqltype = m->second;
+
+        if (!vec.empty())
+            query += ",\n";
+
+        const size_t N = col.type=='A' ? 1 : col.num;
+        for (int i=0; i<N; i++)
+        {
+            query += "   `"+name;
+            if (N>1)
+                query += "["+to_string(i)+"]";
+            query += "` "+sqltype;
+            if (col.type=='A')
+                query += '('+to_string(col.num)+')';
+            query += " NOT NULL COMMENT '"+ic.first;
+            if (!col.unit.empty())
+                query += "["+col.unit+"]";
+            query += ": "+col.comment+"'";
+        }
+
+        const BasicType_t bt =
+            /*ic.first=="Time" && col.num==1 && col.unit=="MJD" && it->second.first==kDouble ?
+            kMJD :*/ it->second.first;
+
+        vec.emplace_back(ic.first, name, bt, col.num/*, mjdref*/);
+        vec.back().ptr = f.SetPtrAddress(ic.first);
+    }
+
+    if (verbose>1)
+        cout << "\n\n";
+    if (verbose>0)
+        cout << vec.size() << " columns setup for reading." << endl;
+
+    // -------------------------------------------------------------------------
+    // Checking for database connection
+
+    try
+    {
+        if (!force)
+            Database(uri).connected();
+    }
+    catch (const exception &e)
+    {
+        cerr << "SQL connection failed: " << e.what() << endl;
+        return 3;
+    }
+
+    // -------------------------------------------------------------------------
+
+    if (verbose>0)
+        cout << "\n--------------------------- Database Table -------------------------" << endl;
+
+    if (!primary.empty())
+        query += ",\n   PRIMARY KEY USING BTREE (`"+boost::algorithm::join(primary, "`, `")+"`)\n";
+
+    query +=
+        ")\n"
+        "DEFAULT CHARSET=latin1 COLLATE=latin1_general_ci\n"
+        "ENGINE="+engine+"\n"
+        "COMMENT='created by "+conf.GetName()+"'\n";
+
+
+    // FIXME: Can we omit the catching to be able to print the
+    //        query 'autmatically'?
+    try
+    {
+        if (drop)
+        {
+            // => Simple result
+            if (!dry_run)
+                Database(uri).query("DROP TABLE `"+table+"`").execute();
+            if (verbose>0)
+                cout << "Table `" << table << "` dropped." << endl;
+        }
+    }
+    catch (const exception &e)
+    {
+        cerr << "DROP TABLE `" << table << "`\n\n";
+        cerr << "SQL query failed:\n" << e.what() << endl;
+        return 4;
+    }
+
+    try
+    {
+        if (create && !dry_run)
+            Database(uri).query(query).execute();
+    }
+    catch (const exception &e)
+    {
+        cerr << query << "\n\n";
+        cerr << "SQL query failed:\n" << e.what() << endl;
+        return 5;
+    }
+
+    if (print_create)
+        cout << query << endl;
+
+    if (create && verbose>0)
+        cout << "Table `" << table << "` created." << endl;
+
+    // -------------------------------------------------------------------------
+
+    if (verbose>0)
+        cout << "\n---------------------------- Reading file --------------------------" << endl;
+
+    //query = update ? "UPDATE" : "INSERT";
+    query = "INSERT `"+table+"`\n"
+        "(\n";
+
+    for (auto c=vec.cbegin(); c!=vec.cend(); c++)
+    {
+        const size_t N = c->type==kVarchar ? 1 : c->num;
+        for (int i=0; i<N; i++)
+        {
+            if (c!=vec.cbegin())
+                query += ",\n";
+
+            if (N==1)
+                query += "   `"+c->column+"`";
+            else
+                query += "   `"+c->column+"["+to_string(i)+"]`";
+        }
+    }
+
+    query +=
+        "\n)\n"
+        "VALUES\n";
+
+    size_t count = 0;
+
+    const size_t num = max>0 && (max-first)<f.GetNumRows() ? (max-first) : f.GetNumRows();
+    for (size_t j=first; j<num; j++)
+    {
+        f.GetRow(j);
+
+        if (count>0)
+            query += ",\n";
+
+        query += "(\n";
+
+        for (auto c=vec.cbegin(); c!=vec.cend(); c++)
+        {
+            const size_t N = c->type==kVarchar ? 1 : c->num;
+            for (int i=0; i<N; i++)
+            {
+                if (c!=vec.cbegin())
+                    query += ",\n";
+
+                if (c->type==kVarchar)
+                    query += "   '"+c->fmt(i)+"'";
+                else
+                    query += "   "+c->fmt(i);
+
+                if (print_insert && i==0)
+                    query += " /* "+c->column+" -> "+c->branch+" */";
+            }
+        }
+        query += "\n)";  // ON DUPLICATE KEY UPDATE\n";
+
+        count ++;
+    }
+
+    if (verbose>0)
+        cout << count << " out of " << num << " row(s) read from file [N=" << first << ".." << num-1 << "]." << endl;
+
+    if (count==0)
+    {
+        if (verbose>0)
+            cout << "Total execution time: " << Time().UnixTime()-start.UnixTime() << "s\n" << endl;
+        return 0;
+    }
+
+    // -------------------------------------------------------------------------
+
+    if (verbose>0)
+    {
+        cout << "\n--------------------------- Inserting data -------------------------" << endl;
+        cout << "Sending INSERT query (" << query.length() << " bytes)"  << endl;
+    }
+
+    try
+    {
+        if (!noinsert && !dry_run)
+            // => Simple result
+            Database(uri).query(query).execute();
+        else
+            cout << "Insert query skipped!" << endl;
+
+        if (print_insert)
+            cout << query << endl;
+    }
+    catch (const exception &e)
+    {
+        if (verbose>1 || query.length()<80*25)
+            cerr << query << "\n\n";
+        cerr << "SQL query failed (" << query.length() << " bytes):\n" << e.what() << endl;
+        return 6;
+    }
+
+    if (verbose>0)
+    {
+        cout << count << " row(s) inserted.\n\n";
+        cout << "Total execution time: " << Time().UnixTime()-start.UnixTime() << "s\n" << endl;
+    }
+
+    return 0;
+}
