Index: /trunk/FACT++/CMakeLists.txt
===================================================================
--- /trunk/FACT++/CMakeLists.txt	(revision 19792)
+++ /trunk/FACT++/CMakeLists.txt	(revision 19793)
@@ -607,4 +607,8 @@
 MANPAGE(root2csv "FACT++ - root2csv - Convert a root-tree to a csv file")
 
+ADD_EXECUTABLE(csv2root src/csv2root.cc)
+TARGET_LINK_LIBRARIES(csv2root ${HELP++LIBS} ${ROOT_LIBRARIES})
+MANPAGE(csv2root "FACT++ - csv2root - Convert a csv file to a root-tree")
+
 ADD_EXECUTABLE(fits2sql src/fits2sql.cc)
 TARGET_LINK_LIBRARIES(fits2sql ${HELP++LIBS}  ZLIB::ZLIB)
@@ -1016,4 +1020,5 @@
 INSTALL(TARGETS  root2sql       DESTINATION "${CMAKE_INSTALL_BINDIR}")
 INSTALL(TARGETS  root2csv       DESTINATION "${CMAKE_INSTALL_BINDIR}")
+INSTALL(TARGETS  csv2root       DESTINATION "${CMAKE_INSTALL_BINDIR}")
 INSTALL(TARGETS  fitsdump       DESTINATION "${CMAKE_INSTALL_BINDIR}")
 INSTALL(TARGETS  zfits          DESTINATION "${CMAKE_INSTALL_BINDIR}")
Index: /trunk/FACT++/src/csv2root.cc
===================================================================
--- /trunk/FACT++/src/csv2root.cc	(revision 19793)
+++ /trunk/FACT++/src/csv2root.cc	(revision 19793)
@@ -0,0 +1,476 @@
+#include <random>
+
+#include <boost/regex.hpp>
+#include <boost/filesystem.hpp>
+#include <boost/algorithm/string/join.hpp>
+
+#include "tools.h"
+#include "Time.h"
+#include "Configuration.h"
+
+#include <TROOT.h>
+#include <TSystem.h>
+#include <TFile.h>
+#include <TTree.h>
+#include <TError.h>
+#include <TObjArray.h>
+
+using namespace std;
+namespace fs = boost::filesystem;
+
+// ------------------------------------------------------------------------
+
+void SetupConfiguration(Configuration &conf)
+{
+    po::options_description control("Root to SQL");
+    control.add_options()
+        ("file",           var<string>()->required(), "The csv input file")
+        ("out,o",          var<string>(""),           "Output root file name")
+        ("force,f",        po_switch(),               "Force overwrite if output file already exists.")
+        ("update,u",       po_switch(),               "Update an existing file")
+        ("tree,t",         var<string>("Events"),     "Name of the root tree to convert")
+        ("compression,c",  var<uint16_t>(1),          "zlib compression level for the root file")
+        ("no-header",      po_switch(),               "Use if the first line contains no header")
+        ("dry-run",        po_switch(),               "Do not create or manipulate any output file")
+        ;
+
+    po::options_description split("Splitting options");
+    split.add_options()
+        ("split-sequence,S", vars<uint16_t>(),            "Split data sequentially into several trees/files (e.g. 1, 1, 2)")
+        ("split-quantile,Q", vars<double>(),              "Split data randomly into several trees/files (e.g. 0.5, 1)")
+        ("seed", var<uint64_t>(mt19937_64::default_seed), "Seed value in case of random split")
+        ;
+
+    po::options_description debug("Debug options");
+    debug.add_options()
+        ("verbose,v",      var<uint16_t>(1),          "Verbosity (0: quiet, 1: default, 2: more, 3, ...)")
+        ;
+
+    po::positional_options_description p;
+    p.add("file", 1); // All positional options
+    p.add("out",  1); // All positional options
+
+    conf.AddOptions(control);
+    conf.AddOptions(split);
+    conf.AddOptions(debug);
+    conf.SetArgumentPositions(p);
+}
+
+void PrintUsage()
+{
+    cout <<
+        "csv2root - Reads data from a root tree and writes a csv file\n"
+        "\n"
+        "For convenience, this documentation uses the extended version of the options, "
+        "refer to the output below to get the abbreviations.\n"
+        "\n"
+        "This is a general purpose tool to fill the contents of a root file into a database "
+        "as long as this is technically possible and makes sense. Note that root can even "
+        "write complex data like a TH1F into a database, this is not the purpose of this "
+        "program.\n"
+        "\n"
+        "Each root tree has branches and leaves (the basic data types). These leaves can "
+        "be read independently of the classes which were used to write the root file. "
+        "The default tree to read from is 'Events' but the name can be overwritten "
+        "using --tree. The default table name to fill the data into is identical to "
+        "the tree name. It can be overwritten using --table.\n"
+        "\n"
+        "To get a list of the contents (keys and trees) of a root file, you can use --print-ls. "
+        "The name of each column to which data is filled from a leave is obtained from "
+        "the leaves' names. The leave names can be checked using --print-leaves. "
+        "A --print-branches exists for convenience to print only the high-level branches. "
+        "Sometimes these names might be quite unconvenient like MTime.fTime.fMilliSec or "
+        "just MHillas.fWidth. To allow to simplify column names, regular expressions "
+        "(using boost's regex) can be defined to change the names. Note that these regular "
+        "expressions are applied one by one on each leaf's name. A valid expression could "
+        "be:\n"
+        "   --map=MHillas\\.f/\n"
+        "which would remove all occurances of 'MHillas.f'. This option can be used more than "
+        "once. They are applied in sequence. A single match does not stop the sequence.\n"
+        "\n"
+        "Sometimes it might also be convenient to skip a leaf. This can be done with "
+        "the --ignore resource. If the given regular expresion yields a match, the "
+        "leaf will be ignored. Note that the regular expression works on the raw-name "
+        "of the leaf not the readily mapped SQL column names. Example:\n"
+        "   --ignore=ThetaSq\\..*\n"
+        "will skip all leaved which start with 'ThetaSq.'. This option can be used"
+        "more than once.\n"
+        "\n"
+        "The data type of each column is kept as close as possible to the leaves' data "
+        "types. If for some reason this is not wanted, the data type of the SQL column "
+        "can be overwritten with --sql-type sql-column/sql-ytpe, for example:\n"
+        "   --sql-type=FileId/UNSIGNED INT\n"
+        "while the first argument of the name of the SQL column to which the data type "
+        "should be applied. The second column is the basic SQL data type. The option can "
+        "be given more than once.\n"
+        "\n"
+        "Database interaction:\n"
+        "\n"
+        "To drop an existing table, --drop can be used.\n"
+        "\n"
+        "To create a table according to theSQL  column names and data types, --create "
+        "can be used. The query used can be printed with --print-create even --create "
+        "has not been specified.\n"
+        "\n"
+        "To choose the columns which should become primary keys, use --primary, "
+        "for example:\n"
+        "   --primary=col1\n"
+        "To define more than one column as primary key, the option can be given more than "
+        "once. Note that the combination of these columns must be unique.\n"
+        "\n"
+        "All columns are created as NOT NULL as default. To force a database engine "
+        "and/or a storage format, use --engine and --row-format.\n"
+        "\n"
+        "Usually, the INSERT query would fail if the PRIMARY key exists already. "
+        "This can be avoided using the 'ON DUPLICATE KEY UPDATE' directive. With the "
+        "--duplicate, you can specify what should be updated in case of a duplicate key. "
+        "To keep the row untouched, you can just update the primary key "
+        "with the identical primary key, e.g. --duplicate='MyPrimary=VALUES(MyPrimary)'. "
+        "The --duplicate resource can be specified more than once to add more expressions "
+        "to the assignment_list. For more details, see the MySQL manual.\n"
+        "\n"
+        "For debugging purpose, or to just create or drop a table, the final insert "
+        "query can be skipped using --no-insert. Note that for performance reason, "
+        "all data is collected in memory and a single INSERT query is issued at the "
+        "end.\n"
+        "\n"
+        "Another possibility is to add the IGNORE keyword to the INSERT query by "
+        "--ignore-errors, which essentially ignores all errors and turns them into "
+        "warnings which are printed after the query succeeded.\n"
+        "\n"
+        "Using a higher verbosity level (-v), an overview of the written columns or all "
+        "processed leaves is printed depending on the verbosity level. The output looks "
+        "like the following\n"
+        "   Leaf name [root data type] (SQL name)\n"
+        "for example\n"
+        "   MTime.fTime.fMilliSec [Long64_t] (MilliSec)\n"
+        "which means that the leaf MTime.fTime.fMilliSec is detected to be a Long64_t "
+        "which is filled into a column called MilliSec. Leaves with non basic data types "
+        "are ignored automatically and are marked as (-n/a-). User ignored columns "
+        "are marked as (-ignored-).\n"
+        "\n"
+        "A constant value for the given file can be inserted by using the --const directive. "
+        "For example --const.mycolumn=42 would insert 42 into a column called mycolumn. "
+        "The column is created as INT UNSIGNED as default which can be altered by "
+        "--sql-type. A special case is a value of the form `/regex/format/`. Here, the given "
+        "regular expression is applied to the filename and it is newly formated with "
+        "the new format string. Uses the standard formatting rules to replace matches "
+        "(those used by ECMAScript's replace method).\n"
+        "\n"
+        "Usually the previously defined constant values are helpful to create an index "
+        "which relates unambiguously the inserted data to the file. It might be useful "
+        "to delete all data which belongs to this particular file before new data is "
+        "entered. This can be achieved with the `--delete` directive. It deletes all "
+        "data from the table before inserting new data which fulfills the condition "
+        "defined by the `--const` directives.\n"
+        "\n"
+        "The constant values can also be used for a conditional execution (--conditional). "
+        "If any row with the given constant values are found, the execution is stopped "
+        "(note that this happend after the table drop/create but before the delete/insert.\n"
+        "\n"
+        "To ensure efficient access for a conditonal execution, it makes sense to have "
+        "an index created for those columns. This can be done during table creation "
+        "with the --index option.\n"
+        "\n"
+        "To create the index as a UNIQUE INDEX, you can use the --unique option which "
+        "implies --index.\n"
+        "\n"
+        "If a query failed, the query is printed to stderr together with the error message. "
+        "For the main INSERT query, this is only true if the verbosity level is at least 2 "
+        "or the query has less than 80*25 bytes.\n"
+        "\n"
+        "In case of success, 0 is returned, a value>0 otherwise.\n"
+        "\n"
+        "Usage: root2sql [options] -uri URI rootfile.root\n"
+        "\n"
+        ;
+    cout << endl;
+}
+
+/*
+void ErrorHandlerAll(Int_t level, Bool_t abort, const char *location, const char *msg)
+{
+    if (string(msg).substr(0,24)=="no dictionary for class ")
+        return;
+    if (string(msg).substr(0,15)=="unknown branch ")
+        return;
+
+    DefaultErrorHandler(level, abort, location, msg);
+}*/
+
+
+void AddTree(vector<TTree*> &ttree, TFile &file, const string &tree, bool update, int verbose)
+{
+    TTree *T = 0;
+    if (update)
+    {
+        file.GetObject(tree.c_str(), T);
+        if (T)
+        {
+            ttree.emplace_back(T);
+            if (verbose>0)
+                cout << "Updating tree: " << tree << endl;
+        }
+    }
+    if (!T)
+        ttree.emplace_back(new TTree(tree.c_str(), "csv2root"));
+}
+
+int main(int argc, const char* argv[])
+{
+    Time start;
+
+    gROOT->SetBatch();
+    //SetErrorHandler(ErrorHandlerAll);
+
+    Configuration conf(argv[0]);
+    conf.SetPrintUsage(PrintUsage);
+    SetupConfiguration(conf);
+
+    if (!conf.DoParse(argc, argv))
+        return 127;
+
+    // ----------------------------- Evaluate options --------------------------
+    const string file            = conf.Get<string>("file");
+    const string tree            = conf.Get<string>("tree");
+
+    const bool force             = conf.Get<bool>("force");
+    const bool update            = conf.Get<bool>("update");
+    const bool dryrun            = conf.Get<bool>("dry-run");
+    const bool noheader          = conf.Get<bool>("no-header");
+
+    const uint16_t verbose       = conf.Get<uint16_t>("verbose");
+//    const int64_t  first         = conf.Get<int64_t>("first");
+//    const int64_t  max           = conf.Get<int64_t>("max");
+
+    const uint16_t compression   = conf.Get<uint16_t>("compression");
+
+    string out = conf.Get<string>("out");
+    if (out.empty())
+    {
+        out = file;
+        const auto p = out.find_last_of('.');
+        if (p!=string::npos)
+            out = string(out.substr(0, p))+".root";
+    }
+
+    // ----------------------------- Setup splitting ---------------------------
+
+    vector<uint16_t> split_seq   = conf.Vec<uint16_t>("split-sequence");
+    vector<double>   split_quant = conf.Vec<double>("split-quantile");
+
+    if (!split_seq.empty() && !split_quant.empty())
+        throw runtime_error("Only splitting by --split-sequence or --split-quantile is allowed.");
+
+    const size_t num_split = split_seq.size()+split_quant.size()==0 ? 0 :
+        ::max(split_seq.size(), split_quant.size()+1);
+
+    map<size_t, size_t> split_lut;
+    for (size_t i=0; i<split_seq.size(); i++)
+    {
+        const size_t sz = split_lut.size();
+        for (size_t j=0; j<split_seq[i]; j++)
+            split_lut.emplace(j+sz, i);
+    }
+
+    for (size_t i=0; i<split_quant.size(); i++)
+        if (split_quant[i]<0 || split_quant[i]>=1)
+            throw runtime_error("Splitting quantiles must be in the range [0;1)");
+
+    for (size_t i=1; i<split_quant.size(); i++)
+    {
+        if (split_quant[i]<=split_quant[i-1])
+            throw runtime_error("Splitting quantiles must be in increasing order.");
+    }
+
+    // -------------------------------------------------------------------------
+
+    const uniform_real_distribution<double> distribution(0,1);
+    mt19937_64 generator;
+    generator.seed(conf.Get<uint64_t>("seed"));
+    auto rndm = bind(distribution, generator);
+
+    // -------------------------------------------------------------------------
+
+    if (verbose>0)
+    {
+        cout << "\n-------------------------- Evaluating input ------------------------\n";
+        cout << "Start Time: " << Time::sql << Time(Time::local) << endl;
+    }
+
+
+    // -------------------------------------------------------------------------
+
+    cout << "Reading from '" << file << "'.\n";
+
+    ifstream fin(file.c_str());
+    if (!fin.good())
+    {
+        cerr << file << ": " << strerror(errno) << endl;
+        return 1;
+    }
+
+    TString buf;
+    buf.ReadLine(fin);
+    if (!fin)
+    {
+        cerr << file << ": " << strerror(errno) << endl;
+        return 2;
+    }
+
+    TObjArray *title = buf.Tokenize(" ");
+    if (title->GetEntries()==0)
+    {
+        cerr << "First line empty." << endl;
+        return 3;
+    }
+
+    if (title->At(0)->GetName()[0]=='#')
+        title->RemoveAt(0);
+
+    const auto numcol = title->GetEntries();
+
+    if (verbose>0)
+        cout << "Found " << numcol << " columns." << endl;
+
+    if (noheader)
+        fin.seekg(0);
+
+    // -------------------------------------------------------------------------
+
+    TString path(out.c_str());
+    gSystem->ExpandPathName(path);
+
+    if (!dryrun)
+    {
+        FileStat_t stat;
+        const Int_t  exist  = !gSystem->GetPathInfo(path, stat);
+        const Bool_t _write = !gSystem->AccessPathName(path,  kWritePermission) && R_ISREG(stat.fMode);
+
+        if ((update && !exist) || (update && exist && !_write) || (force && exist && !_write))
+        {
+            cerr << "File '" << path << "' is not writable." << endl;
+            return 4;
+        }
+
+        if (!update && !force && exist)
+        {
+            cerr << "File '" << path << "' already exists." << endl;
+            return 5;
+        }
+    }
+
+    TFile tfile(path, update?"UPDATE":(force?"RECREATE":"CREATE"), file.c_str(), compression);
+    if (tfile.IsZombie())
+    {
+        cerr << "Opening file '" << path << "' failed." << endl;
+        return 6;
+    }
+
+    if (verbose>0)
+    {
+        cout << "Opened root file '" << path << "'.\n";
+        cout << "Writing to tree: " << tree << ".\n";
+    }
+
+    // -------------------- Configure branches of TTree ------------------------
+    vector<TTree*> ttree;
+
+    if (num_split==0)
+        AddTree(ttree, tfile, tree, update, verbose);
+    else
+    {
+        for (size_t i=0; i<num_split; i++)
+            AddTree(ttree, tfile, tree+"["+to_string(i)+"]", update, verbose);
+    }
+
+
+    vector<float> vec(numcol);
+    for (int i=0; i<numcol; i++)
+    {
+        string col = noheader ? Tools::Form("col%d", i) : title->At(i)->GetName();
+
+        boost::regex rexpr(":");
+        col = boost::regex_replace(col, rexpr, "");
+
+        if (verbose>1)
+            cout << "Column: " << col << '\n';
+
+        for (auto it=ttree.begin(); it!=ttree.end(); it++)
+            it[0]->Branch(col.c_str(), vec.data()+i);
+    }
+
+    delete title;
+
+    // -------------------------------------------------------------------------
+
+    size_t line = 0;
+
+    while (1)
+    {
+        buf.ReadLine(fin);
+        if (!fin)
+            break;
+
+        TObjArray *arr = buf.Tokenize(" ");
+        if (arr->GetEntries()!=numcol)
+        {
+            cerr << "Column count mismatch in line " << line+1 << "!" << endl;
+            return 6;
+        }
+
+        for (int i=0; i<numcol; i++)
+        {
+            try
+            {
+                vec[i] = stof(arr->At(i)->GetName());
+            }
+            catch (const exception &e)
+            {
+                cerr << "Conversion of '" << arr->At(i)->GetName() << "' failed!" << endl;
+                return 7;
+            }
+        }
+
+        delete arr;
+
+
+        size_t index = 0;
+        if (!split_lut.empty())
+            index = split_lut[line % split_lut.size()];
+        if (!split_quant.empty())
+        {
+            const float r = rndm();
+            for (; r>=split_quant[index]; index++)
+                if (index==split_quant.size())
+                    break;
+        }
+
+        ttree[index]->Fill();
+        line++;
+    }
+
+    if (verbose>0)
+    {
+        cout << line << " data rows read from file." << endl;
+        for (size_t i=0; i<ttree.size(); i++)
+            cout << ttree[i]->GetEntries() << " rows filled into tree #" << i << "." << endl;
+    }
+
+    for (auto it=ttree.begin(); it!=ttree.end(); it++)
+        (*it)->Write("", TObject::kOverwrite);
+    tfile.Close();
+
+    if (verbose>0)
+    {
+        const auto sec = Time().UnixTime()-start.UnixTime();
+
+        cout << Tools::Scientific(tfile.GetSize()) << "B written to disk.\n";
+        cout << "File closed.\n";
+        cout << "Execution time: " << sec << "s ";
+        cout << "(" << Tools::Fractional(sec/line) << "s/row)\n";
+        cout << "--------------------------------------------------------------" << endl;
+    }
+
+    return 0;
+}