Changes between Initial Version and Version 1 of DatabaseBasedAnalysis/RandomForest


Ignore:
Timestamp:
08/15/18 17:19:44 (7 years ago)
Author:
tbretz
Comment:

--

Legend:

Unmodified
Added
Removed
Modified
  • DatabaseBasedAnalysis/RandomForest

    v1 v1  
     1
     2== Writing Input Files ==
     3
     4{{{#!Spoiler
     5{{{#!cpp
     6#include <iostream>
     7#include <iomanip>
     8#include <fstream>
     9
     10#include <TMath.h>
     11#include <TChain.h>
     12
     13using namespace std;
     14
     15void writesim()
     16{
     17    // Create chain for the tree Result
     18    // This is just easier than using TFile/TTree
     19    TChain c("Result");
     20
     21    // Add the input file to the
     22    c.AddFile("simulation.root");
     23
     24    // Define variables for all leaves to be accessed
     25    // By definition rootifysql writes only doubles
     26    double X, Y, MeanX, MeanY, Width, Length, CosDelta, SinDelta,
     27        M3Long, SlopeLong, Leakage1, SlopeSpreadWeighted, Size,
     28        ConcCore, ConcCOG, NumIslands, NumUsedPixels, Zd, Energy;
     29
     30    // Connect the variables to the cordesponding leaves
     31    //c.SetBranchAddress("FileId", &FileId);
     32    //c.SetBranchAddress("EvtNumber", &EvtNumber);
     33    c.SetBranchAddress("X", &X);
     34    c.SetBranchAddress("Y", &Y);
     35    c.SetBranchAddress("MeanX", &MeanX);
     36    c.SetBranchAddress("MeanY", &MeanY);
     37    c.SetBranchAddress("Width", &Width);
     38    c.SetBranchAddress("Length", &Length);
     39    c.SetBranchAddress("CosDelta", &CosDelta);
     40    c.SetBranchAddress("SinDelta", &SinDelta);
     41    c.SetBranchAddress("M3Long", &M3Long);
     42    c.SetBranchAddress("SlopeLong", &SlopeLong);
     43    c.SetBranchAddress("Leakage1", &Leakage1);
     44    c.SetBranchAddress("NumIslands", &NumIslands);
     45    c.SetBranchAddress("NumUsedPixels", &NumUsedPixels);
     46    c.SetBranchAddress("Size", &Size);
     47    c.SetBranchAddress("Zd", &Zd);
     48    c.SetBranchAddress("Energy", &Energy);
     49
     50    // Set some constants (they could be included in the database
     51    // in the future)
     52    double mm2deg = +0.0117193246260285378;
     53    //double abberation = 1.02;
     54
     55    // -------------------- Source dependent parameter calculation -------------------
     56
     57    ofstream fout0("sim-train.csv"); // %1
     58    ofstream fout1("sim-test.csv");  // %0
     59    ofstream fout2("sim-test-cuts.csv");
     60
     61    fout0 << "Energy Size Zd Dist Disp Slope M3L Leakage Width Length" << endl;
     62    fout1 << "Energy Size Zd Dist Disp Slope M3L Leakage Width Length" << endl;
     63    fout2 << "Energy Size Zd Dist Disp Slope M3L Leakage Width Length" << endl;
     64
     65    // Loop over all wobble positions in the camera
     66    for (int i=0; i<c.GetEntries(); i++)
     67    {
     68        // read the i-th event from the file
     69        c.GetEntry(i);
     70
     71        // First calculate all cuts to speedup the analysis
     72        double area = TMath::Pi()*Width*Length;
     73
     74        // The abberation correction does increase also Width and Length by 1.02
     75
     76        int angle = 0;
     77
     78        // -------------------- Source dependent parameter calculation -------------------
     79
     80        double cr = cos(angle*TMath::DegToRad());
     81        double sr = sin(angle*TMath::DegToRad());
     82
     83        double px = cr*X-sr*Y;
     84        double py = cr*Y+sr*X;
     85
     86        double dx = MeanX - px*1.022;
     87        double dy = MeanY - py*1.022;
     88
     89        double norm = sqrt(dx*dx + dy*dy);
     90        double dist = norm*mm2deg;
     91
     92        double lx = min(max((CosDelta*dy - SinDelta*dx)/norm, -1.), 1.);
     93        double ly = min(max((CosDelta*dx + SinDelta*dy)/norm, -1.), 1.);
     94
     95        double alpha = asin(lx);
     96        double sgn   = TMath::Sign(1., ly);
     97
     98        // ------------------------------- Application ----------------------------------
     99
     100        double m3l   = M3Long*sgn*mm2deg;
     101        double slope = SlopeLong*sgn/mm2deg;
     102
     103        // --------------------------------- Analysis -----------------------------------
     104
     105        //double xi = 1.34723 + 0.15214 *slope + 0.970704*(1-1/(1+8.89826*Leakage1));
     106        double xi = 1.340 + 0.0755*slope + 1.67972*(1-1/(1+4.86232*Leakage1));
     107
     108        double sign1 = m3l+0.07;
     109        double sign2 = (dist-0.5)*7.2-slope;
     110
     111        double disp  = (sign1<0 || sign2<0 ? -xi : xi)*(1-Width/Length);
     112
     113        double thetasq = disp*disp + dist*dist - 2*disp*dist*sqrt(1-lx*lx);
     114
     115        if (i%2==0)
     116        {
     117            fout0 << log10(Energy) << " ";
     118            fout0 << log10(Size) << " ";
     119            fout0 << Zd << " ";
     120            fout0 << dist << " ";
     121            fout0 << disp << " ";
     122            fout0 << slope << " ";
     123            fout0 << m3l << " ";
     124            fout0 << Leakage1 << " ";
     125            fout0 << Width << " ";
     126            fout0 << Length << endl;
     127        }
     128        else
     129        {
     130            fout1 << log10(Energy) << " ";
     131            fout1 << log10(Size) << " ";
     132            fout1 << Zd << " ";
     133            fout1 << dist << " ";
     134            fout1 << disp << " ";
     135            fout1 << slope << " ";
     136            fout1 << m3l << " ";
     137            fout1 << Leakage1 << " ";
     138            fout1 << Width << " ";
     139            fout1 << Length << endl;
     140
     141            if (thetasq<0.024)
     142                continue;
     143
     144            bool cutq = NumIslands<3.5 && NumUsedPixels>5.5 && Leakage1<0.1;
     145            if (!cutq)
     146                continue;
     147
     148            bool cut0 = area < log10(Size)*898-1535;
     149            if (!cut0)
     150                continue;
     151
     152            fout2 << log10(Energy) << " ";
     153            fout2 << log10(Size) << " ";
     154            fout2 << Zd << " ";
     155            fout2 << dist << " ";
     156            fout2 << disp << " ";
     157            fout2 << slope << " ";
     158            fout2 << m3l << " ";
     159            fout2 << Leakage1 << " ";
     160            fout2 << Width << " ";
     161            fout2 << Length << endl;
     162        }
     163    }
     164}
     165}}}
     166}}}
     167
     168== Training ==
     169
     170{{{
     171fact@ihp-pc45:~/Analysis> nice -n 10 ~/ranger-master/cpp_version/build/ranger --file sim-train.csv --depvarname Energy --memmode 1 --treetype 3 --verbose --impmeasure 1 --outprefix sim-train
     172Starting Ranger.
     173Loading input file: sim-train.csv.
     174Growing trees ..
     175Computing prediction error ..
     176
     177Tree type:                         Regression
     178Dependent variable name:           Energy
     179Dependent variable ID:             0
     180Number of trees:                   500
     181Sample size:                       55417
     182Number of independent variables:   9
     183Mtry:                              3
     184Target node size:                  5
     185Variable importance mode:          1
     186Memory mode:                       1
     187Seed:                              0
     188Number of threads:                 8
     189
     190Overall OOB prediction error:      0.0178514
     191
     192Saved variable importance to file sim-train.importance.
     193Saved prediction error to file sim-train.confusion.
     194Finished Ranger.
     195}}}
     196
     197It will write a file called '''sim-train.forest'''.
     198
     199== Testing ==
     200
     201{{{
     202nice -n 10 ~/ranger-master/cpp_version/build/ranger --file sim-test.csv      --depvarname Energy --memmode 1 --treetype 3 --verbose --impmeasure 1 --predict sim-train.forest
     203nice -n 10 ~/ranger-master/cpp_version/build/ranger --file sim-test-cuts.csv --depvarname Energy --memmode 1 --treetype 3 --verbose --impmeasure 1 --predict sim-train.forest
     204}}}
     205
     206Here is an example output
     207{{{
     208Starting Ranger.
     209Loading input file: sim-test-cuts.csv.
     210Loading forest from file sim-train.forest.
     211Predicting ..
     212
     213Tree type:                         Regression
     214Dependent variable name:           Energy
     215Dependent variable ID:             0
     216Number of trees:                   500
     217Sample size:                       5135
     218Number of independent variables:   9
     219Mtry:                              3
     220Target node size:                  5
     221Variable importance mode:          1
     222Memory mode:                       1
     223Seed:                              0
     224Number of threads:                 8
     225
     226Saved predictions to file ranger_out.prediction.
     227Finished Ranger.
     228}}}
     229
     230
     231