| 1 | #ifndef MARS_MRanForestCalc
|
|---|
| 2 | #define MARS_MRanForestCalc
|
|---|
| 3 |
|
|---|
| 4 | #ifndef MARS_MTask
|
|---|
| 5 | #include "MTask.h"
|
|---|
| 6 | #endif
|
|---|
| 7 |
|
|---|
| 8 | #ifndef ROOT_TObjArray
|
|---|
| 9 | #include <TObjArray.h>
|
|---|
| 10 | #endif
|
|---|
| 11 |
|
|---|
| 12 | #ifndef ROOT_TArrayD
|
|---|
| 13 | #include <TArrayD.h>
|
|---|
| 14 | #endif
|
|---|
| 15 |
|
|---|
| 16 | #ifndef ROOT_MDataPhrase
|
|---|
| 17 | #include "MDataPhrase.h"
|
|---|
| 18 | #endif
|
|---|
| 19 |
|
|---|
| 20 | class MDataArray;
|
|---|
| 21 | class MParameterD;
|
|---|
| 22 | class MHMatrix;
|
|---|
| 23 |
|
|---|
| 24 | class MRanForestCalc : public MTask
|
|---|
| 25 | {
|
|---|
| 26 | public:
|
|---|
| 27 | enum EstimationMode_t
|
|---|
| 28 | {
|
|---|
| 29 | kMean,
|
|---|
| 30 | kMaximum,
|
|---|
| 31 | kFit
|
|---|
| 32 | };
|
|---|
| 33 |
|
|---|
| 34 | private:
|
|---|
| 35 | static const TString gsDefName; //! Default Name
|
|---|
| 36 | static const TString gsDefTitle; //! Default Title
|
|---|
| 37 | static const TString gsNameOutput; //! Default Output name
|
|---|
| 38 | static const TString gsNameEvalFunc; //! Evaluation function name
|
|---|
| 39 |
|
|---|
| 40 | MDataArray *fData; //! Used to store the MDataChains to get the event values
|
|---|
| 41 | MParameterD *fRFOut; //! Used to store result
|
|---|
| 42 | MHMatrix *fTestMatrix; //! Test Matrix used in Process (together with MMatrixLoop)
|
|---|
| 43 | MDataPhrase fFunc; //! Function to apply to the result
|
|---|
| 44 |
|
|---|
| 45 | TObjArray fEForests; //! List of forests read or to be written
|
|---|
| 46 |
|
|---|
| 47 | Int_t fNumTrees; //! Training parameters
|
|---|
| 48 | Int_t fNumTry; //! Training parameters
|
|---|
| 49 | Int_t fNdSize; //! Training parameters
|
|---|
| 50 |
|
|---|
| 51 | Int_t fNumObsoleteVariables; //! Training parameters
|
|---|
| 52 | Bool_t fLastDataColumnHasWeights; //! Training parameters
|
|---|
| 53 |
|
|---|
| 54 | TString fFileName; // File name to forest
|
|---|
| 55 | TString fNameOutput; // Name of output container
|
|---|
| 56 |
|
|---|
| 57 | Bool_t fDebug; // Debugging of eventloop while training on/off
|
|---|
| 58 |
|
|---|
| 59 | EstimationMode_t fEstimationMode; // Mode of estimation in case of multi random forest regression
|
|---|
| 60 |
|
|---|
| 61 | private:
|
|---|
| 62 | // MTask
|
|---|
| 63 | Int_t PreProcess(MParList *plist);
|
|---|
| 64 | Int_t Process();
|
|---|
| 65 |
|
|---|
| 66 | // MRanForestCalc
|
|---|
| 67 | Int_t ReadForests(MParList &plist);
|
|---|
| 68 | Double_t Eval() const;
|
|---|
| 69 |
|
|---|
| 70 | // MParContainer
|
|---|
| 71 | Int_t ReadEnv(const TEnv &env, TString prefix, Bool_t print);
|
|---|
| 72 |
|
|---|
| 73 | // Train Interface
|
|---|
| 74 | Int_t Train(const MHMatrix &n, const TArrayD &grid, Int_t ver);
|
|---|
| 75 |
|
|---|
| 76 | public:
|
|---|
| 77 | MRanForestCalc(const char *name=NULL, const char *title=NULL);
|
|---|
| 78 | ~MRanForestCalc();
|
|---|
| 79 |
|
|---|
| 80 | // TObject
|
|---|
| 81 | void Print(Option_t *o="") const; //*MENU*
|
|---|
| 82 |
|
|---|
| 83 | // Setter for estimation
|
|---|
| 84 | void SetFileName(TString filename) { fFileName = filename; }
|
|---|
| 85 | void SetEstimationMode(EstimationMode_t op) { fEstimationMode = op; }
|
|---|
| 86 | void SetNameOutput(TString name=gsNameOutput) { fNameOutput = name; }
|
|---|
| 87 |
|
|---|
| 88 | // Setter for training
|
|---|
| 89 | void SetNumTrees(UShort_t n=100) { fNumTrees = n; }
|
|---|
| 90 | void SetNdSize(UShort_t n=5) { fNdSize = n; }
|
|---|
| 91 | void SetNumTry(UShort_t n=0) { fNumTry = n; }
|
|---|
| 92 | void SetDebug(Bool_t b=kTRUE) { fDebug = b; }
|
|---|
| 93 |
|
|---|
| 94 | Bool_t SetFunction(const char *name="x");
|
|---|
| 95 |
|
|---|
| 96 | void SetNumObsoleteVariables(Int_t n=1) { fNumObsoleteVariables = n; }
|
|---|
| 97 | void SetLastDataColumnHasWeights(Bool_t b=kTRUE) { fLastDataColumnHasWeights = b; }
|
|---|
| 98 |
|
|---|
| 99 | // Train Interface
|
|---|
| 100 | Int_t TrainMultiRF(const MHMatrix &n, const TArrayD &grid)
|
|---|
| 101 | {
|
|---|
| 102 | // One yes/no-classification forest is trained for each bin
|
|---|
| 103 | return Train(n, grid, 0);
|
|---|
| 104 | }
|
|---|
| 105 | Int_t TrainSingleRF(const MHMatrix &n, const TArrayD &grid=TArrayD())
|
|---|
| 106 | {
|
|---|
| 107 | // w/o Grid: Last Column contains classifier
|
|---|
| 108 | // w/ Grid: Last Column will be converted by grid into classifier
|
|---|
| 109 | return Train(n, grid, grid.GetSize()==0 ? 2 : 1);
|
|---|
| 110 | }
|
|---|
| 111 | Int_t TrainRegression(const MHMatrix &n)
|
|---|
| 112 | {
|
|---|
| 113 | // Use last column for regression
|
|---|
| 114 | return Train(n, TArrayD(), 3);
|
|---|
| 115 | }
|
|---|
| 116 |
|
|---|
| 117 | // Test Interface
|
|---|
| 118 | void SetTestMatrix(MHMatrix *m=0) { fTestMatrix=m; }
|
|---|
| 119 | void InitMapping(MHMatrix *m=0) { fTestMatrix=m; }
|
|---|
| 120 |
|
|---|
| 121 | ClassDef(MRanForestCalc, 1) // Task to calculate RF output and for RF training
|
|---|
| 122 | };
|
|---|
| 123 |
|
|---|
| 124 | #endif
|
|---|