/* ======================================================================== *\ ! ! * ! * This file is part of MARS, the MAGIC Analysis and Reconstruction ! * Software. It is distributed to you in the hope that it can be a useful ! * and timesaving tool in analysing Data of imaging Cerenkov telescopes. ! * It is distributed WITHOUT ANY WARRANTY. ! * ! * Permission to use, copy, modify and distribute this software and its ! * documentation for any purpose is hereby granted without fee, ! * provided that the above copyright notice appear in all copies and ! * that both that copyright notice and this permission notice appear ! * in supporting documentation. It is provided "as is" without express ! * or implied warranty. ! * ! ! ! Author(s): Thomas Hengstebeck 3/2003 ! ! Copyright: MAGIC Software Development, 2000-2003 ! ! \* ======================================================================== */ ///////////////////////////////////////////////////////////////////////////// // // MRanForest // // ParameterContainer for Forest structure // // A random forest can be grown by calling GrowForest. // In advance SetupGrow must be called in order to initialize arrays and // do some preprocessing. // GrowForest() provides the training data for a single tree (bootstrap // aggregate procedure) // // Essentially two random elements serve to provide a "random" forest, // namely bootstrap aggregating (which is done in GrowForest()) and random // split selection (which is subject to MRanTree::GrowTree()) // // Version 2: // + fUserVal // ///////////////////////////////////////////////////////////////////////////// #include "MRanForest.h" #include #include #include "MHMatrix.h" #include "MRanTree.h" #include "MLog.h" #include "MLogManip.h" ClassImp(MRanForest); using namespace std; // -------------------------------------------------------------------------- // // Default constructor. // MRanForest::MRanForest(const char *name, const char *title) : fNumTrees(100), fRanTree(NULL),fUsePriors(kFALSE), fUserVal(-1) { fName = name ? name : "MRanForest"; fTitle = title ? title : "Storage container for Random Forest"; fForest=new TObjArray(); fForest->SetOwner(kTRUE); } // -------------------------------------------------------------------------- // // Destructor. // MRanForest::~MRanForest() { delete fForest; } void MRanForest::SetNumTrees(Int_t n) { //at least 1 tree fNumTrees=TMath::Max(n,1); fTreeHad.Set(fNumTrees); fTreeHad.Reset(); } void MRanForest::SetPriors(Float_t prior_had, Float_t prior_gam) { const Float_t sum=prior_gam+prior_had; prior_gam/=sum; prior_had/=sum; fPrior[0]=prior_had; fPrior[1]=prior_gam; fUsePriors=kTRUE; } Int_t MRanForest::GetNumDim() const { return fGammas ? fGammas->GetM().GetNcols() : 0; } Double_t MRanForest::CalcHadroness(const TVector &event) { Double_t hadroness=0; Int_t ntree=0; TIter Next(fForest); MRanTree *tree; while ((tree=(MRanTree*)Next())) { fTreeHad[ntree]=tree->TreeHad(event); hadroness+=fTreeHad[ntree]; ntree++; } return hadroness/ntree; } Bool_t MRanForest::AddTree(MRanTree *rantree=NULL) { if (rantree) fRanTree=rantree; if (!fRanTree) return kFALSE; fForest->Add((MRanTree*)fRanTree->Clone()); return kTRUE; } Int_t MRanForest::GetNumData() const { return fHadrons && fGammas ? fHadrons->GetM().GetNrows()+fGammas->GetM().GetNrows() : 0; } Bool_t MRanForest::SetupGrow(MHMatrix *mhad,MHMatrix *mgam) { // pointer to training data fHadrons=mhad; fGammas=mgam; // determine data entries and dimension of Hillas-parameter space //fNumHad=fHadrons->GetM().GetNrows(); //fNumGam=fGammas->GetM().GetNrows(); const Int_t dim = GetNumDim(); if (dim!=fGammas->GetM().GetNcols()) return kFALSE; const Int_t numdata = GetNumData(); // allocating and initializing arrays fHadTrue.Set(numdata); fHadTrue.Reset(); fHadEst.Set(numdata); fPrior.Set(2); fClassPop.Set(2); fWeight.Set(numdata); fNTimesOutBag.Set(numdata); fNTimesOutBag.Reset(); fDataSort.Set(dim*numdata); fDataRang.Set(dim*numdata); // calculating class populations (= no. of gammas and hadrons) fClassPop.Reset(); for(Int_t n=0;n=1) ? (Float_t)numdata/fClassPop[j]:0; for(Int_t n=0;nSetRules(fGammas->GetColumns()); fTreeNo=0; return kTRUE; } void MRanForest::InitHadEst(Int_t from, Int_t to, const TMatrix &m, TArrayI &jinbag) { for (Int_t ievt=from;ievt0) continue; fHadEst[ievt] += fRanTree->TreeHad(m, ievt-from); fNTimesOutBag[ievt]++; } } Bool_t MRanForest::GrowForest() { if(!gRandom) { *fLog << err << dbginf << "gRandom not initialized... aborting." << endl; return kFALSE; } fTreeNo++; // initialize running output if (fTreeNo==1) { *fLog << inf << endl; *fLog << underline; // << "1st col 2nd col" << endl; *fLog << "no. of tree error in % (calulated using oob-data -> overestim. of error)" << endl; } const Int_t numdata = GetNumData(); // bootstrap aggregating (bagging) -> sampling with replacement: // // The integer k is randomly (uniformly) chosen from the set // {0,1,...,fNumData-1}, which is the set of the index numbers of // all events in the training sample TArrayF classpopw(2); TArrayI jinbag(numdata); // Initialization includes filling with 0 TArrayF winbag(numdata); // Initialization includes filling with 0 for (Int_t n=0; nRndm()*numdata); classpopw[fHadTrue[k]]+=fWeight[k]; winbag[k]+=fWeight[k]; jinbag[k]=1; } // modifying sorted-data array for in-bag data: // // In bagging procedure ca. 2/3 of all elements in the original // training sample are used to build the in-bag data TArrayI datsortinbag=fDataSort; Int_t ninbag=0; ModifyDataSort(datsortinbag, ninbag, jinbag); const TMatrix &hadrons=fHadrons->GetM(); const TMatrix &gammas =fGammas->GetM(); // growing single tree fRanTree->GrowTree(hadrons,gammas,fHadTrue,datsortinbag, fDataRang,classpopw,jinbag,winbag); // error-estimates from out-of-bag data (oob data): // // For a single tree the events not(!) contained in the bootstrap sample of // this tree can be used to obtain estimates for the classification error of // this tree. // If you take a certain event, it is contained in the oob-data of 1/3 of // the trees (see comment to ModifyData). This means that the classification error // determined from oob-data is underestimated, but can still be taken as upper limit. const Int_t numhad = hadrons.GetNrows(); InitHadEst(0, numhad, hadrons, jinbag); InitHadEst(numhad, numdata, gammas, jinbag); /* for (Int_t ievt=0;ievt0) continue; fHadEst[ievt] += fRanTree->TreeHad(hadrons, ievt); fNTimesOutBag[ievt]++; } for (Int_t ievt=numhad;ievt0) continue; fHadEst[ievt] += fRanTree->TreeHad(gammas, ievt-numhad); fNTimesOutBag[ievt]++; } */ Int_t n=0; Double_t ferr=0; for (Int_t ievt=0;ievtGetM(); const TMatrix &gammas=fGammas->GetM(); const Int_t numgam = gammas.GetNrows(); const Int_t numhad = hadrons.GetNrows(); for (Int_t j=0;j=ninbag) break; } } } Bool_t MRanForest::AsciiWrite(ostream &out) const { Int_t n=0; MRanTree *tree; TIter forest(fForest); while ((tree=(MRanTree*)forest.Next())) { tree->AsciiWrite(out); n++; } return n==fNumTrees; }