/* ======================================================================== *\ ! ! * ! * This file is part of MARS, the MAGIC Analysis and Reconstruction ! * Software. It is distributed to you in the hope that it can be a useful ! * and timesaving tool in analysing Data of imaging Cerenkov telescopes. ! * It is distributed WITHOUT ANY WARRANTY. ! * ! * Permission to use, copy, modify and distribute this software and its ! * documentation for any purpose is hereby granted without fee, ! * provided that the above copyright notice appear in all copies and ! * that both that copyright notice and this permission notice appear ! * in supporting documentation. It is provided "as is" without express ! * or implied warranty. ! * ! ! ! Author(s): Thomas Hengstebeck 3/2003 ! ! Copyright: MAGIC Software Development, 2000-2003 ! ! \* ======================================================================== */ ///////////////////////////////////////////////////////////////////////////// // // // MRanForest // // // // ParameterContainer for Forest structure // // // // A random forest can be grown by calling GrowForest. // // In advance SetupGrow must be called in order to initialize arrays and // // do some preprocessing. // // GrowForest() provides the training data for a single tree (bootstrap // // aggregate procedure) // // // // Essentially two random elements serve to provide a "random" forest, // // namely bootstrap aggregating (which is done in GrowForest()) and random // // split selection (which is subject to MRanTree::GrowTree()) // // // ///////////////////////////////////////////////////////////////////////////// #include "MRanForest.h" #include #include #include "MHMatrix.h" #include "MRanTree.h" #include "MLog.h" #include "MLogManip.h" ClassImp(MRanForest); // -------------------------------------------------------------------------- // // Default constructor. // MRanForest::MRanForest(const char *name, const char *title) : fNumTrees(100), fRanTree(NULL),fUsePriors(kFALSE) { fName = name ? name : "MRanForest"; fTitle = title ? title : "Storage container for Random Forest"; fForest=new TObjArray(); fForest->SetOwner(kTRUE); } // -------------------------------------------------------------------------- // // Destructor. // MRanForest::~MRanForest() { delete fForest; } void MRanForest::SetNumTrees(Int_t n) { //at least 1 tree fNumTrees=TMath::Max(n,1); fTreeHad.Set(fNumTrees); fTreeHad.Reset(); } void MRanForest::SetPriors(Float_t prior_had, Float_t prior_gam) { Float_t sum; sum=prior_gam+prior_had; prior_gam/=sum; prior_had/=sum; fPrior[0]=prior_had; fPrior[1]=prior_gam; fUsePriors=kTRUE; } Double_t MRanForest::CalcHadroness(TVector &event) { Double_t hadroness=0; Int_t ntree=0; MRanTree *tree; TIter forest(fForest); forest.Reset(); while ((tree=(MRanTree*)forest.Next())) { fTreeHad[ntree]=tree->TreeHad(event); hadroness+=fTreeHad[ntree]; ntree++; } return hadroness/Double_t(ntree); } Bool_t MRanForest::AddTree(MRanTree *rantree=NULL) { if (rantree) fRanTree=rantree; if (!fRanTree) return kFALSE; fForest->Add((MRanTree*)fRanTree->Clone()); return kTRUE; } Bool_t MRanForest::SetupGrow(MHMatrix *mhad,MHMatrix *mgam) { // pointer to training data fHadrons=mhad; fGammas=mgam; // determine data entries and dimension of Hillas-parameter space fNumHad=fHadrons->GetM().GetNrows(); fNumGam=fGammas->GetM().GetNrows(); fNumDim=fHadrons->GetM().GetNcols(); if (fNumDim!=fHadrons->GetM().GetNcols()) return kFALSE; fNumData=fNumHad+fNumGam; // allocating and initializing arrays fHadTrue.Set(fNumData); fHadTrue.Reset(); fHadEst.Set(fNumData); fPrior.Set(2); fClassPop.Set(2); fWeight.Set(fNumData); fNTimesOutBag.Set(fNumData); fNTimesOutBag.Reset(); fDataSort.Set(fNumDim*fNumData); fDataRang.Set(fNumDim*fNumData); // calculating class populations (= no. of gammas and hadrons) fClassPop.Reset(); for(Int_t n=0;n=1) ? Float_t(fNumData)/Float_t(fClassPop[j]):0; for(Int_t n=0;nSetRules(fGammas->GetColumns()); fTreeNo=0; return kTRUE; } Bool_t MRanForest::GrowForest() { Int_t ninbag=0; TArrayI datsortinbag; TArrayF classpopw; TArrayI jinbag; TArrayF winbag; jinbag.Set(fNumData); winbag.Set(fNumData); classpopw.Set(2); TMatrix hadrons=fHadrons->GetM(); TMatrix gammas=fGammas->GetM(); fTreeNo++; // initialize running output if(fTreeNo==1) { cout< overestim. of error)"< sampling with replacement: // // The integer k is randomly (uniformly) chosen from the set // {0,1,...,fNumData-1}, which is the set of the index numbers of // all events in the training sample for (Int_t n=0;nRndm()); classpopw[fHadTrue[k]]+=fWeight[k]; winbag[k]+=fWeight[k]; jinbag[k]=1; } // modifying sorted-data array for in-bag data: // // In bagging procedure ca. 2/3 of all elements in the original // training sample are used to build the in-bag data datsortinbag=fDataSort; ModifyDataSort(datsortinbag,ninbag,jinbag); // growing single tree fRanTree->GrowTree(hadrons,gammas,fNumData,fNumDim,fHadTrue,datsortinbag, fDataRang,classpopw,jinbag,winbag,fWeight); // error-estimates from out-of-bag data (oob data): // // For a single tree the events not(!) contained in the bootstrap sample of // this tree can be used to obtain estimates for the classification error of // this tree. // If you take a certain event, it is contained in the oob-data of 1/3 of // the trees (see comment to ModifyData). This means that the classification error // determined from oob-data is underestimated, but can still be taken as upper limit. TVector event(fNumDim); for(Int_t ievt=0;ievt0)continue; event=TMatrixRow(hadrons,ievt); fHadEst[ievt]+=fRanTree->TreeHad(event); fNTimesOutBag[ievt]++; } for(Int_t ievt=0;ievt0)continue; event=TMatrixRow(gammas,ievt); fHadEst[fNumHad+ievt]+=fRanTree->TreeHad(event); fNTimesOutBag[fNumHad+ievt]++; } Int_t n=0; fErr=0; for(Int_t ievt=0;ievtGetM(); TMatrix gammas=fGammas->GetM(); for (Int_t j=0;j=ninbag) break; } } return; } Bool_t MRanForest::AsciiWrite(ostream &out) const { Int_t n=0; MRanTree *tree; TIter forest(fForest); while ((tree=(MRanTree*)forest.Next())) { tree->AsciiWrite(out); n++; } return n==fNumTrees; }