/* ======================================================================== *\ ! ! * ! * This file is part of MARS, the MAGIC Analysis and Reconstruction ! * Software. It is distributed to you in the hope that it can be a useful ! * and timesaving tool in analysing Data of imaging Cerenkov telescopes. ! * It is distributed WITHOUT ANY WARRANTY. ! * ! * Permission to use, copy, modify and distribute this software and its ! * documentation for any purpose is hereby granted without fee, ! * provided that the above copyright notice appear in all copies and ! * that both that copyright notice and this permission notice appear ! * in supporting documentation. It is provided "as is" without express ! * or implied warranty. ! * ! ! ! Author(s): Thomas Hengstebeck 3/2003 ! ! Copyright: MAGIC Software Development, 2000-2005 ! ! \* ======================================================================== */ ///////////////////////////////////////////////////////////////////////////// // // MRanForest // // ParameterContainer for Forest structure // // A random forest can be grown by calling GrowForest. // In advance SetupGrow must be called in order to initialize arrays and // do some preprocessing. // GrowForest() provides the training data for a single tree (bootstrap // aggregate procedure) // // Essentially two random elements serve to provide a "random" forest, // namely bootstrap aggregating (which is done in GrowForest()) and random // split selection (which is subject to MRanTree::GrowTree()) // ///////////////////////////////////////////////////////////////////////////// #include "MRanForest.h" #include #include #include "MHMatrix.h" #include "MRanTree.h" #include "MData.h" #include "MDataArray.h" #include "MParList.h" #include "MLog.h" #include "MLogManip.h" ClassImp(MRanForest); using namespace std; // -------------------------------------------------------------------------- // // Default constructor. // MRanForest::MRanForest(const char *name, const char *title) : fClassify(kTRUE), fNumTrees(100), fNumTry(0), fNdSize(1), fRanTree(NULL), fRules(NULL), fMatrix(NULL), fUserVal(-1) { fName = name ? name : "MRanForest"; fTitle = title ? title : "Storage container for Random Forest"; fForest=new TObjArray(); fForest->SetOwner(kTRUE); } MRanForest::MRanForest(const MRanForest &rf) { // Copy constructor fName = rf.fName; fTitle = rf.fTitle; fClassify = rf.fClassify; fNumTrees = rf.fNumTrees; fNumTry = rf.fNumTry; fNdSize = rf.fNdSize; fTreeNo = rf.fTreeNo; fRanTree = NULL; fRules=new MDataArray(); fRules->Reset(); MDataArray *newrules=rf.fRules; for(Int_t i=0;iGetNumEntries();i++) { MData &data=(*newrules)[i]; fRules->AddEntry(data.GetRule()); } // trees fForest=new TObjArray(); fForest->SetOwner(kTRUE); TObjArray *newforest=rf.fForest; for(Int_t i=0;iGetEntries();i++) { MRanTree *rantree=(MRanTree*)newforest->At(i); MRanTree *newtree=new MRanTree(*rantree); fForest->Add(newtree); } fHadTrue = rf.fHadTrue; fHadEst = rf.fHadEst; fDataSort = rf.fDataSort; fDataRang = rf.fDataRang; fClassPop = rf.fClassPop; fWeight = rf.fWeight; fTreeHad = rf.fTreeHad; fNTimesOutBag = rf.fNTimesOutBag; } // -------------------------------------------------------------------------- // Destructor. MRanForest::~MRanForest() { delete fForest; if (fMatrix) delete fMatrix; if (fRules) delete fRules; } MRanTree *MRanForest::GetTree(Int_t i) { return (MRanTree*)(fForest->At(i)); } void MRanForest::SetNumTrees(Int_t n) { //at least 1 tree fNumTrees=TMath::Max(n,1); fTreeHad.Set(fNumTrees); fTreeHad.Reset(); } void MRanForest::SetNumTry(Int_t n) { fNumTry=TMath::Max(n,0); } void MRanForest::SetNdSize(Int_t n) { fNdSize=TMath::Max(n,1); } void MRanForest::SetWeights(const TArrayF &weights) { fWeight=weights; } void MRanForest::SetGrid(const TArrayD &grid) { const int n=grid.GetSize(); for(int i=0;i=grid[i+1]) { *fLog<GetNcols() : 0; } Int_t MRanForest::GetNumData() const { return fMatrix ? fMatrix->GetNrows() : 0; } Int_t MRanForest::GetNclass() const { int maxidx = TMath::LocMax(fClass.GetSize(),fClass.GetArray()); return int(fClass[maxidx])+1; } void MRanForest::PrepareClasses() { const int numdata=fHadTrue.GetSize(); if(fGrid.GetSize()>0) { // classes given by grid const int ngrid=fGrid.GetSize(); for(int j=0;j> event; return CalcHadroness(event); } Double_t MRanForest::CalcHadroness(const TVector &event) { Double_t hadroness=0; Int_t ntree=0; TIter Next(fForest); MRanTree *tree; while ((tree=(MRanTree*)Next())) hadroness += (fTreeHad[ntree++]=tree->TreeHad(event)); return hadroness/ntree; } Bool_t MRanForest::AddTree(MRanTree *rantree=NULL) { fRanTree = rantree ? rantree : fRanTree; if (!fRanTree) return kFALSE; MRanTree *newtree=new MRanTree(*fRanTree); fForest->Add(newtree); return kTRUE; } Bool_t MRanForest::SetupGrow(MHMatrix *mat,MParList *plist) { //------------------------------------------------------------------- // access matrix, copy last column (target) preliminarily // into fHadTrue if (fMatrix) delete fMatrix; fMatrix = new TMatrix(mat->GetM()); int dim = fMatrix->GetNcols()-1; int numdata = fMatrix->GetNrows(); fHadTrue.Set(numdata); fHadTrue.Reset(0); for (Int_t j=0;jResizeTo(numdata,dim); //------------------------------------------------------------------- // setup labels for classification/regression fClass.Set(numdata); fClass.Reset(0); if (fClassify) PrepareClasses(); //------------------------------------------------------------------- // allocating and initializing arrays fHadEst.Set(numdata); fHadEst.Reset(0); fNTimesOutBag.Set(numdata); fNTimesOutBag.Reset(0); fDataSort.Set(dim*numdata); fDataSort.Reset(0); fDataRang.Set(dim*numdata); fDataRang.Reset(0); if(fWeight.GetSize()!=numdata) { fWeight.Set(numdata); fWeight.Reset(1.); *fLog << inf <<"Setting weights to 1 (no weighting)"<< endl; } //------------------------------------------------------------------- // setup rules to be used for classification/regression const MDataArray *allrules=(MDataArray*)mat->GetColumns(); if(allrules==NULL) { *fLog << err <<"Rules of matrix == null, exiting"<< endl; return kFALSE; } if (fRules) delete fRules; fRules = new MDataArray(); fRules->Reset(); const TString target_rule = (*allrules)[dim].GetRule(); for (Int_t i=0;iAddEntry((*allrules)[i].GetRule()); *fLog << inf << endl; *fLog << "Setting up RF for training on target:" << endl; *fLog << " " << target_rule.Data() << endl; *fLog << "Following rules are used as input to RF:" << endl; for (Int_t i=0;iFindCreateObj("MRanTree"); if(!fRanTree) { *fLog << err << dbginf << "MRanForest, fRanTree not initialized... aborting." << endl; return kFALSE; } const Int_t tryest = TMath::Nint(TMath::Sqrt(dim)); *fLog << inf << endl; *fLog << "Following input for the tree growing are used:"<0?"Yes":"No")<0.001); if (fTreeNo==1) { *fLog << inf << endl << underline; if(calcResolution) *fLog << "no. of tree no. of nodes resolution in % (from oob-data -> overest. of error)" << endl; else *fLog << "no. of tree no. of nodes rms in % (from oob-data -> overest. of error)" << endl; // 12345678901234567890123456789012345678901234567890 } const Int_t numdata = GetNumData(); const Int_t nclass = GetNclass(); //------------------------------------------------------------------- // bootstrap aggregating (bagging) -> sampling with replacement: TArrayF classpopw(nclass); TArrayI jinbag(numdata); // Initialization includes filling with 0 TArrayF winbag(numdata); // Initialization includes filling with 0 float square=0; float mean=0; for (Int_t n=0; nRndm()*numdata); if(fClassify) classpopw[fClass[k]]+=fWeight[k]; else classpopw[0]+=fWeight[k]; mean +=fHadTrue[k]*fWeight[k]; square+=fHadTrue[k]*fHadTrue[k]*fWeight[k]; winbag[k]+=fWeight[k]; jinbag[k]=1; } //------------------------------------------------------------------- // modifying sorted-data array for in-bag data: // In bagging procedure ca. 2/3 of all elements in the original // training sample are used to build the in-bag data TArrayI datsortinbag=fDataSort; Int_t ninbag=0; ModifyDataSort(datsortinbag, ninbag, jinbag); fRanTree->GrowTree(fMatrix,fHadTrue,fClass,datsortinbag,fDataRang,classpopw,mean,square, jinbag,winbag,nclass); //------------------------------------------------------------------- // error-estimates from out-of-bag data (oob data): // // For a single tree the events not(!) contained in the bootstrap sample of // this tree can be used to obtain estimates for the classification error of // this tree. // If you take a certain event, it is contained in the oob-data of 1/3 of // the trees (see comment to ModifyData). This means that the classification error // determined from oob-data is underestimated, but can still be taken as upper limit. for (Int_t ievt=0;ievt0) continue; fHadEst[ievt] +=fRanTree->TreeHad((*fMatrix), ievt); fNTimesOutBag[ievt]++; } Int_t n=0; Float_t ferr=0; for (Int_t ievt=0;ievtGetNumEndNodes(); *fLog << Form("%18.2f", ferr*100.); *fLog << endl; fRanTree->SetError(ferr); // adding tree to forest AddTree(); return fTreeNov[isort[n]]) { *fLog << err <<"Event no. "<=ninbag) break; } } } Bool_t MRanForest::AsciiWrite(ostream &out) const { Int_t n=0; MRanTree *tree; TIter forest(fForest); while ((tree=(MRanTree*)forest.Next())) { tree->AsciiWrite(out); n++; } return n==fNumTrees; }