/* ======================================================================== *\ ! ! * ! * This file is part of MARS, the MAGIC Analysis and Reconstruction ! * Software. It is distributed to you in the hope that it can be a useful ! * and timesaving tool in analysing Data of imaging Cerenkov telescopes. ! * It is distributed WITHOUT ANY WARRANTY. ! * ! * Permission to use, copy, modify and distribute this software and its ! * documentation for any purpose is hereby granted without fee, ! * provided that the above copyright notice appear in all copies and ! * that both that copyright notice and this permission notice appear ! * in supporting documentation. It is provided "as is" without express ! * or implied warranty. ! * ! ! ! Author(s): Thomas Hengstebeck 3/2003 ! ! Copyright: MAGIC Software Development, 2000-2003 ! ! \* ======================================================================== */ ///////////////////////////////////////////////////////////////////////////// // // // MRanTree // // // // ParameterContainer for Tree structure // // // // // ///////////////////////////////////////////////////////////////////////////// #include "MRanTree.h" #include #include #include #include #include "MDataArray.h" #include "MLog.h" #include "MLogManip.h" ClassImp(MRanTree); using namespace std; // -------------------------------------------------------------------------- // // Default constructor. // MRanTree::MRanTree(const char *name, const char *title):fNdSize(0), fNumTry(3), fData(NULL) { fName = name ? name : "MRanTree"; fTitle = title ? title : "Storage container for structure of a single tree"; } void MRanTree::SetNdSize(Int_t n) { // threshold nodesize of terminal nodes, i.e. the training data is splitted // until there is only pure date in the subsets(=terminal nodes) or the // subset size is LE n fNdSize=TMath::Max(1,n);//at least 1 event per node } void MRanTree::SetNumTry(Int_t n) { // number of trials in random split selection: // choose at least 1 variable to split in fNumTry=TMath::Max(1,n); } void MRanTree::GrowTree(const TMatrix &mhad, const TMatrix &mgam, const TArrayI &hadtrue, TArrayI &datasort, const TArrayI &datarang, TArrayF &tclasspop, TArrayI &jinbag, const TArrayF &winbag) { // arrays have to be initialized with generous size, so number of total nodes (nrnodes) // is estimated for worst case const Int_t numdim =mhad.GetNcols(); const Int_t numdata=winbag.GetSize(); const Int_t nrnodes=2*numdata+1; // number of events in bootstrap sample Int_t ninbag=0; for (Int_t n=0;nRndm()*mdim); const Int_t mn = mvar*numdata; // Gini index = rrn/rrd+rln/rld Double_t rrn=pno; Double_t rrd=pdo; Double_t rln=0; Double_t rld=0; TArrayF wl(2); // left node wr = tclasspop; Double_t critvar=-1.0e20; for(Int_t nsp=ndstart;nsp<=ndend-1;nsp++) { const Int_t &nc=datasort[mn+nsp]; const Int_t &k=hadtrue[nc]; const Float_t &u=winbag[nc]; rln+=u*(2*wl[k]+u); rrn+=u*(-2*wr[k]+u); rld+=u; rrd-=u; wl[k]+=u; wr[k]-=u; if (datarang[mn+nc]>=datarang[mn+datasort[mn+nsp+1]]) continue; if (TMath::Min(rrd,rld)<=1.0e-5) continue; const Double_t crit=(rln/rld)+(rrn/rrd); if (crit<=critvar) continue; nbestvar=nsp; critvar=crit; } if (critvar<=critmax) continue; msplit=mvar; nbest=nbestvar; critmax=critvar; } decsplit=critmax-crit0; return critmax<-1.0e10 ? 1 : jstat; } void MRanTree::MoveData(TArrayI &datasort,Int_t ndstart, Int_t ndend,TArrayI &idmove,TArrayI &ncase,Int_t msplit, Int_t nbest,Int_t &ndendl) { // This is the heart of the BuildTree construction. Based on the best split // the data in the part of datasort corresponding to the current node is moved to the // left if it belongs to the left child and right if it belongs to the right child-node. const Int_t numdata = ncase.GetSize(); const Int_t mdim = fGiniDec.GetSize(); TArrayI tdatasort(numdata); // compute idmove = indicator of case nos. going left for (Int_t nsp=ndstart;nsp<=ndend;nsp++) { const Int_t &nc=datasort[msplit*numdata+nsp]; idmove[nc]= nsp<=nbest?1:0; } ndendl=nbest; // shift case. nos. right and left for numerical variables. for(Int_t msh=0;mshncur) break; if (nodestatus[kbuild]!=2) continue; // initialize for next call to FindBestSplit const Int_t ndstart=nodestart[kbuild]; const Int_t ndend=ndstart+nodepop[kbuild]-1; for (Int_t j=0;j<2;j++) tclasspop[j]=classpop[j*nrnodes+kbuild]; Int_t msplit, nbest; Float_t decsplit=0; const Int_t jstat=FindBestSplit(datasort,datarang,hadtrue, ndstart,ndend,tclasspop,msplit, decsplit,nbest,winbag); if (jstat==1) { nodestatus[kbuild]=-1; continue; } fBestVar[kbuild]=msplit; fGiniDec[msplit]+=decsplit; bestsplit[kbuild]=datasort[msplit*numdata+nbest]; bestsplitnext[kbuild]=datasort[msplit*numdata+nbest+1]; Int_t ndendl; MoveData(datasort,ndstart,ndend,idmove,ncase, msplit,nbest,ndendl); // leftnode no.= ncur+1, rightnode no. = ncur+2. nodepop[ncur+1]=ndendl-ndstart+1; nodepop[ncur+2]=ndend-ndendl; nodestart[ncur+1]=ndstart; nodestart[ncur+2]=ndendl+1; // find class populations in both nodes for (Int_t n=ndstart;n<=ndendl;n++) { const Int_t &nc=ncase[n]; const Int_t &j=hadtrue[nc]; classpop[j*nrnodes+ncur+1]+=winbag[nc]; } for (Int_t n=ndendl+1;n<=ndend;n++) { const Int_t &nc=ncase[n]; const Int_t &j=hadtrue[nc]; classpop[j*nrnodes+ncur+2]+=winbag[nc]; } // check on nodestatus nodestatus[ncur+1]=2; nodestatus[ncur+2]=2; if (nodepop[ncur+1]<=fNdSize) nodestatus[ncur+1]=-1; if (nodepop[ncur+2]<=fNdSize) nodestatus[ncur+2]=-1; Double_t popt1=0; Double_t popt2=0; for (Int_t j=0;j<2;j++) { popt1+=classpop[j*nrnodes+ncur+1]; popt2+=classpop[j*nrnodes+ncur+2]; } for (Int_t j=0;j<2;j++) { if (classpop[j*nrnodes+ncur+1]==popt1) nodestatus[ncur+1]=-1; if (classpop[j*nrnodes+ncur+2]==popt2) nodestatus[ncur+2]=-1; } fTreeMap1[kbuild]=ncur+1; fTreeMap2[kbuild]=ncur+2; parent[ncur+1]=kbuild; parent[ncur+2]=kbuild; nodestatus[kbuild]=1; ncur+=2; if (ncur>=nrnodes) break; } // determine number of nodes fNumNodes=nrnodes; for (Int_t k=nrnodes-1;k>=0;k--) { if (nodestatus[k]==0) fNumNodes-=1; if (nodestatus[k]==2) nodestatus[k]=-1; } fNumEndNodes=0; for (Int_t kn=0;knpp) { // class + status of node kn coded into fBestVar[kn] fBestVar[kn]=j-2; pp=classpop[j*nrnodes+kn]; } } fBestSplit[kn] =classpop[1*nrnodes+kn]; fBestSplit[kn]/=(classpop[0*nrnodes+kn]+classpop[1*nrnodes+kn]); } } void MRanTree::SetRules(MDataArray *rules) { fData=rules; } Double_t MRanTree::TreeHad(const TVector &event) { Int_t kt=0; // to optimize on storage space node status and node class // are coded into fBestVar: // status of node kt = TMath::Sign(1,fBestVar[kt]) // class of node kt = fBestVar[kt]+2 (class defined by larger // node population, actually not used) // hadronness assigned to node kt = fBestSplit[kt] for (Int_t k=0;k> event; return TreeHad(event); } Bool_t MRanTree::AsciiWrite(ostream &out) const { TString str; Int_t k; out.width(5);out<