/* ======================================================================== *\ ! ! * ! * This file is part of MARS, the MAGIC Analysis and Reconstruction ! * Software. It is distributed to you in the hope that it can be a useful ! * and timesaving tool in analysing Data of imaging Cerenkov telescopes. ! * It is distributed WITHOUT ANY WARRANTY. ! * ! * Permission to use, copy, modify and distribute this software and its ! * documentation for any purpose is hereby granted without fee, ! * provided that the above copyright notice appear in all copies and ! * that both that copyright notice and this permission notice appear ! * in supporting documentation. It is provided "as is" without express ! * or implied warranty. ! * ! ! ! Author(s): Thomas Hengstebeck 3/2003 ! ! Copyright: MAGIC Software Development, 2000-2003 ! ! \* ======================================================================== */ ///////////////////////////////////////////////////////////////////////////// // // // MRanTree // // // // ParameterContainer for Tree structure // // // // // ///////////////////////////////////////////////////////////////////////////// #include "MRanTree.h" #include #include #include #include #include "MDataArray.h" #include "MLog.h" #include "MLogManip.h" ClassImp(MRanTree); // -------------------------------------------------------------------------- // // Default constructor. // MRanTree::MRanTree(const char *name, const char *title):fNdSize(0), fNumTry(3), fData(NULL) { fName = name ? name : "MRanTree"; fTitle = title ? title : "Storage container for structure of a single tree"; } void MRanTree::SetNdSize(Int_t n) { // threshold nodesize of terminal nodes, i.e. the training data is splitted // until there is only pure date in the subsets(=terminal nodes) or the // subset size is LE n fNdSize=TMath::Max(1,n);//at least 1 event per node } void MRanTree::SetNumTry(Int_t n) { // number of trials in random split selection: // choose at least 1 variable to split in fNumTry=TMath::Max(1,n); } void MRanTree::GrowTree(TMatrix &mhad,TMatrix &mgam,Int_t numdata, Int_t numdim,TArrayI &hadtrue, TArrayI &datasort,TArrayI &datarang,TArrayF &tclasspop,TArrayI &jinbag, TArrayF &winbag,TArrayF &weight) { // arrays have to be initialized with generous size, so number of total nodes (nrnodes) // is estimated for worst case Int_t nrnodes=2*numdata+1; // number of events in bootstrap sample Int_t ninbag=0; for (Int_t n=0;nRndm()); // Gini index = rrn/rrd+rln/rld rrn=pno; rrd=pdo; rln=0; rld=0; wl.Reset(); for (Int_t j=0;j<2;j++) { wr[j]=tclasspop[j]; } critvar=-1.0e20; for(Int_t nsp=ndstart;nsp<=ndend-1;nsp++) { nc=datasort[mvar*numdata+nsp]; u=winbag[nc]; k=hadtrue[nc]; rln=rln+u*(2*wl[k]+u); rrn=rrn+u*(-2*wr[k]+u); rld=rld+u; rrd=rrd-u; wl[k]=wl[k]+u; wr[k]=wr[k]-u; if (datarang[mvar*numdata+nc]1.0e-5) { crit=(rln/rld)+(rrn/rrd); if (crit>critvar) { nbestvar=nsp; critvar=crit; } } } } if (critvar>critmax) { msplit=mvar; nbest=nbestvar; critmax=critvar; } } decsplit=critmax-crit0; if (critmax<-1.0e10) jstat=1; return jstat; } void MRanTree::MoveData(TArrayI &datasort,Int_t mdim,Int_t numdata,Int_t ndstart, Int_t ndend,TArrayI &idmove,TArrayI &ncase,Int_t msplit, Int_t nbest,Int_t &ndendl) { // This is the heart of the BuildTree construction. Based on the best split // the data in the part of datasort corresponding to the current node is moved to the // left if it belongs to the left child and right if it belongs to the right child-node. Int_t nc,k,ih; TArrayI tdatasort(numdata); // compute idmove = indicator of case nos. going left for (Int_t nsp=ndstart;nsp<=nbest;nsp++) { nc=datasort[msplit*numdata+nsp]; idmove[nc]=1; } for (Int_t nsp=nbest+1;nsp<=ndend;nsp++) { nc=datasort[msplit*numdata+nsp]; idmove[nc]=0; } ndendl=nbest; // shift case. nos. right and left for numerical variables. for(Int_t msh=0;mshncur) break; if (nodestatus[kbuild]!=2) continue; // initialize for next call to FindBestSplit ndstart=nodestart[kbuild]; ndend=ndstart+nodepop[kbuild]-1; for (Int_t j=0;j<2;j++) tclasspop[j]=classpop[j*nrnodes+kbuild]; jstat=FindBestSplit(datasort,datarang,hadtrue,mdim,numdata, ndstart,ndend,tclasspop,msplit,decsplit, nbest,ncase,jinbag,iv,winbag,wr,wc,wl, kbuild); if(jstat==1) { nodestatus[kbuild]=-1; continue; }else{ fBestVar[kbuild]=msplit; fGiniDec[msplit]+=decsplit; bestsplit[kbuild]=datasort[msplit*numdata+nbest]; bestsplitnext[kbuild]=datasort[msplit*numdata+nbest+1]; } MoveData(datasort,mdim,numdata,ndstart,ndend,idmove,ncase, msplit,nbest,ndendl); // leftnode no.= ncur+1, rightnode no. = ncur+2. nodepop[ncur+1]=ndendl-ndstart+1; nodepop[ncur+2]=ndend-ndendl; nodestart[ncur+1]=ndstart; nodestart[ncur+2]=ndendl+1; // find class populations in both nodes for (Int_t n=ndstart;n<=ndendl;n++) { nc=ncase[n]; Int_t j=hadtrue[nc]; classpop[j*nrnodes+ncur+1]+=winbag[nc]; } for (Int_t n=ndendl+1;n<=ndend;n++) { nc=ncase[n]; Int_t j=hadtrue[nc]; classpop[j*nrnodes+ncur+2]+=winbag[nc]; } // check on nodestatus nodestatus[ncur+1]=2; nodestatus[ncur+2]=2; if (nodepop[ncur+1]<=fNdSize) nodestatus[ncur+1]=-1; if (nodepop[ncur+2]<=fNdSize) nodestatus[ncur+2]=-1; popt1=0; popt2=0; for (Int_t j=0;j<2;j++) { popt1+=classpop[j*nrnodes+ncur+1]; popt2+=classpop[j*nrnodes+ncur+2]; } for (Int_t j=0;j<2;j++) { if (classpop[j*nrnodes+ncur+1]==popt1) nodestatus[ncur+1]=-1; if (classpop[j*nrnodes+ncur+2]==popt2) nodestatus[ncur+2]=-1; } fTreeMap1[kbuild]=ncur+1; fTreeMap2[kbuild]=ncur+2; parent[ncur+1]=kbuild; parent[ncur+2]=kbuild; nodestatus[kbuild]=1; ncur+=2; if (ncur>=nrnodes) break; } // determine number of nodes fNumNodes=nrnodes; for (Int_t k=nrnodes-1;k>=0;k--) { if (nodestatus[k]==0) fNumNodes-=1; if (nodestatus[k]==2) nodestatus[k]=-1; } fNumEndNodes=0; for (Int_t kn=0;knpp) { // class + status of node kn coded into fBestVar[kn] fBestVar[kn]=j-2; pp=classpop[j*nrnodes+kn]; } } fBestSplit[kn] =classpop[1*nrnodes+kn]; fBestSplit[kn]/=(classpop[0*nrnodes+kn]+classpop[1*nrnodes+kn]); } return; } void MRanTree::SetRules(MDataArray *rules) { fData=rules; } Double_t MRanTree::TreeHad(TVector &event) { Int_t kt=0; // to optimize on storage space node status and node class // are coded into fBestVar: // status of node kt = TMath::Sign(1,fBestVar[kt]) // hadronness assigned to node kt = fBestSplit[kt] for (Int_t k=0;kGetNumEntries(); TVector event(ncols); for (int i=0; iGetNumEntries(); i++) event(i) = (*fData)(i); Int_t kt=0; // to optimize on storage space node status and node class // are coded into fBestVar: // status of node kt = TMath::Sign(1,fBestVar[kt]) // class of node kt = fBestVar[kt]+2 (class defined by larger // node population, actually not used) // hadronness assigned to node kt = fBestSplit[kt] for (Int_t k=0;k