/* ======================================================================== *\ ! ! * ! * This file is part of MARS, the MAGIC Analysis and Reconstruction ! * Software. It is distributed to you in the hope that it can be a useful ! * and timesaving tool in analysing Data of imaging Cerenkov telescopes. ! * It is distributed WITHOUT ANY WARRANTY. ! * ! * Permission to use, copy, modify and distribute this software and its ! * documentation for any purpose is hereby granted without fee, ! * provided that the above copyright notice appear in all copies and ! * that both that copyright notice and this permission notice appear ! * in supporting documentation. It is provided "as is" without express ! * or implied warranty. ! * ! ! ! Author(s): Thomas Hengstebeck 3/2003 ! ! Copyright: MAGIC Software Development, 2000-2005 ! ! \* ======================================================================== */ ///////////////////////////////////////////////////////////////////////////// // // MRanTree // // ParameterContainer for Tree structure // ///////////////////////////////////////////////////////////////////////////// #include "MRanTree.h" #include #include #include #include #include "MLog.h" #include "MLogManip.h" ClassImp(MRanTree); using namespace std; // -------------------------------------------------------------------------- // Default constructor. // MRanTree::MRanTree(const char *name, const char *title):fClassify(1),fNdSize(0), fNumTry(3) { fName = name ? name : "MRanTree"; fTitle = title ? title : "Storage container for structure of a single tree"; } // -------------------------------------------------------------------------- // Copy constructor // MRanTree::MRanTree(const MRanTree &tree) { fName = tree.fName; fTitle = tree.fTitle; fClassify = tree.fClassify; fNdSize = tree.fNdSize; fNumTry = tree.fNumTry; fNumNodes = tree.fNumNodes; fNumEndNodes = tree.fNumEndNodes; fBestVar = tree.fBestVar; fTreeMap1 = tree.fTreeMap1; fTreeMap2 = tree.fTreeMap2; fBestSplit = tree.fBestSplit; fGiniDec = tree.fGiniDec; } void MRanTree::SetNdSize(Int_t n) { // threshold nodesize of terminal nodes, i.e. the training data is splitted // until there is only pure date in the subsets(=terminal nodes) or the // subset size is LE n fNdSize=TMath::Max(1,n);//at least 1 event per node } void MRanTree::SetNumTry(Int_t n) { // number of trials in random split selection: // choose at least 1 variable to split in fNumTry=TMath::Max(1,n); } void MRanTree::GrowTree(TMatrix *mat, const TArrayF &hadtrue, const TArrayI &idclass, TArrayI &datasort, const TArrayI &datarang, TArrayF &tclasspop, float &mean, float &square, TArrayI &jinbag, const TArrayF &winbag, const int nclass) { // arrays have to be initialized with generous size, so number of total nodes (nrnodes) // is estimated for worst case const Int_t numdim =mat->GetNcols(); const Int_t numdata=winbag.GetSize(); const Int_t nrnodes=2*numdata+1; // number of events in bootstrap sample Int_t ninbag=0; for (Int_t n=0;nRndm()*mdim); const Int_t mn = mvar*numdata; // Gini index = rrn/rrd+rln/rld Double_t rrn=pno; Double_t rrd=pdo; Double_t rln=0; Double_t rld=0; TArrayF wl(nclass); wl.Reset(0.);// left node //nclass wr = tclasspop; Double_t critvar=-1.0e20; for(Int_t nsp=ndstart;nsp<=ndend-1;nsp++) { const Int_t &nc = datasort[mn+nsp]; const Int_t &k = idclass[nc]; const Float_t &u = winbag[nc]; // do classification, Gini index as split rule rln+=u*(2*wl[k]+u); rrn+=u*(-2*wr[k]+u); rld+=u; rrd-=u; wl[k]+=u; wr[k]-=u; if (datarang[mn+nc]>=datarang[mn+datasort[mn+nsp+1]]) continue; if (TMath::Min(rrd,rld)<=1.0e-5) continue; const Double_t crit=(rln/rld)+(rrn/rrd); if (crit<=critvar) continue; nbestvar=nsp; critvar=crit; } if (critvar<=critmax) continue; msplit=mvar; nbest=nbestvar; critmax=critvar; } decsplit=critmax-crit0; return critmax<-1.0e10 ? 1 : 0; } int MRanTree::FindBestSplitSigma(const TArrayI &datasort,const TArrayI &datarang, const TArrayF &hadtrue, const TArrayI &idclass, Int_t ndstart,Int_t ndend, TArrayF &tclasspop, float &mean, float &square, Int_t &msplit, Float_t &decsplit,Int_t &nbest, const TArrayF &winbag, const int nclass) { const Int_t nrnodes = fBestSplit.GetSize(); const Int_t numdata = (nrnodes-1)/2; const Int_t mdim = fGiniDec.GetSize(); float wr=0;// right node // For the best split, msplit is the index of the variable (e.g Hillas par., zenith angle ,...) // split on. decsplit is the decreae in impurity measured by Gini-index. // nsplit is the case number of value of msplit split on, // and nsplitnext is the case number of the next larger value of msplit. Int_t nbestvar=0; // compute initial values of numerator and denominator of split-index, // resolution //Double_t pno=-(tclasspop[0]*square-mean*mean)*tclasspop[0]; //Double_t pdo= (tclasspop[0]-1.)*mean*mean; // n*resolution //Double_t pno=-(tclasspop[0]*square-mean*mean)*tclasspop[0]; //Double_t pdo= mean*mean; // variance //Double_t pno=-(square-mean*mean/tclasspop[0]); //Double_t pdo= (tclasspop[0]-1.); // n*variance Double_t pno= (square-mean*mean/tclasspop[0]); Double_t pdo= 1.; // 1./(n*variance) //Double_t pno= 1.; //Double_t pdo= (square-mean*mean/tclasspop[0]); const Double_t crit0=pno/pdo; // start main loop through variables to find best split, Double_t critmin=1.0e40; // random split selection, number of trials = fNumTry for (Int_t mt=0; mtRndm()*mdim); const Int_t mn = mvar*numdata; Double_t rrn=0, rrd=0, rln=0, rld=0; Double_t esumr=0, esuml=0, e2sumr=0,e2suml=0; esumr =mean; e2sumr=square; esuml =0; e2suml=0; float wl=0.;// left node wr = tclasspop[0]; Double_t critvar=critmin; for(Int_t nsp=ndstart;nsp<=ndend-1;nsp++) { const Int_t &nc=datasort[mn+nsp]; const Float_t &f=hadtrue[nc];; const Float_t &u=winbag[nc]; e2sumr-=u*f*f; esumr -=u*f; wr -=u; //------------------------------------------- // resolution //rrn=(wr*e2sumr-esumr*esumr)*wr; //rrd=(wr-1.)*esumr*esumr; // resolution times n //rrn=(wr*e2sumr-esumr*esumr)*wr; //rrd=esumr*esumr; // sigma //rrn=(e2sumr-esumr*esumr/wr); //rrd=(wr-1.); // sigma times n rrn=(e2sumr-esumr*esumr/wr); rrd=1.; // 1./(n*variance) //rrn=1.; //rrd=(e2sumr-esumr*esumr/wr); //------------------------------------------- e2suml+=u*f*f; esuml +=u*f; wl +=u; //------------------------------------------- // resolution //rln=(wl*e2suml-esuml*esuml)*wl; //rld=(wl-1.)*esuml*esuml; // resolution times n //rln=(wl*e2suml-esuml*esuml)*wl; //rld=esuml*esuml; // sigma //rln=(e2suml-esuml*esuml/wl); //rld=(wl-1.); // sigma times n rln=(e2suml-esuml*esuml/wl); rld=1.; // 1./(n*variance) //rln=1.; //rld=(e2suml-esuml*esuml/wl); //------------------------------------------- if (datarang[mn+nc]>=datarang[mn+datasort[mn+nsp+1]]) continue; if (TMath::Min(rrd,rld)<=1.0e-5) continue; const Double_t crit=(rln/rld)+(rrn/rrd); if (crit>=critvar) continue; nbestvar=nsp; critvar=crit; } if (critvar>=critmin) continue; msplit=mvar; nbest=nbestvar; critmin=critvar; } decsplit=crit0-critmin; //return critmin>1.0e20 ? 1 : 0; return decsplit<0 ? 1 : 0; } void MRanTree::MoveData(TArrayI &datasort,Int_t ndstart, Int_t ndend, TArrayI &idmove,TArrayI &ncase,Int_t msplit, Int_t nbest,Int_t &ndendl) { // This is the heart of the BuildTree construction. Based on the best split // the data in the part of datasort corresponding to the current node is moved to the // left if it belongs to the left child and right if it belongs to the right child-node. const Int_t numdata = ncase.GetSize(); const Int_t mdim = fGiniDec.GetSize(); TArrayI tdatasort(numdata); tdatasort.Reset(0); // compute idmove = indicator of case nos. going left for (Int_t nsp=ndstart;nsp<=ndend;nsp++) { const Int_t &nc=datasort[msplit*numdata+nsp]; idmove[nc]= nsp<=nbest?1:0; } ndendl=nbest; // shift case. nos. right and left for numerical variables. for(Int_t msh=0;mshncur) break; if (nodestatus[kbuild]!=2) continue; // initialize for next call to FindBestSplit const Int_t ndstart=nodestart[kbuild]; const Int_t ndend=ndstart+nodepop[kbuild]-1; for (Int_t j=0;j=nrnodes) break; } // determine number of nodes fNumNodes=nrnodes; for (Int_t k=nrnodes-1;k>=0;k--) { if (nodestatus[k]==0) fNumNodes-=1; if (nodestatus[k]==2) nodestatus[k]=-1; } fNumEndNodes=0; for (Int_t kn=0;knpp) { // class + status of node kn coded into fBestVar[kn] fBestVar[kn]=j-nclass; pp=classpop[j*nrnodes+kn]; } } float sum=0; for(int i=0;i