| 1 | /* ======================================================================== *\
 | 
|---|
| 2 | !
 | 
|---|
| 3 | ! *
 | 
|---|
| 4 | ! * This file is part of MARS, the MAGIC Analysis and Reconstruction
 | 
|---|
| 5 | ! * Software. It is distributed to you in the hope that it can be a useful
 | 
|---|
| 6 | ! * and timesaving tool in analysing Data of imaging Cerenkov telescopes.
 | 
|---|
| 7 | ! * It is distributed WITHOUT ANY WARRANTY.
 | 
|---|
| 8 | ! *
 | 
|---|
| 9 | ! * Permission to use, copy, modify and distribute this software and its
 | 
|---|
| 10 | ! * documentation for any purpose is hereby granted without fee,
 | 
|---|
| 11 | ! * provided that the above copyright notice appear in all copies and
 | 
|---|
| 12 | ! * that both that copyright notice and this permission notice appear
 | 
|---|
| 13 | ! * in supporting documentation. It is provided "as is" without express
 | 
|---|
| 14 | ! * or implied warranty.
 | 
|---|
| 15 | ! *
 | 
|---|
| 16 | !
 | 
|---|
| 17 | !
 | 
|---|
| 18 | !   Author(s): Thomas Hengstebeck 3/2003 <mailto:hengsteb@physik.hu-berlin.de>
 | 
|---|
| 19 | !
 | 
|---|
| 20 | !   Copyright: MAGIC Software Development, 2000-2005
 | 
|---|
| 21 | !
 | 
|---|
| 22 | !
 | 
|---|
| 23 | \* ======================================================================== */
 | 
|---|
| 24 | 
 | 
|---|
| 25 | /////////////////////////////////////////////////////////////////////////////
 | 
|---|
| 26 | //
 | 
|---|
| 27 | // MRanTree
 | 
|---|
| 28 | //
 | 
|---|
| 29 | // ParameterContainer for Tree structure
 | 
|---|
| 30 | //
 | 
|---|
| 31 | /////////////////////////////////////////////////////////////////////////////
 | 
|---|
| 32 | #include "MRanTree.h"
 | 
|---|
| 33 | 
 | 
|---|
| 34 | #include <iostream>
 | 
|---|
| 35 | 
 | 
|---|
| 36 | #include <TRandom.h>
 | 
|---|
| 37 | 
 | 
|---|
| 38 | #include "MArrayI.h"
 | 
|---|
| 39 | #include "MArrayF.h"
 | 
|---|
| 40 | 
 | 
|---|
| 41 | #include "MMath.h"
 | 
|---|
| 42 | 
 | 
|---|
| 43 | #include "MLog.h"
 | 
|---|
| 44 | #include "MLogManip.h"
 | 
|---|
| 45 | 
 | 
|---|
| 46 | ClassImp(MRanTree);
 | 
|---|
| 47 | 
 | 
|---|
| 48 | using namespace std;
 | 
|---|
| 49 | 
 | 
|---|
| 50 | 
 | 
|---|
| 51 | // --------------------------------------------------------------------------
 | 
|---|
| 52 | // Default constructor.
 | 
|---|
| 53 | //
 | 
|---|
| 54 | MRanTree::MRanTree(const char *name, const char *title):fClassify(kTRUE),fNdSize(0), fNumTry(3)
 | 
|---|
| 55 | {
 | 
|---|
| 56 | 
 | 
|---|
| 57 |     fName  = name  ? name  : "MRanTree";
 | 
|---|
| 58 |     fTitle = title ? title : "Storage container for structure of a single tree";
 | 
|---|
| 59 | }
 | 
|---|
| 60 | 
 | 
|---|
| 61 | // --------------------------------------------------------------------------
 | 
|---|
| 62 | // Copy constructor
 | 
|---|
| 63 | //
 | 
|---|
| 64 | MRanTree::MRanTree(const MRanTree &tree)
 | 
|---|
| 65 | {
 | 
|---|
| 66 |     fName  = tree.fName;
 | 
|---|
| 67 |     fTitle = tree.fTitle;
 | 
|---|
| 68 | 
 | 
|---|
| 69 |     fClassify = tree.fClassify;
 | 
|---|
| 70 |     fNdSize   = tree.fNdSize;
 | 
|---|
| 71 |     fNumTry   = tree.fNumTry;
 | 
|---|
| 72 | 
 | 
|---|
| 73 |     fNumNodes    = tree.fNumNodes;
 | 
|---|
| 74 |     fNumEndNodes = tree.fNumEndNodes;
 | 
|---|
| 75 | 
 | 
|---|
| 76 |     fBestVar   = tree.fBestVar;
 | 
|---|
| 77 |     fTreeMap1  = tree.fTreeMap1;
 | 
|---|
| 78 |     fTreeMap2  = tree.fTreeMap2;
 | 
|---|
| 79 |     fBestSplit = tree.fBestSplit;
 | 
|---|
| 80 |     fGiniDec   = tree.fGiniDec;
 | 
|---|
| 81 | }
 | 
|---|
| 82 | 
 | 
|---|
| 83 | void MRanTree::SetNdSize(Int_t n)
 | 
|---|
| 84 | {
 | 
|---|
| 85 |     // threshold nodesize of terminal nodes, i.e. the training data is
 | 
|---|
| 86 |     // splitted until there is only pure date in the subsets(=terminal
 | 
|---|
| 87 |     // nodes) or the subset size is LE n
 | 
|---|
| 88 | 
 | 
|---|
| 89 |     fNdSize=TMath::Max(1,n);//at least 1 event per node
 | 
|---|
| 90 | }
 | 
|---|
| 91 | 
 | 
|---|
| 92 | void MRanTree::SetNumTry(Int_t n)
 | 
|---|
| 93 | {
 | 
|---|
| 94 |     // number of trials in random split selection:
 | 
|---|
| 95 |     // choose at least 1 variable to split in
 | 
|---|
| 96 | 
 | 
|---|
| 97 |     fNumTry=TMath::Max(1,n);
 | 
|---|
| 98 | }
 | 
|---|
| 99 | 
 | 
|---|
| 100 | void MRanTree::GrowTree(TMatrix *mat, const MArrayF &hadtrue, const MArrayI &idclass,
 | 
|---|
| 101 |                         MArrayI &datasort, const MArrayI &datarang, const MArrayF &tclasspop,
 | 
|---|
| 102 |                         const Float_t &mean, const Float_t &square, const MArrayI &jinbag, const MArrayF &winbag,
 | 
|---|
| 103 |                         const int nclass)
 | 
|---|
| 104 | {
 | 
|---|
| 105 |     // arrays have to be initialized with generous size, so number of total nodes (nrnodes)
 | 
|---|
| 106 |     // is estimated for worst case
 | 
|---|
| 107 |     const Int_t numdim =mat->GetNcols();
 | 
|---|
| 108 |     const Int_t numdata=winbag.GetSize();
 | 
|---|
| 109 |     const Int_t nrnodes=2*numdata+1;
 | 
|---|
| 110 | 
 | 
|---|
| 111 |     // number of events in bootstrap sample
 | 
|---|
| 112 |     Int_t ninbag=0;
 | 
|---|
| 113 |     for (Int_t n=0;n<numdata;n++) if(jinbag[n]==1) ninbag++;
 | 
|---|
| 114 | 
 | 
|---|
| 115 |     MArrayI bestsplit(nrnodes);
 | 
|---|
| 116 |     MArrayI bestsplitnext(nrnodes);
 | 
|---|
| 117 | 
 | 
|---|
| 118 |     fBestVar.Set(nrnodes);    fBestVar.Reset();
 | 
|---|
| 119 |     fTreeMap1.Set(nrnodes);   fTreeMap1.Reset();
 | 
|---|
| 120 |     fTreeMap2.Set(nrnodes);   fTreeMap2.Reset();
 | 
|---|
| 121 |     fBestSplit.Set(nrnodes);  fBestSplit.Reset();
 | 
|---|
| 122 |     fGiniDec.Set(numdim);     fGiniDec.Reset();
 | 
|---|
| 123 | 
 | 
|---|
| 124 | 
 | 
|---|
| 125 |     if(fClassify)
 | 
|---|
| 126 |         FindBestSplit=&MRanTree::FindBestSplitGini;
 | 
|---|
| 127 |     else
 | 
|---|
| 128 |         FindBestSplit=&MRanTree::FindBestSplitSigma;
 | 
|---|
| 129 | 
 | 
|---|
| 130 |     // tree growing
 | 
|---|
| 131 |     BuildTree(datasort,datarang,hadtrue,idclass,bestsplit, bestsplitnext,
 | 
|---|
| 132 |               tclasspop,mean,square,winbag,ninbag,nclass);
 | 
|---|
| 133 | 
 | 
|---|
| 134 |     // post processing, determine cut (or split) values fBestSplit
 | 
|---|
| 135 |     for(Int_t k=0; k<nrnodes; k++)
 | 
|---|
| 136 |     {
 | 
|---|
| 137 |         if (GetNodeStatus(k)==-1)
 | 
|---|
| 138 |             continue;
 | 
|---|
| 139 | 
 | 
|---|
| 140 |         const Int_t &bsp =bestsplit[k];
 | 
|---|
| 141 |         const Int_t &bspn=bestsplitnext[k];
 | 
|---|
| 142 |         const Int_t &msp =fBestVar[k];
 | 
|---|
| 143 | 
 | 
|---|
| 144 |         fBestSplit[k] = ((*mat)(bsp, msp)+(*mat)(bspn,msp))/2;
 | 
|---|
| 145 |     }
 | 
|---|
| 146 | 
 | 
|---|
| 147 |     // resizing arrays to save memory
 | 
|---|
| 148 |     fBestVar.Set(fNumNodes);
 | 
|---|
| 149 |     fTreeMap1.Set(fNumNodes);
 | 
|---|
| 150 |     fTreeMap2.Set(fNumNodes);
 | 
|---|
| 151 |     fBestSplit.Set(fNumNodes);
 | 
|---|
| 152 | }
 | 
|---|
| 153 | 
 | 
|---|
| 154 | int MRanTree::FindBestSplitGini(const MArrayI &datasort,const MArrayI &datarang,
 | 
|---|
| 155 |                                 const MArrayF &hadtrue,const MArrayI &idclass,
 | 
|---|
| 156 |                                 Int_t ndstart,Int_t ndend, const MArrayF &tclasspop,
 | 
|---|
| 157 |                                 const Float_t &mean, const Float_t &square, Int_t &msplit,
 | 
|---|
| 158 |                                 Float_t &decsplit,Int_t &nbest, const MArrayF &winbag,
 | 
|---|
| 159 |                                 const int nclass)
 | 
|---|
| 160 | {
 | 
|---|
| 161 |     const Int_t nrnodes = fBestSplit.GetSize();
 | 
|---|
| 162 |     const Int_t numdata = (nrnodes-1)/2;
 | 
|---|
| 163 |     const Int_t mdim = fGiniDec.GetSize();
 | 
|---|
| 164 | 
 | 
|---|
| 165 |     // For the best split, msplit is the index of the variable (e.g
 | 
|---|
| 166 |     // Hillas par., zenith angle ,...)
 | 
|---|
| 167 |     // split on. decsplit is the decreae in impurity measured by
 | 
|---|
| 168 |     // Gini-index. nsplit is the case number of value of msplit split on,
 | 
|---|
| 169 |     // and nsplitnext is the case number of the next larger value of msplit.
 | 
|---|
| 170 | 
 | 
|---|
| 171 |     Int_t nbestvar=0;
 | 
|---|
| 172 | 
 | 
|---|
| 173 |     // compute initial values of numerator and denominator of Gini-index,
 | 
|---|
| 174 |     // Gini index= pno/dno
 | 
|---|
| 175 |     Double_t pno=0;
 | 
|---|
| 176 |     Double_t pdo=0;
 | 
|---|
| 177 | 
 | 
|---|
| 178 |     // tclasspop: sum of weights for events in class
 | 
|---|
| 179 |     for (Int_t j=0; j<nclass; j++) // loop over number of classes to classifiy
 | 
|---|
| 180 |     {
 | 
|---|
| 181 |         pno+=tclasspop[j]*tclasspop[j];
 | 
|---|
| 182 |         pdo+=tclasspop[j];
 | 
|---|
| 183 |     }
 | 
|---|
| 184 | 
 | 
|---|
| 185 |     const Double_t crit0=pno/pdo;  // weighted mean of weights
 | 
|---|
| 186 | 
 | 
|---|
| 187 |     // start main loop through variables to find best split,
 | 
|---|
| 188 |     // (Gini-index as criterium crit)
 | 
|---|
| 189 | 
 | 
|---|
| 190 |     Double_t critmax=-FLT_MAX;
 | 
|---|
| 191 | 
 | 
|---|
| 192 |     // random split selection, number of trials = fNumTry
 | 
|---|
| 193 |     for (Int_t mt=0; mt<fNumTry; mt++) // we could try ALL variables???
 | 
|---|
| 194 |     {
 | 
|---|
| 195 |         const Int_t mvar= gRandom->Integer(mdim);
 | 
|---|
| 196 |         const Int_t mn  = mvar*numdata;
 | 
|---|
| 197 | 
 | 
|---|
| 198 |         // Gini index = rrn/rrd+rln/rld
 | 
|---|
| 199 |         Double_t rrn=pno;
 | 
|---|
| 200 |         Double_t rrd=pdo;
 | 
|---|
| 201 |         Double_t rln=0;
 | 
|---|
| 202 |         Double_t rld=0;
 | 
|---|
| 203 | 
 | 
|---|
| 204 |         MArrayF wl(nclass);     // left node //nclass
 | 
|---|
| 205 |         MArrayF wr(tclasspop);  // right node//nclass
 | 
|---|
| 206 | 
 | 
|---|
| 207 |         Double_t critvar=-FLT_MAX;
 | 
|---|
| 208 |         for(Int_t nsp=ndstart;nsp<=ndend-1;nsp++)
 | 
|---|
| 209 |         {
 | 
|---|
| 210 |             const Int_t  &nc = datasort[mn+nsp];
 | 
|---|
| 211 |             const Int_t   &k = idclass[nc];
 | 
|---|
| 212 |             const Float_t &u = winbag[nc];
 | 
|---|
| 213 | 
 | 
|---|
| 214 |             // do classification, Gini index as split rule
 | 
|---|
| 215 |             rln   +=u*(2*wl[k]+u);  // += u*(wl[k]{i-1} + wl[k]{i-1}+u{i})
 | 
|---|
| 216 |             rld   +=u;   // sum of weights left  from cut total
 | 
|---|
| 217 |             wl[k] +=u;   // sum of weights left  from cut for class k
 | 
|---|
| 218 | 
 | 
|---|
| 219 |             rrn   -=u*(2*wr[k]-u);  // -= u*(wr[k]{i-1} + wr[k]{i-1}-u{i})
 | 
|---|
| 220 |             //  rr0=0; rr0+=u*2*tclasspop[k]
 | 
|---|
| 221 |             //  rrn = pno - rr0 + rln
 | 
|---|
| 222 |             rrd   -=u;   // sum of weights right from cut total
 | 
|---|
| 223 |             wr[k] -=u;   // sum of weights right from cut for class k
 | 
|---|
| 224 | 
 | 
|---|
| 225 |             // REPLACE BY?
 | 
|---|
| 226 |             // rr0   = 0
 | 
|---|
| 227 |             // rr0  += u*2*tclasspop[k]
 | 
|---|
| 228 |             // rrn   = pno - rr0 + rln
 | 
|---|
| 229 |             // rrd   = pdo - rld
 | 
|---|
| 230 |             // wr[k] = tclasspop[k] - wl[k]
 | 
|---|
| 231 | 
 | 
|---|
| 232 |             // crit = (rln*(pdo - rld + 1) + pno - rr0) / rld*(pdo - rld)
 | 
|---|
| 233 | 
 | 
|---|
| 234 |             /*
 | 
|---|
| 235 |              if (k==background)
 | 
|---|
| 236 |                 continue;
 | 
|---|
| 237 |              crit = TMath::Max(MMath::SignificanceLiMa(rld, rld-wl[k]),
 | 
|---|
| 238 |                                MMath::SignificanceLiMa(rrd, rrd-wr[k]))
 | 
|---|
| 239 |              */
 | 
|---|
| 240 | 
 | 
|---|
| 241 |             // This condition is in fact a == (> cannot happen at all)
 | 
|---|
| 242 |             // This is because we cannot set the cut between two identical values
 | 
|---|
| 243 |             //if (datarang[mn+datasort[mn+nsp]]>=datarang[mn+datasort[mn+nsp+1]])
 | 
|---|
| 244 |             if (datarang[mn+nc]>=datarang[mn+datasort[mn+nsp+1]])
 | 
|---|
| 245 |                 continue;
 | 
|---|
| 246 | 
 | 
|---|
| 247 |             // If crit starts to become pretty large do WHAT???
 | 
|---|
| 248 |             //if (TMath::Min(rrd,rld)<=1.0e-5) // FIXME: CHECKIT FOR WEIGHTS!
 | 
|---|
| 249 |             //    continue;
 | 
|---|
| 250 | 
 | 
|---|
| 251 |             const Double_t crit=(rln/rld)+(rrn/rrd);
 | 
|---|
| 252 |             if (!TMath::Finite(crit))
 | 
|---|
| 253 |                 continue;
 | 
|---|
| 254 | 
 | 
|---|
| 255 |             // Search for the highest value of crit
 | 
|---|
| 256 |             if (crit<=critvar) continue;
 | 
|---|
| 257 | 
 | 
|---|
| 258 |             // store the highest crit value and the corresponding event to cut at
 | 
|---|
| 259 |             nbestvar=nsp;
 | 
|---|
| 260 |             critvar=crit;
 | 
|---|
| 261 |         }
 | 
|---|
| 262 | 
 | 
|---|
| 263 |         if (critvar<=critmax) continue;
 | 
|---|
| 264 | 
 | 
|---|
| 265 |         msplit=mvar;      // Variable in which to split
 | 
|---|
| 266 |         nbest=nbestvar;   // event at which the best split was found
 | 
|---|
| 267 |         critmax=critvar;
 | 
|---|
| 268 |     }
 | 
|---|
| 269 | 
 | 
|---|
| 270 |     // crit0 = MMath::SignificanceLiMa(pdo, pdo-tclasspop[0])
 | 
|---|
| 271 |     // mean increase of sensitivity
 | 
|---|
| 272 |     // decsplit = sqrt(critmax/crit0)
 | 
|---|
| 273 |     decsplit=critmax-crit0;
 | 
|---|
| 274 | 
 | 
|---|
| 275 |     return critmax<-1.0e10 ? 1 : 0;
 | 
|---|
| 276 | }
 | 
|---|
| 277 | 
 | 
|---|
| 278 | int MRanTree::FindBestSplitSigma(const MArrayI &datasort,const MArrayI &datarang,
 | 
|---|
| 279 |                                  const MArrayF &hadtrue, const MArrayI &idclass,
 | 
|---|
| 280 |                                  Int_t ndstart,Int_t ndend, const MArrayF &tclasspop,
 | 
|---|
| 281 |                                  const Float_t &mean, const Float_t &square, Int_t &msplit,
 | 
|---|
| 282 |                                  Float_t &decsplit,Int_t &nbest, const MArrayF &winbag,
 | 
|---|
| 283 |                                  const int nclass)
 | 
|---|
| 284 | {
 | 
|---|
| 285 |     const Int_t nrnodes = fBestSplit.GetSize();
 | 
|---|
| 286 |     const Int_t numdata = (nrnodes-1)/2;
 | 
|---|
| 287 |     const Int_t mdim = fGiniDec.GetSize();
 | 
|---|
| 288 | 
 | 
|---|
| 289 |     // For the best split, msplit is the index of the variable (e.g
 | 
|---|
| 290 |     // Hillas par., zenith angle ,...) split on. decsplit is the decreae
 | 
|---|
| 291 |     // in impurity measured by Gini-index. nsplit is the case number of
 | 
|---|
| 292 |     // value of msplit split on, and nsplitnext is the case number of
 | 
|---|
| 293 |     // the next larger value of msplit.
 | 
|---|
| 294 | 
 | 
|---|
| 295 |     Int_t nbestvar=0;
 | 
|---|
| 296 | 
 | 
|---|
| 297 |     // compute initial values of numerator and denominator of split-index,
 | 
|---|
| 298 | 
 | 
|---|
| 299 |     // resolution
 | 
|---|
| 300 |     //Double_t pno=-(tclasspop[0]*square-mean*mean)*tclasspop[0];
 | 
|---|
| 301 |     //Double_t pdo= (tclasspop[0]-1.)*mean*mean;
 | 
|---|
| 302 | 
 | 
|---|
| 303 |     // n*resolution
 | 
|---|
| 304 |     //Double_t pno=-(tclasspop[0]*square-mean*mean)*tclasspop[0];
 | 
|---|
| 305 |     //Double_t pdo= mean*mean;
 | 
|---|
| 306 | 
 | 
|---|
| 307 |     // variance
 | 
|---|
| 308 |     //Double_t pno=-(square-mean*mean/tclasspop[0]);
 | 
|---|
| 309 |     //Double_t pdo= (tclasspop[0]-1.);
 | 
|---|
| 310 | 
 | 
|---|
| 311 |     // n*variance
 | 
|---|
| 312 |     Double_t pno= (square-mean*mean/tclasspop[0]);
 | 
|---|
| 313 |     Double_t pdo= 1.;
 | 
|---|
| 314 | 
 | 
|---|
| 315 |     // 1./(n*variance)
 | 
|---|
| 316 |     //Double_t pno= 1.;
 | 
|---|
| 317 |     //Double_t pdo= (square-mean*mean/tclasspop[0]);
 | 
|---|
| 318 | 
 | 
|---|
| 319 |     const Double_t crit0=pno/pdo;
 | 
|---|
| 320 | 
 | 
|---|
| 321 |     // start main loop through variables to find best split,
 | 
|---|
| 322 | 
 | 
|---|
| 323 |     Double_t critmin=FLT_MAX;
 | 
|---|
| 324 | 
 | 
|---|
| 325 |     // random split selection, number of trials = fNumTry
 | 
|---|
| 326 |     for (Int_t mt=0; mt<fNumTry; mt++)
 | 
|---|
| 327 |     {
 | 
|---|
| 328 |         const Int_t mvar= gRandom->Integer(mdim);
 | 
|---|
| 329 |         const Int_t mn  = mvar*numdata;
 | 
|---|
| 330 | 
 | 
|---|
| 331 |         Double_t esumr =mean;
 | 
|---|
| 332 |         Double_t e2sumr=square;
 | 
|---|
| 333 |         Double_t esuml =0;
 | 
|---|
| 334 |         Double_t e2suml=0;
 | 
|---|
| 335 | 
 | 
|---|
| 336 |         float wl=0.;// left node
 | 
|---|
| 337 |         float wr=tclasspop[0]; // right node
 | 
|---|
| 338 | 
 | 
|---|
| 339 |         Double_t critvar=critmin;
 | 
|---|
| 340 |         for(Int_t nsp=ndstart;nsp<=ndend-1;nsp++)
 | 
|---|
| 341 |         {
 | 
|---|
| 342 |             const Int_t &nc=datasort[mn+nsp];
 | 
|---|
| 343 |             const Float_t &f=hadtrue[nc];;
 | 
|---|
| 344 |             const Float_t &u=winbag[nc];
 | 
|---|
| 345 | 
 | 
|---|
| 346 |             e2suml+=u*f*f;
 | 
|---|
| 347 |             esuml +=u*f;
 | 
|---|
| 348 |             wl    +=u;
 | 
|---|
| 349 | 
 | 
|---|
| 350 |             //-------------------------------------------
 | 
|---|
| 351 |             // resolution
 | 
|---|
| 352 |             //const Double_t rln=(wl*e2suml-esuml*esuml)*wl;
 | 
|---|
| 353 |             //const Double_t rld=(wl-1.)*esuml*esuml;
 | 
|---|
| 354 | 
 | 
|---|
| 355 |             // resolution times n
 | 
|---|
| 356 |             //const Double_t rln=(wl*e2suml-esuml*esuml)*wl;
 | 
|---|
| 357 |             //const Double_t rld=esuml*esuml;
 | 
|---|
| 358 | 
 | 
|---|
| 359 |             // sigma
 | 
|---|
| 360 |             //const Double_t rln=(e2suml-esuml*esuml/wl);
 | 
|---|
| 361 |             //const Double_t rld=(wl-1.);
 | 
|---|
| 362 | 
 | 
|---|
| 363 |             // sigma times n
 | 
|---|
| 364 |             Double_t rln=(e2suml-esuml*esuml/wl);
 | 
|---|
| 365 |             Double_t rld=1.;
 | 
|---|
| 366 | 
 | 
|---|
| 367 |             // 1./(n*variance)
 | 
|---|
| 368 |             //const Double_t rln=1.;
 | 
|---|
| 369 |             //const Double_t rld=(e2suml-esuml*esuml/wl);
 | 
|---|
| 370 |             //-------------------------------------------
 | 
|---|
| 371 | 
 | 
|---|
| 372 |             // REPLACE BY??? 
 | 
|---|
| 373 |             e2sumr-=u*f*f;   // e2sumr = square       - e2suml
 | 
|---|
| 374 |             esumr -=u*f;     // esumr  = mean         - esuml
 | 
|---|
| 375 |             wr    -=u;       // wr     = tclasspop[0] - wl
 | 
|---|
| 376 | 
 | 
|---|
| 377 |             //-------------------------------------------
 | 
|---|
| 378 |             // resolution
 | 
|---|
| 379 |             //const Double_t rrn=(wr*e2sumr-esumr*esumr)*wr;
 | 
|---|
| 380 |             //const Double_t rrd=(wr-1.)*esumr*esumr;
 | 
|---|
| 381 | 
 | 
|---|
| 382 |             // resolution times n
 | 
|---|
| 383 |             //const Double_t rrn=(wr*e2sumr-esumr*esumr)*wr;
 | 
|---|
| 384 |             //const Double_t rrd=esumr*esumr;
 | 
|---|
| 385 | 
 | 
|---|
| 386 |             // sigma
 | 
|---|
| 387 |             //const Double_t rrn=(e2sumr-esumr*esumr/wr);
 | 
|---|
| 388 |             //const Double_t rrd=(wr-1.);
 | 
|---|
| 389 | 
 | 
|---|
| 390 |             // sigma times n
 | 
|---|
| 391 |             const Double_t rrn=(e2sumr-esumr*esumr/wr);
 | 
|---|
| 392 |             const Double_t rrd=1.;
 | 
|---|
| 393 | 
 | 
|---|
| 394 |             // 1./(n*variance)
 | 
|---|
| 395 |             //const Double_t rrn=1.;
 | 
|---|
| 396 |             //const Double_t rrd=(e2sumr-esumr*esumr/wr);
 | 
|---|
| 397 |             //-------------------------------------------
 | 
|---|
| 398 | 
 | 
|---|
| 399 |             if (datarang[mn+nc]>=datarang[mn+datasort[mn+nsp+1]])
 | 
|---|
| 400 |                 continue;
 | 
|---|
| 401 | 
 | 
|---|
| 402 |             //if (TMath::Min(rrd,rld)<=1.0e-5)
 | 
|---|
| 403 |             //    continue;
 | 
|---|
| 404 | 
 | 
|---|
| 405 |             const Double_t crit=(rln/rld)+(rrn/rrd);
 | 
|---|
| 406 |             if (!TMath::Finite(crit))
 | 
|---|
| 407 |                 continue;
 | 
|---|
| 408 | 
 | 
|---|
| 409 |             if (crit>=critvar) continue;
 | 
|---|
| 410 | 
 | 
|---|
| 411 |             nbestvar=nsp;
 | 
|---|
| 412 |             critvar=crit;
 | 
|---|
| 413 |         }
 | 
|---|
| 414 | 
 | 
|---|
| 415 |         if (critvar>=critmin) continue;
 | 
|---|
| 416 | 
 | 
|---|
| 417 |         msplit=mvar;
 | 
|---|
| 418 |         nbest=nbestvar;
 | 
|---|
| 419 |         critmin=critvar;
 | 
|---|
| 420 |     }
 | 
|---|
| 421 | 
 | 
|---|
| 422 |     decsplit=crit0-critmin;
 | 
|---|
| 423 | 
 | 
|---|
| 424 |     //return critmin>1.0e20 ? 1 : 0;
 | 
|---|
| 425 |     return decsplit<0 ? 1 : 0;
 | 
|---|
| 426 | }
 | 
|---|
| 427 | 
 | 
|---|
| 428 | void MRanTree::MoveData(MArrayI &datasort,Int_t ndstart, Int_t ndend,
 | 
|---|
| 429 |                         MArrayI &idmove,MArrayI &ncase,Int_t msplit,
 | 
|---|
| 430 |                         Int_t nbest,Int_t &ndendl)
 | 
|---|
| 431 | {
 | 
|---|
| 432 |     // This is the heart of the BuildTree construction. Based on the
 | 
|---|
| 433 |     // best split the data in the part of datasort corresponding to
 | 
|---|
| 434 |     // the current node is moved to the left if it belongs to the left
 | 
|---|
| 435 |     // child and right if it belongs to the right child-node.
 | 
|---|
| 436 |     const Int_t numdata = ncase.GetSize();
 | 
|---|
| 437 |     const Int_t mdim    = fGiniDec.GetSize();
 | 
|---|
| 438 | 
 | 
|---|
| 439 |     MArrayI tdatasort(numdata);
 | 
|---|
| 440 | 
 | 
|---|
| 441 |     // compute idmove = indicator of case nos. going left
 | 
|---|
| 442 |     for (Int_t nsp=ndstart;nsp<=ndend;nsp++)
 | 
|---|
| 443 |     {
 | 
|---|
| 444 |         const Int_t &nc=datasort[msplit*numdata+nsp];
 | 
|---|
| 445 |         idmove[nc]= nsp<=nbest?1:0;
 | 
|---|
| 446 |     }
 | 
|---|
| 447 |     ndendl=nbest;
 | 
|---|
| 448 | 
 | 
|---|
| 449 |     // shift case. nos. right and left for numerical variables.
 | 
|---|
| 450 |     for(Int_t msh=0;msh<mdim;msh++)
 | 
|---|
| 451 |     {
 | 
|---|
| 452 |         Int_t k=ndstart-1;
 | 
|---|
| 453 |         for (Int_t n=ndstart;n<=ndend;n++)
 | 
|---|
| 454 |         {
 | 
|---|
| 455 |             const Int_t &ih=datasort[msh*numdata+n];
 | 
|---|
| 456 |             if (idmove[ih]==1)
 | 
|---|
| 457 |                 tdatasort[++k]=datasort[msh*numdata+n];
 | 
|---|
| 458 |         }
 | 
|---|
| 459 | 
 | 
|---|
| 460 |         for (Int_t n=ndstart;n<=ndend;n++)
 | 
|---|
| 461 |         {
 | 
|---|
| 462 |             const Int_t &ih=datasort[msh*numdata+n];
 | 
|---|
| 463 |             if (idmove[ih]==0)
 | 
|---|
| 464 |                 tdatasort[++k]=datasort[msh*numdata+n];
 | 
|---|
| 465 |         }
 | 
|---|
| 466 | 
 | 
|---|
| 467 |         for(Int_t m=ndstart;m<=ndend;m++)
 | 
|---|
| 468 |             datasort[msh*numdata+m]=tdatasort[m];
 | 
|---|
| 469 |     }
 | 
|---|
| 470 | 
 | 
|---|
| 471 |     // compute case nos. for right and left nodes.
 | 
|---|
| 472 | 
 | 
|---|
| 473 |     for(Int_t n=ndstart;n<=ndend;n++)
 | 
|---|
| 474 |         ncase[n]=datasort[msplit*numdata+n];
 | 
|---|
| 475 | }
 | 
|---|
| 476 | 
 | 
|---|
| 477 | void MRanTree::BuildTree(MArrayI &datasort,const MArrayI &datarang, const MArrayF &hadtrue,
 | 
|---|
| 478 |                          const MArrayI &idclass, MArrayI &bestsplit, MArrayI &bestsplitnext,
 | 
|---|
| 479 |                          const MArrayF &tclasspop, const Float_t &tmean, const Float_t &tsquare, const MArrayF &winbag,
 | 
|---|
| 480 |                          Int_t ninbag, const int nclass)
 | 
|---|
| 481 | {
 | 
|---|
| 482 |     // Buildtree consists of repeated calls to two void functions,
 | 
|---|
| 483 |     // FindBestSplit and MoveData. Findbestsplit does just that--it finds
 | 
|---|
| 484 |     // the best split of the current node. MoveData moves the data in
 | 
|---|
| 485 |     // the split node right and left so that the data corresponding to
 | 
|---|
| 486 |     // each child node is contiguous.
 | 
|---|
| 487 |     //
 | 
|---|
| 488 |     // buildtree bookkeeping:
 | 
|---|
| 489 |     // ncur is the total number of nodes to date.  nodestatus(k)=1 if
 | 
|---|
| 490 |     // the kth node has been split. nodestatus(k)=2 if the node exists
 | 
|---|
| 491 |     // but has not yet been split, and =-1 if the node is terminal.
 | 
|---|
| 492 |     // A node is terminal if its size is below a threshold value, or
 | 
|---|
| 493 |     // if it is all one class, or if all the data-values are equal.
 | 
|---|
| 494 |     // If the current node k is split, then its children are numbered
 | 
|---|
| 495 |     // ncur+1 (left), and ncur+2(right), ncur increases to ncur+2 and
 | 
|---|
| 496 |     // the next node to be split is numbered k+1.  When no more nodes
 | 
|---|
| 497 |     // can be split, buildtree returns.
 | 
|---|
| 498 |     const Int_t mdim    = fGiniDec.GetSize();
 | 
|---|
| 499 |     const Int_t nrnodes = fBestSplit.GetSize();
 | 
|---|
| 500 |     const Int_t numdata = (nrnodes-1)/2;
 | 
|---|
| 501 | 
 | 
|---|
| 502 |     MArrayI nodepop(nrnodes);
 | 
|---|
| 503 |     MArrayI nodestart(nrnodes);
 | 
|---|
| 504 |     MArrayI parent(nrnodes);
 | 
|---|
| 505 | 
 | 
|---|
| 506 |     MArrayI ncase(numdata);
 | 
|---|
| 507 |     MArrayI idmove(numdata);
 | 
|---|
| 508 |     MArrayI iv(mdim);
 | 
|---|
| 509 | 
 | 
|---|
| 510 |     MArrayF classpop(nrnodes*nclass);//nclass
 | 
|---|
| 511 |     MArrayI nodestatus(nrnodes);
 | 
|---|
| 512 | 
 | 
|---|
| 513 |     for (Int_t j=0;j<nclass;j++)
 | 
|---|
| 514 |         classpop[j*nrnodes+0]=tclasspop[j];
 | 
|---|
| 515 | 
 | 
|---|
| 516 |     MArrayF mean(nrnodes);
 | 
|---|
| 517 |     MArrayF square(nrnodes);
 | 
|---|
| 518 |     MArrayF lclasspop(tclasspop);
 | 
|---|
| 519 | 
 | 
|---|
| 520 |     mean[0]=tmean;
 | 
|---|
| 521 |     square[0]=tsquare;
 | 
|---|
| 522 | 
 | 
|---|
| 523 | 
 | 
|---|
| 524 |     Int_t ncur=0;
 | 
|---|
| 525 |     nodepop[0]=ninbag;
 | 
|---|
| 526 |     nodestatus[0]=2;
 | 
|---|
| 527 | 
 | 
|---|
| 528 |     // start main loop
 | 
|---|
| 529 |     for (Int_t kbuild=0; kbuild<nrnodes; kbuild++)
 | 
|---|
| 530 |     {
 | 
|---|
| 531 |           if (kbuild>ncur) break;
 | 
|---|
| 532 |           if (nodestatus[kbuild]!=2) continue;
 | 
|---|
| 533 | 
 | 
|---|
| 534 |           // initialize for next call to FindBestSplit
 | 
|---|
| 535 | 
 | 
|---|
| 536 |           const Int_t ndstart=nodestart[kbuild];
 | 
|---|
| 537 |           const Int_t ndend=ndstart+nodepop[kbuild]-1;
 | 
|---|
| 538 | 
 | 
|---|
| 539 |           for (Int_t j=0;j<nclass;j++)
 | 
|---|
| 540 |               lclasspop[j]=classpop[j*nrnodes+kbuild];
 | 
|---|
| 541 | 
 | 
|---|
| 542 |           Int_t msplit, nbest;
 | 
|---|
| 543 |           Float_t decsplit=0;
 | 
|---|
| 544 | 
 | 
|---|
| 545 |           if ((this->*FindBestSplit)(datasort,datarang,hadtrue,idclass,ndstart,
 | 
|---|
| 546 |                                      ndend, lclasspop,mean[kbuild],square[kbuild],msplit,decsplit,
 | 
|---|
| 547 |                                      nbest,winbag,nclass))
 | 
|---|
| 548 |           {
 | 
|---|
| 549 |               nodestatus[kbuild]=-1;
 | 
|---|
| 550 |               continue;
 | 
|---|
| 551 |           }
 | 
|---|
| 552 | 
 | 
|---|
| 553 |           fBestVar[kbuild]=msplit;
 | 
|---|
| 554 |           fGiniDec[msplit]+=decsplit;
 | 
|---|
| 555 | 
 | 
|---|
| 556 |           bestsplit[kbuild]=datasort[msplit*numdata+nbest];
 | 
|---|
| 557 |           bestsplitnext[kbuild]=datasort[msplit*numdata+nbest+1];
 | 
|---|
| 558 | 
 | 
|---|
| 559 |           Int_t ndendl;
 | 
|---|
| 560 |           MoveData(datasort,ndstart,ndend,idmove,ncase,
 | 
|---|
| 561 |                    msplit,nbest,ndendl);
 | 
|---|
| 562 | 
 | 
|---|
| 563 |           // leftnode no.= ncur+1, rightnode no. = ncur+2.
 | 
|---|
| 564 |           nodepop[ncur+1]=ndendl-ndstart+1;
 | 
|---|
| 565 |           nodepop[ncur+2]=ndend-ndendl;
 | 
|---|
| 566 |           nodestart[ncur+1]=ndstart;
 | 
|---|
| 567 |           nodestart[ncur+2]=ndendl+1;
 | 
|---|
| 568 | 
 | 
|---|
| 569 |           // find class populations in both nodes
 | 
|---|
| 570 |           for (Int_t n=ndstart;n<=ndendl;n++)
 | 
|---|
| 571 |           {
 | 
|---|
| 572 |               const Int_t &nc=ncase[n];
 | 
|---|
| 573 |               const int j=idclass[nc];
 | 
|---|
| 574 |                    
 | 
|---|
| 575 |               // statistics left from cut
 | 
|---|
| 576 |               mean[ncur+1]+=hadtrue[nc]*winbag[nc];
 | 
|---|
| 577 |               square[ncur+1]+=hadtrue[nc]*hadtrue[nc]*winbag[nc];
 | 
|---|
| 578 | 
 | 
|---|
| 579 |               // sum of weights left from cut
 | 
|---|
| 580 |               classpop[j*nrnodes+ncur+1]+=winbag[nc];
 | 
|---|
| 581 |           }
 | 
|---|
| 582 | 
 | 
|---|
| 583 |           for (Int_t n=ndendl+1;n<=ndend;n++)
 | 
|---|
| 584 |           {
 | 
|---|
| 585 |               const Int_t &nc=ncase[n];
 | 
|---|
| 586 |               const int j=idclass[nc];
 | 
|---|
| 587 | 
 | 
|---|
| 588 |               // statistics right from cut
 | 
|---|
| 589 |               mean[ncur+2]  +=hadtrue[nc]*winbag[nc];
 | 
|---|
| 590 |               square[ncur+2]+=hadtrue[nc]*hadtrue[nc]*winbag[nc];
 | 
|---|
| 591 | 
 | 
|---|
| 592 |               // sum of weights right from cut
 | 
|---|
| 593 |               classpop[j*nrnodes+ncur+2]+=winbag[nc];
 | 
|---|
| 594 |           }
 | 
|---|
| 595 | 
 | 
|---|
| 596 |           // check on nodestatus
 | 
|---|
| 597 | 
 | 
|---|
| 598 |           nodestatus[ncur+1]=2;
 | 
|---|
| 599 |           nodestatus[ncur+2]=2;
 | 
|---|
| 600 |           if (nodepop[ncur+1]<=fNdSize) nodestatus[ncur+1]=-1;
 | 
|---|
| 601 |           if (nodepop[ncur+2]<=fNdSize) nodestatus[ncur+2]=-1;
 | 
|---|
| 602 | 
 | 
|---|
| 603 | 
 | 
|---|
| 604 |           Double_t popt1=0;
 | 
|---|
| 605 |           Double_t popt2=0;
 | 
|---|
| 606 |           for (Int_t j=0;j<nclass;j++)
 | 
|---|
| 607 |           {
 | 
|---|
| 608 |               popt1+=classpop[j*nrnodes+ncur+1];
 | 
|---|
| 609 |               popt2+=classpop[j*nrnodes+ncur+2];
 | 
|---|
| 610 |           }
 | 
|---|
| 611 | 
 | 
|---|
| 612 |           if(fClassify)
 | 
|---|
| 613 |           {
 | 
|---|
| 614 |               // check if only members of one class in node
 | 
|---|
| 615 |               for (Int_t j=0;j<nclass;j++)
 | 
|---|
| 616 |               {
 | 
|---|
| 617 |                   if (classpop[j*nrnodes+ncur+1]==popt1) nodestatus[ncur+1]=-1;
 | 
|---|
| 618 |                   if (classpop[j*nrnodes+ncur+2]==popt2) nodestatus[ncur+2]=-1;
 | 
|---|
| 619 |               }
 | 
|---|
| 620 |           }
 | 
|---|
| 621 | 
 | 
|---|
| 622 |           fTreeMap1[kbuild]=ncur+1;
 | 
|---|
| 623 |           fTreeMap2[kbuild]=ncur+2;
 | 
|---|
| 624 |           parent[ncur+1]=kbuild;
 | 
|---|
| 625 |           parent[ncur+2]=kbuild;
 | 
|---|
| 626 |           nodestatus[kbuild]=1;
 | 
|---|
| 627 |           ncur+=2;
 | 
|---|
| 628 |           if (ncur>=nrnodes) break;
 | 
|---|
| 629 |     }
 | 
|---|
| 630 | 
 | 
|---|
| 631 |     // determine number of nodes
 | 
|---|
| 632 |     fNumNodes=nrnodes;
 | 
|---|
| 633 |     for (Int_t k=nrnodes-1;k>=0;k--)
 | 
|---|
| 634 |     {
 | 
|---|
| 635 |         if (nodestatus[k]==0) fNumNodes-=1;
 | 
|---|
| 636 |         if (nodestatus[k]==2) nodestatus[k]=-1;
 | 
|---|
| 637 |     }
 | 
|---|
| 638 | 
 | 
|---|
| 639 |     fNumEndNodes=0;
 | 
|---|
| 640 |     for (Int_t kn=0;kn<fNumNodes;kn++)
 | 
|---|
| 641 |         if(nodestatus[kn]==-1)
 | 
|---|
| 642 |         {
 | 
|---|
| 643 |             fNumEndNodes++;
 | 
|---|
| 644 | 
 | 
|---|
| 645 |             Double_t pp=0;
 | 
|---|
| 646 |             for (Int_t j=0;j<nclass;j++)
 | 
|---|
| 647 |             {
 | 
|---|
| 648 |                 if(classpop[j*nrnodes+kn]>pp)
 | 
|---|
| 649 |                 {
 | 
|---|
| 650 |                     // class + status of node kn coded into fBestVar[kn]
 | 
|---|
| 651 |                     fBestVar[kn]=j-nclass;
 | 
|---|
| 652 |                     pp=classpop[j*nrnodes+kn];
 | 
|---|
| 653 |                 }
 | 
|---|
| 654 |             }
 | 
|---|
| 655 | 
 | 
|---|
| 656 |                 float sum=0;
 | 
|---|
| 657 |                 for(int i=0;i<nclass;i++) sum+=classpop[i*nrnodes+kn];
 | 
|---|
| 658 | 
 | 
|---|
| 659 |                 fBestSplit[kn]=mean[kn]/sum;
 | 
|---|
| 660 |         }
 | 
|---|
| 661 | }
 | 
|---|
| 662 | 
 | 
|---|
| 663 | Double_t MRanTree::TreeHad(const Float_t *evt)
 | 
|---|
| 664 | {
 | 
|---|
| 665 |     // to optimize on storage space node status and node class
 | 
|---|
| 666 |     // are coded into fBestVar:
 | 
|---|
| 667 |     // status of node kt = TMath::Sign(1,fBestVar[kt])
 | 
|---|
| 668 |     // class  of node kt = fBestVar[kt]+2 (class defined by larger
 | 
|---|
| 669 |     //  node population, actually not used)
 | 
|---|
| 670 |     // hadronness assigned to node kt = fBestSplit[kt]
 | 
|---|
| 671 | 
 | 
|---|
| 672 |     // To get rid of the range check of the root classes
 | 
|---|
| 673 |     const Float_t *split = fBestSplit.GetArray();
 | 
|---|
| 674 |     const Int_t   *map1  = fTreeMap1.GetArray();
 | 
|---|
| 675 |     const Int_t   *map2  = fTreeMap2.GetArray();
 | 
|---|
| 676 |     const Int_t   *best  = fBestVar.GetArray();
 | 
|---|
| 677 | 
 | 
|---|
| 678 |     Int_t kt=0;
 | 
|---|
| 679 |     for (Int_t k=0; k<fNumNodes; k++)
 | 
|---|
| 680 |     {
 | 
|---|
| 681 |         if (best[kt]<0)
 | 
|---|
| 682 |             break;
 | 
|---|
| 683 | 
 | 
|---|
| 684 |         const Int_t m=best[kt];
 | 
|---|
| 685 |         kt = evt[m]<=split[kt] ? map1[kt] : map2[kt];
 | 
|---|
| 686 |     }
 | 
|---|
| 687 | 
 | 
|---|
| 688 |     return split[kt];
 | 
|---|
| 689 | }
 | 
|---|
| 690 | 
 | 
|---|
| 691 | Double_t MRanTree::TreeHad(const TVector &event)
 | 
|---|
| 692 | {
 | 
|---|
| 693 |     return TreeHad(event.GetMatrixArray());
 | 
|---|
| 694 | }
 | 
|---|
| 695 | 
 | 
|---|
| 696 | Double_t MRanTree::TreeHad(const TMatrixFRow_const &event)
 | 
|---|
| 697 | {
 | 
|---|
| 698 |     return TreeHad(event.GetPtr());
 | 
|---|
| 699 | }
 | 
|---|
| 700 | 
 | 
|---|
| 701 | Double_t MRanTree::TreeHad(const TMatrix &m, Int_t ievt)
 | 
|---|
| 702 | {
 | 
|---|
| 703 | #if ROOT_VERSION_CODE < ROOT_VERSION(4,00,8)
 | 
|---|
| 704 |     return TreeHad(TMatrixRow(m, ievt));
 | 
|---|
| 705 | #else
 | 
|---|
| 706 |     return TreeHad(TMatrixFRow_const(m, ievt));
 | 
|---|
| 707 | #endif
 | 
|---|
| 708 | }
 | 
|---|
| 709 | 
 | 
|---|
| 710 | Bool_t MRanTree::AsciiWrite(ostream &out) const
 | 
|---|
| 711 | {
 | 
|---|
| 712 |     TString str;
 | 
|---|
| 713 |     Int_t k;
 | 
|---|
| 714 | 
 | 
|---|
| 715 |     out.width(5);out<<fNumNodes<<endl;
 | 
|---|
| 716 | 
 | 
|---|
| 717 |     for (k=0;k<fNumNodes;k++)
 | 
|---|
| 718 |     {
 | 
|---|
| 719 |         str=Form("%f",GetBestSplit(k));
 | 
|---|
| 720 | 
 | 
|---|
| 721 |         out.width(5);  out << k;
 | 
|---|
| 722 |         out.width(5);  out << GetNodeStatus(k);
 | 
|---|
| 723 |         out.width(5);  out << GetTreeMap1(k);
 | 
|---|
| 724 |         out.width(5);  out << GetTreeMap2(k);
 | 
|---|
| 725 |         out.width(5);  out << GetBestVar(k);
 | 
|---|
| 726 |         out.width(15); out << str<<endl;
 | 
|---|
| 727 |         out.width(5);  out << GetNodeClass(k);
 | 
|---|
| 728 |     }
 | 
|---|
| 729 |     out<<endl;
 | 
|---|
| 730 | 
 | 
|---|
| 731 |     return k==fNumNodes;
 | 
|---|
| 732 | }
 | 
|---|