source: trunk/MagicSoft/Mars/manalysis/MRanTree.cc@ 1869

Last change on this file since 1869 was 1866, checked in by tbretz, 22 years ago
*** empty log message ***
File size: 15.3 KB
Line 
1/* ======================================================================== *\
2!
3! *
4! * This file is part of MARS, the MAGIC Analysis and Reconstruction
5! * Software. It is distributed to you in the hope that it can be a useful
6! * and timesaving tool in analysing Data of imaging Cerenkov telescopes.
7! * It is distributed WITHOUT ANY WARRANTY.
8! *
9! * Permission to use, copy, modify and distribute this software and its
10! * documentation for any purpose is hereby granted without fee,
11! * provided that the above copyright notice appear in all copies and
12! * that both that copyright notice and this permission notice appear
13! * in supporting documentation. It is provided "as is" without express
14! * or implied warranty.
15! *
16!
17!
18! Author(s): Thomas Hengstebeck 3/2003 <mailto:hengsteb@alwa02.physik.uni-siegen.de>
19!
20! Copyright: MAGIC Software Development, 2000-2003
21!
22!
23\* ======================================================================== */
24
25/////////////////////////////////////////////////////////////////////////////
26// //
27// MRanTree //
28// //
29// ParameterContainer for Tree structure //
30// //
31// //
32/////////////////////////////////////////////////////////////////////////////
33#include "MRanTree.h"
34
35#include <ostream.h>
36
37#include <TVector.h>
38#include <TMatrix.h>
39#include <TRandom.h>
40
41#include "MDataArray.h"
42
43ClassImp(MRanTree);
44
45// --------------------------------------------------------------------------
46//
47// Default constructor.
48//
49MRanTree::MRanTree(const char *name, const char *title):fData(NULL)
50{
51 fName = name ? name : "MRanTree";
52 fTitle = title ? title : "Storage container for structure of a single tree and additional information";
53}
54
55void MRanTree::SetNdSize(Int_t n)
56{
57 // minimum nodesize of terminal nodes
58 fNdSize=TMath::Max(1,n);//at least 1 event per node
59}
60
61void MRanTree::SetNumTry(Int_t n)
62{
63 // number of trials in random split selection:
64 // choose at least 1 variable to split in...
65 fNumTry=TMath::Max(1,n);
66 // and not more candidates than available
67 if(fData)
68 fNumTry=TMath::Min(fData->GetNumEntries(),n);
69}
70
71void MRanTree::GrowTree(TMatrix &mhad,TMatrix &mgam,Int_t numdata, Int_t numdim,TArrayI &hadtrue,TArrayI &datasort,
72 TArrayI &datarang,TArrayF &ginidec,TArrayF &tclasspop,TArrayI &jinbag,
73 TArrayF &winbag,TArrayF &weight,TRandom &rand)
74{
75 //Int_t nrnodes=2*(numdata/fNdSize)+1;
76 Int_t nrnodes=2*numdata+1;
77 Int_t ninbag=0;
78 for (Int_t n=0;n<numdata;n++)
79 if(jinbag[n]==1) ninbag++;
80
81
82 TArrayI bestsplit(nrnodes);
83 TArrayF wl(2);
84 TArrayF wc(2);
85 TArrayF wr(2);
86 TArrayI nc(2);
87
88 TArrayI bestsplitnext(nrnodes);
89 TArrayI nodepop(nrnodes);
90 TArrayI parent(nrnodes);
91 TArrayI nodex(numdata);
92 TArrayI nodestart(nrnodes);
93
94 TArrayI ncase(numdata);
95 TArrayI iv(numdim);
96 TArrayI idmove(numdata);
97
98 idmove.Reset();
99
100 fBestVar.Set(nrnodes);
101 fTreeMap1.Set(nrnodes);
102 fTreeMap2.Set(nrnodes);
103 fBestSplit.Set(nrnodes);
104
105 fTreeMap1.Reset();
106 fTreeMap2.Reset();
107 fBestSplit.Reset();
108
109 // tree growing
110 BuildTree(datasort,datarang,hadtrue,numdim,numdata,bestsplit,
111 bestsplitnext,ginidec,nodepop,nodestart,tclasspop,nrnodes,
112 idmove,ncase,parent,jinbag,iv,winbag,wr,wc,wl,ninbag,rand);
113
114 Int_t nhad=mhad.GetNrows();
115
116 for(Int_t k=0;k<nrnodes;k++)
117 {
118 Int_t bsp=bestsplit[k];
119 Int_t bspn=bestsplitnext[k];
120 Int_t msp=fBestVar[k];
121
122 if (GetNodeStatus(k)!=-1)
123 {
124 fBestSplit[k] = bsp<nhad ? mhad(bsp,msp):mgam(bsp-nhad,msp);
125 fBestSplit[k] += bspn<nhad ? mhad(bspn,msp):mgam(bspn-nhad,msp);
126 fBestSplit[k] /=2.;
127 }
128 }
129
130 fBestVar.Set(fNumNodes);
131 fTreeMap1.Set(fNumNodes);
132 fTreeMap2.Set(fNumNodes);
133 fBestSplit.Set(fNumNodes);
134}
135
136Int_t MRanTree::FindBestSplit(TArrayI &datasort,TArrayI &datarang,TArrayI &hadtrue,Int_t mdim,
137 Int_t numdata,Int_t ndstart,Int_t ndend,TArrayF &tclasspop,
138 Int_t &msplit,Float_t &decsplit,Int_t &nbest,TArrayI &ncase,
139 TArrayI &jinbag,TArrayI &iv,TArrayF &winbag,TArrayF &wr,
140 TArrayF &wc,TArrayF &wl,Int_t kbuild,TRandom &rand)
141{
142 // For the best split, msplit is the variable split on. decsplit is the dec. in impurity.
143 // nsplit is the case number of value of msplit split on,
144 // and nsplitnext is the case number of the next larger value of msplit.
145
146 Int_t mvar,nc,nbestvar=0,jstat,k;
147 Float_t crit,crit0,critmax,critvar=0;
148 Float_t rrn, rrd, rln, rld, u;
149
150 // compute initial values of numerator and denominator of Gini
151 Float_t pno=0;
152 Float_t pdo=0;
153
154 for (Int_t j=0;j<2;j++)
155 {
156 pno+=tclasspop[j]*tclasspop[j];
157 pdo+=tclasspop[j];
158 }
159 crit0=pno/pdo;
160 jstat=0;
161
162 // start main loop through variables to find best split
163
164 critmax=-1.0e20;
165 for(Int_t mt=0;mt<fNumTry;mt++)
166 {
167 mvar=Int_t(mdim*rand.Rndm());
168
169 rrn=pno;
170 rrd=pdo;
171 rln=0;
172 rld=0;
173 wl.Reset();
174
175 for (Int_t j=0;j<2;j++)
176 {
177 wr[j]=tclasspop[j];
178 }
179
180 critvar=-1.0e20;
181
182 for(Int_t nsp=ndstart;nsp<=ndend-1;nsp++)
183 {
184 nc=datasort[mvar*numdata+nsp];
185
186 u=winbag[nc];
187 k=hadtrue[nc];
188
189 rln=rln+u*(2*wl[k]+u);
190 rrn=rrn+u*(-2*wr[k]+u);
191 rld=rld+u;
192 rrd=rrd-u;
193
194 wl[k]=wl[k]+u;
195 wr[k]=wr[k]-u;
196
197 if (datarang[mvar*numdata+nc]<datarang[mvar*numdata+datasort[mvar*numdata+nsp+1]])
198 {
199 if(TMath::Min(rrd,rld)>1.0e-5)
200 {
201 crit=(rln/rld)+(rrn/rrd);
202 if (crit>critvar)
203 {
204 nbestvar=nsp;
205 critvar=crit;
206 }
207 }
208 }
209 }
210
211 if (critvar>critmax) {
212 msplit=mvar;
213 nbest=nbestvar;
214 critmax=critvar;
215 }
216 }
217
218 decsplit=critmax-crit0;
219 if (critmax<-1.0e10) jstat=1;
220
221 return jstat;
222}
223
224void MRanTree::MoveData(TArrayI &datasort,Int_t mdim,Int_t numdata,Int_t ndstart,
225 Int_t ndend,TArrayI &idmove,TArrayI &ncase,Int_t msplit,
226 Int_t nbest,Int_t &ndendl)
227{
228 // This is the heart of the BuildTree construction. Based on the best split
229 // the data in the part of datasort corresponding to the current node is moved to the
230 // left if it belongs to the left child and right if it belongs to the right child-node.
231
232 Int_t nc,k,ih;
233 TArrayI tdatasort(numdata);
234
235 // compute idmove = indicator of case nos. going left
236
237 for (Int_t nsp=ndstart;nsp<=nbest;nsp++)
238 {
239 nc=datasort[msplit*numdata+nsp];
240 idmove[nc]=1;
241 }
242 for (Int_t nsp=nbest+1;nsp<=ndend;nsp++)
243 {
244 nc=datasort[msplit*numdata+nsp];
245 idmove[nc]=0;
246 }
247 ndendl=nbest;
248
249 // shift case. nos. right and left for numerical variables.
250
251 for(Int_t msh=0;msh<mdim;msh++)
252 {
253 k=ndstart-1;
254 for (Int_t n=ndstart;n<=ndend;n++)
255 {
256 ih=datasort[msh*numdata+n];
257 if (idmove[ih]==1) {
258 k++;
259 tdatasort[k]=datasort[msh*numdata+n];
260 }
261 }
262
263 for (Int_t n=ndstart;n<=ndend;n++)
264 {
265
266 ih=datasort[msh*numdata+n];
267 if (idmove[ih]==0)
268 {
269 k++;
270 tdatasort[k]=datasort[msh*numdata+n];
271 }
272 }
273 for(Int_t k=ndstart;k<=ndend;k++)
274 {
275 datasort[msh*numdata+k]=tdatasort[k];
276 }
277 }
278
279 // compute case nos. for right and left nodes.
280
281 for(Int_t n=ndstart;n<=ndend;n++)
282 {
283 ncase[n]=datasort[msplit*numdata+n];
284 }
285
286 return;
287}
288
289void MRanTree::BuildTree(TArrayI &datasort,TArrayI &datarang,TArrayI &hadtrue,Int_t mdim,
290 Int_t numdata,TArrayI &bestsplit,TArrayI &bestsplitnext,
291 TArrayF &ginidec,TArrayI &nodepop,TArrayI &nodestart,TArrayF &tclasspop,
292 Int_t nrnodes,TArrayI &idmove,TArrayI &ncase,TArrayI &parent,
293 TArrayI &jinbag,TArrayI &iv,TArrayF &winbag,TArrayF &wr,TArrayF &wc,
294 TArrayF &wl,Int_t ninbag,TRandom &rand)
295{
296 // Buildtree consists of repeated calls to two void functions, FindBestSplit and MoveData.
297 // Findbestsplit does just that--it finds the best split of the current node.
298 // MoveData moves the data in the split node right and left so that the data
299 // corresponding to each child node is contiguous.
300 //
301 // buildtree bookkeeping:
302 // ncur is the total number of nodes to date. nodestatus(k)=1 if the kth node has been split.
303 // nodestatus(k)=2 if the node exists but has not yet been split, and =-1 if the node is
304 // terminal. A node is terminal if its size is below a threshold value, or if it is all
305 // one class, or if all the data-values are equal. If the current node k is split, then its
306 // children are numbered ncur+1 (left), and ncur+2(right), ncur increases to ncur+2 and
307 // the next node to be split is numbered k+1. When no more nodes can be split, buildtree
308 // returns.
309
310 Int_t msplit,nbest,ndendl,nc,jstat,ndend,ndstart;
311 Float_t decsplit=0;
312 Float_t popt1,popt2,pp;
313 TArrayF classpop;
314 TArrayI nodestatus;
315
316 nodestatus.Set(nrnodes);
317 classpop.Set(2*nrnodes);
318
319 nodestatus.Reset();
320 nodestart.Reset();
321 nodepop.Reset();
322 classpop.Reset();
323
324
325 for (Int_t j=0;j<2;j++)
326 classpop[j*nrnodes+0]=tclasspop[j];
327
328 Int_t ncur=0;
329 nodestart[0]=0;
330 nodepop[0]=ninbag;
331 nodestatus[0]=2;
332
333 // start main loop
334 for (Int_t kbuild=0;kbuild<nrnodes;kbuild++)
335 {
336 if (kbuild>ncur) break;
337 if (nodestatus[kbuild]!=2) continue;
338
339 // initialize for next call to FindBestSplit
340
341 ndstart=nodestart[kbuild];
342 ndend=ndstart+nodepop[kbuild]-1;
343 for (Int_t j=0;j<2;j++)
344 {
345 tclasspop[j]=classpop[j*nrnodes+kbuild];
346 }
347
348 jstat=FindBestSplit(datasort,datarang,hadtrue,mdim,numdata,
349 ndstart,ndend,tclasspop,msplit,decsplit,
350 nbest,ncase,jinbag,iv,winbag,wr,wc,wl,
351 kbuild,rand);
352
353 if(jstat==1) {
354 nodestatus[kbuild]=-1;
355 continue;
356 }else{
357 fBestVar[kbuild]=msplit;
358 ginidec[msplit]+=decsplit;
359
360 bestsplit[kbuild]=datasort[msplit*numdata+nbest];
361 bestsplitnext[kbuild]=datasort[msplit*numdata+nbest+1];
362 }
363
364 MoveData(datasort,mdim,numdata,ndstart,ndend,idmove,ncase,
365 msplit,nbest,ndendl);
366
367 // leftnode no.= ncur+1, rightnode no. = ncur+2.
368
369 nodepop[ncur+1]=ndendl-ndstart+1;
370 nodepop[ncur+2]=ndend-ndendl;
371 nodestart[ncur+1]=ndstart;
372 nodestart[ncur+2]=ndendl+1;
373
374 // find class populations in both nodes
375
376 for (Int_t n=ndstart;n<=ndendl;n++)
377 {
378 nc=ncase[n];
379 Int_t j=hadtrue[nc];
380 classpop[j*nrnodes+ncur+1]+=winbag[nc];
381 }
382
383 for (Int_t n=ndendl+1;n<=ndend;n++)
384 {
385 nc=ncase[n];
386 Int_t j=hadtrue[nc];
387 classpop[j*nrnodes+ncur+2]+=winbag[nc];
388
389 }
390
391 // check on nodestatus
392
393 nodestatus[ncur+1]=2;
394 nodestatus[ncur+2]=2;
395 if (nodepop[ncur+1]<=fNdSize) nodestatus[ncur+1]=-1;
396 if (nodepop[ncur+2]<=fNdSize) nodestatus[ncur+2]=-1;
397 popt1=0;
398 popt2=0;
399 for (Int_t j=0;j<2;j++)
400 {
401 popt1+=classpop[j*nrnodes+ncur+1];
402 popt2+=classpop[j*nrnodes+ncur+2];
403 }
404
405 for (Int_t j=0;j<2;j++)
406 {
407 if (classpop[j*nrnodes+ncur+1]==popt1) nodestatus[ncur+1]=-1;
408 if (classpop[j*nrnodes+ncur+2]==popt2) nodestatus[ncur+2]=-1;
409 }
410
411 fTreeMap1[kbuild]=ncur+1;
412 fTreeMap2[kbuild]=ncur+2;
413 parent[ncur+1]=kbuild;
414 parent[ncur+2]=kbuild;
415 nodestatus[kbuild]=1;
416 ncur+=2;
417 if (ncur>=nrnodes) break;
418 }
419
420 // determine number of nodes
421 fNumNodes=nrnodes;
422 for (Int_t k=nrnodes-1;k>=0;k--)
423 {
424 if (nodestatus[k]==0) fNumNodes-=1;
425 if (nodestatus[k]==2) nodestatus[k]=-1;
426 }
427
428 fNumEndNodes=0;
429 for (Int_t kn=0;kn<fNumNodes;kn++)
430 {
431 if(nodestatus[kn]==-1)
432 {
433 fNumEndNodes++;
434 pp=0;
435 for (Int_t j=0;j<2;j++)
436 {
437 if(classpop[j*nrnodes+kn]>pp)
438 {
439 // class + status of node kn coded into fBestVar[kn]
440 fBestVar[kn]=j-2;
441 pp=classpop[j*nrnodes+kn];
442 }
443 }
444 fBestSplit[kn] =classpop[1*nrnodes+kn];
445 fBestSplit[kn]/=(classpop[0*nrnodes+kn]+classpop[1*nrnodes+kn]);
446 }
447 }
448 return;
449}
450
451void MRanTree::SetRules(MDataArray *rules)
452{
453 fData=rules;
454}
455
456Double_t MRanTree::TreeHad(TVector &event)
457{
458 Int_t kt=0;
459 // to optimize on storage space node status and node class
460 // are coded into fBestVar:
461 // status of node kt = TMath::Sign(1,fBestVar[kt])
462 // hadronness assigned to node kt = fBestSplit[kt]
463
464 for (Int_t k=0;k<fNumNodes;k++)
465 {
466 if (fBestVar[kt]<0)
467 break;
468
469 Int_t m=fBestVar[kt];
470 if (event(m)<=fBestSplit[kt])
471 {
472 kt=fTreeMap1[kt];
473 }else{
474 kt=fTreeMap2[kt];
475 }
476 }
477 return fBestSplit[kt];
478}
479
480Double_t MRanTree::TreeHad()
481{
482 const Double_t ncols = fData->GetNumEntries();
483 TVector event(ncols);
484
485 for (int i=0; i<fData->GetNumEntries(); i++)
486 event(i) = (*fData)(i);
487
488 Int_t kt=0;
489 // to optimize on storage space node status and node class
490 // are coded into fBestVar:
491 // status of node kt = TMath::Sign(1,fBestVar[kt])
492 // class of node kt = fBestVar[kt]+2 (class defined by larger
493 // node population, actually not used)
494 // hadronness assigned to node kt = fBestSplit[kt]
495
496 for (Int_t k=0;k<fNumNodes;k++)
497 {
498 if (fBestVar[kt]<0)
499 break;
500
501 Int_t m=fBestVar[kt];
502 if (event(m)<=fBestSplit[kt])
503 {
504 kt=fTreeMap1[kt];
505 }else{
506 kt=fTreeMap2[kt];
507 }
508 }
509 return fBestSplit[kt];
510}
511
512Bool_t MRanTree::AsciiWrite(ostream &out) const
513{
514 TString str;
515 Int_t k;
516
517 out.width(5);out<<fNumNodes<<endl;
518
519 for (k=0;k<fNumNodes;k++)
520 {
521 str=Form("%f",GetBestSplit(k));
522
523 out.width(5); out << k;
524 out.width(5); out << GetNodeStatus(k);
525 out.width(5); out << GetTreeMap1(k);
526 out.width(5); out << GetTreeMap2(k);
527 out.width(5); out << GetBestVar(k);
528 out.width(15); out << str<<endl;
529 out.width(5); out << GetNodeClass(k);
530 }
531 out<<endl;
532
533 return k==fNumNodes;
534}
Note: See TracBrowser for help on using the repository browser.