source: trunk/MagicSoft/Mars/mranforest/MRanForest.cc@ 2124

Last change on this file since 2124 was 2114, checked in by tbretz, 22 years ago
*** empty log message ***
File size: 11.7 KB
Line 
1/* ======================================================================== *\
2!
3! *
4! * This file is part of MARS, the MAGIC Analysis and Reconstruction
5! * Software. It is distributed to you in the hope that it can be a useful
6! * and timesaving tool in analysing Data of imaging Cerenkov telescopes.
7! * It is distributed WITHOUT ANY WARRANTY.
8! *
9! * Permission to use, copy, modify and distribute this software and its
10! * documentation for any purpose is hereby granted without fee,
11! * provided that the above copyright notice appear in all copies and
12! * that both that copyright notice and this permission notice appear
13! * in supporting documentation. It is provided "as is" without express
14! * or implied warranty.
15! *
16!
17!
18! Author(s): Thomas Hengstebeck 3/2003 <mailto:hengsteb@alwa02.physik.uni-siegen.de>
19!
20! Copyright: MAGIC Software Development, 2000-2003
21!
22!
23\* ======================================================================== */
24
25/////////////////////////////////////////////////////////////////////////////
26// //
27// MRanForest //
28// //
29// ParameterContainer for Forest structure //
30// //
31// A random forest can be grown by calling GrowForest. //
32// In advance SetupGrow must be called in order to initialize arrays and //
33// do some preprocessing. //
34// GrowForest() provides the training data for a single tree (bootstrap //
35// aggregate procedure) //
36// //
37// Essentially two random elements serve to provide a "random" forest, //
38// namely bootstrap aggregating (which is done in GrowForest()) and random //
39// split selection (which is subject to MRanTree::GrowTree()) //
40// //
41/////////////////////////////////////////////////////////////////////////////
42#include "MRanForest.h"
43
44#include <TMatrix.h>
45#include <TRandom3.h>
46
47#include "MHMatrix.h"
48#include "MRanTree.h"
49
50#include "MLog.h"
51#include "MLogManip.h"
52
53ClassImp(MRanForest);
54
55// --------------------------------------------------------------------------
56//
57// Default constructor.
58//
59MRanForest::MRanForest(const char *name, const char *title) : fNumTrees(100), fRanTree(NULL),fUsePriors(kFALSE)
60{
61 fName = name ? name : "MRanForest";
62 fTitle = title ? title : "Storage container for Random Forest";
63
64 fForest=new TObjArray();
65 fForest->SetOwner(kTRUE);
66}
67
68// --------------------------------------------------------------------------
69//
70// Destructor.
71//
72MRanForest::~MRanForest()
73{
74 delete fForest;
75}
76
77void MRanForest::SetNumTrees(Int_t n)
78{
79 //at least 1 tree
80 fNumTrees=TMath::Max(n,1);
81 fTreeHad.Set(fNumTrees);
82 fTreeHad.Reset();
83}
84
85void MRanForest::SetPriors(Float_t prior_had, Float_t prior_gam)
86{
87 const Float_t sum=prior_gam+prior_had;
88
89 prior_gam/=sum;
90 prior_had/=sum;
91
92 fPrior[0]=prior_had;
93 fPrior[1]=prior_gam;
94
95 fUsePriors=kTRUE;
96}
97
98Double_t MRanForest::CalcHadroness(const TVector &event)
99{
100 Double_t hadroness=0;
101 Int_t ntree=0;
102
103 TIter forest(fForest);
104
105 MRanTree *tree;
106 while ((tree=(MRanTree*)forest.Next()))
107 {
108 fTreeHad[ntree]=tree->TreeHad(event);
109 hadroness+=fTreeHad[ntree];
110 ntree++;
111 }
112 return hadroness/Double_t(ntree);
113}
114
115Bool_t MRanForest::AddTree(MRanTree *rantree=NULL)
116{
117 if (rantree)
118 fRanTree=rantree;
119 if (!fRanTree)
120 return kFALSE;
121
122 fForest->Add((MRanTree*)fRanTree->Clone());
123
124 return kTRUE;
125}
126
127Bool_t MRanForest::SetupGrow(MHMatrix *mhad,MHMatrix *mgam)
128{
129 // pointer to training data
130 fHadrons=mhad;
131 fGammas=mgam;
132
133 // determine data entries and dimension of Hillas-parameter space
134 fNumHad=fHadrons->GetM().GetNrows();
135 fNumGam=fGammas->GetM().GetNrows();
136 fNumDim=fHadrons->GetM().GetNcols();
137
138 if (fNumDim!=fHadrons->GetM().GetNcols()) return kFALSE;
139
140 fNumData=fNumHad+fNumGam;
141
142 // allocating and initializing arrays
143 fHadTrue.Set(fNumData);
144 fHadTrue.Reset();
145 fHadEst.Set(fNumData);
146
147 fPrior.Set(2);
148 fClassPop.Set(2);
149 fWeight.Set(fNumData);
150 fNTimesOutBag.Set(fNumData);
151 fNTimesOutBag.Reset();
152
153 fDataSort.Set(fNumDim*fNumData);
154 fDataRang.Set(fNumDim*fNumData);
155
156 // calculating class populations (= no. of gammas and hadrons)
157 fClassPop.Reset();
158 for(Int_t n=0;n<fNumData;n++)
159 fClassPop[fHadTrue[n]]++;
160
161 // setting weights taking into account priors
162 if (!fUsePriors)
163 {
164 fWeight.Reset(1.);
165 }else{
166 for(Int_t j=0;j<2;j++)
167 fPrior[j] *= (fClassPop[j]>=1) ?
168 Float_t(fNumData)/Float_t(fClassPop[j]):0;
169
170 for(Int_t n=0;n<fNumData;n++)
171 fWeight[n]=fPrior[fHadTrue[n]];
172 }
173
174 // create fDataSort
175 CreateDataSort();
176
177 if(!fRanTree)
178 {
179 *fLog << err << dbginf << "MRanForest, fRanTree not initialized... aborting." << endl;
180 return kFALSE;
181 }
182 fRanTree->SetRules(fGammas->GetColumns());
183 fTreeNo=0;
184
185 return kTRUE;
186}
187
188Bool_t MRanForest::GrowForest()
189{
190 Int_t ninbag=0;
191 TArrayI datsortinbag;
192 TArrayF classpopw;
193 TArrayI jinbag;
194 TArrayF winbag;
195
196 jinbag.Set(fNumData);
197 winbag.Set(fNumData);
198 classpopw.Set(2);
199
200 TMatrix hadrons=fHadrons->GetM();
201 TMatrix gammas=fGammas->GetM();
202
203 fTreeNo++;
204
205 // initialize running output
206 if(fTreeNo==1)
207 {
208 cout<<endl<<endl<<"1st col.: no. of tree"<<endl;
209 cout<<"2nd col.: error in % (calulated using oob-data -> overestim. of error)"<<endl;
210 }
211
212 jinbag.Reset();
213 classpopw.Reset();
214 winbag.Reset();
215
216 // bootstrap aggregating (bagging) -> sampling with replacement:
217 //
218 // The integer k is randomly (uniformly) chosen from the set
219 // {0,1,...,fNumData-1}, which is the set of the index numbers of
220 // all events in the training sample
221
222 for (Int_t n=0;n<fNumData;n++)
223 {
224 if(!gRandom)
225 {
226 *fLog << err << dbginf << "gRandom not initialized... aborting." << endl;
227 return kFALSE;
228 }
229
230 Int_t k=Int_t(fNumData*gRandom->Rndm());
231
232 classpopw[fHadTrue[k]]+=fWeight[k];
233 winbag[k]+=fWeight[k];
234 jinbag[k]=1;
235 }
236
237 // modifying sorted-data array for in-bag data:
238 //
239 // In bagging procedure ca. 2/3 of all elements in the original
240 // training sample are used to build the in-bag data
241 datsortinbag=fDataSort;
242
243 ModifyDataSort(datsortinbag,ninbag,jinbag);
244
245 // growing single tree
246 fRanTree->GrowTree(hadrons,gammas,fNumData,fNumDim,fHadTrue,datsortinbag,
247 fDataRang,classpopw,jinbag,winbag,fWeight);
248
249 // error-estimates from out-of-bag data (oob data):
250 //
251 // For a single tree the events not(!) contained in the bootstrap sample of
252 // this tree can be used to obtain estimates for the classification error of
253 // this tree.
254 // If you take a certain event, it is contained in the oob-data of 1/3 of
255 // the trees (see comment to ModifyData). This means that the classification error
256 // determined from oob-data is underestimated, but can still be taken as upper limit.
257
258 TVector event(fNumDim);
259 for(Int_t ievt=0;ievt<fNumHad;ievt++)
260 {
261 if(jinbag[ievt]>0)continue;
262 event=TMatrixRow(hadrons,ievt);
263 fHadEst[ievt]+=fRanTree->TreeHad(event);
264 fNTimesOutBag[ievt]++;
265 }
266 for(Int_t ievt=0;ievt<fNumGam;ievt++)
267 {
268 if(jinbag[fNumHad+ievt]>0)continue;
269 event=TMatrixRow(gammas,ievt);
270 fHadEst[fNumHad+ievt]+=fRanTree->TreeHad(event);
271 fNTimesOutBag[fNumHad+ievt]++;
272 }
273
274 Int_t n=0;
275 fErr=0;
276 for(Int_t ievt=0;ievt<fNumData;ievt++)
277 if(fNTimesOutBag[ievt]!=0)
278 {
279 fErr+=TMath::Power(fHadEst[ievt]/fNTimesOutBag[ievt]-fHadTrue[ievt],2.);
280 n++;
281 }
282
283 fErr/=Float_t(n);
284 fErr=TMath::Sqrt(fErr);
285
286 // give running output
287 cout << setw(5) << fTreeNo << setw(15) << Form("%.2f",100.*fErr) << endl;
288
289 // adding tree to forest
290 AddTree();
291
292 return(fTreeNo<fNumTrees);
293}
294
295void MRanForest::CreateDataSort()
296{
297 // The values of concatenated data arrays fHadrons and fGammas (denoted in the following by fData,
298 // which does actually not exist) are sorted from lowest to highest.
299 //
300 //
301 // fHadrons(0,0) ... fHadrons(0,nhad-1) fGammas(0,0) ... fGammas(0,ngam-1)
302 // . . . .
303 // fData(m,n) = . . . .
304 // . . . .
305 // fHadrons(m-1,0)...fHadrons(m-1,nhad-1) fGammas(m-1,0)...fGammas(m-1,ngam-1)
306 //
307 //
308 // Then fDataSort(m,n) is the event number in which fData(m,n) occurs.
309 // fDataRang(m,n) is the rang of fData(m,n), i.e. if rang = r, fData(m,n) is the r-th highest
310 // value of all fData(m,.). There may be more then 1 event with rang r (due to bagging).
311
312 TArrayF v(fNumData);
313 TArrayI isort(fNumData);
314
315 TMatrix hadrons=fHadrons->GetM();
316 TMatrix gammas=fGammas->GetM();
317
318 for (Int_t j=0;j<fNumHad;j++)
319 fHadTrue[j]=1;
320
321 for (Int_t j=0;j<fNumGam;j++)
322 fHadTrue[j+fNumHad]=0;
323
324 for (Int_t mvar=0;mvar<fNumDim;mvar++)
325 {
326 for(Int_t n=0;n<fNumHad;n++)
327 {
328 v[n]=hadrons(n,mvar);
329 isort[n]=n;
330 }
331
332 for(Int_t n=0;n<fNumGam;n++)
333 {
334 v[n+fNumHad]=gammas(n,mvar);
335 isort[n+fNumHad]=n;
336 }
337
338 TMath::Sort(fNumData,v.GetArray(),isort.GetArray(),kFALSE);
339
340 // this sorts the v[n] in ascending order. isort[n] is the event number
341 // of that v[n], which is the n-th from the lowest (assume the original
342 // event numbers are 0,1,...).
343
344 for(Int_t n=0;n<fNumData-1;n++)
345 {
346 Int_t n1=isort[n];
347 Int_t n2=isort[n+1];
348 fDataSort[mvar*fNumData+n]=n1;
349 if(n==0) fDataRang[mvar*fNumData+n1]=0;
350 if(v[n]<v[n+1])
351 {
352 fDataRang[mvar*fNumData+n2]=fDataRang[mvar*fNumData+n1]+1;
353 }else{
354 fDataRang[mvar*fNumData+n2]=fDataRang[mvar*fNumData+n1];
355 }
356 }
357 fDataSort[(mvar+1)*fNumData-1]=isort[fNumData-1];
358 }
359
360 return;
361}
362
363void MRanForest::ModifyDataSort(TArrayI &datsortinbag,Int_t ninbag,TArrayI &jinbag)
364{
365 ninbag=0;
366 for (Int_t n=0;n<fNumData;n++)
367 if(jinbag[n]==1) ninbag++;
368
369 for(Int_t m=0;m<fNumDim;m++)
370 {
371 Int_t k=0;
372 Int_t nt=0;
373 for(Int_t n=0;n<fNumData;n++)
374 {
375 if(jinbag[datsortinbag[m*fNumData+k]]==1)
376 {
377 datsortinbag[m*fNumData+nt]=datsortinbag[m*fNumData+k];
378 k++;
379 }else{
380 for(Int_t j=1;j<fNumData-k;j++)
381 {
382 if(jinbag[datsortinbag[m*fNumData+k+j]]==1)
383 {
384 datsortinbag[m*fNumData+nt]=datsortinbag[m*fNumData+k+j];
385 k+=j+1;
386 break;
387 }
388 }
389 }
390 nt++;
391 if(nt>=ninbag) break;
392 }
393 }
394 return;
395}
396
397Bool_t MRanForest::AsciiWrite(ostream &out) const
398{
399 Int_t n=0;
400 MRanTree *tree;
401 TIter forest(fForest);
402
403 while ((tree=(MRanTree*)forest.Next()))
404 {
405 tree->AsciiWrite(out);
406 n++;
407 }
408
409 return n==fNumTrees;
410}
Note: See TracBrowser for help on using the repository browser.