source: trunk/MagicSoft/Mars/mranforest/MRanForest.cc@ 2273

Last change on this file since 2273 was 2173, checked in by tbretz, 22 years ago
*** empty log message ***
File size: 11.7 KB
Line 
1/* ======================================================================== *\
2!
3! *
4! * This file is part of MARS, the MAGIC Analysis and Reconstruction
5! * Software. It is distributed to you in the hope that it can be a useful
6! * and timesaving tool in analysing Data of imaging Cerenkov telescopes.
7! * It is distributed WITHOUT ANY WARRANTY.
8! *
9! * Permission to use, copy, modify and distribute this software and its
10! * documentation for any purpose is hereby granted without fee,
11! * provided that the above copyright notice appear in all copies and
12! * that both that copyright notice and this permission notice appear
13! * in supporting documentation. It is provided "as is" without express
14! * or implied warranty.
15! *
16!
17!
18! Author(s): Thomas Hengstebeck 3/2003 <mailto:hengsteb@alwa02.physik.uni-siegen.de>
19!
20! Copyright: MAGIC Software Development, 2000-2003
21!
22!
23\* ======================================================================== */
24
25/////////////////////////////////////////////////////////////////////////////
26// //
27// MRanForest //
28// //
29// ParameterContainer for Forest structure //
30// //
31// A random forest can be grown by calling GrowForest. //
32// In advance SetupGrow must be called in order to initialize arrays and //
33// do some preprocessing. //
34// GrowForest() provides the training data for a single tree (bootstrap //
35// aggregate procedure) //
36// //
37// Essentially two random elements serve to provide a "random" forest, //
38// namely bootstrap aggregating (which is done in GrowForest()) and random //
39// split selection (which is subject to MRanTree::GrowTree()) //
40// //
41/////////////////////////////////////////////////////////////////////////////
42#include "MRanForest.h"
43
44#include <TMatrix.h>
45#include <TRandom3.h>
46
47#include "MHMatrix.h"
48#include "MRanTree.h"
49
50#include "MLog.h"
51#include "MLogManip.h"
52
53ClassImp(MRanForest);
54
55using namespace std;
56
57// --------------------------------------------------------------------------
58//
59// Default constructor.
60//
61MRanForest::MRanForest(const char *name, const char *title) : fNumTrees(100), fRanTree(NULL),fUsePriors(kFALSE)
62{
63 fName = name ? name : "MRanForest";
64 fTitle = title ? title : "Storage container for Random Forest";
65
66 fForest=new TObjArray();
67 fForest->SetOwner(kTRUE);
68}
69
70// --------------------------------------------------------------------------
71//
72// Destructor.
73//
74MRanForest::~MRanForest()
75{
76 delete fForest;
77}
78
79void MRanForest::SetNumTrees(Int_t n)
80{
81 //at least 1 tree
82 fNumTrees=TMath::Max(n,1);
83 fTreeHad.Set(fNumTrees);
84 fTreeHad.Reset();
85}
86
87void MRanForest::SetPriors(Float_t prior_had, Float_t prior_gam)
88{
89 const Float_t sum=prior_gam+prior_had;
90
91 prior_gam/=sum;
92 prior_had/=sum;
93
94 fPrior[0]=prior_had;
95 fPrior[1]=prior_gam;
96
97 fUsePriors=kTRUE;
98}
99
100Double_t MRanForest::CalcHadroness(const TVector &event)
101{
102 Double_t hadroness=0;
103 Int_t ntree=0;
104
105 TIter forest(fForest);
106
107 MRanTree *tree;
108 while ((tree=(MRanTree*)forest.Next()))
109 {
110 fTreeHad[ntree]=tree->TreeHad(event);
111 hadroness+=fTreeHad[ntree];
112 ntree++;
113 }
114 return hadroness/Double_t(ntree);
115}
116
117Bool_t MRanForest::AddTree(MRanTree *rantree=NULL)
118{
119 if (rantree)
120 fRanTree=rantree;
121 if (!fRanTree)
122 return kFALSE;
123
124 fForest->Add((MRanTree*)fRanTree->Clone());
125
126 return kTRUE;
127}
128
129Bool_t MRanForest::SetupGrow(MHMatrix *mhad,MHMatrix *mgam)
130{
131 // pointer to training data
132 fHadrons=mhad;
133 fGammas=mgam;
134
135 // determine data entries and dimension of Hillas-parameter space
136 fNumHad=fHadrons->GetM().GetNrows();
137 fNumGam=fGammas->GetM().GetNrows();
138 fNumDim=fHadrons->GetM().GetNcols();
139
140 if (fNumDim!=fHadrons->GetM().GetNcols()) return kFALSE;
141
142 fNumData=fNumHad+fNumGam;
143
144 // allocating and initializing arrays
145 fHadTrue.Set(fNumData);
146 fHadTrue.Reset();
147 fHadEst.Set(fNumData);
148
149 fPrior.Set(2);
150 fClassPop.Set(2);
151 fWeight.Set(fNumData);
152 fNTimesOutBag.Set(fNumData);
153 fNTimesOutBag.Reset();
154
155 fDataSort.Set(fNumDim*fNumData);
156 fDataRang.Set(fNumDim*fNumData);
157
158 // calculating class populations (= no. of gammas and hadrons)
159 fClassPop.Reset();
160 for(Int_t n=0;n<fNumData;n++)
161 fClassPop[fHadTrue[n]]++;
162
163 // setting weights taking into account priors
164 if (!fUsePriors)
165 {
166 fWeight.Reset(1.);
167 }else{
168 for(Int_t j=0;j<2;j++)
169 fPrior[j] *= (fClassPop[j]>=1) ?
170 Float_t(fNumData)/Float_t(fClassPop[j]):0;
171
172 for(Int_t n=0;n<fNumData;n++)
173 fWeight[n]=fPrior[fHadTrue[n]];
174 }
175
176 // create fDataSort
177 CreateDataSort();
178
179 if(!fRanTree)
180 {
181 *fLog << err << dbginf << "MRanForest, fRanTree not initialized... aborting." << endl;
182 return kFALSE;
183 }
184 fRanTree->SetRules(fGammas->GetColumns());
185 fTreeNo=0;
186
187 return kTRUE;
188}
189
190Bool_t MRanForest::GrowForest()
191{
192 Int_t ninbag=0;
193 TArrayI datsortinbag;
194 TArrayF classpopw;
195 TArrayI jinbag;
196 TArrayF winbag;
197
198 jinbag.Set(fNumData);
199 winbag.Set(fNumData);
200 classpopw.Set(2);
201
202 TMatrix hadrons=fHadrons->GetM();
203 TMatrix gammas=fGammas->GetM();
204
205 fTreeNo++;
206
207 // initialize running output
208 if(fTreeNo==1)
209 {
210 cout<<endl<<endl<<"1st col.: no. of tree"<<endl;
211 cout<<"2nd col.: error in % (calulated using oob-data -> overestim. of error)"<<endl;
212 }
213
214 jinbag.Reset();
215 classpopw.Reset();
216 winbag.Reset();
217
218 // bootstrap aggregating (bagging) -> sampling with replacement:
219 //
220 // The integer k is randomly (uniformly) chosen from the set
221 // {0,1,...,fNumData-1}, which is the set of the index numbers of
222 // all events in the training sample
223
224 for (Int_t n=0;n<fNumData;n++)
225 {
226 if(!gRandom)
227 {
228 *fLog << err << dbginf << "gRandom not initialized... aborting." << endl;
229 return kFALSE;
230 }
231
232 Int_t k=Int_t(fNumData*gRandom->Rndm());
233
234 classpopw[fHadTrue[k]]+=fWeight[k];
235 winbag[k]+=fWeight[k];
236 jinbag[k]=1;
237 }
238
239 // modifying sorted-data array for in-bag data:
240 //
241 // In bagging procedure ca. 2/3 of all elements in the original
242 // training sample are used to build the in-bag data
243 datsortinbag=fDataSort;
244
245 ModifyDataSort(datsortinbag,ninbag,jinbag);
246
247 // growing single tree
248 fRanTree->GrowTree(hadrons,gammas,fNumData,fNumDim,fHadTrue,datsortinbag,
249 fDataRang,classpopw,jinbag,winbag,fWeight);
250
251 // error-estimates from out-of-bag data (oob data):
252 //
253 // For a single tree the events not(!) contained in the bootstrap sample of
254 // this tree can be used to obtain estimates for the classification error of
255 // this tree.
256 // If you take a certain event, it is contained in the oob-data of 1/3 of
257 // the trees (see comment to ModifyData). This means that the classification error
258 // determined from oob-data is underestimated, but can still be taken as upper limit.
259
260 TVector event(fNumDim);
261 for(Int_t ievt=0;ievt<fNumHad;ievt++)
262 {
263 if(jinbag[ievt]>0)continue;
264 event=TMatrixRow(hadrons,ievt);
265 fHadEst[ievt]+=fRanTree->TreeHad(event);
266 fNTimesOutBag[ievt]++;
267 }
268 for(Int_t ievt=0;ievt<fNumGam;ievt++)
269 {
270 if(jinbag[fNumHad+ievt]>0)continue;
271 event=TMatrixRow(gammas,ievt);
272 fHadEst[fNumHad+ievt]+=fRanTree->TreeHad(event);
273 fNTimesOutBag[fNumHad+ievt]++;
274 }
275
276 Int_t n=0;
277 fErr=0;
278 for(Int_t ievt=0;ievt<fNumData;ievt++)
279 if(fNTimesOutBag[ievt]!=0)
280 {
281 fErr+=TMath::Power(fHadEst[ievt]/fNTimesOutBag[ievt]-fHadTrue[ievt],2.);
282 n++;
283 }
284
285 fErr/=Float_t(n);
286 fErr=TMath::Sqrt(fErr);
287
288 // give running output
289 cout << setw(5) << fTreeNo << setw(15) << Form("%.2f",100.*fErr) << endl;
290
291 // adding tree to forest
292 AddTree();
293
294 return(fTreeNo<fNumTrees);
295}
296
297void MRanForest::CreateDataSort()
298{
299 // The values of concatenated data arrays fHadrons and fGammas (denoted in the following by fData,
300 // which does actually not exist) are sorted from lowest to highest.
301 //
302 //
303 // fHadrons(0,0) ... fHadrons(0,nhad-1) fGammas(0,0) ... fGammas(0,ngam-1)
304 // . . . .
305 // fData(m,n) = . . . .
306 // . . . .
307 // fHadrons(m-1,0)...fHadrons(m-1,nhad-1) fGammas(m-1,0)...fGammas(m-1,ngam-1)
308 //
309 //
310 // Then fDataSort(m,n) is the event number in which fData(m,n) occurs.
311 // fDataRang(m,n) is the rang of fData(m,n), i.e. if rang = r, fData(m,n) is the r-th highest
312 // value of all fData(m,.). There may be more then 1 event with rang r (due to bagging).
313
314 TArrayF v(fNumData);
315 TArrayI isort(fNumData);
316
317 TMatrix hadrons=fHadrons->GetM();
318 TMatrix gammas=fGammas->GetM();
319
320 for (Int_t j=0;j<fNumHad;j++)
321 fHadTrue[j]=1;
322
323 for (Int_t j=0;j<fNumGam;j++)
324 fHadTrue[j+fNumHad]=0;
325
326 for (Int_t mvar=0;mvar<fNumDim;mvar++)
327 {
328 for(Int_t n=0;n<fNumHad;n++)
329 {
330 v[n]=hadrons(n,mvar);
331 isort[n]=n;
332 }
333
334 for(Int_t n=0;n<fNumGam;n++)
335 {
336 v[n+fNumHad]=gammas(n,mvar);
337 isort[n+fNumHad]=n;
338 }
339
340 TMath::Sort(fNumData,v.GetArray(),isort.GetArray(),kFALSE);
341
342 // this sorts the v[n] in ascending order. isort[n] is the event number
343 // of that v[n], which is the n-th from the lowest (assume the original
344 // event numbers are 0,1,...).
345
346 for(Int_t n=0;n<fNumData-1;n++)
347 {
348 Int_t n1=isort[n];
349 Int_t n2=isort[n+1];
350 fDataSort[mvar*fNumData+n]=n1;
351 if(n==0) fDataRang[mvar*fNumData+n1]=0;
352 if(v[n]<v[n+1])
353 {
354 fDataRang[mvar*fNumData+n2]=fDataRang[mvar*fNumData+n1]+1;
355 }else{
356 fDataRang[mvar*fNumData+n2]=fDataRang[mvar*fNumData+n1];
357 }
358 }
359 fDataSort[(mvar+1)*fNumData-1]=isort[fNumData-1];
360 }
361
362 return;
363}
364
365void MRanForest::ModifyDataSort(TArrayI &datsortinbag,Int_t ninbag,TArrayI &jinbag)
366{
367 ninbag=0;
368 for (Int_t n=0;n<fNumData;n++)
369 if(jinbag[n]==1) ninbag++;
370
371 for(Int_t m=0;m<fNumDim;m++)
372 {
373 Int_t k=0;
374 Int_t nt=0;
375 for(Int_t n=0;n<fNumData;n++)
376 {
377 if(jinbag[datsortinbag[m*fNumData+k]]==1)
378 {
379 datsortinbag[m*fNumData+nt]=datsortinbag[m*fNumData+k];
380 k++;
381 }else{
382 for(Int_t j=1;j<fNumData-k;j++)
383 {
384 if(jinbag[datsortinbag[m*fNumData+k+j]]==1)
385 {
386 datsortinbag[m*fNumData+nt]=datsortinbag[m*fNumData+k+j];
387 k+=j+1;
388 break;
389 }
390 }
391 }
392 nt++;
393 if(nt>=ninbag) break;
394 }
395 }
396 return;
397}
398
399Bool_t MRanForest::AsciiWrite(ostream &out) const
400{
401 Int_t n=0;
402 MRanTree *tree;
403 TIter forest(fForest);
404
405 while ((tree=(MRanTree*)forest.Next()))
406 {
407 tree->AsciiWrite(out);
408 n++;
409 }
410
411 return n==fNumTrees;
412}
Note: See TracBrowser for help on using the repository browser.