source: trunk/Mars/mranforest/MRanForestCalc.cc@ 9930

Last change on this file since 9930 was 9583, checked in by tbretz, 15 years ago
*** empty log message ***
File size: 13.9 KB
Line 
1/* ======================================================================== *\
2! $Name: not supported by cvs2svn $:$Id: MRanForestCalc.cc,v 1.32 2010-06-03 11:20:47 tbretz Exp $
3! --------------------------------------------------------------------------
4!
5! *
6! * This file is part of MARS, the MAGIC Analysis and Reconstruction
7! * Software. It is distributed to you in the hope that it can be a useful
8! * and timesaving tool in analysing Data of imaging Cerenkov telescopes.
9! * It is distributed WITHOUT ANY WARRANTY.
10! *
11! * Permission to use, copy, modify and distribute this software and its
12! * documentation for any purpose is hereby granted without fee,
13! * provided that the above copyright notice appear in all copies and
14! * that both that copyright notice and this permission notice appear
15! * in supporting documentation. It is provided "as is" without express
16! * or implied warranty.
17! *
18!
19!
20! Author(s): Thomas Hengstebeck 2/2005 <mailto:hengsteb@physik.hu-berlin.de>
21! Author(s): Thomas Bretz 8/2005 <mailto:tbretz@astro.uni-wuerzburg.de>
22!
23! Copyright: MAGIC Software Development, 2000-2008
24!
25!
26\* ======================================================================== */
27
28/////////////////////////////////////////////////////////////////////////////
29//
30// MRanForestCalc
31//
32//
33////////////////////////////////////////////////////////////////////////////
34#include "MRanForestCalc.h"
35
36#include <stdlib.h> // Needed for atof in some cases
37
38#include <TMath.h>
39
40#include <TF1.h>
41#include <TFile.h>
42#include <TGraph.h>
43#include <TVector.h>
44
45#include "MHMatrix.h"
46
47#include "MLog.h"
48#include "MLogManip.h"
49
50#include "MData.h"
51#include "MDataArray.h"
52
53#include "MRanForest.h"
54#include "MParameters.h"
55
56#include "MParList.h"
57#include "MTaskList.h"
58#include "MEvtLoop.h"
59#include "MRanForestGrow.h"
60#include "MFillH.h"
61
62ClassImp(MRanForestCalc);
63
64using namespace std;
65
66const TString MRanForestCalc::gsDefName = "MRanForestCalc";
67const TString MRanForestCalc::gsDefTitle = "RF for energy estimation";
68
69const TString MRanForestCalc::gsNameOutput = "RanForestOut";
70const TString MRanForestCalc::gsNameEvalFunc = "EvalFunction";
71
72MRanForestCalc::MRanForestCalc(const char *name, const char *title)
73 : fData(0), fRFOut(0), fTestMatrix(0), fFunc("x"),
74 fNumTrees(-1), fNumTry(-1), fNdSize(-1), fNumObsoleteVariables(1),
75 fLastDataColumnHasWeights(kFALSE),
76 fNameOutput(gsNameOutput), fDebug(kFALSE), fEstimationMode(kMean)
77{
78 fName = name ? name : gsDefName.Data();
79 fTitle = title ? title : gsDefTitle.Data();
80
81 // FIXME:
82 fNumTrees = 100; //100
83 fNumTry = 0; //3 0 means: in MRanForest estimated best value will be calculated
84 fNdSize = 1; //1
85}
86
87MRanForestCalc::~MRanForestCalc()
88{
89 fEForests.Delete();
90}
91
92// --------------------------------------------------------------------------
93//
94// Set a function which is applied to the output of the random forest
95//
96Bool_t MRanForestCalc::SetFunction(const char *func)
97{
98 return !fFunc.SetRule(func);
99}
100
101// --------------------------------------------------------------------------
102//
103// ver=0: One yes/no-classification forest is trained for each bin.
104// the yes/no classification is done using the grid
105// ver=1: One classification forest is trained. The last column contains a
106// value which is turned into a classifier by rf itself using the grid
107// ver=2: One classification forest is trained. The last column already contains
108// the classifier
109// ver=3: A regression forest is trained. The last column contains the
110// classifier
111//
112Int_t MRanForestCalc::Train(const MHMatrix &matrixtrain, const TArrayD &grid, Int_t ver)
113{
114 gLog.Separator("MRanForestCalc - Train");
115
116 if (!matrixtrain.GetColumns())
117 {
118 *fLog << err << "ERROR - MHMatrix does not contain rules... abort." << endl;
119 return kFALSE;
120 }
121
122 const Int_t ncols = matrixtrain.GetM().GetNcols();
123 const Int_t nrows = matrixtrain.GetM().GetNrows();
124 if (ncols<=0 || nrows <=0)
125 {
126 *fLog << err << "ERROR - No. of columns or no. of rows of matrixtrain equal 0 ... abort." << endl;
127 return kFALSE;
128 }
129
130 // rules (= combination of image par) to be used for energy estimation
131 TFile fileRF(fFileName, "recreate");
132 if (!fileRF.IsOpen())
133 {
134 *fLog << err << "ERROR - File to store RFs could not be opened... abort." << endl;
135 return kFALSE;
136 }
137
138 // The number of columns which have to be removed for the training
139 // The last data column may contain weight which also have to be removed
140 const Int_t nobs = fNumObsoleteVariables + (fLastDataColumnHasWeights?1:0); // Number of obsolete columns
141
142 const MDataArray &dcol = *matrixtrain.GetColumns();
143
144 // Make a copy of the rules for accessing the train-data
145 MDataArray usedrules;
146 for (Int_t i=0; i<ncols; i++)
147 if (i<ncols-nobs) // -3 is important!!!
148 usedrules.AddEntry(dcol[i].GetRule());
149 else
150 *fLog << inf << "Skipping " << dcol[i].GetRule() << " for training" << endl;
151
152 // In the case of regression store the rule to be regessed in the
153 // last entry of your rules
154 MDataArray rules(usedrules);
155 rules.AddEntry(ver<3?"Classification.fVal":dcol[ncols-1].GetRule().Data());
156
157 // prepare train-matrix finally used
158 TMatrix mat(matrixtrain.GetM());
159
160 // Resize it such that the obsolete columns are removed
161 mat.ResizeTo(nrows, ncols-nobs+1);
162
163 if (fDebug)
164 gLog.SetNullOutput(kTRUE);
165
166 // In the case one independant RF is trained for each bin (e.g.
167 // energy-bin) train all of them
168 const Int_t nbins = ver>0 ? 1 : grid.GetSize()-1;
169 for (Int_t ie=0; ie<nbins; ie++)
170 {
171 // In the case weights should be used initialize the
172 // corresponding array
173 Double_t sum = 0;
174
175 TArrayF weights(nrows);
176 if (fLastDataColumnHasWeights)
177 {
178 for (Int_t j=0; j<nrows; j++)
179 {
180 weights[j] = matrixtrain.GetM()(j, ncols-nobs);
181 sum += weights[j];
182 }
183 }
184
185 *fLog << inf << "MRanForestCalc::Train: Sum of weights " << sum << endl;
186
187 // Setup the matrix such that the last comlumn contains
188 // the classifier or the regeression target value
189 switch (ver)
190 {
191 case 0: // Replace last column by a classification which is 1 in
192 // the case the event belongs to this bin, 0 otherwise
193 {
194 Int_t irows=0;
195 for (Int_t j=0; j<nrows; j++)
196 {
197 const Double_t value = matrixtrain.GetM()(j,ncols-1);
198 const Bool_t inside = value>grid[ie] && value<=grid[ie+1];
199
200 mat(j, ncols-nobs) = inside ? 1 : 0;
201
202 if (inside)
203 irows++;
204 }
205 if (irows==0)
206 *fLog << warn << "WARNING - Skipping";
207 else
208 *fLog << inf << "Training RF for";
209
210 *fLog << " bin " << ie << " (" << grid[ie] << ", " << grid[ie+1] << ") " << irows << "/" << nrows << endl;
211
212 if (irows==0)
213 continue;
214 }
215 break;
216
217 case 1: // Use last column as classifier or for regression
218 case 2:
219 case 3:
220 for (Int_t j=0; j<nrows; j++)
221 mat(j, ncols-nobs) = matrixtrain.GetM()(j,ncols-1);
222 break;
223 }
224
225 MHMatrix matrix(mat, &rules, "MatrixTrain");
226
227 MParList plist;
228 MTaskList tlist;
229 plist.AddToList(&tlist);
230 plist.AddToList(&matrix);
231
232 MRanForest rf;
233 rf.SetNumTrees(fNumTrees);
234 rf.SetNumTry(fNumTry);
235 rf.SetNdSize(fNdSize);
236 rf.SetClassify(ver<3 ? kTRUE : kFALSE);
237 if (ver==1)
238 rf.SetGrid(grid);
239 if (fLastDataColumnHasWeights)
240 rf.SetWeights(weights);
241
242 plist.AddToList(&rf);
243
244 MRanForestGrow rfgrow;
245 tlist.AddToList(&rfgrow);
246
247 MFillH fillh("MHRanForestGini");
248 tlist.AddToList(&fillh);
249
250 MEvtLoop evtloop(fTitle);
251 evtloop.SetParList(&plist);
252 evtloop.SetDisplay(fDisplay);
253 evtloop.SetLogStream(fLog);
254
255 if (!evtloop.Eventloop())
256 return kFALSE;
257
258 if (fDebug)
259 gLog.SetNullOutput(kFALSE);
260
261 if (ver==0)
262 {
263 // Calculate bin center
264 const Double_t E = (TMath::Log10(grid[ie])+TMath::Log10(grid[ie+1]))/2;
265
266 // save whole forest
267 rf.SetUserVal(E);
268 rf.SetName(Form("%.10f", E));
269 }
270
271 rf.Write();
272 }
273
274 // save rules
275 usedrules.Write("rules");
276
277 fFunc.Write(gsNameEvalFunc);
278
279 return kTRUE;
280}
281
282Int_t MRanForestCalc::ReadForests(MParList &plist)
283{
284 TFile fileRF(fFileName, "read");
285 if (!fileRF.IsOpen())
286 {
287 *fLog << err << dbginf << "File containing RFs could not be opened... aborting." << endl;
288 return kFALSE;
289 }
290
291 fEForests.Delete();
292
293 TIter Next(fileRF.GetListOfKeys());
294 TObject *o=0;
295 while ((o=Next()))
296 {
297 MRanForest *forest=0;
298 fileRF.GetObject(o->GetName(), forest);
299 if (!forest)
300 continue;
301
302 forest->SetUserVal(atof(o->GetName()));
303
304 fEForests.Add(forest);
305 }
306
307 // Maybe fEForests[0].fRules could be used instead?
308 if (fData->Read("rules")<=0)
309 {
310 *fLog << err << "ERROR - Reading 'rules' from file " << fFileName << endl;
311 return kFALSE;
312 }
313
314 if (fileRF.GetListOfKeys()->FindObject(gsNameEvalFunc))
315 {
316 if (fFunc.Read(gsNameEvalFunc)<=0)
317 {
318 *fLog << err << "ERROR - Reading '" << gsNameEvalFunc << "' from file " << fFileName << endl;
319 return kFALSE;
320 }
321
322 *fLog << inf << "Evaluation function found in file: " << fFunc.GetRule() << endl;
323 }
324
325 return kTRUE;
326}
327
328Int_t MRanForestCalc::PreProcess(MParList *plist)
329{
330 fRFOut = (MParameterD*)plist->FindCreateObj("MParameterD", fNameOutput);
331 if (!fRFOut)
332 return kFALSE;
333
334 fData = (MDataArray*)plist->FindCreateObj("MDataArray");
335 if (!fData)
336 return kFALSE;
337
338 if (!ReadForests(*plist))
339 {
340 *fLog << err << "Reading RFs failed... aborting." << endl;
341 return kFALSE;
342 }
343
344 *fLog << inf << "RF read from " << fFileName << endl;
345
346 if (!fFunc.PreProcess(plist))
347 {
348 *fLog << err << "PreProcessing of evaluation function failed... aborting." << endl;
349 return kFALSE;
350 }
351
352 if (fTestMatrix)
353 return kTRUE;
354
355 fData->Print();
356
357 if (!fData->PreProcess(plist))
358 {
359 *fLog << err << "PreProcessing of the MDataArray failed... aborting." << endl;
360 return kFALSE;
361 }
362
363 return kTRUE;
364}
365
366Double_t MRanForestCalc::Eval() const
367{
368 TVector event;
369 if (fTestMatrix)
370 *fTestMatrix >> event;
371 else
372 *fData >> event;
373
374 // --------------- Single Tree RF -------------------
375 if (fEForests.GetEntriesFast()==1)
376 {
377 MRanForest *rf = static_cast<MRanForest*>(fEForests.UncheckedAt(0));
378 return rf->CalcHadroness(event);
379 }
380
381 // --------------- Multi Tree RF -------------------
382 static TF1 f1("f1", "gaus");
383
384 Double_t sume = 0;
385 Double_t sumh = 0;
386 Double_t maxh = 0;
387 Double_t maxe = 0;
388
389 Double_t max = -1e10;
390 Double_t min = 1e10;
391
392 TIter Next(&fEForests);
393 MRanForest *rf = 0;
394
395 TGraph g;
396 while ((rf=(MRanForest*)Next()))
397 {
398 const Double_t h = rf->CalcHadroness(event);
399 const Double_t e = rf->GetUserVal();
400
401 g.SetPoint(g.GetN(), e, h);
402
403 sume += e*h;
404 sumh += h;
405
406 if (h>maxh)
407 {
408 maxh = h;
409 maxe = e;
410 }
411 if (e>max)
412 max = e;
413 if (e<min)
414 min = e;
415 }
416
417 switch (fEstimationMode)
418 {
419 case kMean:
420 return sume/sumh;
421 case kMaximum:
422 return maxe;
423 case kFit:
424 f1.SetParameter(0, maxh);
425 f1.SetParameter(1, maxe);
426 f1.SetParameter(2, 0.125);
427 g.Fit(&f1, "Q0N");
428 return f1.GetParameter(1);
429 }
430
431 return 0;
432}
433
434Int_t MRanForestCalc::Process()
435{
436 const Double_t val = Eval();
437
438 fRFOut->SetVal(fFunc.Eval(val));
439 fRFOut->SetReadyToSave();
440
441 return kTRUE;
442}
443
444void MRanForestCalc::Print(Option_t *o) const
445{
446 *fLog << all;
447 *fLog << GetDescriptor() << ":" << endl;
448 *fLog << " - Forest ";
449 switch (fEForests.GetEntries())
450 {
451 case 0: *fLog << "not yet initialized." << endl; break;
452 case 1: *fLog << "is a single tree forest." << endl; break;
453 default: *fLog << "is a multi tree forest." << endl; break;
454 }
455 /*
456 *fLog << " - Trees: " << fNumTrees << endl;
457 *fLog << " - Trys: " << fNumTry << endl;
458 *fLog << " - Node Size: " << fNdSize << endl;
459 *fLog << " - Node Size: " << fNdSize << endl;
460 */
461 *fLog << " - FileName: " << fFileName << endl;
462 *fLog << " - NameOutput: " << fNameOutput << endl;
463}
464
465// --------------------------------------------------------------------------
466//
467//
468Int_t MRanForestCalc::ReadEnv(const TEnv &env, TString prefix, Bool_t print)
469{
470 Bool_t rc = kFALSE;
471 if (IsEnvDefined(env, prefix, "FileName", print))
472 {
473 rc = kTRUE;
474 SetFileName(GetEnvValue(env, prefix, "FileName", fFileName));
475 }
476 if (IsEnvDefined(env, prefix, "Debug", print))
477 {
478 rc = kTRUE;
479 SetDebug(GetEnvValue(env, prefix, "Debug", fDebug));
480 }
481 if (IsEnvDefined(env, prefix, "NameOutput", print))
482 {
483 rc = kTRUE;
484 SetNameOutput(GetEnvValue(env, prefix, "NameOutput", fNameOutput));
485 }
486 if (IsEnvDefined(env, prefix, "EstimationMode", print))
487 {
488 TString txt = GetEnvValue(env, prefix, "EstimationMode", "");
489 txt = txt.Strip(TString::kBoth);
490 txt.ToLower();
491 if (txt==(TString)"mean")
492 fEstimationMode = kMean;
493 if (txt==(TString)"maximum")
494 fEstimationMode = kMaximum;
495 if (txt==(TString)"fit")
496 fEstimationMode = kFit;
497 rc = kTRUE;
498 }
499 return rc;
500}
Note: See TracBrowser for help on using the repository browser.