source: trunk/Mars/mjtrain/MJTrainCuts.cc@ 9873

Last change on this file since 9873 was 9873, checked in by tbretz, 14 years ago
Added more comments.
File size: 31.4 KB
Line 
1/* ======================================================================== *\
2!
3! *
4! * This file is part of MARS, the MAGIC Analysis and Reconstruction
5! * Software. It is distributed to you in the hope that it can be a useful
6! * and timesaving tool in analysing Data of imaging Cerenkov telescopes.
7! * It is distributed WITHOUT ANY WARRANTY.
8! *
9! * Permission to use, copy, modify and distribute this software and its
10! * documentation for any purpose is hereby granted without fee,
11! * provided that the above copyright notice appear in all copies and
12! * that both that copyright notice and this permission notice appear
13! * in supporting documentation. It is provided "as is" without express
14! * or implied warranty.
15! *
16!
17!
18! Author(s): Thomas Bretz 08/2010 <mailto:thomas.bretz@epfl.ch>
19!
20! Copyright: MAGIC Software Development, 2010
21!
22!
23\* ======================================================================== */
24
25/////////////////////////////////////////////////////////////////////////////
26//
27// MJTrainCuts
28// =========
29//
30// This class is meant as a tool to understand better what a trained
31// random forest is doing in the multi-dimensional phase space.
32// Consequently, it can also be used to deduce good one or two dimensional
33// cuts from the results by mimicing the behaviour of the random forest.
34//
35//
36// Usage
37// -----
38//
39// The instance is created by its default constructor
40//
41// MJTrainCuts opt;
42//
43// In a first step a random forest must be trained and in a second step its
44// performance can be evaluated with an independent test sample. The used
45// samples are defined by two MDataSet objects, one for the on-data (e.g.
46// gammas) and the other one for the off-data (e.g. protons). SequencesOn
47// and SequencesOff are used for testing and training respectively.
48//
49// MDataSet seton ("myondata.txt");
50// MDataSet setoff("myoffdata.txt");
51//
52// // If you want to use all available events
53// opt.SetDataSetOn(seton);
54// opt.SetDataSetOff(setoff);
55//
56// // Try to select 10000 and 30000 events for training and testing resp.
57// // opt.SetDataSetOn(seton, 10000, 30000);
58// // opt.SetDataSetOff(setoff, 10000, 30000);
59//
60// Note that by using several data set in one file (see MDataSet) you can
61// have everything in a single file.
62//
63// The variables which are used for training are now setup as usual
64//
65// Int_t p1 = opt.AddParameter("MHillas.fSize");
66// Int_t p2 = opt.AddParameter("MHillas.GetArea*MGeomCam.fConvMm2Deg^2");
67// Int_t p3 = opt.AddParameter("MHillasSrc.fDist*MGeomCam.fConvMm2Deg");
68//
69// In addition you can now setup a binning for the display of each train
70// parameter as follows (for details see MBinning)
71//
72// opt.AddBinning(p1, MBinning(40, 10, 10000, "", "log"));
73// opt.AddBinning(p2, MBinning(50, 0, 0.25));
74// opt.AddBinning(p3, MBinning(50, 0, 2.5));
75//
76// Since with increasing number of variables the possibly combinations
77// increase to fast you have to define which plots you are interested in,
78// for example:
79//
80// opt.AddHist(p3); // A 1D plot dist
81// opt.AddHist(p1, p2); // A 2D plot area vs. size
82// opt.AddHist(p3, p2); // A 2D plot dist vs. size
83//
84// Also 3D plots are avaiable but they are most probably difficult to
85// interprete.
86//
87// In addition to this you have the usual user interface, i.e. that
88// - PreCuts
89// - TrainCuts
90// - TestCuts
91// - PreTasks
92// - PostTasks
93// - TestTasks
94// are available. For details see MJOptimizeBase
95//
96// void EnableRegression() / void EnableClassification()
97// Defines whether to use the random forest's regression of
98// classification method. Classification is the default.
99//
100//
101// The produced plots
102// ------------------
103//
104// The tab with the plots filled will always look like this:
105//
106// +--------+--------+
107// |1 |2 |
108// +--------+--------|
109// |3 |4 |
110// +--------+ |
111// |5 | |
112// +--------+--------+
113//
114// Pad1 and Pad2 contain the weighted event distribution of the test-sample.
115//
116// Pad2 and Pad5 conatin a profile of the hadronness distribution of the
117// test-sample.
118//
119// Pad4 contains a profile of the hadronness distribution of on-data and
120// off-data together of the test-data.
121//
122// If the profiles for on-data and off-data are identical the displayed
123// hadronness is obviously independant of the other (non shown) trainings
124// variables. Therefore the difference between the two plots show how much
125// the variables are correlated. The same is true if the prfiles in
126// pad3 and pad5 don't differe from the profile in pad4.
127//
128// In the most simple case - the random forest is only trained with the
129// variables displayed - all three plots should be identical (apart from the
130// difference in the distrubution of the three sets).
131//
132// The plot in pad4 can now be used to deduce a good classical cut in the
133// displayed variables.
134//
135//
136// Example:
137// --------
138//
139// MJTrainCuts opt;
140// MDataSet seton ("dataset_on.txt");
141// MDataSet setoff("dataset_off.txt");
142// opt.SetDataSetOn(seton);
143// opt.SetDataSetOff(setmix);
144// Int_t p00 = opt.AddParameter("MHillas.fSize");
145// Int_t p01 = opt.AddParameter("MHillas.GetArea*MGeomCam.fConvMm2Deg^2");
146// opt.AddHist(p00, p01); // Area vs Size
147// MStatusDisplay *d = new MStatusDisplay;
148// opt.SetDisplay(d);
149// opt.Process("rf-cuts.root");
150//
151//
152// Random Numbers:
153// ---------------
154// Use:
155// if(gRandom)
156// delete gRandom;
157// gRandom = new TRandom3();
158// in advance to change the random number generator.
159//
160////////////////////////////////////////////////////////////////////////////
161#include "MJTrainCuts.h"
162
163#include <TGraph.h>
164#include <TMarker.h>
165#include <TCanvas.h>
166#include <TPRegexp.h>
167#include <TStopwatch.h>
168
169#include "MHMatrix.h"
170
171#include "MLog.h"
172#include "MLogManip.h"
173
174// tools
175#include "MMath.h"
176#include "MBinning.h"
177#include "MTFillMatrix.h"
178#include "MStatusDisplay.h"
179
180// eventloop
181#include "MParList.h"
182#include "MTaskList.h"
183#include "MEvtLoop.h"
184
185// tasks
186#include "MReadMarsFile.h"
187#include "MContinue.h"
188#include "MFillH.h"
189#include "MRanForestCalc.h"
190
191// container
192#include "MParameters.h"
193
194// histograms
195#include "MHn.h"
196#include "MHHadronness.h"
197
198// filter
199#include "MFEventSelector.h"
200#include "MFilterList.h"
201
202using namespace std;
203
204class HistSet1D : public TObject
205{
206protected:
207 UInt_t fNx;
208
209 virtual void AddHist(MHn &h, const char *rx, const char *, const char *) const
210 {
211 h.AddHist(rx);
212 }
213 virtual void AddProf(MHn &h, const char *rx, const char *, const char *) const
214 {
215 h.AddHist(rx, "MHadronness.fVal", MH3::kProfileSpread);
216 }
217 virtual void SetupName(MHn &h, const char *name) const
218 {
219 h.InitName(Form("%s%d;%d", name, fNx, fNx));
220 }
221 void SetupHist(MHn &h, const char *name, const char *title) const
222 {
223 SetupName(h, name);
224
225 h.SetAutoRange(kFALSE, kFALSE, kFALSE);
226 h.InitTitle(title);
227 h.SetDrawOption("colz");
228 }
229
230 void CreateHist(MHn &h, const char *rx, const char *ry=0, const char *rz=0) const
231 {
232 h.SetLayout(MHn::kComplex);
233 h.SetBit(MHn::kDoNotReset);
234
235 AddHist(h, rx, ry, rz);
236 SetupHist(h, "DistOn", "Distribution of on-data");
237 h.SetWeight("Type.fVal");
238
239 AddHist(h, rx, ry, rz);
240 SetupHist(h, "DistOff", "Distribution of off-data");
241 h.SetWeight("1-Type.fVal");
242
243 AddProf(h, rx, ry, rz);
244 SetupHist(h, "HadOn", "Hadronness profile for on-data");
245 h.SetWeight("Type.fVal");
246
247 AddProf(h, rx, ry, rz);
248 SetupHist(h, "Had", "Hadronness profile for all events");
249
250 AddProf(h, rx, ry, rz);
251 SetupHist(h, "HadOff", "Hadronness profile for off-data");
252 h.SetWeight("1-Type.fVal");
253 }
254
255
256public:
257 HistSet1D(UInt_t nx) : fNx(nx) { }
258
259 virtual MHn *GetHistN(const TList &rules) const
260 {
261 if (!rules.At(fNx))
262 return 0;
263
264 MHn *h = new MHn(Form("%d", fNx));
265
266 CreateHist(*h, rules.At(fNx)->GetName());
267
268 return h;
269 }
270
271 virtual Bool_t CheckBinning(const TObjArray &binnings) const
272 {
273 return binnings.FindObject(Form("Binning%d", fNx));
274 }
275};
276
277class HistSet2D : public HistSet1D
278{
279protected:
280 UInt_t fNy;
281
282 void AddHist(MHn &h, const char *rx, const char *ry, const char *) const
283 {
284 h.AddHist(rx, ry);
285 }
286 void AddProf(MHn &h, const char *rx, const char *ry, const char *) const
287 {
288 h.AddHist(rx, ry, "MHadronness.fVal", MH3::kProfileSpread);
289 }
290 void SetupName(MHn &h, const char *name) const
291 {
292 h.InitName(Form("%s%d:%d;%d;%d", name, fNx, fNy, fNx, fNy));
293 }
294
295public:
296 HistSet2D(UInt_t nx, UInt_t ny) : HistSet1D(nx), fNy(ny) { }
297
298 MHn *GetHistN(const TList &rules) const
299 {
300 if (!rules.At(fNx) || !rules.At(fNy))
301 return 0;
302
303 MHn *h = new MHn(Form("%d:%d", fNx, fNy));
304
305 CreateHist(*h, rules.At(fNx)->GetName(), rules.At(fNy)->GetName());
306
307 return h;
308 }
309 Bool_t CheckBinning(const TObjArray &binnings) const
310 {
311 return HistSet1D::CheckBinning(binnings) && binnings.FindObject(Form("Binning%d", fNy));
312 }
313};
314
315
316class HistSet3D : public HistSet2D
317{
318private:
319 UInt_t fNz;
320
321 void AddHist(MHn &h, const char *rx, const char *ry, const char *rz) const
322 {
323 h.AddHist(rx, ry, rz);
324 }
325 void AddProf(MHn &h, const char *rx, const char *ry, const char *rz) const
326 {
327 h.AddHist(rx, ry, rz, "MHadronness.fVal");
328 }
329 void SetupName(MHn &h, const char *name) const
330 {
331 h.InitName(Form("%s%d:%d:%d;%d;%d;%d", name, fNx, fNy, fNz, fNx, fNy, fNz));
332 }
333
334public:
335 HistSet3D(UInt_t nx, UInt_t ny, UInt_t nz) : HistSet2D(nx, ny), fNz(nz) { }
336
337 MHn *GetHistN(const TList &rules) const
338 {
339 if (!rules.At(fNx) || !rules.At(fNy) || !rules.At(fNz))
340 return 0;
341
342 MHn *h = new MHn(Form("%d:%d:%d", fNx, fNy, fNz));
343
344 CreateHist(*h,
345 rules.At(fNx)->GetName(),
346 rules.At(fNy)->GetName(),
347 rules.At(fNy)->GetName());
348
349 return h;
350 }
351 Bool_t CheckBinning(const TObjArray &binnings) const
352 {
353 return HistSet2D::CheckBinning(binnings) && binnings.FindObject(Form("Binning%d", fNz));
354 }
355};
356
357// ---------------------------------------------------------------------------------------
358
359void MJTrainCuts::AddHist(UInt_t nx)
360{
361 fHists.Add(new HistSet1D(nx));
362}
363
364void MJTrainCuts::AddHist(UInt_t nx, UInt_t ny)
365{
366 fHists.Add(new HistSet2D(nx, ny));
367}
368
369void MJTrainCuts::AddHist(UInt_t nx, UInt_t ny, UInt_t nz)
370{
371 fHists.Add(new HistSet3D(nx, ny, nz));
372}
373
374void MJTrainCuts::AddBinning(UInt_t n, const MBinning &bins)
375{
376 const char *name = Form("Binning%d", n);
377
378 TObject *o = fBinnings.FindObject(name);
379 if (o)
380 {
381 delete fBinnings.Remove(o);
382 *fLog << warn << "WARNING - Binning for parameter " << n << " (" << name << ") already exists... replaced." << endl;
383 }
384
385 // FIXME: Check for existence
386 fBinnings.Add(new MBinning(bins, name, bins.GetTitle()));
387}
388
389/*
390void MJTrainCuts::AddBinning(const MBinning &bins)
391{
392 // FIXME: Check for existence
393 fBinnings.Add(new MBinning(bins, bins.GetName(), bins.GetTitle()));
394}
395*/
396
397// ---------------------------------------------------------------------------------------
398
399// --------------------------------------------------------------------------
400//
401void MJTrainCuts::DisplayResult(MH3 &h31, MH3 &h32, Float_t ontime)
402{
403 TH2D &g = (TH2D&)h32.GetHist();
404 TH2D &h = (TH2D&)h31.GetHist();
405
406 h.SetMarkerColor(kRed);
407 g.SetMarkerColor(kBlue);
408
409 TH2D res1(g);
410 TH2D res2(g);
411
412 h.SetTitle("Hadronness-Distribution vs. Size");
413 res1.SetTitle("Significance Li/Ma");
414 res1.SetXTitle("Size [phe]");
415 res1.SetYTitle("Hadronness");
416 res2.SetTitle("Significance-Distribution");
417 res2.SetXTitle("Size-Cut [phe]");
418 res2.SetYTitle("Hadronness-Cut");
419 res1.SetContour(50);
420 res2.SetContour(50);
421
422 const Int_t nx = h.GetNbinsX();
423 const Int_t ny = h.GetNbinsY();
424
425 gROOT->SetSelectedPad(NULL);
426
427
428 Double_t Stot = 0;
429 Double_t Btot = 0;
430
431 Double_t max2 = -1;
432
433 TGraph gr1;
434 TGraph gr2;
435 for (int x=nx-1; x>=0; x--)
436 {
437 TH1 *hx = h.ProjectionY("H_py", x+1, x+1);
438 TH1 *gx = g.ProjectionY("G_py", x+1, x+1);
439
440 Double_t S = 0;
441 Double_t B = 0;
442
443 Double_t max1 = -1;
444 Int_t maxy1 = 0;
445 Int_t maxy2 = 0;
446 for (int y=ny-1; y>=0; y--)
447 {
448 const Float_t s = gx->Integral(1, y+1);
449 const Float_t b = hx->Integral(1, y+1);
450 const Float_t sig1 = MMath::SignificanceLiMa(s+b, b);
451 const Float_t sig2 = MMath::SignificanceLiMa(s+Stot+b+Btot, b+Btot)*TMath::Log10(s+Stot+1);
452 if (sig1>max1)
453 {
454 maxy1 = y;
455 max1 = sig1;
456 }
457 if (sig2>max2)
458 {
459 maxy2 = y;
460 max2 = sig2;
461
462 S=s;
463 B=b;
464 }
465
466 res1.SetBinContent(x+1, y+1, sig1);
467 }
468
469 Stot += S;
470 Btot += B;
471
472 gr1.SetPoint(x, h.GetXaxis()->GetBinCenter(x+1), h.GetYaxis()->GetBinCenter(maxy1+1));
473 gr2.SetPoint(x, h.GetXaxis()->GetBinCenter(x+1), h.GetYaxis()->GetBinCenter(maxy2+1));
474
475 delete hx;
476 delete gx;
477 }
478
479 //cout << "--> " << MMath::SignificanceLiMa(Stot+Btot, Btot) << " ";
480 //cout << Stot << " " << Btot << endl;
481
482
483 Int_t mx1=0;
484 Int_t my1=0;
485 Int_t mx2=0;
486 Int_t my2=0;
487 Int_t s1=0;
488 Int_t b1=0;
489 Int_t s2=0;
490 Int_t b2=0;
491 Double_t sig1=-1;
492 Double_t sig2=-1;
493 for (int x=0; x<nx; x++)
494 {
495 TH1 *hx = h.ProjectionY("H_py", x+1);
496 TH1 *gx = g.ProjectionY("G_py", x+1);
497 for (int y=0; y<ny; y++)
498 {
499 const Float_t s = gx->Integral(1, y+1);
500 const Float_t b = hx->Integral(1, y+1);
501 const Float_t sig = MMath::SignificanceLiMa(s+b, b);
502 res2.SetBinContent(x+1, y+1, sig);
503
504 // Search for top-rightmost maximum
505 if (sig>=sig1)
506 {
507 mx1=x+1;
508 my1=y+1;
509 s1 = TMath::Nint(s);
510 b1 = TMath::Nint(b);
511 sig1=sig;
512 }
513 if (TMath::Log10(s)*sig>=sig2)
514 {
515 mx2=x+1;
516 my2=y+1;
517 s2 = TMath::Nint(s);
518 b2 = TMath::Nint(b);
519 sig2=TMath::Log10(s)*sig;
520 }
521 }
522 delete hx;
523 delete gx;
524 }
525
526 TGraph gr3;
527 TGraph gr4;
528 gr4.SetTitle("Significance Li/Ma vs. Hadronness-cut");
529
530 TH1 *hx = h.ProjectionY("H_py");
531 TH1 *gx = g.ProjectionY("G_py");
532 for (int y=0; y<ny; y++)
533 {
534 const Float_t s = gx->Integral(1, y+1);
535 const Float_t b = hx->Integral(1, y+1);
536 const Float_t sg1 = MMath::SignificanceLiMa(s+b, b);
537 const Float_t sg2 = s<1 ? 0 : MMath::SignificanceLiMa(s+b, b)*TMath::Log10(s);
538
539 gr3.SetPoint(y, h.GetYaxis()->GetBinLowEdge(y+2), sg1);
540 gr4.SetPoint(y, h.GetYaxis()->GetBinLowEdge(y+2), sg2);
541 }
542 delete hx;
543 delete gx;
544
545 if (fDisplay)
546 {
547 TCanvas &c = fDisplay->AddTab("OptCut");
548 c.SetBorderMode(0);
549 c.Divide(2,2);
550
551 gROOT->SetSelectedPad(0);
552 c.cd(1);
553 gPad->SetBorderMode(0);
554 gPad->SetFrameBorderMode(0);
555 gPad->SetLogx();
556 gPad->SetGridx();
557 gPad->SetGridy();
558 h.DrawCopy();
559 g.DrawCopy("same");
560 gr1.SetMarkerStyle(kFullDotMedium);
561 gr1.DrawClone("LP")->SetBit(kCanDelete);
562 gr2.SetLineColor(kBlue);
563 gr2.SetMarkerStyle(kFullDotMedium);
564 gr2.DrawClone("LP")->SetBit(kCanDelete);
565
566 gROOT->SetSelectedPad(0);
567 c.cd(3);
568 gPad->SetBorderMode(0);
569 gPad->SetFrameBorderMode(0);
570 gPad->SetGridx();
571 gPad->SetGridy();
572 gr4.SetMinimum(0);
573 gr4.SetMarkerStyle(kFullDotMedium);
574 gr4.DrawClone("ALP")->SetBit(kCanDelete);
575 gr3.SetLineColor(kBlue);
576 gr3.SetMarkerStyle(kFullDotMedium);
577 gr3.DrawClone("LP")->SetBit(kCanDelete);
578
579 c.cd(2);
580 gPad->SetBorderMode(0);
581 gPad->SetFrameBorderMode(0);
582 gPad->SetLogx();
583 gPad->SetGridx();
584 gPad->SetGridy();
585 gPad->AddExec("color", "gStyle->SetPalette(1, 0);");
586 res1.SetMaximum(7);
587 res1.DrawCopy("colz");
588
589 c.cd(4);
590 gPad->SetBorderMode(0);
591 gPad->SetFrameBorderMode(0);
592 gPad->SetLogx();
593 gPad->SetGridx();
594 gPad->SetGridy();
595 gPad->AddExec("color", "gStyle->SetPalette(1, 0);");
596 res2.SetMaximum(res2.GetMaximum()*1.05);
597 res2.DrawCopy("colz");
598
599 // Int_t mx, my, mz;
600 // res2.GetMaximumBin(mx, my, mz);
601
602 TMarker m;
603 m.SetMarkerStyle(kStar);
604 m.DrawMarker(res2.GetXaxis()->GetBinCenter(mx1), res2.GetYaxis()->GetBinCenter(my1));
605 m.SetMarkerStyle(kPlus);
606 m.DrawMarker(res2.GetXaxis()->GetBinCenter(mx2), res2.GetYaxis()->GetBinCenter(my2));
607 }
608
609 if (ontime>0)
610 *fLog << all << "Observation Time: " << TMath::Nint(ontime/60) << "min" << endl;
611 *fLog << "Maximum Significance: " << Form("%.1f", sig1);
612 if (ontime>0)
613 *fLog << Form(" [%.1f/sqrt(h)]", sig1/TMath::Sqrt(ontime/3600));
614 *fLog << endl;
615
616 *fLog << "Significance: S=" << Form("%.1f", sig1) << " E=" << s1 << " B=" << b1 << " h<";
617 *fLog << Form("%.2f", res2.GetYaxis()->GetBinCenter(my1)) << " s>";
618 *fLog << Form("%3d", TMath::Nint(res2.GetXaxis()->GetBinCenter(mx1))) << endl;
619 *fLog << "Significance*LogE: S=" << Form("%.1f", sig2/TMath::Log10(s2)) << " E=" << s2 << " B=" << b2 << " h<";
620 *fLog << Form("%.2f", res2.GetYaxis()->GetBinCenter(my2)) << " s>";
621 *fLog << Form("%3d", TMath::Nint(res2.GetXaxis()->GetBinCenter(mx2))) << endl;
622 *fLog << endl;
623}
624
625// --------------------------------------------------------------------------
626//
627Bool_t MJTrainCuts::Process(const char *out)
628{
629 // =========================== Consistency checks ==================================
630 if (!fDataSetOn.IsValid())
631 {
632 *fLog << err << "ERROR - DataSet for on-data invalid!" << endl;
633 return kFALSE;
634 }
635 if (!fDataSetOff.IsValid())
636 {
637 *fLog << err << "ERROR - DataSet for off-data invalid!" << endl;
638 return kFALSE;
639 }
640
641 if (fDataSetOn.IsWobbleMode()!=fDataSetOff.IsWobbleMode())
642 {
643 *fLog << err << "ERROR - On- and Off-DataSet have different observation modes!" << endl;
644 return kFALSE;
645 }
646
647 if (fDataSetOn.IsMonteCarlo()!=fDataSetOff.IsMonteCarlo())
648 {
649 *fLog << err << "ERROR - On- and Off-DataSet have different monte carlo modes!" << endl;
650 return kFALSE;
651 }
652
653 if (!HasWritePermission(out))
654 return kFALSE;
655
656 // Check if needed binning exists
657 TIter NextH(&fHists);
658 TObject *o = 0;
659 while ((o=NextH()))
660 {
661 const HistSet1D *hs = static_cast<HistSet1D*>(o);
662 if (hs->CheckBinning(fBinnings))
663 continue;
664
665 *fLog << err << "ERROR - Not all needed binnning exist." << endl;
666 return kFALSE;
667 }
668
669 // =========================== Preparation ==================================
670
671 if (fDisplay)
672 fDisplay->SetTitle(out);
673
674 TStopwatch clock;
675 clock.Start();
676
677 // ------------------ Setup reading --------------------
678 MReadMarsFile read1("Events");
679 MReadMarsFile read2("Events");
680 MReadMarsFile read3("Events");
681 MReadMarsFile read4("Events");
682 read1.DisableAutoScheme();
683 read2.DisableAutoScheme();
684 read3.DisableAutoScheme();
685 read4.DisableAutoScheme();
686
687 // Setup four reading tasks with the on- and off-data of the two datasets
688 // Training -- On
689 if (!fDataSetOn.AddFilesOn(read1))
690 return kFALSE;
691 // Testing -- On
692 if (!fDataSetOn.AddFilesOff(read4))
693 return kFALSE;
694 // Training -- Off
695 if (!fDataSetOff.AddFilesOn(read3))
696 return kFALSE;
697 // Testing -- Off
698 if (!fDataSetOff.AddFilesOff(read2))
699 return kFALSE;
700
701 // ===============================================================================
702 // ====================== Training =========================
703 // ===============================================================================
704
705 // ---------------- Setup RF Matrix ----------------
706 MHMatrix train("Train");
707 train.AddColumns(fRules);
708// if (fEnableWeights[kTrainOn] || fEnableWeights[kTrainOff])
709// train.AddColumn("MWeight.fVal");
710 train.AddColumn("MHadronness.fVal");
711
712 // ----------------- Prepare filling Matrix RF ------------------
713
714 // Setup the hadronness container identifying gammas and off-data
715 // and setup a container for the weights
716 MParameterD had("MHadronness");
717 MParameterD wgt("MWeight");
718 MParameterD typ("Type");
719
720 // Add them to the parameter list
721 MParList plistx;
722 plistx.AddToList(this); // take care of fDisplay!
723 plistx.AddToList(&had);
724 plistx.AddToList(&wgt);
725 plistx.AddToList(&typ);
726
727 // Setup the tool class to fill the matrix
728 MTFillMatrix fill;
729 fill.SetLogStream(fLog);
730 fill.SetDisplay(fDisplay);
731 fill.AddPreCuts(fPreCuts);
732 fill.AddPreCuts(fTrainCuts);
733
734 // ----------------- Fill on data into matrix ------------------
735
736 // Setup the tool class to read the gammas and read them
737 fill.SetName("FillOn");
738 fill.SetDestMatrix1(&train, fNum[kTrainOn]);
739 fill.SetReader(&read1);
740// fill.AddPreTasks(fPreTasksSet[kTrainOn]);
741 fill.AddPreTasks(fPreTasks);
742// fill.AddPostTasks(fPostTasksSet[kTrainOn]);
743 fill.AddPostTasks(fPostTasks);
744
745 // Set classifier for gammas
746 had.SetVal(0);
747 wgt.SetVal(1);
748 typ.SetVal(0);
749
750 // Fill matrix
751 if (!fill.Process(plistx))
752 return kFALSE;
753
754 // Check the number or read events
755 const Int_t numontrn = train.GetNumRows();
756 if (numontrn==0)
757 {
758 *fLog << err << "ERROR - No on-data events available for training... aborting." << endl;
759 return kFALSE;
760 }
761
762 // Remove possible post tasks
763 fill.ClearPreTasks();
764 fill.ClearPostTasks();
765
766 // ----------------- Fill off data into matrix ------------------
767
768 // In case of wobble mode we have to do something special
769 // Setup the tool class to read the background and read them
770 fill.SetName("FillOff");
771 fill.SetDestMatrix1(&train, fNum[kTrainOff]);
772 fill.SetReader(&read3);
773// fill.AddPreTasks(fPreTasksSet[kTrainOff]);
774 fill.AddPreTasks(fPreTasks);
775// fill.AddPostTasks(fPostTasksSet[kTrainOff]);
776 fill.AddPostTasks(fPostTasks);
777
778 // Set classifier for background
779 had.SetVal(1);
780 wgt.SetVal(1);
781 typ.SetVal(1);
782
783 // Fiull matrix
784 if (!fill.Process(plistx))
785 return kFALSE;
786
787 // Check the number or read events
788 const Int_t numofftrn = train.GetNumRows()-numontrn;
789 if (numofftrn==0)
790 {
791 *fLog << err << "ERROR - No off-data available for training... aborting." << endl;
792 return kFALSE;
793 }
794
795 // ------------------------ Train RF --------------------------
796
797 MRanForestCalc rf("TrainSeparation", fTitle);
798 rf.SetNumTrees(fNumTrees);
799 rf.SetNdSize(fNdSize);
800 rf.SetNumTry(fNumTry);
801 rf.SetNumObsoleteVariables(1);
802// rf.SetLastDataColumnHasWeights(fEnableWeights[kTrainOn] || fEnableWeights[kTrainOff]);
803 rf.SetDebug(fDebug>1);
804 rf.SetDisplay(fDisplay);
805 rf.SetLogStream(fLog);
806 rf.SetFileName(out);
807 rf.SetNameOutput("MHadronness");
808
809 // Train the random forest either by classification or regression
810 if (!rf.Train(train, fUseRegression))
811 return kFALSE;
812
813 // ----------------- Print result of training ------------------
814
815 // Output information about what was going on so far.
816 *fLog << all;
817 fLog->Separator("The forest was trained with...");
818
819 *fLog << "Training method:" << endl;
820 *fLog << " * " << (fUseRegression?"regression":"classification") << endl;
821 /*
822 if (fEnableWeights[kTrainOn])
823 *fLog << " * weights for on-data" << endl;
824 if (fEnableWeights[kTrainOff])
825 *fLog << " * weights for off-data" << endl;
826 */
827 *fLog << endl;
828 *fLog << "Events used for training:" << endl;
829 *fLog << " * Gammas: " << numontrn << endl;
830 *fLog << " * Background: " << numofftrn << endl;
831 *fLog << endl;
832 *fLog << "Gamma/Background ratio:" << endl;
833 *fLog << " * Requested: " << (float)fNum[kTrainOn]/fNum[kTrainOff] << endl;
834 *fLog << " * Result: " << (float)numontrn/numofftrn << endl;
835 *fLog << endl;
836 *fLog << "Run-Time: " << Form("%.1f", clock.RealTime()/60) << "min (CPU: ";
837 *fLog << Form("%.1f", clock.CpuTime()/60) << "min)" << endl;
838 *fLog << endl;
839 *fLog << "Output file name: " << out << endl;
840
841 // ===============================================================================
842 // ====================== Testing =========================
843 // ===============================================================================
844 fLog->Separator("Test");
845
846 clock.Continue();
847
848 // ---------------------- Prepare eventloop off-data ---------------------
849
850 // Setup parlist and tasklist for testing
851 MParList plist;
852 MTaskList tlist;
853 plist.AddToList(this); // Take care of display
854 plist.AddToList(&tlist);
855
856// MMcEvt mcevt;
857// plist.AddToList(&mcevt);
858
859 plist.AddToList(&wgt);
860 plist.AddToList(&typ);
861
862 // ----- Setup histograms -----
863 MBinning binsy(50, 0 , 1, "BinningMH3Y", "lin");
864 MBinning binsx(40, 10, 100000, "BinningMH3X", "log");
865
866 plist.AddToList(&binsx);
867 plist.AddToList(&binsy);
868
869 MH3 h31("MHillas.fSize", "MHadronness.fVal");
870 MH3 h32("MHillas.fSize", "MHadronness.fVal");
871 MH3 h40("MMcEvt.fEnergy", "MHadronness.fVal");
872 h31.SetTitle("Background probability vs. Size:Size [phe]:Hadronness h");
873 h32.SetTitle("Background probability vs. Size:Size [phe]:Hadronness h");
874 h40.SetTitle("Background probability vs. Energy:Energy [GeV]:Hadronness h");
875
876 plist.AddToList(&fBinnings);
877
878 MHHadronness hist;
879
880 // ----- Setup tasks -----
881 MFillH fillh0(&hist, "", "FillHadronness");
882 MFillH fillh1(&h31, "", "FillHadVsSize");
883 MFillH fillh2(&h32, "", "FillHadVsSize");
884 MFillH fillh4(&h40, "", "FillHadVsEnergy");
885 fillh0.SetWeight("MWeight");
886 fillh1.SetWeight("MWeight");
887 fillh2.SetWeight("MWeight");
888 fillh4.SetWeight("MWeight");
889 fillh1.SetDrawOption("colz profy");
890 fillh2.SetDrawOption("colz profy");
891 fillh4.SetDrawOption("colz profy");
892 fillh1.SetNameTab("HadSzOff");
893 fillh2.SetNameTab("HadSzOn");
894 fillh4.SetNameTab("HadEnOn");
895 fillh0.SetBit(MFillH::kDoNotDisplay);
896
897 // ----- Setup filter -----
898 MFilterList precuts;
899 precuts.AddToList(fPreCuts);
900 precuts.AddToList(fTestCuts);
901
902 MContinue cont0(&precuts);
903 cont0.SetName("PreCuts");
904 cont0.SetInverted();
905
906 MFEventSelector sel; // FIXME: USING IT (WITH PROB?) in READ will by much faster!!!
907 sel.SetNumSelectEvts(fNum[kTestOff]);
908
909 MContinue contsel(&sel);
910 contsel.SetInverted();
911
912 // ----- Setup tasklist -----
913 tlist.AddToList(&read2); // Reading task
914 tlist.AddToList(&contsel); // event selector
915// tlist.AddToList(fPreTasksSet[kTestOff]);
916 tlist.AddToList(fPreTasks); // list of pre tasks
917 tlist.AddToList(&cont0); // list of pre cuts and test cuts
918 tlist.AddToList(&rf); // evaluate random forest
919// tlist.AddToList(fPostTasksSet[kTestOff]);
920 tlist.AddToList(fPostTasks); // list of post tasks
921 tlist.AddToList(&fillh1); // Fill HadSzOff
922
923 TList autodel;
924 autodel.SetOwner();
925
926 TPRegexp regexp("([0-9]:*)+");
927
928 NextH.Reset();
929 while ((o=NextH()))
930 {
931 HistSet1D *hs = static_cast<HistSet1D*>(o);
932
933 // FIXME: Move to beginning of function
934 // Check if needed binning exists
935 if (!hs->CheckBinning(fBinnings))
936 return kFALSE;
937
938 MHn *hist = hs->GetHistN(fRules);
939 MFillH *fill = new MFillH(hist, "", Form("Fill%s", hist->GetName()));
940
941 fill->SetWeight("MWeight");
942 fill->SetDrawOption("colz");
943 fill->SetNameTab(hist->GetName());
944 fill->SetBit(MFillH::kDoNotDisplay);
945
946 tlist.AddToList(fill);
947
948 autodel.Add(fill);
949 autodel.Add(hist);
950 }
951 tlist.AddToList(&fillh0); // Fill MHHadronness (not displayed in first loop)
952 tlist.AddToList(&fTestTasks); // list of test tasks
953
954 // Enable Acceleration
955 tlist.SetAccelerator(MTask::kAccDontReset|MTask::kAccDontTime);
956
957 // ---------------------- Run eventloop on background ---------------------
958 MEvtLoop loop;
959 loop.SetDisplay(fDisplay);
960 loop.SetLogStream(fLog);
961 loop.SetParList(&plist);
962 //if (!SetupEnv(loop))
963 // return kFALSE;
964
965 wgt.SetVal(1);
966 typ.SetVal(0);
967 if (!loop.Eventloop())
968 return kFALSE;
969
970 // ---------------------- Prepare eventloop on-data ---------------------
971
972 sel.SetNumSelectEvts(fNum[kTestOn]); // set number of target events
973
974 fillh0.ResetBit(MFillH::kDoNotDisplay); // Switch on display MHHadronness
975
976 TIter NextF(&autodel);
977 while ((o=NextF()))
978 {
979 MFillH *fill = dynamic_cast<MFillH*>(o);
980 if (fill)
981 fill->ResetBit(MFillH::kDoNotDisplay);
982 }
983
984 // Remove PreTasksOff and PostTasksOff from the list
985// tlist.RemoveFromList(fPreTasksSet[kTestOff]);
986// tlist.RemoveFromList(fPostTasksSet[kTestOff]);
987
988 tlist.Replace(&read4); // replace reading off-data by on-data
989
990 // Add the PreTasksOn directly after the reading task
991// tlist.AddToListAfter(fPreTasksSet[kTestOn], &c1);
992
993 // Add the PostTasksOn after rf
994// tlist.AddToListAfter(fPostTasksSet[kTestOn], &rf);
995
996 tlist.Replace(&fillh2); // Fill HadSzOn instead of HadSzOff
997 tlist.AddToListAfter(&fillh4, &fillh0); // Filling of HadEnOn
998
999 // Enable Acceleration
1000 tlist.SetAccelerator(MTask::kAccDontReset|MTask::kAccDontTime);
1001
1002 // ---------------------- Run eventloop on-data ---------------------
1003
1004 wgt.SetVal(1);
1005 typ.SetVal(1);
1006 if (!loop.Eventloop())
1007 return kFALSE;
1008
1009 // ---------------------- Print/Display result ---------------------
1010
1011 // Show what was going on in the testing
1012 const Double_t numontst = h32.GetHist().GetEntries();
1013 const Double_t numofftst = h31.GetHist().GetEntries();
1014
1015 *fLog << all;
1016 fLog->Separator("The forest was tested with...");
1017 *fLog << "Test method:" << endl;
1018 *fLog << " * Random Forest: " << out << endl;
1019 /*
1020 if (fEnableWeights[kTestOn])
1021 *fLog << " * weights for on-data" << endl;
1022 if (fEnableWeights[kTestOff])
1023 *fLog << " * weights for off-data" << endl;
1024 */
1025 *fLog << endl;
1026 *fLog << "Events used for test:" << endl;
1027 *fLog << " * Gammas: " << numontst << endl;
1028 *fLog << " * Background: " << numofftst << endl;
1029 *fLog << endl;
1030 *fLog << "Gamma/Background ratio:" << endl;
1031 *fLog << " * Requested: " << (float)fNum[kTestOn]/fNum[kTestOff] << endl;
1032 *fLog << " * Result: " << (float)numontst/numofftst << endl;
1033 *fLog << endl;
1034
1035 // Display the result plots
1036 DisplayResult(h31, h32, -1);
1037 //DisplayResult(h31, h32, ontime);
1038
1039 *fLog << "Total Run-Time: " << Form("%.1f", clock.RealTime()/60) << "min (CPU: ";
1040 *fLog << Form("%.1f", clock.CpuTime()/60) << "min)" << endl;
1041 fLog->Separator();
1042
1043 // ----------------- Write result ------------------
1044
1045 fDataSetOn.SetName("DataSetOn");
1046 fDataSetOff.SetName("DataSetOff");
1047
1048 // Write the display
1049 TObjArray arr;
1050 arr.Add(const_cast<MDataSet*>(&fDataSetOn));
1051 arr.Add(const_cast<MDataSet*>(&fDataSetOff));
1052 if (fDisplay)
1053 arr.Add(fDisplay);
1054
1055 SetPathOut(out);
1056 return WriteContainer(arr, 0, "UPDATE");
1057}
1058
Note: See TracBrowser for help on using the repository browser.