source: trunk/Mars/mjtrain/MJTrainCuts.cc@ 9870

Last change on this file since 9870 was 9870, checked in by tbretz, 15 years ago
Added new class MJTrainCuts to help analysing the performance of the random forest and finding classical cuts.
File size: 30.7 KB
Line 
1/* ======================================================================== *\
2!
3! *
4! * This file is part of MARS, the MAGIC Analysis and Reconstruction
5! * Software. It is distributed to you in the hope that it can be a useful
6! * and timesaving tool in analysing Data of imaging Cerenkov telescopes.
7! * It is distributed WITHOUT ANY WARRANTY.
8! *
9! * Permission to use, copy, modify and distribute this software and its
10! * documentation for any purpose is hereby granted without fee,
11! * provided that the above copyright notice appear in all copies and
12! * that both that copyright notice and this permission notice appear
13! * in supporting documentation. It is provided "as is" without express
14! * or implied warranty.
15! *
16!
17!
18! Author(s): Thomas Bretz 08/2010 <mailto:thomas.bretz@epfl.ch>
19!
20! Copyright: MAGIC Software Development, 2010
21!
22!
23\* ======================================================================== */
24
25/////////////////////////////////////////////////////////////////////////////
26//
27// MJTrainCuts
28// =========
29//
30// This class is meant as a tool to understand better what a trained
31// random forest is doing in the multi-dimensional phase space.
32// Consequently, it can also be used to deduce good one or two dimensional
33// cuts from the results by mimicing the behaviour of the random forest.
34//
35//
36// Usage
37// -----
38//
39// The instance is created by its default constructor
40//
41// MJTrainCuts opt;
42//
43// In a first step a random forest must be trained and in a second step its
44// performance can be evaluated with an independent test sample. The used
45// samples are defined by two MDataSet objects, one for the on-data (e.g.
46// gammas) and the other one for the off-data (e.g. protons). SequencesOn
47// and SequencesOff are used for testing and training respectively.
48//
49// MDataSet seton ("myondata.txt");
50// MDataSet setoff("myoffdata.txt");
51//
52// // If you want to use all available events
53// opt.SetDataSetOn(seton);
54// opt.SetDataSetOff(setoff);
55//
56// // Try to select 10000 and 30000 events for training and testing resp.
57// // opt.SetDataSetOn(seton, 10000, 30000);
58// // opt.SetDataSetOff(setoff, 10000, 30000);
59//
60// Note that by using several data set in one file (see MDataSet) you can
61// have everything in a single file.
62//
63// The variables which are used for training are now setup as usual
64//
65// Int_t p1 = opt.AddParameter("MHillas.fSize");
66// Int_t p2 = opt.AddParameter("MHillas.GetArea*MGeomCam.fConvMm2Deg^2");
67// Int_t p3 = opt.AddParameter("MHillasSrc.fDist*MGeomCam.fConvMm2Deg");
68//
69// In addition you can now setup a binning for the display of each train
70// parameter as follows (for details see MBinning)
71//
72// opt.AddBinning(p1, MBinning(40, 10, 10000, "", "log"));
73// opt.AddBinning(p2, MBinning(50, 0, 0.25));
74// opt.AddBinning(p3, MBinning(50, 0, 2.5));
75//
76// Since with increasing number of variables the possibly combinations
77// increase to fast you have to define which plots you are interested in,
78// for example:
79//
80// opt.AddHist(p3); // A 1D plot dist
81// opt.AddHist(p1, p2); // A 2D plot area vs. size
82// opt.AddHist(p3, p2); // A 2D plot dist vs. size
83//
84// Also 3D plots are avaiable but they are most probably difficult to
85// interprete.
86//
87// In addition to this you have the usual user interface, i.e. that
88// - PreCuts
89// - TrainCuts
90// - TestCuts
91// - PreTasks
92// - PostTasks
93// - TestTasks
94// are available. For details see MJOptimizeBase
95//
96// void EnableRegression() / void EnableClassification()
97// Defines whether to use the random forest's regression of
98// classification method. Classification is the default.
99//
100//
101// The produced plots
102// ------------------
103//
104// The tab with the plots filled will always look like this:
105//
106// +--------+--------+
107// |1 |2 |
108// +--------+--------|
109// |3 |4 |
110// +--------+ |
111// |5 | |
112// +--------+--------+
113//
114// Pad1 and Pad2 contain the weighted event distribution of the test-sample.
115//
116// Pad2 and Pad5 conatin a profile of the hadronness distribution of the
117// test-sample.
118//
119// Pad4 contains a profile of the hadronness distribution of on-data and
120// off-data together of the test-data.
121//
122// If the profiles for on-data and off-data are identical the displayed
123// hadronness is obviously independant of the other (non shown) trainings
124// variables. Therefore the difference between the two plots show how much
125// the variables are correlated. The same is true if the prfiles in
126// pad3 and pad5 don't differe from the profile in pad4.
127//
128// In the most simple case - the random forest is only trained with the
129// variables displayed - all three plots should be identical (apart from the
130// difference in the distrubution of the three sets).
131//
132// The plot in pad4 can now be used to deduce a good classical cut in the
133// displayed variables.
134//
135////////////////////////////////////////////////////////////////////////////
136#include "MJTrainCuts.h"
137
138#include <TGraph.h>
139#include <TMarker.h>
140#include <TCanvas.h>
141#include <TPRegexp.h>
142#include <TStopwatch.h>
143
144#include "MHMatrix.h"
145
146#include "MLog.h"
147#include "MLogManip.h"
148
149// tools
150#include "MMath.h"
151#include "MBinning.h"
152#include "MTFillMatrix.h"
153#include "MStatusDisplay.h"
154
155// eventloop
156#include "MParList.h"
157#include "MTaskList.h"
158#include "MEvtLoop.h"
159
160// tasks
161#include "MReadMarsFile.h"
162#include "MContinue.h"
163#include "MFillH.h"
164#include "MRanForestCalc.h"
165
166// container
167#include "MParameters.h"
168
169// histograms
170#include "MHn.h"
171#include "MHHadronness.h"
172
173// filter
174#include "MFEventSelector.h"
175#include "MFilterList.h"
176
177using namespace std;
178
179class HistSet1D : public TObject
180{
181protected:
182 UInt_t fNx;
183
184 virtual void AddHist(MHn &h, const char *rx, const char *, const char *) const
185 {
186 h.AddHist(rx);
187 }
188 virtual void AddProf(MHn &h, const char *rx, const char *, const char *) const
189 {
190 h.AddHist(rx, "MHadronness.fVal", MH3::kProfileSpread);
191 }
192 virtual void SetupName(MHn &h, const char *name) const
193 {
194 h.InitName(Form("%s%d;%d", name, fNx, fNx));
195 }
196 void SetupHist(MHn &h, const char *name, const char *title) const
197 {
198 SetupName(h, name);
199
200 h.SetAutoRange(kFALSE, kFALSE, kFALSE);
201 h.InitTitle(title);
202 h.SetDrawOption("colz");
203 }
204
205 void CreateHist(MHn &h, const char *rx, const char *ry=0, const char *rz=0) const
206 {
207 h.SetLayout(MHn::kComplex);
208 h.SetBit(MHn::kDoNotReset);
209
210 AddHist(h, rx, ry, rz);
211 SetupHist(h, "DistOn", "Distribution of on-data");
212 h.SetWeight("Type.fVal");
213
214 AddHist(h, rx, ry, rz);
215 SetupHist(h, "DistOff", "Distribution of off-data");
216 h.SetWeight("1-Type.fVal");
217
218 AddProf(h, rx, ry, rz);
219 SetupHist(h, "HadOn", "Hadronness profile for on-data");
220 h.SetWeight("Type.fVal");
221
222 AddProf(h, rx, ry, rz);
223 SetupHist(h, "Had", "Hadronness profile for all events");
224
225 AddProf(h, rx, ry, rz);
226 SetupHist(h, "HadOff", "Hadronness profile for off-data");
227 h.SetWeight("1-Type.fVal");
228 }
229
230
231public:
232 HistSet1D(UInt_t nx) : fNx(nx) { }
233
234 virtual MHn *GetHistN(const TList &rules) const
235 {
236 if (!rules.At(fNx))
237 return 0;
238
239 MHn *h = new MHn(Form("%d", fNx));
240
241 CreateHist(*h, rules.At(fNx)->GetName());
242
243 return h;
244 }
245
246 virtual Bool_t CheckBinning(const TObjArray &binnings) const
247 {
248 return binnings.FindObject(Form("Binning%d", fNx));
249 }
250};
251
252class HistSet2D : public HistSet1D
253{
254protected:
255 UInt_t fNy;
256
257 void AddHist(MHn &h, const char *rx, const char *ry, const char *) const
258 {
259 h.AddHist(rx, ry);
260 }
261 void AddProf(MHn &h, const char *rx, const char *ry, const char *) const
262 {
263 h.AddHist(rx, ry, "MHadronness.fVal", MH3::kProfileSpread);
264 }
265 void SetupName(MHn &h, const char *name) const
266 {
267 h.InitName(Form("%s%d:%d;%d;%d", name, fNx, fNy, fNx, fNy));
268 }
269
270public:
271 HistSet2D(UInt_t nx, UInt_t ny) : HistSet1D(nx), fNy(ny) { }
272
273 MHn *GetHistN(const TList &rules) const
274 {
275 if (!rules.At(fNx) || !rules.At(fNy))
276 return 0;
277
278 MHn *h = new MHn(Form("%d:%d", fNx, fNy));
279
280 CreateHist(*h, rules.At(fNx)->GetName(), rules.At(fNy)->GetName());
281
282 return h;
283 }
284 Bool_t CheckBinning(const TObjArray &binnings) const
285 {
286 return HistSet1D::CheckBinning(binnings) && binnings.FindObject(Form("Binning%d", fNy));
287 }
288};
289
290
291class HistSet3D : public HistSet2D
292{
293private:
294 UInt_t fNz;
295
296 void AddHist(MHn &h, const char *rx, const char *ry, const char *rz) const
297 {
298 h.AddHist(rx, ry, rz);
299 }
300 void AddProf(MHn &h, const char *rx, const char *ry, const char *rz) const
301 {
302 h.AddHist(rx, ry, rz, "MHadronness.fVal");
303 }
304 void SetupName(MHn &h, const char *name) const
305 {
306 h.InitName(Form("%s%d:%d:%d;%d;%d;%d", name, fNx, fNy, fNz, fNx, fNy, fNz));
307 }
308
309public:
310 HistSet3D(UInt_t nx, UInt_t ny, UInt_t nz) : HistSet2D(nx, ny), fNz(nz) { }
311
312 MHn *GetHistN(const TList &rules) const
313 {
314 if (!rules.At(fNx) || !rules.At(fNy) || !rules.At(fNz))
315 return 0;
316
317 MHn *h = new MHn(Form("%d:%d:%d", fNx, fNy, fNz));
318
319 CreateHist(*h,
320 rules.At(fNx)->GetName(),
321 rules.At(fNy)->GetName(),
322 rules.At(fNy)->GetName());
323
324 return h;
325 }
326 Bool_t CheckBinning(const TObjArray &binnings) const
327 {
328 return HistSet2D::CheckBinning(binnings) && binnings.FindObject(Form("Binning%d", fNz));
329 }
330};
331
332// ---------------------------------------------------------------------------------------
333
334void MJTrainCuts::AddHist(UInt_t nx)
335{
336 fHists.Add(new HistSet1D(nx));
337}
338
339void MJTrainCuts::AddHist(UInt_t nx, UInt_t ny)
340{
341 fHists.Add(new HistSet2D(nx, ny));
342}
343
344void MJTrainCuts::AddHist(UInt_t nx, UInt_t ny, UInt_t nz)
345{
346 fHists.Add(new HistSet3D(nx, ny, nz));
347}
348
349void MJTrainCuts::AddBinning(UInt_t n, const MBinning &bins)
350{
351 const char *name = Form("Binning%d", n);
352
353 TObject *o = fBinnings.FindObject(name);
354 if (o)
355 {
356 delete fBinnings.Remove(o);
357 *fLog << warn << "WARNING - Binning for parameter " << n << " (" << name << ") already exists... replaced." << endl;
358 }
359
360 // FIXME: Check for existence
361 fBinnings.Add(new MBinning(bins, name, bins.GetTitle()));
362}
363
364/*
365void MJTrainCuts::AddBinning(const MBinning &bins)
366{
367 // FIXME: Check for existence
368 fBinnings.Add(new MBinning(bins, bins.GetName(), bins.GetTitle()));
369}
370*/
371
372// ---------------------------------------------------------------------------------------
373
374// --------------------------------------------------------------------------
375//
376void MJTrainCuts::DisplayResult(MH3 &h31, MH3 &h32, Float_t ontime)
377{
378 TH2D &g = (TH2D&)h32.GetHist();
379 TH2D &h = (TH2D&)h31.GetHist();
380
381 h.SetMarkerColor(kRed);
382 g.SetMarkerColor(kBlue);
383
384 TH2D res1(g);
385 TH2D res2(g);
386
387 h.SetTitle("Hadronness-Distribution vs. Size");
388 res1.SetTitle("Significance Li/Ma");
389 res1.SetXTitle("Size [phe]");
390 res1.SetYTitle("Hadronness");
391 res2.SetTitle("Significance-Distribution");
392 res2.SetXTitle("Size-Cut [phe]");
393 res2.SetYTitle("Hadronness-Cut");
394 res1.SetContour(50);
395 res2.SetContour(50);
396
397 const Int_t nx = h.GetNbinsX();
398 const Int_t ny = h.GetNbinsY();
399
400 gROOT->SetSelectedPad(NULL);
401
402
403 Double_t Stot = 0;
404 Double_t Btot = 0;
405
406 Double_t max2 = -1;
407
408 TGraph gr1;
409 TGraph gr2;
410 for (int x=nx-1; x>=0; x--)
411 {
412 TH1 *hx = h.ProjectionY("H_py", x+1, x+1);
413 TH1 *gx = g.ProjectionY("G_py", x+1, x+1);
414
415 Double_t S = 0;
416 Double_t B = 0;
417
418 Double_t max1 = -1;
419 Int_t maxy1 = 0;
420 Int_t maxy2 = 0;
421 for (int y=ny-1; y>=0; y--)
422 {
423 const Float_t s = gx->Integral(1, y+1);
424 const Float_t b = hx->Integral(1, y+1);
425 const Float_t sig1 = MMath::SignificanceLiMa(s+b, b);
426 const Float_t sig2 = MMath::SignificanceLiMa(s+Stot+b+Btot, b+Btot)*TMath::Log10(s+Stot+1);
427 if (sig1>max1)
428 {
429 maxy1 = y;
430 max1 = sig1;
431 }
432 if (sig2>max2)
433 {
434 maxy2 = y;
435 max2 = sig2;
436
437 S=s;
438 B=b;
439 }
440
441 res1.SetBinContent(x+1, y+1, sig1);
442 }
443
444 Stot += S;
445 Btot += B;
446
447 gr1.SetPoint(x, h.GetXaxis()->GetBinCenter(x+1), h.GetYaxis()->GetBinCenter(maxy1+1));
448 gr2.SetPoint(x, h.GetXaxis()->GetBinCenter(x+1), h.GetYaxis()->GetBinCenter(maxy2+1));
449
450 delete hx;
451 delete gx;
452 }
453
454 //cout << "--> " << MMath::SignificanceLiMa(Stot+Btot, Btot) << " ";
455 //cout << Stot << " " << Btot << endl;
456
457
458 Int_t mx1=0;
459 Int_t my1=0;
460 Int_t mx2=0;
461 Int_t my2=0;
462 Int_t s1=0;
463 Int_t b1=0;
464 Int_t s2=0;
465 Int_t b2=0;
466 Double_t sig1=-1;
467 Double_t sig2=-1;
468 for (int x=0; x<nx; x++)
469 {
470 TH1 *hx = h.ProjectionY("H_py", x+1);
471 TH1 *gx = g.ProjectionY("G_py", x+1);
472 for (int y=0; y<ny; y++)
473 {
474 const Float_t s = gx->Integral(1, y+1);
475 const Float_t b = hx->Integral(1, y+1);
476 const Float_t sig = MMath::SignificanceLiMa(s+b, b);
477 res2.SetBinContent(x+1, y+1, sig);
478
479 // Search for top-rightmost maximum
480 if (sig>=sig1)
481 {
482 mx1=x+1;
483 my1=y+1;
484 s1 = TMath::Nint(s);
485 b1 = TMath::Nint(b);
486 sig1=sig;
487 }
488 if (TMath::Log10(s)*sig>=sig2)
489 {
490 mx2=x+1;
491 my2=y+1;
492 s2 = TMath::Nint(s);
493 b2 = TMath::Nint(b);
494 sig2=TMath::Log10(s)*sig;
495 }
496 }
497 delete hx;
498 delete gx;
499 }
500
501 TGraph gr3;
502 TGraph gr4;
503 gr4.SetTitle("Significance Li/Ma vs. Hadronness-cut");
504
505 TH1 *hx = h.ProjectionY("H_py");
506 TH1 *gx = g.ProjectionY("G_py");
507 for (int y=0; y<ny; y++)
508 {
509 const Float_t s = gx->Integral(1, y+1);
510 const Float_t b = hx->Integral(1, y+1);
511 const Float_t sg1 = MMath::SignificanceLiMa(s+b, b);
512 const Float_t sg2 = s<1 ? 0 : MMath::SignificanceLiMa(s+b, b)*TMath::Log10(s);
513
514 gr3.SetPoint(y, h.GetYaxis()->GetBinLowEdge(y+2), sg1);
515 gr4.SetPoint(y, h.GetYaxis()->GetBinLowEdge(y+2), sg2);
516 }
517 delete hx;
518 delete gx;
519
520 if (fDisplay)
521 {
522 TCanvas &c = fDisplay->AddTab("OptCut");
523 c.SetBorderMode(0);
524 c.Divide(2,2);
525
526 gROOT->SetSelectedPad(0);
527 c.cd(1);
528 gPad->SetBorderMode(0);
529 gPad->SetFrameBorderMode(0);
530 gPad->SetLogx();
531 gPad->SetGridx();
532 gPad->SetGridy();
533 h.DrawCopy();
534 g.DrawCopy("same");
535 gr1.SetMarkerStyle(kFullDotMedium);
536 gr1.DrawClone("LP")->SetBit(kCanDelete);
537 gr2.SetLineColor(kBlue);
538 gr2.SetMarkerStyle(kFullDotMedium);
539 gr2.DrawClone("LP")->SetBit(kCanDelete);
540
541 gROOT->SetSelectedPad(0);
542 c.cd(3);
543 gPad->SetBorderMode(0);
544 gPad->SetFrameBorderMode(0);
545 gPad->SetGridx();
546 gPad->SetGridy();
547 gr4.SetMinimum(0);
548 gr4.SetMarkerStyle(kFullDotMedium);
549 gr4.DrawClone("ALP")->SetBit(kCanDelete);
550 gr3.SetLineColor(kBlue);
551 gr3.SetMarkerStyle(kFullDotMedium);
552 gr3.DrawClone("LP")->SetBit(kCanDelete);
553
554 c.cd(2);
555 gPad->SetBorderMode(0);
556 gPad->SetFrameBorderMode(0);
557 gPad->SetLogx();
558 gPad->SetGridx();
559 gPad->SetGridy();
560 gPad->AddExec("color", "gStyle->SetPalette(1, 0);");
561 res1.SetMaximum(7);
562 res1.DrawCopy("colz");
563
564 c.cd(4);
565 gPad->SetBorderMode(0);
566 gPad->SetFrameBorderMode(0);
567 gPad->SetLogx();
568 gPad->SetGridx();
569 gPad->SetGridy();
570 gPad->AddExec("color", "gStyle->SetPalette(1, 0);");
571 res2.SetMaximum(res2.GetMaximum()*1.05);
572 res2.DrawCopy("colz");
573
574 // Int_t mx, my, mz;
575 // res2.GetMaximumBin(mx, my, mz);
576
577 TMarker m;
578 m.SetMarkerStyle(kStar);
579 m.DrawMarker(res2.GetXaxis()->GetBinCenter(mx1), res2.GetYaxis()->GetBinCenter(my1));
580 m.SetMarkerStyle(kPlus);
581 m.DrawMarker(res2.GetXaxis()->GetBinCenter(mx2), res2.GetYaxis()->GetBinCenter(my2));
582 }
583
584 if (ontime>0)
585 *fLog << all << "Observation Time: " << TMath::Nint(ontime/60) << "min" << endl;
586 *fLog << "Maximum Significance: " << Form("%.1f", sig1);
587 if (ontime>0)
588 *fLog << Form(" [%.1f/sqrt(h)]", sig1/TMath::Sqrt(ontime/3600));
589 *fLog << endl;
590
591 *fLog << "Significance: S=" << Form("%.1f", sig1) << " E=" << s1 << " B=" << b1 << " h<";
592 *fLog << Form("%.2f", res2.GetYaxis()->GetBinCenter(my1)) << " s>";
593 *fLog << Form("%3d", TMath::Nint(res2.GetXaxis()->GetBinCenter(mx1))) << endl;
594 *fLog << "Significance*LogE: S=" << Form("%.1f", sig2/TMath::Log10(s2)) << " E=" << s2 << " B=" << b2 << " h<";
595 *fLog << Form("%.2f", res2.GetYaxis()->GetBinCenter(my2)) << " s>";
596 *fLog << Form("%3d", TMath::Nint(res2.GetXaxis()->GetBinCenter(mx2))) << endl;
597 *fLog << endl;
598}
599
600// --------------------------------------------------------------------------
601//
602Bool_t MJTrainCuts::Process(const char *out)
603{
604 // =========================== Consistency checks ==================================
605 if (!fDataSetOn.IsValid())
606 {
607 *fLog << err << "ERROR - DataSet for on-data invalid!" << endl;
608 return kFALSE;
609 }
610 if (!fDataSetOff.IsValid())
611 {
612 *fLog << err << "ERROR - DataSet for off-data invalid!" << endl;
613 return kFALSE;
614 }
615
616 if (fDataSetOn.IsWobbleMode()!=fDataSetOff.IsWobbleMode())
617 {
618 *fLog << err << "ERROR - On- and Off-DataSet have different observation modes!" << endl;
619 return kFALSE;
620 }
621
622 if (fDataSetOn.IsMonteCarlo()!=fDataSetOff.IsMonteCarlo())
623 {
624 *fLog << err << "ERROR - On- and Off-DataSet have different monte carlo modes!" << endl;
625 return kFALSE;
626 }
627
628 if (!HasWritePermission(out))
629 return kFALSE;
630
631 // Check if needed binning exists
632 TIter NextH(&fHists);
633 TObject *o = 0;
634 while ((o=NextH()))
635 {
636 const HistSet1D *hs = static_cast<HistSet1D*>(o);
637 if (hs->CheckBinning(fBinnings))
638 continue;
639
640 *fLog << err << "ERROR - Not all needed binnning exist." << endl;
641 return kFALSE;
642 }
643
644 // =========================== Preparation ==================================
645
646 if (fDisplay)
647 fDisplay->SetTitle(out);
648
649 TStopwatch clock;
650 clock.Start();
651
652 // ------------------ Setup reading --------------------
653 MReadMarsFile read1("Events");
654 MReadMarsFile read2("Events");
655 MReadMarsFile read3("Events");
656 MReadMarsFile read4("Events");
657 read1.DisableAutoScheme();
658 read2.DisableAutoScheme();
659 read3.DisableAutoScheme();
660 read4.DisableAutoScheme();
661
662 // Setup four reading tasks with the on- and off-data of the two datasets
663 // Training -- On
664 if (!fDataSetOn.AddFilesOn(read1))
665 return kFALSE;
666 // Testing -- On
667 if (!fDataSetOn.AddFilesOff(read4))
668 return kFALSE;
669 // Training -- Off
670 if (!fDataSetOff.AddFilesOn(read3))
671 return kFALSE;
672 // Testing -- Off
673 if (!fDataSetOff.AddFilesOff(read2))
674 return kFALSE;
675
676 // ===============================================================================
677 // ====================== Training =========================
678 // ===============================================================================
679
680 // ---------------- Setup RF Matrix ----------------
681 MHMatrix train("Train");
682 train.AddColumns(fRules);
683// if (fEnableWeights[kTrainOn] || fEnableWeights[kTrainOff])
684// train.AddColumn("MWeight.fVal");
685 train.AddColumn("MHadronness.fVal");
686
687 // ----------------- Prepare filling Matrix RF ------------------
688
689 // Setup the hadronness container identifying gammas and off-data
690 // and setup a container for the weights
691 MParameterD had("MHadronness");
692 MParameterD wgt("MWeight");
693 MParameterD typ("Type");
694
695 // Add them to the parameter list
696 MParList plistx;
697 plistx.AddToList(this); // take care of fDisplay!
698 plistx.AddToList(&had);
699 plistx.AddToList(&wgt);
700 plistx.AddToList(&typ);
701
702 // Setup the tool class to fill the matrix
703 MTFillMatrix fill;
704 fill.SetLogStream(fLog);
705 fill.SetDisplay(fDisplay);
706 fill.AddPreCuts(fPreCuts);
707 fill.AddPreCuts(fTrainCuts);
708
709 // ----------------- Fill on data into matrix ------------------
710
711 // Setup the tool class to read the gammas and read them
712 fill.SetName("FillOn");
713 fill.SetDestMatrix1(&train, fNum[kTrainOn]);
714 fill.SetReader(&read1);
715// fill.AddPreTasks(fPreTasksSet[kTrainOn]);
716 fill.AddPreTasks(fPreTasks);
717// fill.AddPostTasks(fPostTasksSet[kTrainOn]);
718 fill.AddPostTasks(fPostTasks);
719
720 // Set classifier for gammas
721 had.SetVal(0);
722 wgt.SetVal(1);
723 typ.SetVal(0);
724
725 // Fill matrix
726 if (!fill.Process(plistx))
727 return kFALSE;
728
729 // Check the number or read events
730 const Int_t numontrn = train.GetNumRows();
731 if (numontrn==0)
732 {
733 *fLog << err << "ERROR - No on-data events available for training... aborting." << endl;
734 return kFALSE;
735 }
736
737 // Remove possible post tasks
738 fill.ClearPreTasks();
739 fill.ClearPostTasks();
740
741 // ----------------- Fill off data into matrix ------------------
742
743 // In case of wobble mode we have to do something special
744 // Setup the tool class to read the background and read them
745 fill.SetName("FillOff");
746 fill.SetDestMatrix1(&train, fNum[kTrainOff]);
747 fill.SetReader(&read3);
748// fill.AddPreTasks(fPreTasksSet[kTrainOff]);
749 fill.AddPreTasks(fPreTasks);
750// fill.AddPostTasks(fPostTasksSet[kTrainOff]);
751 fill.AddPostTasks(fPostTasks);
752
753 // Set classifier for background
754 had.SetVal(1);
755 wgt.SetVal(1);
756 typ.SetVal(1);
757
758 // Fiull matrix
759 if (!fill.Process(plistx))
760 return kFALSE;
761
762 // Check the number or read events
763 const Int_t numofftrn = train.GetNumRows()-numontrn;
764 if (numofftrn==0)
765 {
766 *fLog << err << "ERROR - No off-data available for training... aborting." << endl;
767 return kFALSE;
768 }
769
770 // ------------------------ Train RF --------------------------
771
772 MRanForestCalc rf("TrainSeparation", fTitle);
773 rf.SetNumTrees(fNumTrees);
774 rf.SetNdSize(fNdSize);
775 rf.SetNumTry(fNumTry);
776 rf.SetNumObsoleteVariables(1);
777// rf.SetLastDataColumnHasWeights(fEnableWeights[kTrainOn] || fEnableWeights[kTrainOff]);
778 rf.SetDebug(fDebug>1);
779 rf.SetDisplay(fDisplay);
780 rf.SetLogStream(fLog);
781 rf.SetFileName(out);
782 rf.SetNameOutput("MHadronness");
783
784 // Train the random forest either by classification or regression
785 if (!rf.Train(train, fUseRegression))
786 return kFALSE;
787
788 // ----------------- Print result of training ------------------
789
790 // Output information about what was going on so far.
791 *fLog << all;
792 fLog->Separator("The forest was trained with...");
793
794 *fLog << "Training method:" << endl;
795 *fLog << " * " << (fUseRegression?"regression":"classification") << endl;
796 /*
797 if (fEnableWeights[kTrainOn])
798 *fLog << " * weights for on-data" << endl;
799 if (fEnableWeights[kTrainOff])
800 *fLog << " * weights for off-data" << endl;
801 */
802 *fLog << endl;
803 *fLog << "Events used for training:" << endl;
804 *fLog << " * Gammas: " << numontrn << endl;
805 *fLog << " * Background: " << numofftrn << endl;
806 *fLog << endl;
807 *fLog << "Gamma/Background ratio:" << endl;
808 *fLog << " * Requested: " << (float)fNum[kTrainOn]/fNum[kTrainOff] << endl;
809 *fLog << " * Result: " << (float)numontrn/numofftrn << endl;
810 *fLog << endl;
811 *fLog << "Run-Time: " << Form("%.1f", clock.RealTime()/60) << "min (CPU: ";
812 *fLog << Form("%.1f", clock.CpuTime()/60) << "min)" << endl;
813 *fLog << endl;
814 *fLog << "Output file name: " << out << endl;
815
816 // ===============================================================================
817 // ====================== Testing =========================
818 // ===============================================================================
819 fLog->Separator("Test");
820
821 clock.Continue();
822
823 // ---------------------- Prepare eventloop off-data ---------------------
824
825 // Setup parlist and tasklist for testing
826 MParList plist;
827 MTaskList tlist;
828 plist.AddToList(this); // Take care of display
829 plist.AddToList(&tlist);
830
831// MMcEvt mcevt;
832// plist.AddToList(&mcevt);
833
834 plist.AddToList(&wgt);
835 plist.AddToList(&typ);
836
837 // ----- Setup histograms -----
838 MBinning binsy(50, 0 , 1, "BinningMH3Y", "lin");
839 MBinning binsx(40, 10, 100000, "BinningMH3X", "log");
840
841 plist.AddToList(&binsx);
842 plist.AddToList(&binsy);
843
844 MH3 h31("MHillas.fSize", "MHadronness.fVal");
845 MH3 h32("MHillas.fSize", "MHadronness.fVal");
846 MH3 h40("MMcEvt.fEnergy", "MHadronness.fVal");
847 h31.SetTitle("Background probability vs. Size:Size [phe]:Hadronness h");
848 h32.SetTitle("Background probability vs. Size:Size [phe]:Hadronness h");
849 h40.SetTitle("Background probability vs. Energy:Energy [GeV]:Hadronness h");
850
851 plist.AddToList(&fBinnings);
852
853 MHHadronness hist;
854
855 // ----- Setup tasks -----
856 MFillH fillh0(&hist, "", "FillHadronness");
857 MFillH fillh1(&h31, "", "FillHadVsSize");
858 MFillH fillh2(&h32, "", "FillHadVsSize");
859 MFillH fillh4(&h40, "", "FillHadVsEnergy");
860 fillh0.SetWeight("MWeight");
861 fillh1.SetWeight("MWeight");
862 fillh2.SetWeight("MWeight");
863 fillh4.SetWeight("MWeight");
864 fillh1.SetDrawOption("colz profy");
865 fillh2.SetDrawOption("colz profy");
866 fillh4.SetDrawOption("colz profy");
867 fillh1.SetNameTab("HadSzOff");
868 fillh2.SetNameTab("HadSzOn");
869 fillh4.SetNameTab("HadEnOn");
870 fillh0.SetBit(MFillH::kDoNotDisplay);
871
872 // ----- Setup filter -----
873 MFilterList precuts;
874 precuts.AddToList(fPreCuts);
875 precuts.AddToList(fTestCuts);
876
877 MContinue cont0(&precuts);
878 cont0.SetName("PreCuts");
879 cont0.SetInverted();
880
881 MFEventSelector sel; // FIXME: USING IT (WITH PROB?) in READ will by much faster!!!
882 sel.SetNumSelectEvts(fNum[kTestOff]);
883
884 MContinue contsel(&sel);
885 contsel.SetInverted();
886
887 // ----- Setup tasklist -----
888 tlist.AddToList(&read2); // Reading task
889 tlist.AddToList(&contsel); // event selector
890// tlist.AddToList(fPreTasksSet[kTestOff]);
891 tlist.AddToList(fPreTasks); // list of pre tasks
892 tlist.AddToList(&cont0); // list of pre cuts and test cuts
893 tlist.AddToList(&rf); // evaluate random forest
894// tlist.AddToList(fPostTasksSet[kTestOff]);
895 tlist.AddToList(fPostTasks); // list of post tasks
896 tlist.AddToList(&fillh1); // Fill HadSzOff
897
898 TList autodel;
899 autodel.SetOwner();
900
901 TPRegexp regexp("([0-9]:*)+");
902
903 NextH.Reset();
904 while ((o=NextH()))
905 {
906 HistSet1D *hs = static_cast<HistSet1D*>(o);
907
908 // FIXME: Move to beginning of function
909 // Check if needed binning exists
910 if (!hs->CheckBinning(fBinnings))
911 return kFALSE;
912
913 MHn *hist = hs->GetHistN(fRules);
914 MFillH *fill = new MFillH(hist, "", Form("Fill%s", hist->GetName()));
915
916 fill->SetWeight("MWeight");
917 fill->SetDrawOption("colz");
918 fill->SetNameTab(hist->GetName());
919 fill->SetBit(MFillH::kDoNotDisplay);
920
921 tlist.AddToList(fill);
922
923 autodel.Add(fill);
924 autodel.Add(hist);
925 }
926 tlist.AddToList(&fillh0); // Fill MHHadronness (not displayed in first loop)
927 tlist.AddToList(&fTestTasks); // list of test tasks
928
929 // Enable Acceleration
930 tlist.SetAccelerator(MTask::kAccDontReset|MTask::kAccDontTime);
931
932 // ---------------------- Run eventloop on background ---------------------
933 MEvtLoop loop;
934 loop.SetDisplay(fDisplay);
935 loop.SetLogStream(fLog);
936 loop.SetParList(&plist);
937 //if (!SetupEnv(loop))
938 // return kFALSE;
939
940 wgt.SetVal(1);
941 typ.SetVal(0);
942 if (!loop.Eventloop())
943 return kFALSE;
944
945 // ---------------------- Prepare eventloop on-data ---------------------
946
947 sel.SetNumSelectEvts(fNum[kTestOn]); // set number of target events
948
949 fillh0.ResetBit(MFillH::kDoNotDisplay); // Switch on display MHHadronness
950
951 TIter NextF(&autodel);
952 while ((o=NextF()))
953 {
954 MFillH *fill = dynamic_cast<MFillH*>(o);
955 if (fill)
956 fill->ResetBit(MFillH::kDoNotDisplay);
957 }
958
959 // Remove PreTasksOff and PostTasksOff from the list
960// tlist.RemoveFromList(fPreTasksSet[kTestOff]);
961// tlist.RemoveFromList(fPostTasksSet[kTestOff]);
962
963 tlist.Replace(&read4); // replace reading off-data by on-data
964
965 // Add the PreTasksOn directly after the reading task
966// tlist.AddToListAfter(fPreTasksSet[kTestOn], &c1);
967
968 // Add the PostTasksOn after rf
969// tlist.AddToListAfter(fPostTasksSet[kTestOn], &rf);
970
971 tlist.Replace(&fillh2); // Fill HadSzOn instead of HadSzOff
972 tlist.AddToListAfter(&fillh4, &fillh0); // Filling of HadEnOn
973
974 // Enable Acceleration
975 tlist.SetAccelerator(MTask::kAccDontReset|MTask::kAccDontTime);
976
977 // ---------------------- Run eventloop on-data ---------------------
978
979 wgt.SetVal(1);
980 typ.SetVal(1);
981 if (!loop.Eventloop())
982 return kFALSE;
983
984 // ---------------------- Print/Display result ---------------------
985
986 // Show what was going on in the testing
987 const Double_t numontst = h32.GetHist().GetEntries();
988 const Double_t numofftst = h31.GetHist().GetEntries();
989
990 *fLog << all;
991 fLog->Separator("The forest was tested with...");
992 *fLog << "Test method:" << endl;
993 *fLog << " * Random Forest: " << out << endl;
994 /*
995 if (fEnableWeights[kTestOn])
996 *fLog << " * weights for on-data" << endl;
997 if (fEnableWeights[kTestOff])
998 *fLog << " * weights for off-data" << endl;
999 */
1000 *fLog << endl;
1001 *fLog << "Events used for test:" << endl;
1002 *fLog << " * Gammas: " << numontst << endl;
1003 *fLog << " * Background: " << numofftst << endl;
1004 *fLog << endl;
1005 *fLog << "Gamma/Background ratio:" << endl;
1006 *fLog << " * Requested: " << (float)fNum[kTestOn]/fNum[kTestOff] << endl;
1007 *fLog << " * Result: " << (float)numontst/numofftst << endl;
1008 *fLog << endl;
1009
1010 // Display the result plots
1011 DisplayResult(h31, h32, -1);
1012 //DisplayResult(h31, h32, ontime);
1013
1014 *fLog << "Total Run-Time: " << Form("%.1f", clock.RealTime()/60) << "min (CPU: ";
1015 *fLog << Form("%.1f", clock.CpuTime()/60) << "min)" << endl;
1016 fLog->Separator();
1017
1018 // ----------------- Write result ------------------
1019
1020 fDataSetOn.SetName("DataSetOn");
1021 fDataSetOff.SetName("DataSetOff");
1022
1023 // Write the display
1024 TObjArray arr;
1025 arr.Add(const_cast<MDataSet*>(&fDataSetOn));
1026 arr.Add(const_cast<MDataSet*>(&fDataSetOff));
1027 if (fDisplay)
1028 arr.Add(fDisplay);
1029
1030 SetPathOut(out);
1031 return WriteContainer(arr, 0, "UPDATE");
1032}
1033
Note: See TracBrowser for help on using the repository browser.