source: trunk/Mars/mjtrain/MJTrainCuts.cc@ 10104

Last change on this file since 10104 was 9892, checked in by tbretz, 14 years ago
Fixed the 2D histograms in MJTrainCuts.
File size: 31.5 KB
Line 
1/* ======================================================================== *\
2!
3! *
4! * This file is part of MARS, the MAGIC Analysis and Reconstruction
5! * Software. It is distributed to you in the hope that it can be a useful
6! * and timesaving tool in analysing Data of imaging Cerenkov telescopes.
7! * It is distributed WITHOUT ANY WARRANTY.
8! *
9! * Permission to use, copy, modify and distribute this software and its
10! * documentation for any purpose is hereby granted without fee,
11! * provided that the above copyright notice appear in all copies and
12! * that both that copyright notice and this permission notice appear
13! * in supporting documentation. It is provided "as is" without express
14! * or implied warranty.
15! *
16!
17!
18! Author(s): Thomas Bretz 08/2010 <mailto:thomas.bretz@epfl.ch>
19!
20! Copyright: MAGIC Software Development, 2010
21!
22!
23\* ======================================================================== */
24
25/////////////////////////////////////////////////////////////////////////////
26//
27// MJTrainCuts
28// =========
29//
30// This class is meant as a tool to understand better what a trained
31// random forest is doing in the multi-dimensional phase space.
32// Consequently, it can also be used to deduce good one or two dimensional
33// cuts from the results by mimicing the behaviour of the random forest.
34//
35//
36// Usage
37// -----
38//
39// The instance is created by its default constructor
40//
41// MJTrainCuts opt;
42//
43// In a first step a random forest must be trained and in a second step its
44// performance can be evaluated with an independent test sample. The used
45// samples are defined by two MDataSet objects, one for the on-data (e.g.
46// gammas) and the other one for the off-data (e.g. protons). SequencesOn
47// and SequencesOff are used for testing and training respectively.
48//
49// MDataSet seton ("myondata.txt");
50// MDataSet setoff("myoffdata.txt");
51//
52// // If you want to use all available events
53// opt.SetDataSetOn(seton);
54// opt.SetDataSetOff(setoff);
55//
56// // Try to select 10000 and 30000 events for training and testing resp.
57// // opt.SetDataSetOn(seton, 10000, 30000);
58// // opt.SetDataSetOff(setoff, 10000, 30000);
59//
60// Note that by using several data set in one file (see MDataSet) you can
61// have everything in a single file.
62//
63// The variables which are used for training are now setup as usual
64//
65// Int_t p1 = opt.AddParameter("MHillas.fSize");
66// Int_t p2 = opt.AddParameter("MHillas.GetArea*MGeomCam.fConvMm2Deg^2");
67// Int_t p3 = opt.AddParameter("MHillasSrc.fDist*MGeomCam.fConvMm2Deg");
68//
69// In addition you can now setup a binning for the display of each train
70// parameter as follows (for details see MBinning)
71//
72// opt.AddBinning(p1, MBinning(40, 10, 10000, "", "log"));
73// opt.AddBinning(p2, MBinning(50, 0, 0.25));
74// opt.AddBinning(p3, MBinning(50, 0, 2.5));
75//
76// Since with increasing number of variables the possibly combinations
77// increase to fast you have to define which plots you are interested in,
78// for example:
79//
80// opt.AddHist(p3); // A 1D plot dist
81// opt.AddHist(p1, p2); // A 2D plot area vs. size
82// opt.AddHist(p3, p2); // A 2D plot dist vs. size
83//
84// Also 3D plots are avaiable but they are most probably difficult to
85// interprete.
86//
87// In addition to this you have the usual user interface, i.e. that
88// - PreCuts
89// - TrainCuts
90// - TestCuts
91// - PreTasks
92// - PostTasks
93// - TestTasks
94// are available. For details see MJOptimizeBase
95//
96// void EnableRegression() / void EnableClassification()
97// Defines whether to use the random forest's regression of
98// classification method. Classification is the default.
99//
100//
101// The produced plots
102// ------------------
103//
104// The tab with the plots filled will always look like this:
105//
106// +--------+--------+
107// |1 |2 |
108// +--------+--------|
109// |3 |4 |
110// +--------+ |
111// |5 | |
112// +--------+--------+
113//
114// Pad1 and Pad2 contain the weighted event distribution of the test-sample.
115//
116// Pad2 and Pad5 conatin a profile of the hadronness distribution of the
117// test-sample.
118//
119// Pad4 contains a profile of the hadronness distribution of on-data and
120// off-data together of the test-data.
121//
122// If the profiles for on-data and off-data are identical the displayed
123// hadronness is obviously independant of the other (non shown) trainings
124// variables. Therefore the difference between the two plots show how much
125// the variables are correlated. The same is true if the prfiles in
126// pad3 and pad5 don't differe from the profile in pad4.
127//
128// In the most simple case - the random forest is only trained with the
129// variables displayed - all three plots should be identical (apart from the
130// difference in the distrubution of the three sets).
131//
132// The plot in pad4 can now be used to deduce a good classical cut in the
133// displayed variables.
134//
135//
136// Example:
137// --------
138//
139// MJTrainCuts opt;
140// MDataSet seton ("dataset_on.txt");
141// MDataSet setoff("dataset_off.txt");
142// opt.SetDataSetOn(seton);
143// opt.SetDataSetOff(setmix);
144// Int_t p00 = opt.AddParameter("MHillas.fSize");
145// Int_t p01 = opt.AddParameter("MHillas.GetArea*MGeomCam.fConvMm2Deg^2");
146// opt.AddHist(p00, p01); // Area vs Size
147// MStatusDisplay *d = new MStatusDisplay;
148// opt.SetDisplay(d);
149// opt.Process("rf-cuts.root");
150//
151//
152// Random Numbers:
153// ---------------
154// Use:
155// if(gRandom)
156// delete gRandom;
157// gRandom = new TRandom3();
158// in advance to change the random number generator.
159//
160////////////////////////////////////////////////////////////////////////////
161#include "MJTrainCuts.h"
162
163#include <TGraph.h>
164#include <TMarker.h>
165#include <TCanvas.h>
166#include <TPRegexp.h>
167#include <TStopwatch.h>
168
169#include "MHMatrix.h"
170
171#include "MLog.h"
172#include "MLogManip.h"
173
174// tools
175#include "MMath.h"
176#include "MBinning.h"
177#include "MTFillMatrix.h"
178#include "MStatusDisplay.h"
179
180// eventloop
181#include "MParList.h"
182#include "MTaskList.h"
183#include "MEvtLoop.h"
184
185// tasks
186#include "MReadMarsFile.h"
187#include "MContinue.h"
188#include "MFillH.h"
189#include "MRanForestCalc.h"
190
191// container
192#include "MParameters.h"
193
194// histograms
195#include "MHn.h"
196#include "MHHadronness.h"
197
198// filter
199#include "MFEventSelector.h"
200#include "MFilterList.h"
201
202using namespace std;
203
204class HistSet1D : public TObject
205{
206protected:
207 UInt_t fNx;
208
209 virtual void AddHist(MHn &h, const char *rx, const char *, const char *) const
210 {
211 h.AddHist(rx);
212 }
213 virtual void AddProf(MHn &h, const char *rx, const char *, const char *) const
214 {
215 h.AddHist(rx, "MHadronness.fVal", MH3::kProfileSpread);
216 }
217 virtual void SetupName(MHn &h, const char *name) const
218 {
219 h.InitName(Form("%s%d;%d", name, fNx, fNx));
220 }
221 virtual void SetupHist(MHn &h, const char *name, const char *title) const
222 {
223 SetupName(h, name);
224
225 h.SetAutoRange(kFALSE, kFALSE, kFALSE);
226 h.InitTitle(title);
227 }
228
229 void CreateHist(MHn &h, const char *rx, const char *ry=0, const char *rz=0) const
230 {
231 h.SetLayout(MHn::kComplex);
232 h.SetBit(MHn::kDoNotReset);
233
234 AddHist(h, rx, ry, rz);
235 SetupHist(h, "DistOn", "Distribution of on-data");
236 h.SetWeight("Type.fVal");
237
238 AddHist(h, rx, ry, rz);
239 SetupHist(h, "DistOff", "Distribution of off-data");
240 h.SetWeight("1-Type.fVal");
241
242 AddProf(h, rx, ry, rz);
243 SetupHist(h, "HadOn", "Hadronness profile for on-data");
244 h.SetWeight("Type.fVal");
245
246 AddProf(h, rx, ry, rz);
247 SetupHist(h, "Had", "Hadronness profile for all events");
248
249 AddProf(h, rx, ry, rz);
250 SetupHist(h, "HadOff", "Hadronness profile for off-data");
251 h.SetWeight("1-Type.fVal");
252 }
253
254
255public:
256 HistSet1D(UInt_t nx) : fNx(nx) { }
257
258 virtual MHn *GetHistN(const TList &rules) const
259 {
260 if (!rules.At(fNx))
261 return 0;
262
263 MHn *h = new MHn(Form("%d", fNx));
264
265 CreateHist(*h, rules.At(fNx)->GetName());
266
267 return h;
268 }
269
270 virtual Bool_t CheckBinning(const TObjArray &binnings) const
271 {
272 return binnings.FindObject(Form("Binning%d", fNx));
273 }
274};
275
276class HistSet2D : public HistSet1D
277{
278protected:
279 UInt_t fNy;
280
281 void AddHist(MHn &h, const char *rx, const char *ry, const char *) const
282 {
283 h.AddHist(rx, ry);
284 }
285 void AddProf(MHn &h, const char *rx, const char *ry, const char *) const
286 {
287 h.AddHist(rx, ry, "MHadronness.fVal", MH3::kProfileSpread);
288 }
289 void SetupName(MHn &h, const char *name) const
290 {
291 h.InitName(Form("%s%d:%d;%d;%d", name, fNx, fNy, fNx, fNy));
292 }
293
294 void SetupHist(MHn &h, const char *name, const char *title) const
295 {
296 HistSet1D::SetupHist(h, name, title);
297 h.SetDrawOption("colz");
298 }
299
300public:
301 HistSet2D(UInt_t nx, UInt_t ny) : HistSet1D(nx), fNy(ny) { }
302
303 MHn *GetHistN(const TList &rules) const
304 {
305 if (!rules.At(fNx) || !rules.At(fNy))
306 return 0;
307
308 MHn *h = new MHn(Form("%d:%d", fNx, fNy));
309
310 CreateHist(*h, rules.At(fNx)->GetName(), rules.At(fNy)->GetName());
311
312 return h;
313 }
314 Bool_t CheckBinning(const TObjArray &binnings) const
315 {
316 return HistSet1D::CheckBinning(binnings) && binnings.FindObject(Form("Binning%d", fNy));
317 }
318};
319
320
321class HistSet3D : public HistSet2D
322{
323private:
324 UInt_t fNz;
325
326 void AddHist(MHn &h, const char *rx, const char *ry, const char *rz) const
327 {
328 h.AddHist(rx, ry, rz);
329 }
330 void AddProf(MHn &h, const char *rx, const char *ry, const char *rz) const
331 {
332 h.AddHist(rx, ry, rz, "MHadronness.fVal");
333 }
334 void SetupName(MHn &h, const char *name) const
335 {
336 h.InitName(Form("%s%d:%d:%d;%d;%d;%d", name, fNx, fNy, fNz, fNx, fNy, fNz));
337 }
338
339public:
340 HistSet3D(UInt_t nx, UInt_t ny, UInt_t nz) : HistSet2D(nx, ny), fNz(nz) { }
341
342 MHn *GetHistN(const TList &rules) const
343 {
344 if (!rules.At(fNx) || !rules.At(fNy) || !rules.At(fNz))
345 return 0;
346
347 MHn *h = new MHn(Form("%d:%d:%d", fNx, fNy, fNz));
348
349 CreateHist(*h,
350 rules.At(fNx)->GetName(),
351 rules.At(fNy)->GetName(),
352 rules.At(fNy)->GetName());
353
354 return h;
355 }
356 Bool_t CheckBinning(const TObjArray &binnings) const
357 {
358 return HistSet2D::CheckBinning(binnings) && binnings.FindObject(Form("Binning%d", fNz));
359 }
360};
361
362// ---------------------------------------------------------------------------------------
363
364void MJTrainCuts::AddHist(UInt_t nx)
365{
366 fHists.Add(new HistSet1D(nx));
367}
368
369void MJTrainCuts::AddHist(UInt_t nx, UInt_t ny)
370{
371 fHists.Add(new HistSet2D(nx, ny));
372}
373
374void MJTrainCuts::AddHist(UInt_t nx, UInt_t ny, UInt_t nz)
375{
376 fHists.Add(new HistSet3D(nx, ny, nz));
377}
378
379void MJTrainCuts::AddBinning(UInt_t n, const MBinning &bins)
380{
381 const char *name = Form("Binning%d", n);
382
383 TObject *o = fBinnings.FindObject(name);
384 if (o)
385 {
386 delete fBinnings.Remove(o);
387 *fLog << warn << "WARNING - Binning for parameter " << n << " (" << name << ") already exists... replaced." << endl;
388 }
389
390 // FIXME: Check for existence
391 fBinnings.Add(new MBinning(bins, name, bins.GetTitle()));
392}
393
394/*
395void MJTrainCuts::AddBinning(const MBinning &bins)
396{
397 // FIXME: Check for existence
398 fBinnings.Add(new MBinning(bins, bins.GetName(), bins.GetTitle()));
399}
400*/
401
402// ---------------------------------------------------------------------------------------
403
404// --------------------------------------------------------------------------
405//
406void MJTrainCuts::DisplayResult(MH3 &h31, MH3 &h32, Float_t ontime)
407{
408 TH2D &g = (TH2D&)h32.GetHist();
409 TH2D &h = (TH2D&)h31.GetHist();
410
411 h.SetMarkerColor(kRed);
412 g.SetMarkerColor(kBlue);
413
414 TH2D res1(g);
415 TH2D res2(g);
416
417 h.SetTitle("Hadronness-Distribution vs. Size");
418 res1.SetTitle("Significance Li/Ma");
419 res1.SetXTitle("Size [phe]");
420 res1.SetYTitle("Hadronness");
421 res2.SetTitle("Significance-Distribution");
422 res2.SetXTitle("Size-Cut [phe]");
423 res2.SetYTitle("Hadronness-Cut");
424 res1.SetContour(50);
425 res2.SetContour(50);
426
427 const Int_t nx = h.GetNbinsX();
428 const Int_t ny = h.GetNbinsY();
429
430 gROOT->SetSelectedPad(NULL);
431
432
433 Double_t Stot = 0;
434 Double_t Btot = 0;
435
436 Double_t max2 = -1;
437
438 TGraph gr1;
439 TGraph gr2;
440 for (int x=nx-1; x>=0; x--)
441 {
442 TH1 *hx = h.ProjectionY("H_py", x+1, x+1);
443 TH1 *gx = g.ProjectionY("G_py", x+1, x+1);
444
445 Double_t S = 0;
446 Double_t B = 0;
447
448 Double_t max1 = -1;
449 Int_t maxy1 = 0;
450 Int_t maxy2 = 0;
451 for (int y=ny-1; y>=0; y--)
452 {
453 const Float_t s = gx->Integral(1, y+1);
454 const Float_t b = hx->Integral(1, y+1);
455 const Float_t sig1 = MMath::SignificanceLiMa(s+b, b);
456 const Float_t sig2 = MMath::SignificanceLiMa(s+Stot+b+Btot, b+Btot)*TMath::Log10(s+Stot+1);
457 if (sig1>max1)
458 {
459 maxy1 = y;
460 max1 = sig1;
461 }
462 if (sig2>max2)
463 {
464 maxy2 = y;
465 max2 = sig2;
466
467 S=s;
468 B=b;
469 }
470
471 res1.SetBinContent(x+1, y+1, sig1);
472 }
473
474 Stot += S;
475 Btot += B;
476
477 gr1.SetPoint(x, h.GetXaxis()->GetBinCenter(x+1), h.GetYaxis()->GetBinCenter(maxy1+1));
478 gr2.SetPoint(x, h.GetXaxis()->GetBinCenter(x+1), h.GetYaxis()->GetBinCenter(maxy2+1));
479
480 delete hx;
481 delete gx;
482 }
483
484 //cout << "--> " << MMath::SignificanceLiMa(Stot+Btot, Btot) << " ";
485 //cout << Stot << " " << Btot << endl;
486
487
488 Int_t mx1=0;
489 Int_t my1=0;
490 Int_t mx2=0;
491 Int_t my2=0;
492 Int_t s1=0;
493 Int_t b1=0;
494 Int_t s2=0;
495 Int_t b2=0;
496 Double_t sig1=-1;
497 Double_t sig2=-1;
498 for (int x=0; x<nx; x++)
499 {
500 TH1 *hx = h.ProjectionY("H_py", x+1);
501 TH1 *gx = g.ProjectionY("G_py", x+1);
502 for (int y=0; y<ny; y++)
503 {
504 const Float_t s = gx->Integral(1, y+1);
505 const Float_t b = hx->Integral(1, y+1);
506 const Float_t sig = MMath::SignificanceLiMa(s+b, b);
507 res2.SetBinContent(x+1, y+1, sig);
508
509 // Search for top-rightmost maximum
510 if (sig>=sig1)
511 {
512 mx1=x+1;
513 my1=y+1;
514 s1 = TMath::Nint(s);
515 b1 = TMath::Nint(b);
516 sig1=sig;
517 }
518 if (TMath::Log10(s)*sig>=sig2)
519 {
520 mx2=x+1;
521 my2=y+1;
522 s2 = TMath::Nint(s);
523 b2 = TMath::Nint(b);
524 sig2=TMath::Log10(s)*sig;
525 }
526 }
527 delete hx;
528 delete gx;
529 }
530
531 TGraph gr3;
532 TGraph gr4;
533 gr4.SetTitle("Significance Li/Ma vs. Hadronness-cut");
534
535 TH1 *hx = h.ProjectionY("H_py");
536 TH1 *gx = g.ProjectionY("G_py");
537 for (int y=0; y<ny; y++)
538 {
539 const Float_t s = gx->Integral(1, y+1);
540 const Float_t b = hx->Integral(1, y+1);
541 const Float_t sg1 = MMath::SignificanceLiMa(s+b, b);
542 const Float_t sg2 = s<1 ? 0 : MMath::SignificanceLiMa(s+b, b)*TMath::Log10(s);
543
544 gr3.SetPoint(y, h.GetYaxis()->GetBinLowEdge(y+2), sg1);
545 gr4.SetPoint(y, h.GetYaxis()->GetBinLowEdge(y+2), sg2);
546 }
547 delete hx;
548 delete gx;
549
550 if (fDisplay)
551 {
552 TCanvas &c = fDisplay->AddTab("OptCut");
553 c.SetBorderMode(0);
554 c.Divide(2,2);
555
556 gROOT->SetSelectedPad(0);
557 c.cd(1);
558 gPad->SetBorderMode(0);
559 gPad->SetFrameBorderMode(0);
560 gPad->SetLogx();
561 gPad->SetGridx();
562 gPad->SetGridy();
563 h.DrawCopy();
564 g.DrawCopy("same");
565 gr1.SetMarkerStyle(kFullDotMedium);
566 gr1.DrawClone("LP")->SetBit(kCanDelete);
567 gr2.SetLineColor(kBlue);
568 gr2.SetMarkerStyle(kFullDotMedium);
569 gr2.DrawClone("LP")->SetBit(kCanDelete);
570
571 gROOT->SetSelectedPad(0);
572 c.cd(3);
573 gPad->SetBorderMode(0);
574 gPad->SetFrameBorderMode(0);
575 gPad->SetGridx();
576 gPad->SetGridy();
577 gr4.SetMinimum(0);
578 gr4.SetMarkerStyle(kFullDotMedium);
579 gr4.DrawClone("ALP")->SetBit(kCanDelete);
580 gr3.SetLineColor(kBlue);
581 gr3.SetMarkerStyle(kFullDotMedium);
582 gr3.DrawClone("LP")->SetBit(kCanDelete);
583
584 c.cd(2);
585 gPad->SetBorderMode(0);
586 gPad->SetFrameBorderMode(0);
587 gPad->SetLogx();
588 gPad->SetGridx();
589 gPad->SetGridy();
590 gPad->AddExec("color", "gStyle->SetPalette(1, 0);");
591 res1.SetMaximum(7);
592 res1.DrawCopy("colz");
593
594 c.cd(4);
595 gPad->SetBorderMode(0);
596 gPad->SetFrameBorderMode(0);
597 gPad->SetLogx();
598 gPad->SetGridx();
599 gPad->SetGridy();
600 gPad->AddExec("color", "gStyle->SetPalette(1, 0);");
601 res2.SetMaximum(res2.GetMaximum()*1.05);
602 res2.DrawCopy("colz");
603
604 // Int_t mx, my, mz;
605 // res2.GetMaximumBin(mx, my, mz);
606
607 TMarker m;
608 m.SetMarkerStyle(kStar);
609 m.DrawMarker(res2.GetXaxis()->GetBinCenter(mx1), res2.GetYaxis()->GetBinCenter(my1));
610 m.SetMarkerStyle(kPlus);
611 m.DrawMarker(res2.GetXaxis()->GetBinCenter(mx2), res2.GetYaxis()->GetBinCenter(my2));
612 }
613
614 if (ontime>0)
615 *fLog << all << "Observation Time: " << TMath::Nint(ontime/60) << "min" << endl;
616 *fLog << "Maximum Significance: " << Form("%.1f", sig1);
617 if (ontime>0)
618 *fLog << Form(" [%.1f/sqrt(h)]", sig1/TMath::Sqrt(ontime/3600));
619 *fLog << endl;
620
621 *fLog << "Significance: S=" << Form("%.1f", sig1) << " E=" << s1 << " B=" << b1 << " h<";
622 *fLog << Form("%.2f", res2.GetYaxis()->GetBinCenter(my1)) << " s>";
623 *fLog << Form("%3d", TMath::Nint(res2.GetXaxis()->GetBinCenter(mx1))) << endl;
624 *fLog << "Significance*LogE: S=" << Form("%.1f", sig2/TMath::Log10(s2)) << " E=" << s2 << " B=" << b2 << " h<";
625 *fLog << Form("%.2f", res2.GetYaxis()->GetBinCenter(my2)) << " s>";
626 *fLog << Form("%3d", TMath::Nint(res2.GetXaxis()->GetBinCenter(mx2))) << endl;
627 *fLog << endl;
628}
629
630// --------------------------------------------------------------------------
631//
632Bool_t MJTrainCuts::Process(const char *out)
633{
634 // =========================== Consistency checks ==================================
635 if (!fDataSetOn.IsValid())
636 {
637 *fLog << err << "ERROR - DataSet for on-data invalid!" << endl;
638 return kFALSE;
639 }
640 if (!fDataSetOff.IsValid())
641 {
642 *fLog << err << "ERROR - DataSet for off-data invalid!" << endl;
643 return kFALSE;
644 }
645
646 if (fDataSetOn.IsWobbleMode()!=fDataSetOff.IsWobbleMode())
647 {
648 *fLog << err << "ERROR - On- and Off-DataSet have different observation modes!" << endl;
649 return kFALSE;
650 }
651
652 if (fDataSetOn.IsMonteCarlo()!=fDataSetOff.IsMonteCarlo())
653 {
654 *fLog << err << "ERROR - On- and Off-DataSet have different monte carlo modes!" << endl;
655 return kFALSE;
656 }
657
658 if (!HasWritePermission(out))
659 return kFALSE;
660
661 // Check if needed binning exists
662 TIter NextH(&fHists);
663 TObject *o = 0;
664 while ((o=NextH()))
665 {
666 const HistSet1D *hs = static_cast<HistSet1D*>(o);
667 if (hs->CheckBinning(fBinnings))
668 continue;
669
670 *fLog << err << "ERROR - Not all needed binnning exist." << endl;
671 return kFALSE;
672 }
673
674 // =========================== Preparation ==================================
675
676 if (fDisplay)
677 fDisplay->SetTitle(out);
678
679 TStopwatch clock;
680 clock.Start();
681
682 // ------------------ Setup reading --------------------
683 MReadMarsFile read1("Events");
684 MReadMarsFile read2("Events");
685 MReadMarsFile read3("Events");
686 MReadMarsFile read4("Events");
687 read1.DisableAutoScheme();
688 read2.DisableAutoScheme();
689 read3.DisableAutoScheme();
690 read4.DisableAutoScheme();
691
692 // Setup four reading tasks with the on- and off-data of the two datasets
693 // Training -- On
694 if (!fDataSetOn.AddFilesOn(read1))
695 return kFALSE;
696 // Testing -- On
697 if (!fDataSetOn.AddFilesOff(read4))
698 return kFALSE;
699 // Training -- Off
700 if (!fDataSetOff.AddFilesOn(read3))
701 return kFALSE;
702 // Testing -- Off
703 if (!fDataSetOff.AddFilesOff(read2))
704 return kFALSE;
705
706 // ===============================================================================
707 // ====================== Training =========================
708 // ===============================================================================
709
710 // ---------------- Setup RF Matrix ----------------
711 MHMatrix train("Train");
712 train.AddColumns(fRules);
713// if (fEnableWeights[kTrainOn] || fEnableWeights[kTrainOff])
714// train.AddColumn("MWeight.fVal");
715 train.AddColumn("MHadronness.fVal");
716
717 // ----------------- Prepare filling Matrix RF ------------------
718
719 // Setup the hadronness container identifying gammas and off-data
720 // and setup a container for the weights
721 MParameterD had("MHadronness");
722 MParameterD wgt("MWeight");
723 MParameterD typ("Type");
724
725 // Add them to the parameter list
726 MParList plistx;
727 plistx.AddToList(this); // take care of fDisplay!
728 plistx.AddToList(&had);
729 plistx.AddToList(&wgt);
730 plistx.AddToList(&typ);
731
732 // Setup the tool class to fill the matrix
733 MTFillMatrix fill;
734 fill.SetLogStream(fLog);
735 fill.SetDisplay(fDisplay);
736 fill.AddPreCuts(fPreCuts);
737 fill.AddPreCuts(fTrainCuts);
738
739 // ----------------- Fill on data into matrix ------------------
740
741 // Setup the tool class to read the gammas and read them
742 fill.SetName("FillOn");
743 fill.SetDestMatrix1(&train, fNum[kTrainOn]);
744 fill.SetReader(&read1);
745// fill.AddPreTasks(fPreTasksSet[kTrainOn]);
746 fill.AddPreTasks(fPreTasks);
747// fill.AddPostTasks(fPostTasksSet[kTrainOn]);
748 fill.AddPostTasks(fPostTasks);
749
750 // Set classifier for gammas
751 had.SetVal(0);
752 wgt.SetVal(1);
753 typ.SetVal(0);
754
755 // Fill matrix
756 if (!fill.Process(plistx))
757 return kFALSE;
758
759 // Check the number or read events
760 const Int_t numontrn = train.GetNumRows();
761 if (numontrn==0)
762 {
763 *fLog << err << "ERROR - No on-data events available for training... aborting." << endl;
764 return kFALSE;
765 }
766
767 // Remove possible post tasks
768 fill.ClearPreTasks();
769 fill.ClearPostTasks();
770
771 // ----------------- Fill off data into matrix ------------------
772
773 // In case of wobble mode we have to do something special
774 // Setup the tool class to read the background and read them
775 fill.SetName("FillOff");
776 fill.SetDestMatrix1(&train, fNum[kTrainOff]);
777 fill.SetReader(&read3);
778// fill.AddPreTasks(fPreTasksSet[kTrainOff]);
779 fill.AddPreTasks(fPreTasks);
780// fill.AddPostTasks(fPostTasksSet[kTrainOff]);
781 fill.AddPostTasks(fPostTasks);
782
783 // Set classifier for background
784 had.SetVal(1);
785 wgt.SetVal(1);
786 typ.SetVal(1);
787
788 // Fiull matrix
789 if (!fill.Process(plistx))
790 return kFALSE;
791
792 // Check the number or read events
793 const Int_t numofftrn = train.GetNumRows()-numontrn;
794 if (numofftrn==0)
795 {
796 *fLog << err << "ERROR - No off-data available for training... aborting." << endl;
797 return kFALSE;
798 }
799
800 // ------------------------ Train RF --------------------------
801
802 MRanForestCalc rf("TrainSeparation", fTitle);
803 rf.SetNumTrees(fNumTrees);
804 rf.SetNdSize(fNdSize);
805 rf.SetNumTry(fNumTry);
806 rf.SetNumObsoleteVariables(1);
807// rf.SetLastDataColumnHasWeights(fEnableWeights[kTrainOn] || fEnableWeights[kTrainOff]);
808 rf.SetDebug(fDebug>1);
809 rf.SetDisplay(fDisplay);
810 rf.SetLogStream(fLog);
811 rf.SetFileName(out);
812 rf.SetNameOutput("MHadronness");
813
814 // Train the random forest either by classification or regression
815 if (!rf.Train(train, fUseRegression))
816 return kFALSE;
817
818 // ----------------- Print result of training ------------------
819
820 // Output information about what was going on so far.
821 *fLog << all;
822 fLog->Separator("The forest was trained with...");
823
824 *fLog << "Training method:" << endl;
825 *fLog << " * " << (fUseRegression?"regression":"classification") << endl;
826 /*
827 if (fEnableWeights[kTrainOn])
828 *fLog << " * weights for on-data" << endl;
829 if (fEnableWeights[kTrainOff])
830 *fLog << " * weights for off-data" << endl;
831 */
832 *fLog << endl;
833 *fLog << "Events used for training:" << endl;
834 *fLog << " * Gammas: " << numontrn << endl;
835 *fLog << " * Background: " << numofftrn << endl;
836 *fLog << endl;
837 *fLog << "Gamma/Background ratio:" << endl;
838 *fLog << " * Requested: " << (float)fNum[kTrainOn]/fNum[kTrainOff] << endl;
839 *fLog << " * Result: " << (float)numontrn/numofftrn << endl;
840 *fLog << endl;
841 *fLog << "Run-Time: " << Form("%.1f", clock.RealTime()/60) << "min (CPU: ";
842 *fLog << Form("%.1f", clock.CpuTime()/60) << "min)" << endl;
843 *fLog << endl;
844 *fLog << "Output file name: " << out << endl;
845
846 // ===============================================================================
847 // ====================== Testing =========================
848 // ===============================================================================
849 fLog->Separator("Test");
850
851 clock.Continue();
852
853 // ---------------------- Prepare eventloop off-data ---------------------
854
855 // Setup parlist and tasklist for testing
856 MParList plist;
857 MTaskList tlist;
858 plist.AddToList(this); // Take care of display
859 plist.AddToList(&tlist);
860
861// MMcEvt mcevt;
862// plist.AddToList(&mcevt);
863
864 plist.AddToList(&wgt);
865 plist.AddToList(&typ);
866
867 // ----- Setup histograms -----
868 MBinning binsy(50, 0 , 1, "BinningMH3Y", "lin");
869 MBinning binsx(40, 10, 100000, "BinningMH3X", "log");
870
871 plist.AddToList(&binsx);
872 plist.AddToList(&binsy);
873
874 MH3 h31("MHillas.fSize", "MHadronness.fVal");
875 MH3 h32("MHillas.fSize", "MHadronness.fVal");
876 MH3 h40("MMcEvt.fEnergy", "MHadronness.fVal");
877 h31.SetTitle("Background probability vs. Size:Size [phe]:Hadronness h");
878 h32.SetTitle("Background probability vs. Size:Size [phe]:Hadronness h");
879 h40.SetTitle("Background probability vs. Energy:Energy [GeV]:Hadronness h");
880
881 plist.AddToList(&fBinnings);
882
883 MHHadronness hist;
884
885 // ----- Setup tasks -----
886 MFillH fillh0(&hist, "", "FillHadronness");
887 MFillH fillh1(&h31, "", "FillHadVsSize");
888 MFillH fillh2(&h32, "", "FillHadVsSize");
889 MFillH fillh4(&h40, "", "FillHadVsEnergy");
890 fillh0.SetWeight("MWeight");
891 fillh1.SetWeight("MWeight");
892 fillh2.SetWeight("MWeight");
893 fillh4.SetWeight("MWeight");
894 fillh1.SetDrawOption("colz profy");
895 fillh2.SetDrawOption("colz profy");
896 fillh4.SetDrawOption("colz profy");
897 fillh1.SetNameTab("HadSzOff");
898 fillh2.SetNameTab("HadSzOn");
899 fillh4.SetNameTab("HadEnOn");
900 fillh0.SetBit(MFillH::kDoNotDisplay);
901
902 // ----- Setup filter -----
903 MFilterList precuts;
904 precuts.AddToList(fPreCuts);
905 precuts.AddToList(fTestCuts);
906
907 MContinue cont0(&precuts);
908 cont0.SetName("PreCuts");
909 cont0.SetInverted();
910
911 MFEventSelector sel; // FIXME: USING IT (WITH PROB?) in READ will by much faster!!!
912 sel.SetNumSelectEvts(fNum[kTestOff]);
913
914 MContinue contsel(&sel);
915 contsel.SetInverted();
916
917 // ----- Setup tasklist -----
918 tlist.AddToList(&read2); // Reading task
919 tlist.AddToList(&contsel); // event selector
920// tlist.AddToList(fPreTasksSet[kTestOff]);
921 tlist.AddToList(fPreTasks); // list of pre tasks
922 tlist.AddToList(&cont0); // list of pre cuts and test cuts
923 tlist.AddToList(&rf); // evaluate random forest
924// tlist.AddToList(fPostTasksSet[kTestOff]);
925 tlist.AddToList(fPostTasks); // list of post tasks
926 tlist.AddToList(&fillh1); // Fill HadSzOff
927
928 TList autodel;
929 autodel.SetOwner();
930
931 TPRegexp regexp("([0-9]:*)+");
932
933 NextH.Reset();
934 while ((o=NextH()))
935 {
936 HistSet1D *hs = static_cast<HistSet1D*>(o);
937
938 // FIXME: Move to beginning of function
939 // Check if needed binning exists
940 if (!hs->CheckBinning(fBinnings))
941 return kFALSE;
942
943 MHn *hist = hs->GetHistN(fRules);
944 MFillH *fill = new MFillH(hist, "", Form("Fill%s", hist->GetName()));
945
946 fill->SetWeight("MWeight");
947 fill->SetDrawOption("colz");
948 fill->SetNameTab(hist->GetName());
949 fill->SetBit(MFillH::kDoNotDisplay);
950
951 tlist.AddToList(fill);
952
953 autodel.Add(fill);
954 autodel.Add(hist);
955 }
956 tlist.AddToList(&fillh0); // Fill MHHadronness (not displayed in first loop)
957 tlist.AddToList(&fTestTasks); // list of test tasks
958
959 // Enable Acceleration
960 tlist.SetAccelerator(MTask::kAccDontReset|MTask::kAccDontTime);
961
962 // ---------------------- Run eventloop on background ---------------------
963 MEvtLoop loop;
964 loop.SetDisplay(fDisplay);
965 loop.SetLogStream(fLog);
966 loop.SetParList(&plist);
967 //if (!SetupEnv(loop))
968 // return kFALSE;
969
970 wgt.SetVal(1);
971 typ.SetVal(0);
972 if (!loop.Eventloop())
973 return kFALSE;
974
975 // ---------------------- Prepare eventloop on-data ---------------------
976
977 sel.SetNumSelectEvts(fNum[kTestOn]); // set number of target events
978
979 fillh0.ResetBit(MFillH::kDoNotDisplay); // Switch on display MHHadronness
980
981 TIter NextF(&autodel);
982 while ((o=NextF()))
983 {
984 MFillH *fill = dynamic_cast<MFillH*>(o);
985 if (fill)
986 fill->ResetBit(MFillH::kDoNotDisplay);
987 }
988
989 // Remove PreTasksOff and PostTasksOff from the list
990// tlist.RemoveFromList(fPreTasksSet[kTestOff]);
991// tlist.RemoveFromList(fPostTasksSet[kTestOff]);
992
993 tlist.Replace(&read4); // replace reading off-data by on-data
994
995 // Add the PreTasksOn directly after the reading task
996// tlist.AddToListAfter(fPreTasksSet[kTestOn], &c1);
997
998 // Add the PostTasksOn after rf
999// tlist.AddToListAfter(fPostTasksSet[kTestOn], &rf);
1000
1001 tlist.Replace(&fillh2); // Fill HadSzOn instead of HadSzOff
1002 tlist.AddToListAfter(&fillh4, &fillh0); // Filling of HadEnOn
1003
1004 // Enable Acceleration
1005 tlist.SetAccelerator(MTask::kAccDontReset|MTask::kAccDontTime);
1006
1007 // ---------------------- Run eventloop on-data ---------------------
1008
1009 wgt.SetVal(1);
1010 typ.SetVal(1);
1011 if (!loop.Eventloop())
1012 return kFALSE;
1013
1014 // ---------------------- Print/Display result ---------------------
1015
1016 // Show what was going on in the testing
1017 const Double_t numontst = h32.GetHist().GetEntries();
1018 const Double_t numofftst = h31.GetHist().GetEntries();
1019
1020 *fLog << all;
1021 fLog->Separator("The forest was tested with...");
1022 *fLog << "Test method:" << endl;
1023 *fLog << " * Random Forest: " << out << endl;
1024 /*
1025 if (fEnableWeights[kTestOn])
1026 *fLog << " * weights for on-data" << endl;
1027 if (fEnableWeights[kTestOff])
1028 *fLog << " * weights for off-data" << endl;
1029 */
1030 *fLog << endl;
1031 *fLog << "Events used for test:" << endl;
1032 *fLog << " * Gammas: " << numontst << endl;
1033 *fLog << " * Background: " << numofftst << endl;
1034 *fLog << endl;
1035 *fLog << "Gamma/Background ratio:" << endl;
1036 *fLog << " * Requested: " << (float)fNum[kTestOn]/fNum[kTestOff] << endl;
1037 *fLog << " * Result: " << (float)numontst/numofftst << endl;
1038 *fLog << endl;
1039
1040 // Display the result plots
1041 DisplayResult(h31, h32, -1);
1042 //DisplayResult(h31, h32, ontime);
1043
1044 *fLog << "Total Run-Time: " << Form("%.1f", clock.RealTime()/60) << "min (CPU: ";
1045 *fLog << Form("%.1f", clock.CpuTime()/60) << "min)" << endl;
1046 fLog->Separator();
1047
1048 // ----------------- Write result ------------------
1049
1050 fDataSetOn.SetName("DataSetOn");
1051 fDataSetOff.SetName("DataSetOff");
1052
1053 // Write the display
1054 TObjArray arr;
1055 arr.Add(const_cast<MDataSet*>(&fDataSetOn));
1056 arr.Add(const_cast<MDataSet*>(&fDataSetOff));
1057 if (fDisplay)
1058 arr.Add(fDisplay);
1059
1060 SetPathOut(out);
1061 return WriteContainer(arr, 0, "UPDATE");
1062}
1063
Note: See TracBrowser for help on using the repository browser.