source: trunk/MagicSoft/Mars/mjtrain/MJTrainSeparation.cc@ 7667

Last change on this file since 7667 was 7667, checked in by tbretz, 18 years ago
*** empty log message ***
File size: 18.0 KB
Line 
1/* ======================================================================== *\
2!
3! *
4! * This file is part of MARS, the MAGIC Analysis and Reconstruction
5! * Software. It is distributed to you in the hope that it can be a useful
6! * and timesaving tool in analysing Data of imaging Cerenkov telescopes.
7! * It is distributed WITHOUT ANY WARRANTY.
8! *
9! * Permission to use, copy, modify and distribute this software and its
10! * documentation for any purpose is hereby granted without fee,
11! * provided that the above copyright notice appear in all copies and
12! * that both that copyright notice and this permission notice appear
13! * in supporting documentation. It is provided "as is" without express
14! * or implied warranty.
15! *
16!
17!
18! Author(s): Thomas Bretz 11/2005 <mailto:tbretz@astro.uni-wuerzburg.de>
19!
20! Copyright: MAGIC Software Development, 2006
21!
22!
23\* ======================================================================== */
24
25/////////////////////////////////////////////////////////////////////////////
26//
27// MJTrainSeparation
28//
29////////////////////////////////////////////////////////////////////////////
30#include "MJTrainSeparation.h"
31
32#include <TF1.h>
33#include <TH2.h>
34#include <TChain.h>
35#include <TGraph.h>
36#include <TCanvas.h>
37#include <TVirtualPad.h>
38
39#include "MHMatrix.h"
40
41#include "MLog.h"
42#include "MLogManip.h"
43
44// tools
45#include "MMath.h"
46#include "MDataSet.h"
47#include "MTFillMatrix.h"
48#include "MChisqEval.h"
49#include "MStatusDisplay.h"
50
51// eventloop
52#include "MParList.h"
53#include "MTaskList.h"
54#include "MEvtLoop.h"
55
56// tasks
57#include "MReadMarsFile.h"
58#include "MContinue.h"
59#include "MFillH.h"
60#include "MRanForestCalc.h"
61#include "MParameterCalc.h"
62
63// container
64#include "MMcEvt.hxx"
65#include "MParameters.h"
66
67// histograms
68#include "MBinning.h"
69#include "MH3.h"
70#include "MHHadronness.h"
71
72// filter
73#include "MF.h"
74#include "MFEventSelector.h"
75#include "MFilterList.h"
76
77ClassImp(MJTrainSeparation);
78
79using namespace std;
80
81void MJTrainSeparation::DisplayResult(MH3 &h31, MH3 &h32)
82{
83 TH2D &g = (TH2D&)h32.GetHist();
84 TH2D &h = (TH2D&)h31.GetHist();
85
86 h.SetMarkerColor(kRed);
87 g.SetMarkerColor(kGreen);
88
89 TH2D res1(g);
90 TH2D res2(g);
91
92 res1.SetTitle("Significance Li/Ma");
93 res2.SetTitle("Significance Li/Ma * log_{10}(excess)");
94
95 const Int_t nx = h.GetNbinsX();
96 const Int_t ny = h.GetNbinsY();
97
98 gROOT->SetSelectedPad(NULL);
99
100 TGraph gr1;
101 TGraph gr2;
102 for (int x=0; x<nx; x++)
103 {
104 TH1 *hx = h.ProjectionY("H_py", x+1, x+1);
105 TH1 *gx = g.ProjectionY("G_py", x+1, x+1);
106
107 Double_t max1 = -1;
108 Double_t max2 = -1;
109 Int_t maxy1 = 0;
110 Int_t maxy2 = 0;
111 for (int y=0; y<ny; y++)
112 {
113 const Float_t s = gx->Integral(1, y+1);
114 const Float_t b = hx->Integral(1, y+1);
115 const Float_t sig1 = MMath::SignificanceLiMa(s+b, b);
116 const Float_t sig2 = s<1 ? 0 : MMath::SignificanceLiMa(s+b, b)*TMath::Log10(s);
117 if (sig1>max1)
118 {
119 maxy1 = y;
120 max1 = sig1;
121 }
122 if (sig2>max2)
123 {
124 maxy2 = y;
125 max2 = sig2;
126 }
127
128 res1.SetBinContent(x+1, y+1, sig1);
129 res2.SetBinContent(x+1, y+1, sig2);
130 }
131
132 gr1.SetPoint(x, h.GetXaxis()->GetBinCenter(x+1), h.GetYaxis()->GetBinCenter(maxy1+1));
133 gr2.SetPoint(x, h.GetXaxis()->GetBinCenter(x+1), h.GetYaxis()->GetBinCenter(maxy2+1));
134
135 delete hx;
136 delete gx;
137 }
138
139 TGraph gr3;
140 TGraph gr4;
141
142 TH1 *hx = h.ProjectionY("H_py");
143 TH1 *gx = g.ProjectionY("G_py");
144 for (int y=0; y<ny; y++)
145 {
146 const Float_t s = gx->Integral(1, y+1);
147 const Float_t b = hx->Integral(1, y+1);
148 const Float_t sig1 = MMath::SignificanceLiMa(s+b, b);
149 const Float_t sig2 = s<1 ? 0 : MMath::SignificanceLiMa(s+b, b)*TMath::Log10(s);
150
151 gr3.SetPoint(y, h.GetYaxis()->GetBinLowEdge(y+2), sig1);
152 gr4.SetPoint(y, h.GetYaxis()->GetBinLowEdge(y+2), sig2);
153 }
154 delete hx;
155 delete gx;
156
157 TCanvas &c = fDisplay->AddTab("OptCut");
158 c.SetBorderMode(0);
159 c.Divide(2,2);
160
161 c.cd(1);
162 gPad->SetBorderMode(0);
163 gPad->SetFrameBorderMode(0);
164 gPad->SetLogx();
165 h.DrawCopy();
166 g.DrawCopy("same");
167 gr1.SetMarkerStyle(kFullDotMedium);
168 gr1.DrawClone("LP")->SetBit(kCanDelete);
169 gr2.SetLineColor(kBlue);
170 gr2.SetMarkerStyle(kFullDotMedium);
171 gr2.DrawClone("LP")->SetBit(kCanDelete);
172
173 c.cd(3);
174 gPad->SetBorderMode(0);
175 gPad->SetFrameBorderMode(0);
176 gr4.SetMarkerStyle(kFullDotMedium);
177 gr4.DrawClone("ALP")->SetBit(kCanDelete);
178 gr3.SetLineColor(kBlue);
179 gr3.SetMarkerStyle(kFullDotMedium);
180 gr3.DrawClone("LP")->SetBit(kCanDelete);
181
182 c.cd(2);
183 gPad->SetBorderMode(0);
184 gPad->SetFrameBorderMode(0);
185 gPad->SetLogx();
186 MH::SetPalette("pretty");
187 res1.DrawCopy("colz");
188
189 c.cd(4);
190 gPad->SetBorderMode(0);
191 gPad->SetFrameBorderMode(0);
192 gPad->SetLogx();
193 MH::SetPalette("pretty");
194 res2.DrawCopy("colz");
195}
196
197/*
198Bool_t MJSpectrum::InitWeighting(const MDataSet &set, MMcSpectrumWeight &w) const
199{
200 fLog->Separator("Initialize energy weighting");
201
202 if (!CheckEnv(w))
203 {
204 *fLog << err << "ERROR - Reading resources for MMcSpectrumWeight failed." << endl;
205 return kFALSE;
206 }
207
208 TChain chain("RunHeaders");
209 set.AddFilesOn(chain);
210
211 MMcCorsikaRunHeader *h=0;
212 chain.SetBranchAddress("MMcCorsikaRunHeader.", &h);
213 chain.GetEntry(1);
214
215 if (!h)
216 {
217 *fLog << err << "ERROR - Couldn't read MMcCorsikaRunHeader from DataSet." << endl;
218 return kFALSE;
219 }
220
221 if (!w.Set(*h))
222 {
223 *fLog << err << "ERROR - Initializing MMcSpectrumWeight failed." << endl;
224 return kFALSE;
225 }
226
227 w.Print();
228 return kTRUE;
229}
230
231Bool_t MJSpectrum::ReadOrigMCDistribution(const MDataSet &set, TH1 &h, MMcSpectrumWeight &weight) const
232{
233 // Some debug output
234 fLog->Separator("Compiling original MC distribution");
235
236 weight.SetNameMcEvt("MMcEvtBasic");
237 const TString w(weight.GetFormulaWeights());
238 weight.SetNameMcEvt();
239
240 *fLog << inf << "Using weights: " << w << endl;
241 *fLog << "Please stand by, this may take a while..." << flush;
242
243 if (fDisplay)
244 fDisplay->SetStatusLine1("Compiling MC distribution...");
245
246 // Create chain
247 TChain chain("OriginalMC");
248 set.AddFilesOn(chain);
249
250 // Prepare histogram
251 h.Reset();
252
253 // Fill histogram from chain
254 h.SetDirectory(gROOT);
255 if (h.InheritsFrom(TH2::Class()))
256 {
257 h.SetNameTitle("ThetaEMC", "Event-Distribution vs Theta and Energy for MC (produced)");
258 h.SetXTitle("\\Theta [\\circ]");
259 h.SetYTitle("E [GeV]");
260 h.SetZTitle("Counts");
261 chain.Draw("MMcEvtBasic.fEnergy:MMcEvtBasic.fTelescopeTheta*TMath::RadToDeg()>>ThetaEMC", w, "goff");
262 }
263 else
264 {
265 h.SetNameTitle("ThetaMC", "Event-Distribution vs Theta for MC (produced)");
266 h.SetXTitle("\\Theta [\\circ]");
267 h.SetYTitle("Counts");
268 chain.Draw("MMcEvtBasic.fTelescopeTheta*TMath::RadToDeg()>>ThetaMC", w, "goff");
269 }
270 h.SetDirectory(0);
271
272 *fLog << "done." << endl;
273 if (fDisplay)
274 fDisplay->SetStatusLine2("done.");
275
276 if (h.GetEntries()>0)
277 return kTRUE;
278
279 *fLog << err << "ERROR - Histogram with original MC distribution empty..." << endl;
280
281 return h.GetEntries()>0;
282}
283*/
284
285Bool_t MJTrainSeparation::GetEventsProduced(MDataSet &set, Double_t &num, Double_t &min, Double_t &max) const
286{
287 TChain chain("OriginalMC");
288 set.AddFilesOn(chain);
289
290 min = chain.GetMinimum("MMcEvtBasic.fEnergy");
291 max = chain.GetMaximum("MMcEvtBasic.fEnergy");
292
293 num = chain.GetEntries();
294
295 if (num<100)
296 *fLog << err << "ERROR - Less than 100 entries in OriginalMC-Tree of MC-Train-Data found." << endl;
297
298 return num>=100;
299}
300
301Double_t MJTrainSeparation::GetDataRate(MDataSet &set, Double_t &num) const
302{
303 TChain chain1("Events");
304 set.AddFilesOff(chain1);
305
306 num = chain1.GetEntries();
307 if (num<100)
308 {
309 *fLog << err << "ERROR - Less than 100 entries in Events-Tree of Train-Data found." << endl;
310 return -1;
311 }
312
313 TChain chain("EffectiveOnTime");
314 set.AddFilesOff(chain);
315
316 chain.Draw("MEffectiveOnTime.fVal", "MEffectiveOnTime.fVal", "goff");
317
318 TH1 *h = dynamic_cast<TH1*>(gROOT->FindObject("htemp"));
319 if (!h)
320 {
321 *fLog << err << "ERROR - Weird things are happening (htemp not found)!" << endl;
322 return -1;
323 }
324
325 const Double_t ontime = h->Integral();
326 delete h;
327
328 if (ontime<1)
329 {
330 *fLog << err << "ERROR - Less than 1s of effective observation time found in Train-Data." << endl;
331 return -1;
332 }
333
334 return num/ontime;
335}
336
337Double_t MJTrainSeparation::GetNumMC(MDataSet &set) const
338{
339 TChain chain1("Events");
340 set.AddFilesOn(chain1);
341
342 const Double_t num = chain1.GetEntries();
343 if (num<100)
344 {
345 *fLog << err << "ERROR - Less than 100 entries in Events-Tree of Train-Data found." << endl;
346 return -1;
347 }
348
349 return num;
350}
351
352Bool_t MJTrainSeparation::AutoTrain()
353{
354 Double_t num, min, max;
355 if (!GetEventsProduced(fDataSetTrain, num, min, max))
356 return kFALSE;
357
358 *fLog << inf << "Using build-in radius of 300m to calculate collection area!" << endl;
359
360 // Target spectrum
361 TF1 flx("Flux", "[0]/1000*(x/1000)^(-2.6)", min, max);
362 flx.SetParameter(0, 1e-6);
363
364 // Number n0 of events this spectrum would produce per s and m^2
365 const Double_t n0 = flx.Integral(min, max); //[#]
366
367 // Area produced in MC
368 const Double_t A = TMath::Pi()*300*300; //[m²]
369
370 // Rate R of events this spectrum would produce per s
371 const Double_t R = n0*A; //[Hz]
372
373 *fLog << "Gamma rate from the source inside the MC production area: " << R << "Hz" << endl;
374
375 // Number N of events produced (in trainings sample)
376 const Double_t N = num; //[#]
377
378 *fLog << "Events produced by MC inside the production area: " << num << endl;
379
380 // This correponds to an observation time T [s]
381 const Double_t T = N/R; //[s]
382
383 *fLog << "Total time produced by the Monte Carlo: " << T << "s" << endl;
384
385 // With an average data rate after star of
386 Double_t data=0;
387 const Double_t r = GetDataRate(fDataSetTrain, data); //[Hz]
388
389 *fLog << "Events measured per second effective on time: " << r << "Hz" << endl;
390
391 // this yields a number of n events to be read for training
392 const Double_t n = r*T; //[#]
393
394 *fLog << "Events to be read from the data sample: " << n << endl;
395 *fLog << "Events available in data sample: " << data << endl;
396
397 if (r<0)
398 return kFALSE;
399
400 Double_t nummc = GetNumMC(fDataSetTrain);
401
402 *fLog << "Events available in MC sample: " << nummc << endl;
403
404 *fLog << "MC read probability: " << data/n << endl;
405
406 // more data requested than available => Scale down num MC events
407 Double_t on, off;
408 if (data<n)
409 {
410 on = TMath::Nint(nummc*data/n);
411 off = TMath::Nint(data);
412 *fLog << "Not enough data events available... scaling by " << data/n << endl;
413 }
414 else
415 {
416 on = TMath::Nint(nummc);
417 off = TMath::Nint(n);
418 }
419
420 if (fNumTrainOn>0 && fNumTrainOn<on)
421 {
422 fNumTrainOff = TMath::Nint(off*fNumTrainOn/on);
423 *fLog << "Less MC events requested... scaling by " << fNumTrainOn/on << endl;
424 }
425 else
426 {
427 fNumTrainOn = TMath::Nint(on);
428 fNumTrainOff = TMath::Nint(off);
429 }
430
431 *fLog << "Target number of MC events: " << fNumTrainOn << endl;
432 *fLog << "Target number of data events: " << fNumTrainOff << endl;
433
434 fNumTestOn = fNumTrainOn;
435 fNumTestOff = fNumTrainOff;
436
437 /*
438 An event rate dependent selection?
439 ----------------------------------
440 Total average data rate: R
441 Goal number of events: N
442 Number of data events: N0
443 Rate assigned to single evt: r
444
445 Selection probability: N/N0 * r/R
446
447 f := N/N0 * r
448
449 MF f("f * MEventRate.fRate < rand");
450 */
451
452 return kTRUE;
453}
454
455Bool_t MJTrainSeparation::Train(const char *out)
456{
457 if (!fDataSetTrain.IsValid())
458 {
459 *fLog << err << "ERROR - DataSet for training invalid!" << endl;
460 return kFALSE;
461 }
462 if (!fDataSetTest.IsValid())
463 {
464 *fLog << err << "ERROR - DataSet for testing invalid!" << endl;
465 return kFALSE;
466 }
467
468 // ----------------------- Auto Train? ----------------------
469
470 if (fAutoTrain)
471 if (!AutoTrain())
472 return kFALSE;
473
474 // --------------------- Setup files --------------------
475 MReadMarsFile read1("Events");
476 MReadMarsFile read2("Events");
477 MReadMarsFile read3("Events");
478 MReadMarsFile read4("Events");
479 read1.DisableAutoScheme();
480 read2.DisableAutoScheme();
481 read3.DisableAutoScheme();
482 read4.DisableAutoScheme();
483
484 fDataSetTrain.AddFilesOn(read1);
485 fDataSetTrain.AddFilesOff(read3);
486
487 fDataSetTest.AddFilesOff(read2);
488 fDataSetTest.AddFilesOn(read4);
489
490 // ----------------------- Setup RF ----------------------
491 MHMatrix train("Train");
492 train.AddColumns(fRules);
493 train.AddColumn("MHadronness.fVal");
494
495 // ----------------------- Fill Matrix RF ----------------------
496
497 MParameterD had("MHadronness");
498
499 MParList plistx;
500 plistx.AddToList(&had);
501 plistx.AddToList(this);
502
503 MTFillMatrix fill;
504 fill.SetLogStream(fLog);
505 fill.SetDisplay(fDisplay);
506 fill.AddPreCuts(fPreCuts);
507 fill.AddPreCuts(fTrainCuts);
508
509 // Set classifier for gammas
510 had.SetVal(0);
511 fill.SetName("FillGammas");
512 fill.SetDestMatrix1(&train, fNumTrainOn);
513 fill.SetReader(&read1);
514 if (!fill.Process(plistx))
515 return kFALSE;
516
517 const Int_t numgammas = train.GetNumRows();
518 if (numgammas==0)
519 {
520 *fLog << err << "ERROR - No gammas available for training... aborting." << endl;
521 return kFALSE;
522 }
523
524 // Set classifier for hadrons
525 had.SetVal(1);
526 fill.SetName("FillBackground");
527 fill.SetDestMatrix1(&train, fNumTrainOff);
528 fill.SetReader(&read3);
529 if (!fill.Process(plistx))
530 return kFALSE;
531
532 const Int_t numbackgrnd = train.GetNumRows()-numgammas;
533 if (numbackgrnd==0)
534 {
535 *fLog << err << "ERROR - No background available for training... aborting." << endl;
536 return kFALSE;
537 }
538
539 // ------------------------ Train RF --------------------------
540
541 MRanForestCalc rf;
542 rf.SetNumTrees(fNumTrees);
543 rf.SetNdSize(fNdSize);
544 rf.SetNumTry(fNumTry);
545 rf.SetNumObsoleteVariables(1);
546 rf.SetDebug(fDebug);
547 rf.SetDisplay(fDisplay);
548 rf.SetLogStream(fLog);
549 rf.SetFileName(out);
550 rf.SetNameOutput("MHadronness");
551
552 if (fUseRegression)
553 {
554 if (!rf.TrainSingleRF(train)) // regression
555 return kFALSE;
556 }
557 else
558 {
559 MBinning b(2, -0.5, 1.5, "BinningHadronness", "lin");
560 if (!rf.TrainSingleRF(train, b.GetEdgesD())) // classification
561 return kFALSE;
562 }
563
564 //if (!rf.TrainMultiRF(train, b.GetEdgesD())) // classification
565 // return;
566
567 //fDisplay = rf.GetDisplay();
568
569
570 *fLog << all;
571 fLog->Separator();
572
573 *fLog << "Training method:" << endl;
574 *fLog << " * " << (fUseRegression?"regression":"classification") << endl;
575 *fLog << endl;
576 *fLog << "Events used for training:" << endl;
577 *fLog << " * Gammas: " << numgammas << endl;
578 *fLog << " * Background: " << numbackgrnd << endl;
579 *fLog << endl;
580 *fLog << "Gamma/Background ratio:" << endl;
581 *fLog << " * Requested: " << (float)fNumTrainOn/fNumTrainOff << endl;
582 *fLog << " * Result: " << (float)numgammas/numbackgrnd << endl;
583
584 if (!fDataSetTest.IsValid())
585 return kTRUE;
586
587 // --------------------- Display result ----------------------
588 fLog->Separator("Test");
589
590 MParList plist;
591 MTaskList tlist;
592 plist.AddToList(this);
593 plist.AddToList(&tlist);
594
595 MMcEvt mcevt;
596 plist.AddToList(&mcevt);
597
598 // ----- Setup histograms -----
599 MBinning binsy(100, 0 , 1, "BinningMH3Y", "lin");
600 MBinning binsx( 40, 10, 100000, "BinningMH3X", "log");
601
602 plist.AddToList(&binsx);
603 plist.AddToList(&binsy);
604
605 MH3 h31("MHillas.fSize", "MHadronness.fVal");
606 MH3 h32("MHillas.fSize", "MHadronness.fVal");
607 MH3 h40("MMcEvt.fEnergy", "MHadronness.fVal");
608 h31.SetTitle("Background probability vs. Size:Size [phe]:Hadronness");
609 h32.SetTitle("Background probability vs. Size:Size [phe]:Hadronness");
610 h40.SetTitle("Background probability vs. Energy:Energy [GeV]:Hadronness");
611
612 MHHadronness hist;
613
614 // ----- Setup tasks -----
615 MFillH fillh0(&hist, "", "FillHadronness");
616 MFillH fillh1(&h31);
617 MFillH fillh2(&h32);
618 MFillH fillh4(&h40);
619 fillh1.SetNameTab("Background");
620 fillh2.SetNameTab("GammasH");
621 fillh4.SetNameTab("GammasE");
622 fillh0.SetBit(MFillH::kDoNotDisplay);
623
624 // ----- Setup filter -----
625 MFilterList precuts;
626 precuts.AddToList(fPreCuts);
627 precuts.AddToList(fTestCuts);
628
629 MContinue c0(&precuts);
630 c0.SetName("PreCuts");
631 c0.SetInverted();
632
633 MFEventSelector sel;
634 sel.SetNumSelectEvts(fNumTestOff);
635
636 MContinue c1(&sel);
637 c1.SetInverted();
638
639 // ----- Setup tasklist -----
640 tlist.AddToList(&read2);
641 tlist.AddToList(&c0);
642 tlist.AddToList(&c1);
643 tlist.AddToList(&rf);
644 tlist.AddToList(&fillh0);
645 tlist.AddToList(&fillh1);
646
647 // ----- Run eventloop on gammas -----
648 MEvtLoop loop;
649 loop.SetDisplay(fDisplay);
650 loop.SetLogStream(fLog);
651 loop.SetParList(&plist);
652
653 if (!loop.Eventloop())
654 return kFALSE;
655
656 // ----- Setup and run eventloop on background -----
657 sel.SetNumSelectEvts(fNumTestOn);
658 fillh0.ResetBit(MFillH::kDoNotDisplay);
659
660 tlist.Replace(&read4);
661 tlist.Replace(&fillh2);
662 tlist.AddToListAfter(&fillh4, &fillh2);
663
664 if (!loop.Eventloop())
665 return kFALSE;
666
667 DisplayResult(h31, h32);
668
669 if (!WriteDisplay(out))
670 return kFALSE;
671
672 return kTRUE;
673}
674
Note: See TracBrowser for help on using the repository browser.