source: trunk/MagicSoft/Mars/mjtrain/MJTrainSeparation.cc@ 7669

Last change on this file since 7669 was 7669, checked in by tbretz, 19 years ago
*** empty log message ***
File size: 18.1 KB
Line 
1/* ======================================================================== *\
2!
3! *
4! * This file is part of MARS, the MAGIC Analysis and Reconstruction
5! * Software. It is distributed to you in the hope that it can be a useful
6! * and timesaving tool in analysing Data of imaging Cerenkov telescopes.
7! * It is distributed WITHOUT ANY WARRANTY.
8! *
9! * Permission to use, copy, modify and distribute this software and its
10! * documentation for any purpose is hereby granted without fee,
11! * provided that the above copyright notice appear in all copies and
12! * that both that copyright notice and this permission notice appear
13! * in supporting documentation. It is provided "as is" without express
14! * or implied warranty.
15! *
16!
17!
18! Author(s): Thomas Bretz 11/2005 <mailto:tbretz@astro.uni-wuerzburg.de>
19!
20! Copyright: MAGIC Software Development, 2006
21!
22!
23\* ======================================================================== */
24
25/////////////////////////////////////////////////////////////////////////////
26//
27// MJTrainSeparation
28//
29////////////////////////////////////////////////////////////////////////////
30#include "MJTrainSeparation.h"
31
32#include <TF1.h>
33#include <TH2.h>
34#include <TChain.h>
35#include <TGraph.h>
36#include <TCanvas.h>
37#include <TVirtualPad.h>
38
39#include "MHMatrix.h"
40
41#include "MLog.h"
42#include "MLogManip.h"
43
44// tools
45#include "MMath.h"
46#include "MDataSet.h"
47#include "MTFillMatrix.h"
48#include "MChisqEval.h"
49#include "MStatusDisplay.h"
50
51// eventloop
52#include "MParList.h"
53#include "MTaskList.h"
54#include "MEvtLoop.h"
55
56// tasks
57#include "MReadMarsFile.h"
58#include "MContinue.h"
59#include "MFillH.h"
60#include "MRanForestCalc.h"
61#include "MParameterCalc.h"
62
63// container
64#include "MMcEvt.hxx"
65#include "MParameters.h"
66
67// histograms
68#include "MBinning.h"
69#include "MH3.h"
70#include "MHHadronness.h"
71
72// filter
73#include "MF.h"
74#include "MFEventSelector.h"
75#include "MFilterList.h"
76
77ClassImp(MJTrainSeparation);
78
79using namespace std;
80
81void MJTrainSeparation::DisplayResult(MH3 &h31, MH3 &h32)
82{
83 TH2D &g = (TH2D&)h32.GetHist();
84 TH2D &h = (TH2D&)h31.GetHist();
85
86 h.SetMarkerColor(kRed);
87 g.SetMarkerColor(kGreen);
88
89 TH2D res1(g);
90 TH2D res2(g);
91
92 res1.SetTitle("Significance Li/Ma");
93 res2.SetTitle("Significance Li/Ma * log_{10}(excess)");
94
95 const Int_t nx = h.GetNbinsX();
96 const Int_t ny = h.GetNbinsY();
97
98 gROOT->SetSelectedPad(NULL);
99
100 TGraph gr1;
101 TGraph gr2;
102 for (int x=0; x<nx; x++)
103 {
104 TH1 *hx = h.ProjectionY("H_py", x+1, x+1);
105 TH1 *gx = g.ProjectionY("G_py", x+1, x+1);
106
107 Double_t max1 = -1;
108 Double_t max2 = -1;
109 Int_t maxy1 = 0;
110 Int_t maxy2 = 0;
111 for (int y=0; y<ny; y++)
112 {
113 const Float_t s = gx->Integral(1, y+1);
114 const Float_t b = hx->Integral(1, y+1);
115 const Float_t sig1 = MMath::SignificanceLiMa(s+b, b);
116 const Float_t sig2 = s<1 ? 0 : MMath::SignificanceLiMa(s+b, b)*TMath::Log10(s);
117 if (sig1>max1)
118 {
119 maxy1 = y;
120 max1 = sig1;
121 }
122 if (sig2>max2)
123 {
124 maxy2 = y;
125 max2 = sig2;
126 }
127
128 res1.SetBinContent(x+1, y+1, sig1);
129 res2.SetBinContent(x+1, y+1, sig2);
130 }
131
132 gr1.SetPoint(x, h.GetXaxis()->GetBinCenter(x+1), h.GetYaxis()->GetBinCenter(maxy1+1));
133 gr2.SetPoint(x, h.GetXaxis()->GetBinCenter(x+1), h.GetYaxis()->GetBinCenter(maxy2+1));
134
135 delete hx;
136 delete gx;
137 }
138
139 TGraph gr3;
140 TGraph gr4;
141 gr4.SetTitle("Significance Li/Ma vs. Hadronness-cut");
142
143 TH1 *hx = h.ProjectionY("H_py");
144 TH1 *gx = g.ProjectionY("G_py");
145 for (int y=0; y<ny; y++)
146 {
147 const Float_t s = gx->Integral(1, y+1);
148 const Float_t b = hx->Integral(1, y+1);
149 const Float_t sig1 = MMath::SignificanceLiMa(s+b, b);
150 const Float_t sig2 = s<1 ? 0 : MMath::SignificanceLiMa(s+b, b)*TMath::Log10(s);
151
152 gr3.SetPoint(y, h.GetYaxis()->GetBinLowEdge(y+2), sig1);
153 gr4.SetPoint(y, h.GetYaxis()->GetBinLowEdge(y+2), sig2);
154 }
155 delete hx;
156 delete gx;
157
158 TCanvas &c = fDisplay->AddTab("OptCut");
159 c.SetBorderMode(0);
160 c.Divide(2,2);
161
162 c.cd(1);
163 gPad->SetBorderMode(0);
164 gPad->SetFrameBorderMode(0);
165 gPad->SetLogx();
166 h.DrawCopy();
167 g.DrawCopy("same");
168 gr1.SetMarkerStyle(kFullDotMedium);
169 gr1.DrawClone("LP")->SetBit(kCanDelete);
170 gr2.SetLineColor(kBlue);
171 gr2.SetMarkerStyle(kFullDotMedium);
172 gr2.DrawClone("LP")->SetBit(kCanDelete);
173
174 c.cd(3);
175 gPad->SetBorderMode(0);
176 gPad->SetFrameBorderMode(0);
177 gr4.SetMarkerStyle(kFullDotMedium);
178 gr4.DrawClone("ALP")->SetBit(kCanDelete);
179 gr3.SetLineColor(kBlue);
180 gr3.SetMarkerStyle(kFullDotMedium);
181 gr3.DrawClone("LP")->SetBit(kCanDelete);
182
183 c.cd(2);
184 gPad->SetBorderMode(0);
185 gPad->SetFrameBorderMode(0);
186 gPad->SetLogx();
187 gPad->AddExec("color", "gStyle->SetPalette(1, 0);");
188 res1.DrawCopy("colz");
189
190 c.cd(4);
191 gPad->SetBorderMode(0);
192 gPad->SetFrameBorderMode(0);
193 gPad->SetLogx();
194 gPad->AddExec("color", "gStyle->SetPalette(1, 0);");
195 res2.DrawCopy("colz");
196}
197
198/*
199Bool_t MJSpectrum::InitWeighting(const MDataSet &set, MMcSpectrumWeight &w) const
200{
201 fLog->Separator("Initialize energy weighting");
202
203 if (!CheckEnv(w))
204 {
205 *fLog << err << "ERROR - Reading resources for MMcSpectrumWeight failed." << endl;
206 return kFALSE;
207 }
208
209 TChain chain("RunHeaders");
210 set.AddFilesOn(chain);
211
212 MMcCorsikaRunHeader *h=0;
213 chain.SetBranchAddress("MMcCorsikaRunHeader.", &h);
214 chain.GetEntry(1);
215
216 if (!h)
217 {
218 *fLog << err << "ERROR - Couldn't read MMcCorsikaRunHeader from DataSet." << endl;
219 return kFALSE;
220 }
221
222 if (!w.Set(*h))
223 {
224 *fLog << err << "ERROR - Initializing MMcSpectrumWeight failed." << endl;
225 return kFALSE;
226 }
227
228 w.Print();
229 return kTRUE;
230}
231
232Bool_t MJSpectrum::ReadOrigMCDistribution(const MDataSet &set, TH1 &h, MMcSpectrumWeight &weight) const
233{
234 // Some debug output
235 fLog->Separator("Compiling original MC distribution");
236
237 weight.SetNameMcEvt("MMcEvtBasic");
238 const TString w(weight.GetFormulaWeights());
239 weight.SetNameMcEvt();
240
241 *fLog << inf << "Using weights: " << w << endl;
242 *fLog << "Please stand by, this may take a while..." << flush;
243
244 if (fDisplay)
245 fDisplay->SetStatusLine1("Compiling MC distribution...");
246
247 // Create chain
248 TChain chain("OriginalMC");
249 set.AddFilesOn(chain);
250
251 // Prepare histogram
252 h.Reset();
253
254 // Fill histogram from chain
255 h.SetDirectory(gROOT);
256 if (h.InheritsFrom(TH2::Class()))
257 {
258 h.SetNameTitle("ThetaEMC", "Event-Distribution vs Theta and Energy for MC (produced)");
259 h.SetXTitle("\\Theta [\\circ]");
260 h.SetYTitle("E [GeV]");
261 h.SetZTitle("Counts");
262 chain.Draw("MMcEvtBasic.fEnergy:MMcEvtBasic.fTelescopeTheta*TMath::RadToDeg()>>ThetaEMC", w, "goff");
263 }
264 else
265 {
266 h.SetNameTitle("ThetaMC", "Event-Distribution vs Theta for MC (produced)");
267 h.SetXTitle("\\Theta [\\circ]");
268 h.SetYTitle("Counts");
269 chain.Draw("MMcEvtBasic.fTelescopeTheta*TMath::RadToDeg()>>ThetaMC", w, "goff");
270 }
271 h.SetDirectory(0);
272
273 *fLog << "done." << endl;
274 if (fDisplay)
275 fDisplay->SetStatusLine2("done.");
276
277 if (h.GetEntries()>0)
278 return kTRUE;
279
280 *fLog << err << "ERROR - Histogram with original MC distribution empty..." << endl;
281
282 return h.GetEntries()>0;
283}
284*/
285
286Bool_t MJTrainSeparation::GetEventsProduced(MDataSet &set, Double_t &num, Double_t &min, Double_t &max) const
287{
288 TChain chain("OriginalMC");
289 set.AddFilesOn(chain);
290
291 min = chain.GetMinimum("MMcEvtBasic.fEnergy");
292 max = chain.GetMaximum("MMcEvtBasic.fEnergy");
293
294 num = chain.GetEntries();
295
296 if (num<100)
297 *fLog << err << "ERROR - Less than 100 entries in OriginalMC-Tree of MC-Train-Data found." << endl;
298
299 return num>=100;
300}
301
302Double_t MJTrainSeparation::GetDataRate(MDataSet &set, Double_t &num) const
303{
304 TChain chain1("Events");
305 set.AddFilesOff(chain1);
306
307 num = chain1.GetEntries();
308 if (num<100)
309 {
310 *fLog << err << "ERROR - Less than 100 entries in Events-Tree of Train-Data found." << endl;
311 return -1;
312 }
313
314 TChain chain("EffectiveOnTime");
315 set.AddFilesOff(chain);
316
317 chain.Draw("MEffectiveOnTime.fVal", "MEffectiveOnTime.fVal", "goff");
318
319 TH1 *h = dynamic_cast<TH1*>(gROOT->FindObject("htemp"));
320 if (!h)
321 {
322 *fLog << err << "ERROR - Weird things are happening (htemp not found)!" << endl;
323 return -1;
324 }
325
326 const Double_t ontime = h->Integral();
327 delete h;
328
329 if (ontime<1)
330 {
331 *fLog << err << "ERROR - Less than 1s of effective observation time found in Train-Data." << endl;
332 return -1;
333 }
334
335 return num/ontime;
336}
337
338Double_t MJTrainSeparation::GetNumMC(MDataSet &set) const
339{
340 TChain chain1("Events");
341 set.AddFilesOn(chain1);
342
343 const Double_t num = chain1.GetEntries();
344 if (num<100)
345 {
346 *fLog << err << "ERROR - Less than 100 entries in Events-Tree of Train-Data found." << endl;
347 return -1;
348 }
349
350 return num;
351}
352
353Bool_t MJTrainSeparation::AutoTrain()
354{
355 Double_t num, min, max;
356 if (!GetEventsProduced(fDataSetTrain, num, min, max))
357 return kFALSE;
358
359 *fLog << inf << "Using build-in radius of 300m to calculate collection area!" << endl;
360
361 // Target spectrum
362 TF1 flx("Flux", "[0]/1000*(x/1000)^(-2.6)", min, max);
363 flx.SetParameter(0, 1e-6);
364
365 // Number n0 of events this spectrum would produce per s and m^2
366 const Double_t n0 = flx.Integral(min, max); //[#]
367
368 // Area produced in MC
369 const Double_t A = TMath::Pi()*300*300; //[m²]
370
371 // Rate R of events this spectrum would produce per s
372 const Double_t R = n0*A; //[Hz]
373
374 *fLog << "Gamma rate from the source inside the MC production area: " << R << "Hz" << endl;
375
376 // Number N of events produced (in trainings sample)
377 const Double_t N = num; //[#]
378
379 *fLog << "Events produced by MC inside the production area: " << num << endl;
380
381 // This correponds to an observation time T [s]
382 const Double_t T = N/R; //[s]
383
384 *fLog << "Total time produced by the Monte Carlo: " << T << "s" << endl;
385
386 // With an average data rate after star of
387 Double_t data=0;
388 const Double_t r = GetDataRate(fDataSetTrain, data); //[Hz]
389
390 *fLog << "Events measured per second effective on time: " << r << "Hz" << endl;
391
392 // this yields a number of n events to be read for training
393 const Double_t n = r*T; //[#]
394
395 *fLog << "Events to be read from the data sample: " << n << endl;
396 *fLog << "Events available in data sample: " << data << endl;
397
398 if (r<0)
399 return kFALSE;
400
401 Double_t nummc = GetNumMC(fDataSetTrain);
402
403 *fLog << "Events available in MC sample: " << nummc << endl;
404
405 *fLog << "MC read probability: " << data/n << endl;
406
407 // more data requested than available => Scale down num MC events
408 Double_t on, off;
409 if (data<n)
410 {
411 on = TMath::Nint(nummc*data/n);
412 off = TMath::Nint(data);
413 *fLog << "Not enough data events available... scaling by " << data/n << endl;
414 }
415 else
416 {
417 on = TMath::Nint(nummc);
418 off = TMath::Nint(n);
419 }
420
421 if (fNumTrainOn>0 && fNumTrainOn<on)
422 {
423 fNumTrainOff = TMath::Nint(off*fNumTrainOn/on);
424 *fLog << "Less MC events requested... scaling by " << fNumTrainOn/on << endl;
425 }
426 else
427 {
428 fNumTrainOn = TMath::Nint(on);
429 fNumTrainOff = TMath::Nint(off);
430 }
431
432 *fLog << "Target number of MC events: " << fNumTrainOn << endl;
433 *fLog << "Target number of data events: " << fNumTrainOff << endl;
434
435 fNumTestOn = fNumTrainOn;
436 fNumTestOff = fNumTrainOff;
437
438 /*
439 An event rate dependent selection?
440 ----------------------------------
441 Total average data rate: R
442 Goal number of events: N
443 Number of data events: N0
444 Rate assigned to single evt: r
445
446 Selection probability: N/N0 * r/R
447
448 f := N/N0 * r
449
450 MF f("f * MEventRate.fRate < rand");
451 */
452
453 return kTRUE;
454}
455
456Bool_t MJTrainSeparation::Train(const char *out)
457{
458 if (!fDataSetTrain.IsValid())
459 {
460 *fLog << err << "ERROR - DataSet for training invalid!" << endl;
461 return kFALSE;
462 }
463 if (!fDataSetTest.IsValid())
464 {
465 *fLog << err << "ERROR - DataSet for testing invalid!" << endl;
466 return kFALSE;
467 }
468
469 // ----------------------- Auto Train? ----------------------
470
471 if (fAutoTrain)
472 if (!AutoTrain())
473 return kFALSE;
474
475 // --------------------- Setup files --------------------
476 MReadMarsFile read1("Events");
477 MReadMarsFile read2("Events");
478 MReadMarsFile read3("Events");
479 MReadMarsFile read4("Events");
480 read1.DisableAutoScheme();
481 read2.DisableAutoScheme();
482 read3.DisableAutoScheme();
483 read4.DisableAutoScheme();
484
485 fDataSetTrain.AddFilesOn(read1);
486 fDataSetTrain.AddFilesOff(read3);
487
488 fDataSetTest.AddFilesOff(read2);
489 fDataSetTest.AddFilesOn(read4);
490
491 // ----------------------- Setup RF ----------------------
492 MHMatrix train("Train");
493 train.AddColumns(fRules);
494 train.AddColumn("MHadronness.fVal");
495
496 // ----------------------- Fill Matrix RF ----------------------
497
498 MParameterD had("MHadronness");
499
500 MParList plistx;
501 plistx.AddToList(&had);
502 plistx.AddToList(this);
503
504 MTFillMatrix fill;
505 fill.SetLogStream(fLog);
506 fill.SetDisplay(fDisplay);
507 fill.AddPreCuts(fPreCuts);
508 fill.AddPreCuts(fTrainCuts);
509
510 // Set classifier for gammas
511 had.SetVal(0);
512 fill.SetName("FillGammas");
513 fill.SetDestMatrix1(&train, fNumTrainOn);
514 fill.SetReader(&read1);
515 if (!fill.Process(plistx))
516 return kFALSE;
517
518 const Int_t numgammas = train.GetNumRows();
519 if (numgammas==0)
520 {
521 *fLog << err << "ERROR - No gammas available for training... aborting." << endl;
522 return kFALSE;
523 }
524
525 // Set classifier for hadrons
526 had.SetVal(1);
527 fill.SetName("FillBackground");
528 fill.SetDestMatrix1(&train, fNumTrainOff);
529 fill.SetReader(&read3);
530 if (!fill.Process(plistx))
531 return kFALSE;
532
533 const Int_t numbackgrnd = train.GetNumRows()-numgammas;
534 if (numbackgrnd==0)
535 {
536 *fLog << err << "ERROR - No background available for training... aborting." << endl;
537 return kFALSE;
538 }
539
540 // ------------------------ Train RF --------------------------
541
542 MRanForestCalc rf;
543 rf.SetNumTrees(fNumTrees);
544 rf.SetNdSize(fNdSize);
545 rf.SetNumTry(fNumTry);
546 rf.SetNumObsoleteVariables(1);
547 rf.SetDebug(fDebug);
548 rf.SetDisplay(fDisplay);
549 rf.SetLogStream(fLog);
550 rf.SetFileName(out);
551 rf.SetNameOutput("MHadronness");
552
553 if (fUseRegression)
554 {
555 if (!rf.TrainSingleRF(train)) // regression
556 return kFALSE;
557 }
558 else
559 {
560 MBinning b(2, -0.5, 1.5, "BinningHadronness", "lin");
561 if (!rf.TrainSingleRF(train, b.GetEdgesD())) // classification
562 return kFALSE;
563 }
564
565 //if (!rf.TrainMultiRF(train, b.GetEdgesD())) // classification
566 // return;
567
568 //fDisplay = rf.GetDisplay();
569
570
571 *fLog << all;
572 fLog->Separator();
573
574 *fLog << "Training method:" << endl;
575 *fLog << " * " << (fUseRegression?"regression":"classification") << endl;
576 *fLog << endl;
577 *fLog << "Events used for training:" << endl;
578 *fLog << " * Gammas: " << numgammas << endl;
579 *fLog << " * Background: " << numbackgrnd << endl;
580 *fLog << endl;
581 *fLog << "Gamma/Background ratio:" << endl;
582 *fLog << " * Requested: " << (float)fNumTrainOn/fNumTrainOff << endl;
583 *fLog << " * Result: " << (float)numgammas/numbackgrnd << endl;
584
585 if (!fDataSetTest.IsValid())
586 return kTRUE;
587
588 // --------------------- Display result ----------------------
589 fLog->Separator("Test");
590
591 MParList plist;
592 MTaskList tlist;
593 plist.AddToList(this);
594 plist.AddToList(&tlist);
595
596 MMcEvt mcevt;
597 plist.AddToList(&mcevt);
598
599 // ----- Setup histograms -----
600 MBinning binsy(50, 0 , 1, "BinningMH3Y", "lin");
601 MBinning binsx(40, 10, 100000, "BinningMH3X", "log");
602
603 plist.AddToList(&binsx);
604 plist.AddToList(&binsy);
605
606 MH3 h31("MHillas.fSize", "MHadronness.fVal");
607 MH3 h32("MHillas.fSize", "MHadronness.fVal");
608 MH3 h40("MMcEvt.fEnergy", "MHadronness.fVal");
609 h31.SetTitle("Background probability vs. Size:Size [phe]:Hadronness");
610 h32.SetTitle("Background probability vs. Size:Size [phe]:Hadronness");
611 h40.SetTitle("Background probability vs. Energy:Energy [GeV]:Hadronness");
612
613 MHHadronness hist;
614
615 // ----- Setup tasks -----
616 MFillH fillh0(&hist, "", "FillHadronness");
617 MFillH fillh1(&h31);
618 MFillH fillh2(&h32);
619 MFillH fillh4(&h40);
620 fillh1.SetNameTab("Background");
621 fillh2.SetNameTab("GammasH");
622 fillh4.SetNameTab("GammasE");
623 fillh0.SetBit(MFillH::kDoNotDisplay);
624
625 // ----- Setup filter -----
626 MFilterList precuts;
627 precuts.AddToList(fPreCuts);
628 precuts.AddToList(fTestCuts);
629
630 MContinue c0(&precuts);
631 c0.SetName("PreCuts");
632 c0.SetInverted();
633
634 MFEventSelector sel;
635 sel.SetNumSelectEvts(fNumTestOff);
636
637 MContinue c1(&sel);
638 c1.SetInverted();
639
640 // ----- Setup tasklist -----
641 tlist.AddToList(&read2);
642 tlist.AddToList(&c0);
643 tlist.AddToList(&c1);
644 tlist.AddToList(&rf);
645 tlist.AddToList(&fillh0);
646 tlist.AddToList(&fillh1);
647
648 // ----- Run eventloop on gammas -----
649 MEvtLoop loop;
650 loop.SetDisplay(fDisplay);
651 loop.SetLogStream(fLog);
652 loop.SetParList(&plist);
653
654 if (!loop.Eventloop())
655 return kFALSE;
656
657 // ----- Setup and run eventloop on background -----
658 sel.SetNumSelectEvts(fNumTestOn);
659 fillh0.ResetBit(MFillH::kDoNotDisplay);
660
661 tlist.Replace(&read4);
662 tlist.Replace(&fillh2);
663 tlist.AddToListAfter(&fillh4, &fillh2);
664
665 if (!loop.Eventloop())
666 return kFALSE;
667
668 DisplayResult(h31, h32);
669
670 if (!WriteDisplay(out))
671 return kFALSE;
672
673 return kTRUE;
674}
675
Note: See TracBrowser for help on using the repository browser.