source: trunk/MagicSoft/Mars/mjtrain/MJTrainSeparation.cc@ 7666

Last change on this file since 7666 was 7666, checked in by tbretz, 19 years ago
*** empty log message ***
File size: 17.0 KB
Line 
1/* ======================================================================== *\
2!
3! *
4! * This file is part of MARS, the MAGIC Analysis and Reconstruction
5! * Software. It is distributed to you in the hope that it can be a useful
6! * and timesaving tool in analysing Data of imaging Cerenkov telescopes.
7! * It is distributed WITHOUT ANY WARRANTY.
8! *
9! * Permission to use, copy, modify and distribute this software and its
10! * documentation for any purpose is hereby granted without fee,
11! * provided that the above copyright notice appear in all copies and
12! * that both that copyright notice and this permission notice appear
13! * in supporting documentation. It is provided "as is" without express
14! * or implied warranty.
15! *
16!
17!
18! Author(s): Thomas Bretz 11/2005 <mailto:tbretz@astro.uni-wuerzburg.de>
19!
20! Copyright: MAGIC Software Development, 2006
21!
22!
23\* ======================================================================== */
24
25/////////////////////////////////////////////////////////////////////////////
26//
27// MJTrainSeparation
28//
29////////////////////////////////////////////////////////////////////////////
30#include "MJTrainSeparation.h"
31
32#include <TF1.h>
33#include <TH2.h>
34#include <TChain.h>
35#include <TGraph.h>
36#include <TCanvas.h>
37#include <TVirtualPad.h>
38
39#include "MHMatrix.h"
40
41#include "MLog.h"
42#include "MLogManip.h"
43
44// tools
45#include "MMath.h"
46#include "MDataSet.h"
47#include "MTFillMatrix.h"
48#include "MChisqEval.h"
49#include "MStatusDisplay.h"
50
51// eventloop
52#include "MParList.h"
53#include "MTaskList.h"
54#include "MEvtLoop.h"
55
56// tasks
57#include "MReadMarsFile.h"
58#include "MContinue.h"
59#include "MFillH.h"
60#include "MRanForestCalc.h"
61#include "MParameterCalc.h"
62
63// container
64#include "MMcEvt.hxx"
65#include "MParameters.h"
66
67// histograms
68#include "MBinning.h"
69#include "MH3.h"
70#include "MHHadronness.h"
71
72// filter
73#include "MF.h"
74#include "MFEventSelector.h"
75#include "MFilterList.h"
76
77ClassImp(MJTrainSeparation);
78
79using namespace std;
80
81void MJTrainSeparation::DisplayResult(MH3 &h31, MH3 &h32)
82{
83 TH2D &g = (TH2D&)h32.GetHist();
84 TH2D &h = (TH2D&)h31.GetHist();
85
86 h.SetMarkerColor(kRed);
87 g.SetMarkerColor(kGreen);
88
89 TH2D res1(g);
90 TH2D res2(g);
91
92 res1.SetTitle("Significance Li/Ma");
93 res2.SetTitle("Significance Li/Ma * log_{10}(excess)");
94
95 const Int_t nx = h.GetNbinsX();
96 const Int_t ny = h.GetNbinsY();
97
98 gROOT->SetSelectedPad(NULL);
99
100 TGraph gr1;
101 TGraph gr2;
102 for (int x=0; x<nx; x++)
103 {
104 TH1 *hx = h.ProjectionY("H_py", x+1, x+1);
105 TH1 *gx = g.ProjectionY("G_py", x+1, x+1);
106
107 Double_t max1 = -1;
108 Double_t max2 = -1;
109 Int_t maxy1 = 0;
110 Int_t maxy2 = 0;
111 for (int y=0; y<ny; y++)
112 {
113 const Float_t s = gx->Integral(1, y+1);
114 const Float_t b = hx->Integral(1, y+1);
115 const Float_t sig1 = MMath::SignificanceLiMaSigned(s+b, b);
116 const Float_t sig2 = s<1 ? 0 : MMath::SignificanceLiMaSigned(s+b, b)*TMath::Log10(s);
117 if (sig1>max1)
118 {
119 maxy1 = y;
120 max1 = sig1;
121 }
122 if (sig2>max2)
123 {
124 maxy2 = y;
125 max2 = sig2;
126 }
127
128 res1.SetBinContent(x+1, y+1, sig1);
129 res2.SetBinContent(x+1, y+1, sig2);
130 }
131
132 gr1.SetPoint(x, h.GetXaxis()->GetBinCenter(x+1), h.GetYaxis()->GetBinCenter(maxy1+1));
133 gr2.SetPoint(x, h.GetXaxis()->GetBinCenter(x+1), h.GetYaxis()->GetBinCenter(maxy2+1));
134
135 delete hx;
136 delete gx;
137 }
138
139 TCanvas &c = fDisplay->AddTab("OptCut");
140 c.Divide(2,2);
141
142 c.cd(1);
143 gPad->SetBorderMode(0);
144 gPad->SetFrameBorderMode(0);
145 gPad->SetLogx();
146 h.DrawCopy();
147 g.DrawCopy("same");
148 gr1.SetMarkerStyle(kFullDotMedium);
149 gr1.DrawClone("LP")->SetBit(kCanDelete);
150 gr2.SetLineColor(kBlue);
151 gr2.SetMarkerStyle(kFullDotMedium);
152 gr2.DrawClone("LP")->SetBit(kCanDelete);
153
154 c.cd(2);
155 gPad->SetBorderMode(0);
156 gPad->SetFrameBorderMode(0);
157 gPad->SetLogx();
158 MH::SetPalette("pretty");
159 res1.DrawCopy("colz");
160
161 c.cd(4);
162 gPad->SetBorderMode(0);
163 gPad->SetFrameBorderMode(0);
164 gPad->SetLogx();
165 MH::SetPalette("pretty");
166 res2.DrawCopy("colz");
167}
168
169/*
170Bool_t MJSpectrum::InitWeighting(const MDataSet &set, MMcSpectrumWeight &w) const
171{
172 fLog->Separator("Initialize energy weighting");
173
174 if (!CheckEnv(w))
175 {
176 *fLog << err << "ERROR - Reading resources for MMcSpectrumWeight failed." << endl;
177 return kFALSE;
178 }
179
180 TChain chain("RunHeaders");
181 set.AddFilesOn(chain);
182
183 MMcCorsikaRunHeader *h=0;
184 chain.SetBranchAddress("MMcCorsikaRunHeader.", &h);
185 chain.GetEntry(1);
186
187 if (!h)
188 {
189 *fLog << err << "ERROR - Couldn't read MMcCorsikaRunHeader from DataSet." << endl;
190 return kFALSE;
191 }
192
193 if (!w.Set(*h))
194 {
195 *fLog << err << "ERROR - Initializing MMcSpectrumWeight failed." << endl;
196 return kFALSE;
197 }
198
199 w.Print();
200 return kTRUE;
201}
202
203Bool_t MJSpectrum::ReadOrigMCDistribution(const MDataSet &set, TH1 &h, MMcSpectrumWeight &weight) const
204{
205 // Some debug output
206 fLog->Separator("Compiling original MC distribution");
207
208 weight.SetNameMcEvt("MMcEvtBasic");
209 const TString w(weight.GetFormulaWeights());
210 weight.SetNameMcEvt();
211
212 *fLog << inf << "Using weights: " << w << endl;
213 *fLog << "Please stand by, this may take a while..." << flush;
214
215 if (fDisplay)
216 fDisplay->SetStatusLine1("Compiling MC distribution...");
217
218 // Create chain
219 TChain chain("OriginalMC");
220 set.AddFilesOn(chain);
221
222 // Prepare histogram
223 h.Reset();
224
225 // Fill histogram from chain
226 h.SetDirectory(gROOT);
227 if (h.InheritsFrom(TH2::Class()))
228 {
229 h.SetNameTitle("ThetaEMC", "Event-Distribution vs Theta and Energy for MC (produced)");
230 h.SetXTitle("\\Theta [\\circ]");
231 h.SetYTitle("E [GeV]");
232 h.SetZTitle("Counts");
233 chain.Draw("MMcEvtBasic.fEnergy:MMcEvtBasic.fTelescopeTheta*TMath::RadToDeg()>>ThetaEMC", w, "goff");
234 }
235 else
236 {
237 h.SetNameTitle("ThetaMC", "Event-Distribution vs Theta for MC (produced)");
238 h.SetXTitle("\\Theta [\\circ]");
239 h.SetYTitle("Counts");
240 chain.Draw("MMcEvtBasic.fTelescopeTheta*TMath::RadToDeg()>>ThetaMC", w, "goff");
241 }
242 h.SetDirectory(0);
243
244 *fLog << "done." << endl;
245 if (fDisplay)
246 fDisplay->SetStatusLine2("done.");
247
248 if (h.GetEntries()>0)
249 return kTRUE;
250
251 *fLog << err << "ERROR - Histogram with original MC distribution empty..." << endl;
252
253 return h.GetEntries()>0;
254}
255*/
256
257Bool_t MJTrainSeparation::GetEventsProduced(MDataSet &set, Double_t &num, Double_t &min, Double_t &max) const
258{
259 TChain chain("OriginalMC");
260 set.AddFilesOn(chain);
261
262 min = chain.GetMinimum("MMcEvtBasic.fEnergy");
263 max = chain.GetMaximum("MMcEvtBasic.fEnergy");
264
265 num = chain.GetEntries();
266
267 if (num<100)
268 *fLog << err << "ERROR - Less than 100 entries in OriginalMC-Tree of MC-Train-Data found." << endl;
269
270 return num>=100;
271}
272
273Double_t MJTrainSeparation::GetDataRate(MDataSet &set, Double_t &num) const
274{
275 TChain chain1("Events");
276 set.AddFilesOff(chain1);
277
278 num = chain1.GetEntries();
279 if (num<100)
280 {
281 *fLog << err << "ERROR - Less than 100 entries in Events-Tree of Train-Data found." << endl;
282 return -1;
283 }
284
285 TChain chain("EffectiveOnTime");
286 set.AddFilesOff(chain);
287
288 chain.Draw("MEffectiveOnTime.fVal", "MEffectiveOnTime.fVal", "goff");
289
290 TH1 *h = dynamic_cast<TH1*>(gROOT->FindObject("htemp"));
291 if (!h)
292 {
293 *fLog << err << "ERROR - Weird things are happening (htemp not found)!" << endl;
294 return -1;
295 }
296
297 const Double_t ontime = h->Integral();
298 delete h;
299
300 if (ontime<1)
301 {
302 *fLog << err << "ERROR - Less than 1s of effective observation time found in Train-Data." << endl;
303 return -1;
304 }
305
306 *fLog << inf << "Found " << num << " background events in " << ontime << "s" << endl;
307
308 return num/ontime;
309}
310
311Double_t MJTrainSeparation::GetNumMC(MDataSet &set) const
312{
313 TChain chain1("Events");
314 set.AddFilesOn(chain1);
315
316 const Double_t num = chain1.GetEntries();
317 if (num<100)
318 {
319 *fLog << err << "ERROR - Less than 100 entries in Events-Tree of Train-Data found." << endl;
320 return -1;
321 }
322
323 return num;
324}
325
326Bool_t MJTrainSeparation::AutoTrain()
327{
328 Double_t num, min, max;
329 if (!GetEventsProduced(fDataSetTrain, num, min, max))
330 return kFALSE;
331
332 *fLog << inf << "Using build-in radius of 300m to calculate collection area!" << endl;
333
334 // Target spectrum
335 TF1 flx("Flux", "[0]/1000*(x/1000)^(-2.6)", min, max);
336 flx.SetParameter(0, 1e-6);
337
338 // Number n0 of events this spectrum would produce per s and m^2
339 const Double_t n0 = flx.Integral(min, max); //[#]
340
341 // Area produced in MC
342 const Double_t A = TMath::Pi()*300*300; //[m²]
343
344 // Rate R of events this spectrum would produce per s
345 const Double_t R = n0*A; //[Hz]
346
347 *fLog << "Gamma rate from the source inside the MC production area: " << R << "Hz" << endl;
348
349 // Number N of events produced (in trainings sample)
350 const Double_t N = num; //[#]
351
352 *fLog << "Events produced by MC inside the production area: " << num << endl;
353
354 // This correponds to an observation time T [s]
355 const Double_t T = N/R; //[s]
356
357 *fLog << "Total time produced by the Monte Carlo: " << T << "s" << endl;
358
359 // With an average data rate after star of
360 Double_t data=0;
361 const Double_t r = GetDataRate(fDataSetTrain, data); //[Hz]
362
363 *fLog << "Events measured per second effective on time: " << r << "Hz" << endl;
364
365 // this yields a number of n events to be read for training
366 const Double_t n = r*T; //[#]
367
368 *fLog << "Events to be read from the data sample: " << n << endl;
369 *fLog << "Events available in data sample: " << data << endl;
370
371 if (r<0)
372 return kFALSE;
373
374 Double_t nummc = GetNumMC(fDataSetTrain);
375
376 *fLog << "Events available in MC sample: " << nummc << endl;
377
378 *fLog << "MC read probability: " << data/n << endl;
379
380 // more data requested than available => Scale down num MC events
381 Double_t on, off;
382 if (data<n)
383 {
384 on = TMath::Nint(nummc*data/n); //(UInt_t)-1;
385 off = TMath::Nint(data);
386 *fLog << "Not enough data events available... scaling by " << data/n << endl;
387 }
388 else
389 {
390 on = TMath::Nint(nummc);
391 off = TMath::Nint(n);
392 }
393
394 if (fNumTrainOn>0 && fNumTrainOn<on)
395 {
396 fNumTrainOff = TMath::Nint(off*fNumTrainOn/on);
397 *fLog << "Less MC events requested... scaling by " << fNumTrainOn/on << endl;
398 }
399 else
400 {
401 fNumTrainOn = TMath::Nint(on);
402 fNumTrainOff = TMath::Nint(off);
403 }
404
405 *fLog << "Target number of MC events: " << fNumTrainOn << endl;
406 *fLog << "Target number of data events: " << fNumTrainOff << endl;
407
408 /*
409 An event rate dependent selection?
410 ----------------------------------
411 Total average data rate: R
412 Goal number of events: N
413 Number of data events: N0
414 Rate assigned to single evt: r
415
416 Selection probability: N/N0 * r/R
417
418 f := N/N0 * r
419
420 MF f("f * MEventRate.fRate < rand");
421 */
422
423 return kTRUE;
424}
425
426Bool_t MJTrainSeparation::Train(const char *out)
427{
428 if (!fDataSetTrain.IsValid())
429 {
430 *fLog << err << "ERROR - DataSet for training invalid!" << endl;
431 return kFALSE;
432 }
433 if (!fDataSetTest.IsValid())
434 {
435 *fLog << err << "ERROR - DataSet for testing invalid!" << endl;
436 return kFALSE;
437 }
438
439 // ----------------------- Auto Train? ----------------------
440
441 if (fAutoTrain)
442 if (!AutoTrain())
443 return kFALSE;
444
445 // --------------------- Setup files --------------------
446 MReadMarsFile read1("Events");
447 MReadMarsFile read2("Events");
448 MReadMarsFile read3("Events");
449 MReadMarsFile read4("Events");
450 read1.DisableAutoScheme();
451 read2.DisableAutoScheme();
452 read3.DisableAutoScheme();
453 read4.DisableAutoScheme();
454
455 fDataSetTrain.AddFilesOn(read1);
456 fDataSetTrain.AddFilesOff(read3);
457
458 fDataSetTest.AddFilesOff(read2);
459 fDataSetTest.AddFilesOn(read4);
460
461 // ----------------------- Setup RF ----------------------
462 MHMatrix train("Train");
463 train.AddColumns(fRules);
464 train.AddColumn("MHadronness.fVal");
465
466 // ----------------------- Fill Matrix RF ----------------------
467
468 MParameterD had("MHadronness");
469
470 MParList plistx;
471 plistx.AddToList(&had);
472 plistx.AddToList(this);
473
474 MTFillMatrix fill;
475 fill.SetLogStream(fLog);
476 fill.SetDisplay(fDisplay);
477 fill.AddPreCuts(fPreCuts);
478 fill.AddPreCuts(fTrainCuts);
479
480 // Set classifier for gammas
481 had.SetVal(0);
482 fill.SetName("FillGammas");
483 fill.SetDestMatrix1(&train, fNumTrainOn);
484 fill.SetReader(&read1);
485 if (!fill.Process(plistx))
486 return kFALSE;
487
488 const Int_t numgammas = train.GetNumRows();
489 if (numgammas==0)
490 {
491 *fLog << err << "ERROR - No gammas available for training... aborting." << endl;
492 return kFALSE;
493 }
494
495 // Set classifier for hadrons
496 had.SetVal(1);
497 fill.SetName("FillBackground");
498 fill.SetDestMatrix1(&train, fNumTrainOff);
499 fill.SetReader(&read3);
500 if (!fill.Process(plistx))
501 return kFALSE;
502
503 const Int_t numbackgrnd = train.GetNumRows()-numgammas;
504 if (numbackgrnd==0)
505 {
506 *fLog << err << "ERROR - No background available for training... aborting." << endl;
507 return kFALSE;
508 }
509
510 // ------------------------ Train RF --------------------------
511
512 MRanForestCalc rf;
513 rf.SetNumTrees(fNumTrees);
514 rf.SetNdSize(fNdSize);
515 rf.SetNumTry(fNumTry);
516 rf.SetNumObsoleteVariables(1);
517 rf.SetDebug(fDebug);
518 rf.SetDisplay(fDisplay);
519 rf.SetLogStream(fLog);
520 rf.SetFileName(out);
521 rf.SetNameOutput("MHadronness");
522
523 if (fUseRegression)
524 {
525 if (!rf.TrainSingleRF(train)) // regression
526 return kFALSE;
527 }
528 else
529 {
530 MBinning b(2, -0.5, 1.5, "BinningHadronness", "lin");
531 if (!rf.TrainSingleRF(train, b.GetEdgesD())) // classification
532 return kFALSE;
533 }
534
535 //if (!rf.TrainMultiRF(train, b.GetEdgesD())) // classification
536 // return;
537
538 //fDisplay = rf.GetDisplay();
539
540
541 *fLog << all;
542 fLog->Separator();
543
544 *fLog << "Training method:" << endl;
545 *fLog << " * " << (fUseRegression?"regression":"classification") << endl;
546 *fLog << endl;
547 *fLog << "Events used for training:" << endl;
548 *fLog << " * Gammas: " << numgammas << endl;
549 *fLog << " * Background: " << numbackgrnd << endl;
550
551 if (!fDataSetTest.IsValid())
552 return kTRUE;
553
554 // --------------------- Display result ----------------------
555 fLog->Separator("Test");
556
557 MParList plist;
558 MTaskList tlist;
559 plist.AddToList(this);
560 plist.AddToList(&tlist);
561
562 MMcEvt mcevt;
563 plist.AddToList(&mcevt);
564
565 // ----- Setup histograms -----
566 MBinning binsy(100, 0 , 1, "BinningMH3Y", "lin");
567 MBinning binsx( 50, 10, 100000, "BinningMH3X", "log");
568
569 plist.AddToList(&binsx);
570 plist.AddToList(&binsy);
571
572 MH3 h31("MHillas.fSize", "MHadronness.fVal");
573 MH3 h32("MHillas.fSize", "MHadronness.fVal");
574 MH3 h40("MMcEvt.fEnergy", "MHadronness.fVal");
575 h31.SetTitle("Background probability vs. Size:Size [phe]:Hadronness");
576 h32.SetTitle("Background probability vs. Size:Size [phe]:Hadronness");
577 h40.SetTitle("Background probability vs. Energy:Energy [GeV]:Hadronness");
578
579 MHHadronness hist;
580
581 // ----- Setup tasks -----
582 MFillH fillh0(&hist, "", "FillHadronness");
583 MFillH fillh1(&h31);
584 MFillH fillh2(&h32);
585 MFillH fillh4(&h40);
586 fillh1.SetNameTab("Background");
587 fillh2.SetNameTab("GammasH");
588 fillh4.SetNameTab("GammasE");
589 fillh0.SetBit(MFillH::kDoNotDisplay);
590
591 // ----- Setup filter -----
592 MFilterList precuts;
593 precuts.AddToList(fPreCuts);
594 precuts.AddToList(fTestCuts);
595
596 MContinue c0(&precuts);
597 c0.SetName("PreCuts");
598 c0.SetInverted();
599
600 MFEventSelector sel;
601 sel.SetNumSelectEvts(fNumTestOn);
602
603 MContinue c1(&sel);
604 c1.SetInverted();
605
606 // ----- Setup tasklist -----
607 tlist.AddToList(&read2);
608 tlist.AddToList(&c0);
609 tlist.AddToList(&c1);
610 tlist.AddToList(&rf);
611 tlist.AddToList(&fillh0);
612 tlist.AddToList(&fillh1);
613
614 // ----- Run eventloop on gammas -----
615 MEvtLoop loop;
616 loop.SetDisplay(fDisplay);
617 loop.SetLogStream(fLog);
618 loop.SetParList(&plist);
619
620 if (!loop.Eventloop())
621 return kFALSE;
622
623 // ----- Setup and run eventloop on background -----
624 sel.SetNumSelectEvts(fNumTestOff);
625 fillh0.ResetBit(MFillH::kDoNotDisplay);
626
627 tlist.Replace(&read4);
628 tlist.Replace(&fillh2);
629 tlist.AddToListAfter(&fillh4, &fillh2);
630
631 if (!loop.Eventloop())
632 return kFALSE;
633
634 DisplayResult(h31, h32);
635
636 if (!WriteDisplay(out))
637 return kFALSE;
638
639 return kTRUE;
640}
641
Note: See TracBrowser for help on using the repository browser.