source: trunk/MagicSoft/Mars/mjobs/MDataSet.cc@ 8585

Last change on this file since 8585 was 8441, checked in by tbretz, 18 years ago
*** empty log message ***
File size: 15.7 KB
Line 
1/* ======================================================================== *\
2!
3! *
4! * This file is part of MARS, the MAGIC Analysis and Reconstruction
5! * Software. It is distributed to you in the hope that it can be a useful
6! * and timesaving tool in analysing Data of imaging Cerenkov telescopes.
7! * It is distributed WITHOUT ANY WARRANTY.
8! *
9! * Permission to use, copy, modify and distribute this software and its
10! * documentation for any purpose is hereby granted without fee,
11! * provided that the above copyright notice appear in all copies and
12! * that both that copyright notice and this permission notice appear
13! * in supporting documentation. It is provided "as is" without express
14! * or implied warranty.
15! *
16!
17!
18! Author(s): Thomas Bretz, 1/2005 <mailto:tbretz@astro.uni-wuerzburg.de>
19!
20! Copyright: MAGIC Software Development, 2004-2007
21!
22!
23\* ======================================================================== */
24
25/////////////////////////////////////////////////////////////////////////////
26//
27// MDataSet
28//
29// This class describes a collection of sequences.
30//
31// Such an input file looks like:
32//
33// crab.seq:
34// ---------
35// AnalysisNumber: 1
36//
37// SequencesOn: 35222
38// SequencesOff: 36817
39//
40// Sequence00035222.File: sequences/sequence035222.txt
41// Sequence00036817.File: sequences/sequence036817.txt
42//
43// Sequence00035222.Dir: /data2/wuerzburg/Crab-Analyse/images/035222
44// Sequence00036817.Dir: /data2/wuerzburg/Crab-Analyse/images/036817
45//
46// The analysis number is an artifical number used to name the output
47// files automatically if the names are not overwritten in the corresponding
48// programs.
49//
50// The sequence number are used to concatenate the filenames of the
51// sequences using the file structure used in the datacenter. Each sequence
52// can be added to the on and off data at the same time but only once.
53//
54// If you have different file names you can overwrite the default file names
55// using Sequence%08d.File (make sure you have 8 digits!)
56//
57// In standard coditions (datacenter file system) paths are concatenated
58// by using the information in the sequence files (date, etc). You can
59// overwrite the directories in which the sequence-files (eg I-files) are
60// stored using Sequence%08d.Dir (make sure you have 8 digits!)
61//
62// Resource file entries are case sensitive!
63//
64// IMPORTANT:
65// * Run filenames must begin with a string which allows correct
66// ordering in time, otherwise synchronization might fail.
67// * Sequence filenames should also have names allowing to order them
68// in time, but it is not necessary.
69//
70// Class Version 2:
71// + fMonteCarlo
72// + fWobbleMode
73// - fIsWobbleMode
74//
75/////////////////////////////////////////////////////////////////////////////
76#include "MDataSet.h"
77
78#include <string.h> // necessary for Fedora core 2 with kernel 2.6.9-1.667 #1 and gcc 3.4.2
79#include <errno.h> // necessary for Fedora core 2 with kernel 2.6.9-1.667 #1 and gcc 3.4.2
80
81#include <stdlib.h>
82#include <fstream>
83
84#include <TEnv.h>
85#include <TChain.h>
86#include <TRegexp.h>
87#include <TSystem.h> // TSystem::ExpandPath
88
89#include "MLog.h"
90#include "MLogManip.h"
91
92#include "MRead.h"
93#include "MJob.h"
94#include "MEnv.h"
95#include "MAstro.h"
96#include "MDirIter.h"
97#include "MSequence.h"
98#include "MPointingPos.h"
99
100ClassImp(MDataSet);
101
102using namespace std;
103
104const TString MDataSet::fgCatalog = "/magic/datacenter/setup/magic_favorites.edb";
105
106// --------------------------------------------------------------------------
107//
108// Copy the sequence numbers from the TString runs into the TArrayI data
109// Sequences which are twice in the list are only added once. In this case
110// a warning is emitted.
111//
112void MDataSet::Split(TString &runs, TArrayI &data) const
113{
114 const TRegexp regexp("[0-9]+");
115
116 data.Set(0);
117
118 runs.ReplaceAll("\t", " ");
119 runs = runs.Strip(TString::kBoth);
120
121 while (!runs.IsNull())
122 {
123 const TString num = runs(regexp);
124
125 if (num.IsNull())
126 {
127 *fLog << warn << "WARNING - Sequence is NaN (not a number): '" << runs << "'" << endl;
128 break;
129 }
130
131 const Int_t seq = atoi(num.Data());
132 const Int_t n = data.GetSize();
133
134 // skip already existing entries
135 int i;
136 for (i=0; i<n; i++)
137 if (data[i] == seq)
138 break;
139
140 if (i<n)
141 *fLog << warn << "WARNING - Sequence #" << seq << " already in list... skipped." << endl;
142 else
143 {
144 // set new entry
145 data.Set(n+1);
146 data[n] = seq;
147 }
148
149 // remove entry from string
150 runs.Remove(0, runs.First(num)+num.Length());
151 }
152
153 MJob::SortArray(data);
154}
155
156// --------------------------------------------------------------------------
157//
158// After resolving the sequence filename and directory either from the
159// default (/magic/data/sequences/0004/sequence00004000.txt) or from
160// the corresponding entries in the dataset file.
161// The entries are sorted by filename.
162//
163void MDataSet::ResolveSequences(TEnv &env, const TArrayI &num, TList &list) const
164{
165 TString sequences = fPathSequences;
166 TString data = fPathDataFiles;
167
168 for (int i=0; i<num.GetSize(); i++)
169 {
170 TString name = env.GetValue(Form("Sequence%08d.File", num[i]), "");
171 TString dir = env.GetValue(Form("Sequence%08d.Dir", num[i]), "");
172
173 // Set default sequence file and dir name
174 if (name.IsNull())
175 name = Form("%s%04d/sequence%08d.txt", sequences.Data(), num[i]/10000, num[i]);
176 if (dir.IsNull())
177 dir = Form("%s%04d/%08d", data.Data(), num[i]/10000, num[i]);
178
179 // FIXME: The sequence number from the sequence file is assigned!!!
180 MSequence *seq = new MSequence(name, dir);
181
182 if (seq->IsValid() && seq->GetSequence()!=(UInt_t)num[i])
183 *fLog << warn << "WARNING - Sequence number " << num[i] << " in dataset file doesn't match sequence number " << seq->GetSequence() << " in sequence file!" << endl;
184
185 list.Add(seq);
186 }
187
188 // For the synchronization we must make sure, that all sequences are
189 // in the correct order...
190 // list.Sort();
191}
192
193// --------------------------------------------------------------------------
194//
195// Read the file fname as setup file for the sequence.
196//
197MDataSet::MDataSet(const char *fname, TString sequences, TString data)
198{
199 fName = fname;
200
201 fSequencesOn.SetOwner();
202 fSequencesOff.SetOwner();
203
204 TString expname(fname);
205 gSystem->ExpandPathName(expname);
206
207 const Bool_t access = !gSystem->AccessPathName(expname, kFileExists);
208 if (!access)
209 gLog << err << "ERROR - Dataset file " << expname << " not accessible!" << endl;
210
211 MEnv env(expname);
212
213 fNumAnalysis = env.GetValue("AnalysisNumber", -1);
214 fTitle = env.GetValue("Name", expname);
215
216 TString str;
217 str = env.GetValue("SequencesOn", "");
218 Split(str, fNumSequencesOn);
219 str = env.GetValue("SequencesOff", "");
220 Split(str, fNumSequencesOff);
221
222 fNameSource = env.GetValue("SourceName", "");
223 fCatalog = env.GetValue("Catalog", fgCatalog);
224 fWobbleMode = env.GetValue("WobbleMode", kFALSE);
225 fMonteCarlo = env.GetValue("MonteCarlo", kFALSE);
226 fComment = env.GetValue("Comment", "");
227
228 SetupDefaultPath(sequences, GetDefPathSequences());
229 SetupDefaultPath(data, GetDefPathDataFiles());
230
231 fPathSequences = sequences;
232 fPathDataFiles = data;
233
234 ResolveSequences(env, fNumSequencesOn, fSequencesOn);
235 ResolveSequences(env, fNumSequencesOff, fSequencesOff);
236
237 fNameSource = fNameSource.Strip(TString::kBoth);
238 fCatalog = fCatalog.Strip(TString::kBoth);
239
240 // --- Now "touch" resources which are not yet stored in MDataSet ---
241 env.Touch("RunTime");
242
243 // --- Print "untouch" resources ---
244 if (env.GetNumUntouched()>0)
245 {
246 gLog << warn << "WARNING - At least one resource in the dataset-file has not been touched!" << endl;
247 env.PrintUntouched();
248 }
249}
250
251// --------------------------------------------------------------------------
252//
253// Return '+' if both can be accessed, '-' otherwise.
254//
255void MDataSet::PrintFile(const MSequence &seq)
256{
257 const Char_t access =
258 !gSystem->AccessPathName(seq.GetFileName(), kFileExists) &&
259 !gSystem->AccessPathName(seq.GetDataPath(), kFileExists) ? '+' : '-';
260
261 gLog << "# " << access << " " << seq.GetFileName() << " <" << seq.GetDataPath() << ">" << endl;
262}
263
264// --------------------------------------------------------------------------
265//
266// Print the contents of the sequence
267//
268void MDataSet::Print(Option_t *o) const
269{
270 gLog << all;
271 if (!IsValid())
272 {
273 gLog << "Dataset: " << fName << " <invalid - no analysis number available>" << endl;
274 return;
275 }
276 gLog << "AnalysisNumber: " << fNumAnalysis << endl << endl;
277
278 if (!fTitle.IsNull())
279 gLog << "Name: " << fTitle << endl << endl;
280
281 gLog << "SequencesOn: ";
282 for (int i=0; i<fNumSequencesOn.GetSize(); i++)
283 gLog << " " << fNumSequencesOn[i];
284 gLog << endl;
285 gLog << "SequencesOff: ";
286 for (int i=0; i<fNumSequencesOff.GetSize(); i++)
287 gLog << " " << fNumSequencesOff[i];
288 gLog << endl << endl;
289
290 gLog << "SourceName: " << fNameSource << endl;
291 gLog << "Catalog: " << fCatalog << endl;
292
293 gLog << "WobbleMode: " << (fWobbleMode?"On":"Off") << endl << endl;
294 gLog << "MonteCarlo: " << (fMonteCarlo?"Yes":"No") << endl << endl;
295
296 gLog << "Comment: " << fComment << endl;
297
298 if (fSequencesOn.GetEntries()>0)
299 gLog << endl;
300
301 TIter NextOn(&fSequencesOn);
302 TIter NextOff(&fSequencesOff);
303 MSequence *seq=0;
304 while ((seq=(MSequence*)NextOn()))
305 {
306 gLog << "Sequence" << Form("%08d", seq->GetSequence()) << ".File: " << seq->GetFileName() << endl;
307 gLog << "Sequence" << Form("%08d", seq->GetSequence()) << ".Dir: " << seq->GetDataPath() << endl;
308 }
309 if (fSequencesOff.GetEntries()>0)
310 gLog << endl;
311 while ((seq=(MSequence*)NextOff()))
312 {
313 gLog << "Sequence" << Form("%08d", seq->GetSequence()) << ".File: " << seq->GetFileName() << endl;
314 gLog << "Sequence" << Form("%08d", seq->GetSequence()) << ".Dir: " << seq->GetDataPath() << endl;
315 }
316
317 if (TString(o).Contains("files", TString::kIgnoreCase))
318 {
319 gLog << endl;
320 gLog << "# On-Data Files:" << endl;
321 NextOn.Reset();
322 while ((seq=(MSequence*)NextOn()))
323 PrintFile(*seq);
324
325 gLog << endl;
326 gLog << "# Off-Data Files:" << endl;
327 NextOff.Reset();
328 while ((seq=(MSequence*)NextOff()))
329 PrintFile(*seq);
330
331 return;
332 }
333}
334
335// --------------------------------------------------------------------------
336//
337// Adds all sequences contained in list to the MDirIter. After adding
338// everything MDirIter::Sort is called to sort all entries by name.
339//
340Bool_t MDataSet::AddSequencesFromList(const TList &list, MDirIter &files)
341{
342 TIter Next(const_cast<TList*>(&list));
343
344 MSequence *seq=0;
345 while ((seq=(MSequence*)Next()))
346 {
347 if (!seq->IsValid())
348 {
349 gLog << err;
350 gLog << "ERROR - MDataSet::AddSequencesFromList: Sequence invalid!" << endl;
351 gLog << " + File: " << seq->GetFileName() << endl;
352 gLog << " + Dir: " << seq->GetDataPath() << endl;
353 return kFALSE;
354 }
355
356 if (seq->SetupDatRuns(files, MSequence::kImages)<=0)
357 return kFALSE;
358 }
359
360 // This is important in case of synchronisation, because the
361 // files in the sequences can be interleaved (eg W1, W2)
362 // Filenames MUST begin with an appropriate string which allow
363 // to order them correctly in time!
364 // files.Sort();
365
366 if (gLog.GetDebugLevel()>4)
367 {
368 gLog << dbg << "Files which are searched:" << endl;
369 files.Print();
370 }
371 return kTRUE;
372}
373
374Bool_t MDataSet::AddFilesOn(MRead &read) const
375{
376 MDirIter files;
377 if (!AddSequencesFromList(fSequencesOn, files))
378 return kFALSE;
379 return read.AddFiles(files)>0;
380}
381
382Bool_t MDataSet::AddFilesOff(MRead &read) const
383{
384 MDirIter files;
385 if (!AddSequencesFromList(fSequencesOff, files))
386 return kFALSE;
387 return read.AddFiles(files)>0;
388}
389
390Bool_t MDataSet::AddFiles(MRead &read) const
391{
392 const Bool_t rc1 = AddFilesOff(read);
393 const Bool_t rc2 = AddFilesOn(read);
394 return rc1 && rc2;
395}
396
397Int_t MDataSet::AddFilesToChain(MDirIter &files, TChain &chain)
398{
399 Int_t num=0;
400 while (1)
401 {
402 const TString fname = files.Next();
403 if (fname.IsNull())
404 break;
405
406 const Int_t n = chain.Add(fname);
407 if (n<=0)
408 return kFALSE;
409 num += n;
410 }
411 return num;
412}
413
414Bool_t MDataSet::AddFilesOn(TChain &chain) const
415{
416 MDirIter files;
417 if (!AddSequencesFromList(fSequencesOn, files))
418 return kFALSE;
419 return AddFilesToChain(files, chain)>0;
420}
421
422Bool_t MDataSet::AddFilesOff(TChain &chain) const
423{
424 MDirIter files;
425 if (!AddSequencesFromList(fSequencesOff, files))
426 return kFALSE;
427 return AddFilesToChain(files, chain)>0;
428}
429
430Bool_t MDataSet::AddFiles(TChain &read) const
431{
432 const Bool_t rc1 = AddFilesOff(read);
433 const Bool_t rc2 = AddFilesOn(read);
434 return rc1 && rc2;
435}
436
437Bool_t MDataSet::GetSourcePos(MPointingPos &pos) const
438{
439 if (!HasSource())
440 {
441 gLog << err << "ERROR - MDataSet::GetSourcePos called, but no source available." << endl;
442 return kFALSE;
443 }
444
445 TString catalog(fCatalog);
446 gSystem->ExpandPathName(catalog);
447
448 ifstream fin(catalog);
449 if (!fin)
450 {
451 gLog << err << "Cannot open file " << catalog << ": ";
452 gLog << strerror(errno) << endl;
453 return kFALSE;
454 }
455
456 TString ra, dec, epoch;
457
458 Int_t n = 0;
459 while (1)
460 {
461 TString line;
462 line.ReadLine(fin);
463 if (!fin)
464 {
465 gLog << err << "ERROR - Source '" << fNameSource << "' not found in " << catalog << "." << endl;
466 return kFALSE;
467 }
468
469 n++;
470
471 TObjArray *arr = line.Tokenize(",");
472
473 if (arr->GetEntries()<6)
474 {
475 gLog << err << "ERROR - Not enough arguments in line #" << n << " of " << catalog << endl;
476 delete arr;
477 return kFALSE;;
478 }
479
480 const TString name = (*arr)[0]->GetName();
481
482 ra = (*arr)[2]->GetName();
483 dec = (*arr)[3]->GetName();
484 epoch = (*arr)[5]->GetName();
485
486 delete arr;
487
488 if (name.Strip(TString::kBoth)==fNameSource)
489 break;
490 }
491
492 if (epoch.Strip(TString::kBoth)!=(TString)"2000")
493 {
494 gLog << err << "ERROR - Epoch not 2000... not supported." << endl;
495 return kFALSE;
496 }
497
498 Double_t r,d;
499 if (!MAstro::Coordinate2Angle(ra, r))
500 {
501 gLog << err << "ERROR - Interpreting right ascension: " << ra << endl;
502 return kFALSE;
503 }
504 if (!MAstro::Coordinate2Angle(dec, d))
505 {
506 gLog << err << "ERROR - Interpreting declination: " << dec << endl;
507 return kFALSE;
508 }
509
510 pos.SetSkyPosition(r, d);
511 pos.SetTitle(fNameSource);
512
513 return kTRUE;
514}
515
516// --------------------------------------------------------------------------
517//
518// Calls ReplaceAll(old, news) for all Dir-entries
519//
520void MDataSet::ReplaceDir(TList &list, const TString &old, const TString &news) const
521{
522 TIter Next(&list);
523 TNamed *name = 0;
524 while ((name=(TNamed*)Next()))
525 {
526 TString dir = name->GetTitle();
527 dir.ReplaceAll(old, news);
528 name->SetTitle(dir);
529 }
530}
531
532// --------------------------------------------------------------------------
533//
534// Calls ReplaceAll(old, news) for all File-entries
535//
536void MDataSet::ReplaceFile(TList &list, const TString &old, const TString &news) const
537{
538 TIter Next(&list);
539 TNamed *name = 0;
540 while ((name=(TNamed*)Next()))
541 {
542 TString file = name->GetName();
543 file.ReplaceAll(old, news);
544 name->SetName(file);
545 }
546}
Note: See TracBrowser for help on using the repository browser.