source: trunk/MagicSoft/Mars/mjobs/MDataSet.cc@ 8666

Last change on this file since 8666 was 8666, checked in by tbretz, 17 years ago
*** empty log message ***
File size: 17.1 KB
Line 
1/* ======================================================================== *\
2!
3! *
4! * This file is part of MARS, the MAGIC Analysis and Reconstruction
5! * Software. It is distributed to you in the hope that it can be a useful
6! * and timesaving tool in analysing Data of imaging Cerenkov telescopes.
7! * It is distributed WITHOUT ANY WARRANTY.
8! *
9! * Permission to use, copy, modify and distribute this software and its
10! * documentation for any purpose is hereby granted without fee,
11! * provided that the above copyright notice appear in all copies and
12! * that both that copyright notice and this permission notice appear
13! * in supporting documentation. It is provided "as is" without express
14! * or implied warranty.
15! *
16!
17!
18! Author(s): Thomas Bretz, 1/2005 <mailto:tbretz@astro.uni-wuerzburg.de>
19!
20! Copyright: MAGIC Software Development, 2004-2007
21!
22!
23\* ======================================================================== */
24
25/////////////////////////////////////////////////////////////////////////////
26//
27// MDataSet
28//
29// This class describes a collection of sequences.
30//
31// Such an input file looks like:
32//
33// crab.seq:
34// ---------
35// AnalysisNumber: 1
36//
37// SequencesOn: 35222
38// SequencesOff: 36817
39//
40// SequencePath: /magic/sequences
41// DataPath: /magic/data/star
42//
43// Sequence00035222.File: sequences/sequence035222.txt
44// Sequence00036817.File: sequences/sequence036817.txt
45//
46// Sequence00035222.Dir: /data2/wuerzburg/Crab-Analyse/images/035222
47// Sequence00036817.Dir: /data2/wuerzburg/Crab-Analyse/images/036817
48//
49// MonteCarlo: No
50//
51// AnalysisNumber: The analysis number is an artifical number used to name
52// the output files automatically if the names are not overwritten in the
53// corresponding programs.
54//
55// SequencePath: In case it is not specified the datacenter default path is
56// used. If it is given it is the place at which the sequence files
57// are searched, if not overwritten by either a program command line
58// option (aka. a argument to the constructor) or a resource for
59// an individual sequence file. Note, that the four digits high-level
60// directories to sort the sequences are added to the given path.
61//
62// DataPath: In case it is not specified the datacenter default path is
63// used. If it is given it is the place at which the data files
64// are searched, if not overwritten by either a program command line
65// option (aka. a argument to the constructor) or a resource for
66// an individual data path. Note, that the four digits high-level
67// directories to sort the sequences are added to the given path.
68//
69// SequencesOn/Off: The sequence number are used to concatenate the filenames
70// of the sequences using the file structure used in the datacenter. Each
71// sequence can be added to the on and off data at the same time but only
72// once.
73//
74// If you have different file names you can overwrite the default file names
75// using Sequence%08d.File (make sure you have 8 digits!)
76//
77// In standard coditions (datacenter file system) paths are concatenated
78// by using the information in the sequence files (date, etc). You can
79// overwrite the directories in which the sequence-files (eg I-files) are
80// stored using Sequence%08d.Dir (make sure you have 8 digits!)
81//
82//
83//
84// Resource file entries are case sensitive!
85//
86// IMPORTANT:
87// * Run filenames must begin with a string which allows correct
88// ordering in time, otherwise synchronization might fail.
89// * Sequence filenames should also have names allowing to order them
90// in time, but it is not necessary.
91//
92// Class Version 2:
93// + fMonteCarlo
94// + fWobbleMode
95// - fIsWobbleMode
96//
97/////////////////////////////////////////////////////////////////////////////
98#include "MDataSet.h"
99
100#include <string.h> // necessary for Fedora core 2 with kernel 2.6.9-1.667 #1 and gcc 3.4.2
101#include <errno.h> // necessary for Fedora core 2 with kernel 2.6.9-1.667 #1 and gcc 3.4.2
102
103#include <stdlib.h>
104#include <fstream>
105
106#include <TEnv.h>
107#include <TChain.h>
108#include <TRegexp.h>
109#include <TSystem.h> // TSystem::ExpandPath
110
111#include "MLog.h"
112#include "MLogManip.h"
113
114#include "MRead.h"
115#include "MJob.h"
116#include "MEnv.h"
117#include "MAstro.h"
118#include "MDirIter.h"
119#include "MSequence.h"
120#include "MPointingPos.h"
121
122ClassImp(MDataSet);
123
124using namespace std;
125
126const TString MDataSet::fgCatalog = "/magic/datacenter/setup/magic_favorites.edb";
127
128// --------------------------------------------------------------------------
129//
130// Copy the sequence numbers from the TString runs into the TArrayI data
131// Sequences which are twice in the list are only added once. In this case
132// a warning is emitted.
133//
134void MDataSet::Split(TString &runs, TArrayI &data) const
135{
136 const TRegexp regexp("[0-9]+");
137
138 data.Set(0);
139
140 runs.ReplaceAll("\t", " ");
141 runs = runs.Strip(TString::kBoth);
142
143 while (!runs.IsNull())
144 {
145 const TString num = runs(regexp);
146
147 if (num.IsNull())
148 {
149 *fLog << warn << "WARNING - Sequence is NaN (not a number): '" << runs << "'" << endl;
150 break;
151 }
152
153 const Int_t seq = atoi(num.Data());
154 const Int_t n = data.GetSize();
155
156 // skip already existing entries
157 int i;
158 for (i=0; i<n; i++)
159 if (data[i] == seq)
160 break;
161
162 if (i<n)
163 *fLog << warn << "WARNING - Sequence #" << seq << " already in list... skipped." << endl;
164 else
165 {
166 // set new entry
167 data.Set(n+1);
168 data[n] = seq;
169 }
170
171 // remove entry from string
172 runs.Remove(0, runs.First(num)+num.Length());
173 }
174
175 MJob::SortArray(data);
176}
177
178// --------------------------------------------------------------------------
179//
180// After resolving the sequence filename and directory either from the
181// default (/magic/data/sequences/0004/sequence00004000.txt) or from
182// the corresponding entries in the dataset file.
183// The entries are sorted by filename.
184//
185void MDataSet::ResolveSequences(TEnv &env, const TArrayI &num, TList &list) const
186{
187 TString sequences = fPathSequences;
188 TString data = fPathDataFiles;
189
190 for (int i=0; i<num.GetSize(); i++)
191 {
192 TString name = env.GetValue(Form("Sequence%08d.File", num[i]), "");
193 TString dir = env.GetValue(Form("Sequence%08d.Dir", num[i]), "");
194
195 // Set default sequence file and dir name
196 if (name.IsNull())
197 name = Form("%s%04d/sequence%08d.txt", sequences.Data(), num[i]/10000, num[i]);
198 if (dir.IsNull())
199 dir = Form("%s%04d/%08d", data.Data(), num[i]/10000, num[i]);
200
201 // FIXME: The sequence number from the sequence file is assigned!!!
202 MSequence *seq = new MSequence(name, dir);
203
204 if (seq->IsValid() && seq->GetSequence()!=(UInt_t)num[i])
205 *fLog << warn << "WARNING - Sequence number " << num[i] << " in dataset file doesn't match sequence number " << seq->GetSequence() << " in sequence file!" << endl;
206
207 list.Add(seq);
208 }
209
210 // For the synchronization we must make sure, that all sequences are
211 // in the correct order...
212 // list.Sort();
213}
214
215// --------------------------------------------------------------------------
216//
217// Read the file fname as setup file for the sequence.
218//
219MDataSet::MDataSet(const char *fname, TString sequences, TString data)
220{
221 fName = fname;
222
223 fSequencesOn.SetOwner();
224 fSequencesOff.SetOwner();
225
226 TString expname(fname);
227 gSystem->ExpandPathName(expname);
228
229 const Bool_t access = !gSystem->AccessPathName(expname, kFileExists);
230 if (!access)
231 gLog << err << "ERROR - Dataset file " << expname << " not accessible!" << endl;
232
233 MEnv env(expname);
234
235 fNumAnalysis = env.GetValue("AnalysisNumber", -1);
236 fTitle = env.GetValue("Name", expname);
237
238 TString str;
239 str = env.GetValue("SequencesOn", "");
240 Split(str, fNumSequencesOn);
241 str = env.GetValue("SequencesOff", "");
242 Split(str, fNumSequencesOff);
243
244 fNameSource = env.GetValue("SourceName", "");
245 fCatalog = env.GetValue("Catalog", fgCatalog);
246 fWobbleMode = env.GetValue("WobbleMode", kFALSE);
247 fMonteCarlo = env.GetValue("MonteCarlo", kFALSE);
248 fComment = env.GetValue("Comment", "");
249
250 const TString defpathseq = env.GetValue("SequencePath", GetDefPathSequences());
251 const TString defpathdata = env.GetValue("DataPath", GetDefPathDataFiles());
252
253 SetupDefaultPath(sequences, defpathseq);
254 SetupDefaultPath(data, defpathdata);
255
256 fPathSequences = sequences;
257 fPathDataFiles = data;
258
259 ResolveSequences(env, fNumSequencesOn, fSequencesOn);
260 ResolveSequences(env, fNumSequencesOff, fSequencesOff);
261
262 fNameSource = fNameSource.Strip(TString::kBoth);
263 fCatalog = fCatalog.Strip(TString::kBoth);
264
265 // --- Now "touch" resources which are not yet stored in MDataSet ---
266 env.Touch("RunTime");
267
268 // --- Print "untouch" resources ---
269 if (env.GetNumUntouched()>0)
270 {
271 gLog << warn << "WARNING - At least one resource in the dataset-file has not been touched!" << endl;
272 env.PrintUntouched();
273 }
274}
275
276// --------------------------------------------------------------------------
277//
278// Return '+' if both can be accessed, '-' otherwise.
279//
280void MDataSet::PrintFile(const MSequence &seq)
281{
282 const Char_t access =
283 !gSystem->AccessPathName(seq.GetFileName(), kFileExists) &&
284 !gSystem->AccessPathName(seq.GetDataPath(), kFileExists) ? '+' : '-';
285
286 gLog << "# " << access << " " << seq.GetFileName() << " <" << seq.GetDataPath() << ">" << endl;
287}
288
289// --------------------------------------------------------------------------
290//
291// Print the contents of the sequence
292//
293void MDataSet::Print(Option_t *o) const
294{
295 gLog << all;
296 if (!IsValid())
297 {
298 gLog << "Dataset: " << fName << " <invalid - no analysis number available>" << endl;
299 return;
300 }
301 gLog << "AnalysisNumber: " << fNumAnalysis << endl << endl;
302
303 if (!fTitle.IsNull())
304 gLog << "Name: " << fTitle << endl << endl;
305
306 gLog << "SequencesOn: ";
307 for (int i=0; i<fNumSequencesOn.GetSize(); i++)
308 gLog << " " << fNumSequencesOn[i];
309 gLog << endl;
310 gLog << "SequencesOff: ";
311 for (int i=0; i<fNumSequencesOff.GetSize(); i++)
312 gLog << " " << fNumSequencesOff[i];
313 gLog << endl << endl;
314
315 gLog << "SourceName: " << fNameSource << endl;
316 gLog << "Catalog: " << fCatalog << endl;
317
318 gLog << "WobbleMode: " << (fWobbleMode?"On":"Off") << endl << endl;
319 gLog << "MonteCarlo: " << (fMonteCarlo?"Yes":"No") << endl << endl;
320
321 gLog << "Comment: " << fComment << endl;
322
323 if (fSequencesOn.GetEntries()>0)
324 gLog << endl;
325
326 TIter NextOn(&fSequencesOn);
327 TIter NextOff(&fSequencesOff);
328 MSequence *seq=0;
329 while ((seq=(MSequence*)NextOn()))
330 {
331 gLog << "Sequence" << Form("%08d", seq->GetSequence()) << ".File: " << seq->GetFileName() << endl;
332 gLog << "Sequence" << Form("%08d", seq->GetSequence()) << ".Dir: " << seq->GetDataPath() << endl;
333 }
334 if (fSequencesOff.GetEntries()>0)
335 gLog << endl;
336 while ((seq=(MSequence*)NextOff()))
337 {
338 gLog << "Sequence" << Form("%08d", seq->GetSequence()) << ".File: " << seq->GetFileName() << endl;
339 gLog << "Sequence" << Form("%08d", seq->GetSequence()) << ".Dir: " << seq->GetDataPath() << endl;
340 }
341
342 if (TString(o).Contains("files", TString::kIgnoreCase))
343 {
344 gLog << endl;
345 gLog << "# On-Data Files:" << endl;
346 NextOn.Reset();
347 while ((seq=(MSequence*)NextOn()))
348 PrintFile(*seq);
349
350 gLog << endl;
351 gLog << "# Off-Data Files:" << endl;
352 NextOff.Reset();
353 while ((seq=(MSequence*)NextOff()))
354 PrintFile(*seq);
355
356 return;
357 }
358}
359
360// --------------------------------------------------------------------------
361//
362// Adds all sequences contained in list to the MDirIter. After adding
363// everything MDirIter::Sort is called to sort all entries by name.
364//
365Bool_t MDataSet::AddSequencesFromList(const TList &list, MDirIter &files)
366{
367 TIter Next(const_cast<TList*>(&list));
368
369 MSequence *seq=0;
370 while ((seq=(MSequence*)Next()))
371 {
372 if (!seq->IsValid())
373 {
374 gLog << err;
375 gLog << "ERROR - MDataSet::AddSequencesFromList: Sequence invalid!" << endl;
376 gLog << " + File: " << seq->GetFileName() << endl;
377 gLog << " + Dir: " << seq->GetDataPath() << endl;
378 return kFALSE;
379 }
380
381 if (seq->SetupDatRuns(files, MSequence::kImages)<=0)
382 return kFALSE;
383 }
384
385 // This is important in case of synchronisation, because the
386 // files in the sequences can be interleaved (eg W1, W2)
387 // Filenames MUST begin with an appropriate string which allow
388 // to order them correctly in time!
389 // files.Sort();
390
391 if (gLog.GetDebugLevel()>4)
392 {
393 gLog << dbg << "Files which are searched:" << endl;
394 files.Print();
395 }
396 return kTRUE;
397}
398
399Bool_t MDataSet::AddFilesOn(MDirIter &iter) const
400{
401 return AddSequencesFromList(fSequencesOn, iter);
402}
403
404Bool_t MDataSet::AddFilesOff(MDirIter &iter) const
405{
406 return AddSequencesFromList(fSequencesOff, iter);
407}
408
409Bool_t MDataSet::AddFiles(MDirIter &iter) const
410{
411 const Bool_t rc1 = AddFilesOff(iter);
412 const Bool_t rc2 = AddFilesOn(iter);
413 return rc1 && rc2;
414}
415
416Bool_t MDataSet::AddFilesOn(MRead &read) const
417{
418 MDirIter files;
419 if (!AddFilesOn(files))
420 return kFALSE;
421 return read.AddFiles(files)>0;
422}
423
424Bool_t MDataSet::AddFilesOff(MRead &read) const
425{
426 MDirIter files;
427 if (!AddFilesOff(files))
428 return kFALSE;
429 return read.AddFiles(files)>0;
430}
431
432Bool_t MDataSet::AddFiles(MRead &read) const
433{
434 const Bool_t rc1 = AddFilesOff(read);
435 const Bool_t rc2 = AddFilesOn(read);
436 return rc1 && rc2;
437}
438
439Int_t MDataSet::AddFilesToChain(MDirIter &files, TChain &chain)
440{
441 Int_t num=0;
442 while (1)
443 {
444 const TString fname = files.Next();
445 if (fname.IsNull())
446 break;
447
448 const Int_t n = chain.Add(fname);
449 if (n<=0)
450 return kFALSE;
451 num += n;
452 }
453 return num;
454}
455
456Bool_t MDataSet::AddFilesOn(TChain &chain) const
457{
458 MDirIter files;
459 if (!AddSequencesFromList(fSequencesOn, files))
460 return kFALSE;
461 return AddFilesToChain(files, chain)>0;
462}
463
464Bool_t MDataSet::AddFilesOff(TChain &chain) const
465{
466 MDirIter files;
467 if (!AddSequencesFromList(fSequencesOff, files))
468 return kFALSE;
469 return AddFilesToChain(files, chain)>0;
470}
471
472Bool_t MDataSet::AddFiles(TChain &read) const
473{
474 const Bool_t rc1 = AddFilesOff(read);
475 const Bool_t rc2 = AddFilesOn(read);
476 return rc1 && rc2;
477}
478
479Bool_t MDataSet::GetSourcePos(MPointingPos &pos) const
480{
481 if (!HasSource())
482 {
483 gLog << err << "ERROR - MDataSet::GetSourcePos called, but no source available." << endl;
484 return kFALSE;
485 }
486
487 TString catalog(fCatalog);
488 gSystem->ExpandPathName(catalog);
489
490 ifstream fin(catalog);
491 if (!fin)
492 {
493 gLog << err << "Cannot open file " << catalog << ": ";
494 gLog << strerror(errno) << endl;
495 return kFALSE;
496 }
497
498 TString ra, dec, epoch;
499
500 Int_t n = 0;
501 while (1)
502 {
503 TString line;
504 line.ReadLine(fin);
505 if (!fin)
506 {
507 gLog << err << "ERROR - Source '" << fNameSource << "' not found in " << catalog << "." << endl;
508 return kFALSE;
509 }
510
511 n++;
512
513 TObjArray *arr = line.Tokenize(",");
514
515 if (arr->GetEntries()<6)
516 {
517 gLog << err << "ERROR - Not enough arguments in line #" << n << " of " << catalog << endl;
518 delete arr;
519 return kFALSE;;
520 }
521
522 const TString name = (*arr)[0]->GetName();
523
524 ra = (*arr)[2]->GetName();
525 dec = (*arr)[3]->GetName();
526 epoch = (*arr)[5]->GetName();
527
528 delete arr;
529
530 if (name.Strip(TString::kBoth)==fNameSource)
531 break;
532 }
533
534 if (epoch.Strip(TString::kBoth)!=(TString)"2000")
535 {
536 gLog << err << "ERROR - Epoch not 2000... not supported." << endl;
537 return kFALSE;
538 }
539
540 Double_t r,d;
541 if (!MAstro::Coordinate2Angle(ra, r))
542 {
543 gLog << err << "ERROR - Interpreting right ascension: " << ra << endl;
544 return kFALSE;
545 }
546 if (!MAstro::Coordinate2Angle(dec, d))
547 {
548 gLog << err << "ERROR - Interpreting declination: " << dec << endl;
549 return kFALSE;
550 }
551
552 pos.SetSkyPosition(r, d);
553 pos.SetTitle(fNameSource);
554
555 return kTRUE;
556}
557
558// --------------------------------------------------------------------------
559//
560// Calls ReplaceAll(old, news) for all Dir-entries
561//
562void MDataSet::ReplaceDir(TList &list, const TString &old, const TString &news) const
563{
564 TIter Next(&list);
565 TNamed *name = 0;
566 while ((name=(TNamed*)Next()))
567 {
568 TString dir = name->GetTitle();
569 dir.ReplaceAll(old, news);
570 name->SetTitle(dir);
571 }
572}
573
574// --------------------------------------------------------------------------
575//
576// Calls ReplaceAll(old, news) for all File-entries
577//
578void MDataSet::ReplaceFile(TList &list, const TString &old, const TString &news) const
579{
580 TIter Next(&list);
581 TNamed *name = 0;
582 while ((name=(TNamed*)Next()))
583 {
584 TString file = name->GetName();
585 file.ReplaceAll(old, news);
586 name->SetName(file);
587 }
588}
Note: See TracBrowser for help on using the repository browser.