source: trunk/MagicSoft/Mars/mjobs/MDataSet.cc@ 8701

Last change on this file since 8701 was 8674, checked in by tbretz, 17 years ago
*** empty log message ***
File size: 17.5 KB
Line 
1/* ======================================================================== *\
2!
3! *
4! * This file is part of MARS, the MAGIC Analysis and Reconstruction
5! * Software. It is distributed to you in the hope that it can be a useful
6! * and timesaving tool in analysing Data of imaging Cerenkov telescopes.
7! * It is distributed WITHOUT ANY WARRANTY.
8! *
9! * Permission to use, copy, modify and distribute this software and its
10! * documentation for any purpose is hereby granted without fee,
11! * provided that the above copyright notice appear in all copies and
12! * that both that copyright notice and this permission notice appear
13! * in supporting documentation. It is provided "as is" without express
14! * or implied warranty.
15! *
16!
17!
18! Author(s): Thomas Bretz, 1/2005 <mailto:tbretz@astro.uni-wuerzburg.de>
19!
20! Copyright: MAGIC Software Development, 2004-2007
21!
22!
23\* ======================================================================== */
24
25/////////////////////////////////////////////////////////////////////////////
26//
27// MDataSet
28//
29// This class describes a collection of sequences.
30//
31// Such an input file looks like:
32//
33// crab.seq:
34// ---------
35// AnalysisNumber: 1
36//
37// SequencesOn: 35222
38// SequencesOff: 36817
39//
40// SequencePath: /magic/sequences
41// DataPath: /magic/data/star
42//
43// Sequence00035222.File: sequences/sequence035222.txt
44// Sequence00036817.File: sequences/sequence036817.txt
45//
46// Sequence00035222.Dir: /data2/wuerzburg/Crab-Analyse/images/035222
47// Sequence00036817.Dir: /data2/wuerzburg/Crab-Analyse/images/036817
48//
49// MonteCarlo: No
50//
51// AnalysisNumber: The analysis number is an artifical number used to name
52// the output files automatically if the names are not overwritten in the
53// corresponding programs.
54//
55// SequencePath: In case it is not specified the datacenter default path is
56// used. If it is given it is the place at which the sequence files
57// are searched, if not overwritten by either a program command line
58// option (aka. a argument to the constructor) or a resource for
59// an individual sequence file. Note, that the four digits high-level
60// directories to sort the sequences are added to the given path.
61//
62// DataPath: In case it is not specified the datacenter default path is
63// used. If it is given it is the place at which the data files
64// are searched, if not overwritten by either a program command line
65// option (aka. a argument to the constructor) or a resource for
66// an individual data path. Note, that the four digits high-level
67// directories to sort the sequences are added to the given path.
68//
69// SequencesOn/Off: The sequence number are used to concatenate the filenames
70// of the sequences using the file structure used in the datacenter. Each
71// sequence can be added to the on and off data at the same time but only
72// once.
73//
74// If you have different file names you can overwrite the default file names
75// using Sequence%08d.File (make sure you have 8 digits!)
76//
77// In standard coditions (datacenter file system) paths are concatenated
78// by using the information in the sequence files (date, etc). You can
79// overwrite the directories in which the sequence-files (eg I-files) are
80// stored using Sequence%08d.Dir (make sure you have 8 digits!)
81//
82//
83//
84// Resource file entries are case sensitive!
85//
86// IMPORTANT:
87// * Run filenames must begin with a string which allows correct
88// ordering in time, otherwise synchronization might fail.
89// * Sequence filenames should also have names allowing to order them
90// in time, but it is not necessary.
91//
92// Class Version 2:
93// + fMonteCarlo
94// + fWobbleMode
95// - fIsWobbleMode
96//
97/////////////////////////////////////////////////////////////////////////////
98#include "MDataSet.h"
99
100#include <string.h> // necessary for Fedora core 2 with kernel 2.6.9-1.667 #1 and gcc 3.4.2
101#include <errno.h> // necessary for Fedora core 2 with kernel 2.6.9-1.667 #1 and gcc 3.4.2
102
103#include <stdlib.h>
104#include <fstream>
105
106#include <TEnv.h>
107#include <TChain.h>
108#include <TRegexp.h>
109#include <TSystem.h> // TSystem::ExpandPath
110
111#include "MLog.h"
112#include "MLogManip.h"
113
114#include "MRead.h"
115#include "MJob.h"
116#include "MEnv.h"
117#include "MAstro.h"
118#include "MDirIter.h"
119#include "MSequence.h"
120#include "MPointingPos.h"
121
122ClassImp(MDataSet);
123
124using namespace std;
125
126const TString MDataSet::fgCatalog = "/magic/datacenter/setup/magic_favorites.edb";
127
128// --------------------------------------------------------------------------
129//
130// Copy the sequence numbers from the TString runs into the TArrayI data
131// Sequences which are twice in the list are only added once. In this case
132// a warning is emitted.
133//
134void MDataSet::Split(TString &runs, TArrayI &data) const
135{
136 const TRegexp regexp("[0-9]+");
137
138 data.Set(0);
139
140 runs.ReplaceAll("\t", " ");
141 runs = runs.Strip(TString::kBoth);
142
143 while (!runs.IsNull())
144 {
145 const TString num = runs(regexp);
146
147 if (num.IsNull())
148 {
149 *fLog << warn << "WARNING - Sequence is NaN (not a number): '" << runs << "'" << endl;
150 break;
151 }
152
153 const Int_t seq = atoi(num.Data());
154 const Int_t n = data.GetSize();
155
156 // skip already existing entries
157 int i;
158 for (i=0; i<n; i++)
159 if (data[i] == seq)
160 break;
161
162 if (i<n)
163 *fLog << warn << "WARNING - Sequence #" << seq << " already in list... skipped." << endl;
164 else
165 {
166 // set new entry
167 data.Set(n+1);
168 data[n] = seq;
169 }
170
171 // remove entry from string
172 runs.Remove(0, runs.First(num)+num.Length());
173 }
174
175 MJob::SortArray(data);
176}
177
178// --------------------------------------------------------------------------
179//
180// After resolving the sequence filename and directory either from the
181// default (/magic/data/sequences/0004/sequence00004000.txt) or from
182// the corresponding entries in the dataset file.
183// The entries are sorted by filename.
184//
185void MDataSet::ResolveSequences(TEnv &env, const TArrayI &num, TList &list) const
186{
187 TString sequences = fPathSequences;
188 TString data = fPathDataFiles;
189
190 for (int i=0; i<num.GetSize(); i++)
191 {
192 TString name = env.GetValue(Form("Sequence%08d.File", num[i]), "");
193 TString dir = env.GetValue(Form("Sequence%08d.Dir", num[i]), "");
194
195 // Set default sequence file and dir name
196 if (name.IsNull())
197 name = Form("%s%04d/sequence%08d.txt", sequences.Data(), num[i]/10000, num[i]);
198 if (dir.IsNull())
199 dir = Form("%s%04d/%08d", data.Data(), num[i]/10000, num[i]);
200
201 // FIXME: The sequence number from the sequence file is assigned!!!
202 MSequence *seq = new MSequence(name, dir);
203
204 if (seq->IsValid() && seq->GetSequence()!=(UInt_t)num[i])
205 *fLog << warn << "WARNING - Sequence number " << num[i] << " in dataset file doesn't match sequence number " << seq->GetSequence() << " in sequence file!" << endl;
206
207 list.Add(seq);
208 }
209
210 // For the synchronization we must make sure, that all sequences are
211 // in the correct order...
212 // list.Sort();
213}
214
215// --------------------------------------------------------------------------
216//
217// Read the file fname as setup file for the sequence.
218//
219MDataSet::MDataSet(const char *fname, TString sequences, TString data)
220{
221 fName = fname;
222
223 fSequencesOn.SetOwner();
224 fSequencesOff.SetOwner();
225
226 TString expname(fname);
227 gSystem->ExpandPathName(expname);
228
229 const Bool_t access = !gSystem->AccessPathName(expname, kFileExists);
230 if (!access)
231 gLog << err << "ERROR - Dataset file " << expname << " not accessible!" << endl;
232
233 MEnv env(expname);
234
235 fNumAnalysis = env.GetValue("AnalysisNumber", -1);
236 fTitle = env.GetValue("Name", expname);
237
238 TString str;
239 str = env.GetValue("SequencesOn", "");
240 Split(str, fNumSequencesOn);
241 str = env.GetValue("SequencesOff", "");
242 Split(str, fNumSequencesOff);
243
244 fNameSource = env.GetValue("SourceName", "");
245 fCatalog = env.GetValue("Catalog", fgCatalog);
246 fWobbleMode = env.GetValue("WobbleMode", kFALSE);
247 fMonteCarlo = env.GetValue("MonteCarlo", kFALSE);
248 fComment = env.GetValue("Comment", "");
249
250 const TString defpathseq = env.GetValue("SequencePath", GetDefPathSequences());
251 const TString defpathdata = env.GetValue("DataPath", GetDefPathDataFiles());
252
253 SetupDefaultPath(sequences, defpathseq);
254 SetupDefaultPath(data, defpathdata);
255
256 fPathSequences = sequences;
257 fPathDataFiles = data;
258
259 ResolveSequences(env, fNumSequencesOn, fSequencesOn);
260 ResolveSequences(env, fNumSequencesOff, fSequencesOff);
261
262 fNameSource = fNameSource.Strip(TString::kBoth);
263 fCatalog = fCatalog.Strip(TString::kBoth);
264
265 // --- Now "touch" resources which are not yet stored in MDataSet ---
266 env.Touch("RunTime");
267
268 // --- Print "untouch" resources ---
269 if (env.GetNumUntouched()>0)
270 {
271 gLog << warn << "WARNING - At least one resource in the dataset-file has not been touched!" << endl;
272 env.PrintUntouched();
273 }
274}
275
276//---------------------------------------------------------------------------
277//
278// Make sure that the name used for writing doesn't contain a full path
279//
280const char *MDataSet::GetName() const
281{
282 const char *pos = strrchr(GetRcName(), '/');
283 return pos>0 ? pos+1 : GetRcName();
284}
285
286
287// --------------------------------------------------------------------------
288//
289// Return '+' if both can be accessed, '-' otherwise.
290//
291void MDataSet::PrintFile(const MSequence &seq)
292{
293 const Char_t access =
294 !gSystem->AccessPathName(seq.GetFileName(), kFileExists) &&
295 !gSystem->AccessPathName(seq.GetDataPath(), kFileExists) ? '+' : '-';
296
297 gLog << "# " << access << " " << seq.GetFileName() << " <" << seq.GetDataPath() << ">" << endl;
298}
299
300// --------------------------------------------------------------------------
301//
302// Print the contents of the sequence
303//
304void MDataSet::Print(Option_t *o) const
305{
306 gLog << all;
307 if (!IsValid())
308 {
309 gLog << "Dataset: " << fName << " <invalid - no analysis number available>" << endl;
310 return;
311 }
312 gLog << "# Path: " << GetRcName() << endl;
313 gLog << "# Name: " << GetName() << endl;
314 gLog << endl;
315 gLog << "AnalysisNumber: " << fNumAnalysis << endl << endl;
316
317 if (!fTitle.IsNull())
318 gLog << "Name: " << fTitle << endl << endl;
319
320 gLog << "SequencesOn: ";
321 for (int i=0; i<fNumSequencesOn.GetSize(); i++)
322 gLog << " " << fNumSequencesOn[i];
323 gLog << endl;
324 gLog << "SequencesOff: ";
325 for (int i=0; i<fNumSequencesOff.GetSize(); i++)
326 gLog << " " << fNumSequencesOff[i];
327 gLog << endl << endl;
328
329 gLog << "SourceName: " << fNameSource << endl;
330 gLog << "Catalog: " << fCatalog << endl;
331
332 gLog << "WobbleMode: " << (fWobbleMode?"On":"Off") << endl << endl;
333 gLog << "MonteCarlo: " << (fMonteCarlo?"Yes":"No") << endl << endl;
334
335 gLog << "Comment: " << fComment << endl;
336
337 if (fSequencesOn.GetEntries()>0)
338 gLog << endl;
339
340 TIter NextOn(&fSequencesOn);
341 TIter NextOff(&fSequencesOff);
342 MSequence *seq=0;
343 while ((seq=(MSequence*)NextOn()))
344 {
345 gLog << "Sequence" << Form("%08d", seq->GetSequence()) << ".File: " << seq->GetFileName() << endl;
346 gLog << "Sequence" << Form("%08d", seq->GetSequence()) << ".Dir: " << seq->GetDataPath() << endl;
347 }
348 if (fSequencesOff.GetEntries()>0)
349 gLog << endl;
350 while ((seq=(MSequence*)NextOff()))
351 {
352 gLog << "Sequence" << Form("%08d", seq->GetSequence()) << ".File: " << seq->GetFileName() << endl;
353 gLog << "Sequence" << Form("%08d", seq->GetSequence()) << ".Dir: " << seq->GetDataPath() << endl;
354 }
355
356 if (TString(o).Contains("files", TString::kIgnoreCase))
357 {
358 gLog << endl;
359 gLog << "# On-Data Files:" << endl;
360 NextOn.Reset();
361 while ((seq=(MSequence*)NextOn()))
362 PrintFile(*seq);
363
364 gLog << endl;
365 gLog << "# Off-Data Files:" << endl;
366 NextOff.Reset();
367 while ((seq=(MSequence*)NextOff()))
368 PrintFile(*seq);
369
370 return;
371 }
372}
373
374// --------------------------------------------------------------------------
375//
376// Adds all sequences contained in list to the MDirIter. After adding
377// everything MDirIter::Sort is called to sort all entries by name.
378//
379Bool_t MDataSet::AddSequencesFromList(const TList &list, MDirIter &files)
380{
381 TIter Next(const_cast<TList*>(&list));
382
383 MSequence *seq=0;
384 while ((seq=(MSequence*)Next()))
385 {
386 if (!seq->IsValid())
387 {
388 gLog << err;
389 gLog << "ERROR - MDataSet::AddSequencesFromList: Sequence invalid!" << endl;
390 gLog << " + File: " << seq->GetFileName() << endl;
391 gLog << " + Dir: " << seq->GetDataPath() << endl;
392 return kFALSE;
393 }
394
395 if (seq->SetupDatRuns(files, MSequence::kImages)<=0)
396 return kFALSE;
397 }
398
399 // This is important in case of synchronisation, because the
400 // files in the sequences can be interleaved (eg W1, W2)
401 // Filenames MUST begin with an appropriate string which allow
402 // to order them correctly in time!
403 // files.Sort();
404
405 if (gLog.GetDebugLevel()>4)
406 {
407 gLog << dbg << "Files which are searched:" << endl;
408 files.Print();
409 }
410 return kTRUE;
411}
412
413Bool_t MDataSet::AddFilesOn(MDirIter &iter) const
414{
415 return AddSequencesFromList(fSequencesOn, iter);
416}
417
418Bool_t MDataSet::AddFilesOff(MDirIter &iter) const
419{
420 return AddSequencesFromList(fSequencesOff, iter);
421}
422
423Bool_t MDataSet::AddFiles(MDirIter &iter) const
424{
425 const Bool_t rc1 = AddFilesOff(iter);
426 const Bool_t rc2 = AddFilesOn(iter);
427 return rc1 && rc2;
428}
429
430Bool_t MDataSet::AddFilesOn(MRead &read) const
431{
432 MDirIter files;
433 if (!AddFilesOn(files))
434 return kFALSE;
435 return read.AddFiles(files)>0;
436}
437
438Bool_t MDataSet::AddFilesOff(MRead &read) const
439{
440 MDirIter files;
441 if (!AddFilesOff(files))
442 return kFALSE;
443 return read.AddFiles(files)>0;
444}
445
446Bool_t MDataSet::AddFiles(MRead &read) const
447{
448 const Bool_t rc1 = AddFilesOff(read);
449 const Bool_t rc2 = AddFilesOn(read);
450 return rc1 && rc2;
451}
452
453Int_t MDataSet::AddFilesToChain(MDirIter &files, TChain &chain)
454{
455 Int_t num=0;
456 while (1)
457 {
458 const TString fname = files.Next();
459 if (fname.IsNull())
460 break;
461
462 const Int_t n = chain.Add(fname);
463 if (n<=0)
464 return kFALSE;
465 num += n;
466 }
467 return num;
468}
469
470Bool_t MDataSet::AddFilesOn(TChain &chain) const
471{
472 MDirIter files;
473 if (!AddSequencesFromList(fSequencesOn, files))
474 return kFALSE;
475 return AddFilesToChain(files, chain)>0;
476}
477
478Bool_t MDataSet::AddFilesOff(TChain &chain) const
479{
480 MDirIter files;
481 if (!AddSequencesFromList(fSequencesOff, files))
482 return kFALSE;
483 return AddFilesToChain(files, chain)>0;
484}
485
486Bool_t MDataSet::AddFiles(TChain &read) const
487{
488 const Bool_t rc1 = AddFilesOff(read);
489 const Bool_t rc2 = AddFilesOn(read);
490 return rc1 && rc2;
491}
492
493Bool_t MDataSet::GetSourcePos(MPointingPos &pos) const
494{
495 if (!HasSource())
496 {
497 gLog << err << "ERROR - MDataSet::GetSourcePos called, but no source available." << endl;
498 return kFALSE;
499 }
500
501 TString catalog(fCatalog);
502 gSystem->ExpandPathName(catalog);
503
504 ifstream fin(catalog);
505 if (!fin)
506 {
507 gLog << err << "Cannot open file " << catalog << ": ";
508 gLog << strerror(errno) << endl;
509 return kFALSE;
510 }
511
512 TString ra, dec, epoch;
513
514 Int_t n = 0;
515 while (1)
516 {
517 TString line;
518 line.ReadLine(fin);
519 if (!fin)
520 {
521 gLog << err << "ERROR - Source '" << fNameSource << "' not found in " << catalog << "." << endl;
522 return kFALSE;
523 }
524
525 n++;
526
527 TObjArray *arr = line.Tokenize(",");
528
529 if (arr->GetEntries()<6)
530 {
531 gLog << err << "ERROR - Not enough arguments in line #" << n << " of " << catalog << endl;
532 delete arr;
533 return kFALSE;;
534 }
535
536 const TString name = (*arr)[0]->GetName();
537
538 ra = (*arr)[2]->GetName();
539 dec = (*arr)[3]->GetName();
540 epoch = (*arr)[5]->GetName();
541
542 delete arr;
543
544 if (name.Strip(TString::kBoth)==fNameSource)
545 break;
546 }
547
548 if (epoch.Strip(TString::kBoth)!=(TString)"2000")
549 {
550 gLog << err << "ERROR - Epoch not 2000... not supported." << endl;
551 return kFALSE;
552 }
553
554 Double_t r,d;
555 if (!MAstro::Coordinate2Angle(ra, r))
556 {
557 gLog << err << "ERROR - Interpreting right ascension: " << ra << endl;
558 return kFALSE;
559 }
560 if (!MAstro::Coordinate2Angle(dec, d))
561 {
562 gLog << err << "ERROR - Interpreting declination: " << dec << endl;
563 return kFALSE;
564 }
565
566 pos.SetSkyPosition(r, d);
567 pos.SetTitle(fNameSource);
568
569 return kTRUE;
570}
571
572// --------------------------------------------------------------------------
573//
574// Calls ReplaceAll(old, news) for all Dir-entries
575//
576void MDataSet::ReplaceDir(TList &list, const TString &old, const TString &news) const
577{
578 TIter Next(&list);
579 TNamed *name = 0;
580 while ((name=(TNamed*)Next()))
581 {
582 TString dir = name->GetTitle();
583 dir.ReplaceAll(old, news);
584 name->SetTitle(dir);
585 }
586}
587
588// --------------------------------------------------------------------------
589//
590// Calls ReplaceAll(old, news) for all File-entries
591//
592void MDataSet::ReplaceFile(TList &list, const TString &old, const TString &news) const
593{
594 TIter Next(&list);
595 TNamed *name = 0;
596 while ((name=(TNamed*)Next()))
597 {
598 TString file = name->GetName();
599 file.ReplaceAll(old, news);
600 name->SetName(file);
601 }
602}
Note: See TracBrowser for help on using the repository browser.