source: trunk/MagicSoft/Mars/mjobs/MDataSet.cc@ 8440

Last change on this file since 8440 was 8389, checked in by tbretz, 18 years ago
*** empty log message ***
File size: 15.6 KB
Line 
1/* ======================================================================== *\
2!
3! *
4! * This file is part of MARS, the MAGIC Analysis and Reconstruction
5! * Software. It is distributed to you in the hope that it can be a useful
6! * and timesaving tool in analysing Data of imaging Cerenkov telescopes.
7! * It is distributed WITHOUT ANY WARRANTY.
8! *
9! * Permission to use, copy, modify and distribute this software and its
10! * documentation for any purpose is hereby granted without fee,
11! * provided that the above copyright notice appear in all copies and
12! * that both that copyright notice and this permission notice appear
13! * in supporting documentation. It is provided "as is" without express
14! * or implied warranty.
15! *
16!
17!
18! Author(s): Thomas Bretz, 1/2005 <mailto:tbretz@astro.uni-wuerzburg.de>
19!
20! Copyright: MAGIC Software Development, 2004-2005
21!
22!
23\* ======================================================================== */
24
25/////////////////////////////////////////////////////////////////////////////
26//
27// MDataSet
28//
29// This class describes a collection of sequences.
30//
31// Such an input file looks like:
32//
33// crab.seq:
34// ---------
35// AnalysisNumber: 1
36//
37// SequencesOn: 35222
38// SequencesOff: 36817
39//
40// Sequence00035222.File: sequences/sequence035222.txt
41// Sequence00036817.File: sequences/sequence036817.txt
42//
43// Sequence00035222.Dir: /data2/wuerzburg/Crab-Analyse/images/035222
44// Sequence00036817.Dir: /data2/wuerzburg/Crab-Analyse/images/036817
45//
46// The analysis number is an artifical number used to name the output
47// files automatically if the names are not overwritten in the corresponding
48// programs.
49//
50// The sequence number are used to concatenate the filenames of the
51// sequences using the file structure used in the datacenter. Each sequence
52// can be added to the on and off data at the same time but only once.
53//
54// If you have different file names you can overwrite the default file names
55// using Sequence%08d.File (make sure you have 8 digits!)
56//
57// In standard coditions (datacenter file system) paths are concatenated
58// by using the information in the sequence files (date, etc). You can
59// overwrite the directories in which the sequence-files (eg I-files) are
60// stored using Sequence%08d.Dir (make sure you have 8 digits!)
61//
62// Resource file entries are case sensitive!
63//
64// IMPORTANT:
65// * Run filenames must begin with a string which allows correct
66// ordering in time, otherwise synchronization might fail.
67// * Sequence filenames should also have names allowing to order them
68// in time, but it is not necessary.
69//
70/////////////////////////////////////////////////////////////////////////////
71#include "MDataSet.h"
72
73#include <string.h> // necessary for Fedora core 2 with kernel 2.6.9-1.667 #1 and gcc 3.4.2
74#include <errno.h> // necessary for Fedora core 2 with kernel 2.6.9-1.667 #1 and gcc 3.4.2
75
76#include <stdlib.h>
77#include <fstream>
78
79#include <TEnv.h>
80#include <TChain.h>
81#include <TRegexp.h>
82#include <TSystem.h> // TSystem::ExpandPath
83
84#include "MLog.h"
85#include "MLogManip.h"
86
87#include "MRead.h"
88#include "MJob.h"
89#include "MEnv.h"
90#include "MAstro.h"
91#include "MDirIter.h"
92#include "MSequence.h"
93#include "MPointingPos.h"
94
95ClassImp(MDataSet);
96
97using namespace std;
98
99const TString MDataSet::fgCatalog = "/magic/datacenter/setup/magic_favorites.edb";
100const TString MDataSet::fgPathDataFiles = "/magic/data/star";
101const TString MDataSet::fgPathSequences = "/magic/sequences";
102
103// --------------------------------------------------------------------------
104//
105// Copy the sequence numbers from the TString runs into the TArrayI data
106// Sequences which are twice in the list are only added once. In this case
107// a warning is emitted.
108//
109void MDataSet::Split(TString &runs, TArrayI &data) const
110{
111 const TRegexp regexp("[0-9]+");
112
113 data.Set(0);
114
115 runs.ReplaceAll("\t", " ");
116 runs = runs.Strip(TString::kBoth);
117
118 while (!runs.IsNull())
119 {
120 const TString num = runs(regexp);
121
122 if (num.IsNull())
123 {
124 *fLog << warn << "WARNING - Sequence is NaN (not a number): '" << runs << "'" << endl;
125 break;
126 }
127
128 const Int_t seq = atoi(num.Data());
129 const Int_t n = data.GetSize();
130
131 // skip already existing entries
132 int i;
133 for (i=0; i<n; i++)
134 if (data[i] == seq)
135 break;
136
137 if (i<n)
138 *fLog << warn << "WARNING - Sequence #" << seq << " already in list... skipped." << endl;
139 else
140 {
141 // set new entry
142 data.Set(n+1);
143 data[n] = seq;
144 }
145
146 // remove entry from string
147 runs.Remove(0, runs.First(num)+num.Length());
148 }
149
150 MJob::SortArray(data);
151}
152
153// --------------------------------------------------------------------------
154//
155// After resolving the sequence filename and directory either from the
156// default (/magic/data/sequences/0004/sequence00004000.txt) or from
157// the corresponding entries in the dataset file.
158// The entries are sorted by filename.
159//
160void MDataSet::ResolveSequences(TEnv &env, const TArrayI &num, TList &list) const
161{
162 TString sequences = fPathSequences;
163 TString data = fPathDataFiles;
164
165 for (int i=0; i<num.GetSize(); i++)
166 {
167 TString name = env.GetValue(Form("Sequence%08d.File", num[i]), "");
168 TString dir = env.GetValue(Form("Sequence%08d.Dir", num[i]), "");
169
170 // Set default sequence file and dir name
171 if (name.IsNull())
172 name = Form("%s%04d/sequence%08d.txt", sequences.Data(), num[i]/10000, num[i]);
173 if (dir.IsNull())
174 dir = Form("%s%04d/%08d", data.Data(), num[i]/10000, num[i]);
175
176 // FIXME: The sequence number from the sequence file is assigned!!!
177 MSequence *seq = new MSequence(name, dir);
178
179 if (seq->IsValid() && seq->GetSequence()!=(UInt_t)num[i])
180 *fLog << warn << "WARNING - Sequence number " << num[i] << " in dataset file doesn't match sequence number " << seq->GetSequence() << " in sequence file!" << endl;
181
182 list.Add(seq);
183 }
184
185 // For the synchronization we must make sure, that all sequences are
186 // in the correct order...
187 // list.Sort();
188}
189
190// --------------------------------------------------------------------------
191//
192// Read the file fname as setup file for the sequence.
193//
194MDataSet::MDataSet(const char *fname, TString sequences, TString data)
195{
196 fName = fname;
197
198 fSequencesOn.SetOwner();
199 fSequencesOff.SetOwner();
200
201 TString expname(fname);
202 gSystem->ExpandPathName(expname);
203
204 const Bool_t access = !gSystem->AccessPathName(expname, kFileExists);
205 if (!access)
206 gLog << err << "ERROR - Dataset file " << expname << " not accessible!" << endl;
207
208 MEnv env(expname);
209
210 fNumAnalysis = env.GetValue("AnalysisNumber", -1);
211 fTitle = env.GetValue("Name", expname);
212
213 TString str;
214 str = env.GetValue("SequencesOn", "");
215 Split(str, fNumSequencesOn);
216 str = env.GetValue("SequencesOff", "");
217 Split(str, fNumSequencesOff);
218
219 SetupDefaultPath(sequences, fgPathSequences);
220 SetupDefaultPath(data, fgPathDataFiles);
221
222 fPathSequences = sequences;
223 fPathDataFiles = data;
224
225 ResolveSequences(env, fNumSequencesOn, fSequencesOn);
226 ResolveSequences(env, fNumSequencesOff, fSequencesOff);
227
228 fNameSource = env.GetValue("SourceName", "");
229 fCatalog = env.GetValue("Catalog", fgCatalog);
230 fIsWobbleMode = env.GetValue("WobbleMode", kFALSE);
231 fComment = env.GetValue("Comment", "");
232
233 fNameSource = fNameSource.Strip(TString::kBoth);
234 fCatalog = fCatalog.Strip(TString::kBoth);
235
236 // --- Now "touch" resources which are not yet stored in MDataSet ---
237 env.Touch("RunTime");
238
239 // --- Print "untouch" resources ---
240 if (env.GetNumUntouched()>0)
241 {
242 gLog << warn << "WARNING - At least one resource in the dataset-file has not been touched!" << endl;
243 env.PrintUntouched();
244 }
245}
246
247// --------------------------------------------------------------------------
248//
249// Return '+' if both can be accessed, '-' otherwise.
250//
251void MDataSet::PrintFile(const MSequence &seq)
252{
253 const Char_t access =
254 !gSystem->AccessPathName(seq.GetFileName(), kFileExists) &&
255 !gSystem->AccessPathName(seq.GetDataPath(), kFileExists) ? '+' : '-';
256
257 gLog << "# " << access << " " << seq.GetFileName() << " <" << seq.GetDataPath() << ">" << endl;
258}
259
260// --------------------------------------------------------------------------
261//
262// Print the contents of the sequence
263//
264void MDataSet::Print(Option_t *o) const
265{
266 gLog << all;
267 if (!IsValid())
268 {
269 gLog << "Dataset: " << fName << " <invalid - no analysis number available>" << endl;
270 return;
271 }
272 gLog << "AnalysisNumber: " << fNumAnalysis << endl << endl;
273
274 if (!fTitle.IsNull())
275 gLog << "Name: " << fTitle << endl << endl;
276
277 gLog << "SequencesOn: ";
278 for (int i=0; i<fNumSequencesOn.GetSize(); i++)
279 gLog << " " << fNumSequencesOn[i];
280 gLog << endl;
281 gLog << "SequencesOff: ";
282 for (int i=0; i<fNumSequencesOff.GetSize(); i++)
283 gLog << " " << fNumSequencesOff[i];
284 gLog << endl << endl;
285
286 gLog << "SourceName: " << fNameSource << endl;
287 gLog << "Catalog: " << fCatalog << endl;
288
289 gLog << "WobbleMode: " << (fIsWobbleMode?"On":"Off") << endl << endl;
290
291
292
293 gLog << "Comment: " << fComment << endl;
294
295 if (fSequencesOn.GetEntries()>0)
296 gLog << endl;
297
298 TIter NextOn(&fSequencesOn);
299 TIter NextOff(&fSequencesOff);
300 MSequence *seq=0;
301 while ((seq=(MSequence*)NextOn()))
302 {
303 gLog << "Sequence" << Form("%08d", seq->GetSequence()) << ".File: " << seq->GetFileName() << endl;
304 gLog << "Sequence" << Form("%08d", seq->GetSequence()) << ".Dir: " << seq->GetDataPath() << endl;
305 }
306 if (fSequencesOff.GetEntries()>0)
307 gLog << endl;
308 while ((seq=(MSequence*)NextOff()))
309 {
310 gLog << "Sequence" << Form("%08d", seq->GetSequence()) << ".File: " << seq->GetFileName() << endl;
311 gLog << "Sequence" << Form("%08d", seq->GetSequence()) << ".Dir: " << seq->GetDataPath() << endl;
312 }
313
314 if (TString(o).Contains("files", TString::kIgnoreCase))
315 {
316 gLog << endl;
317 gLog << "# On-Data Files:" << endl;
318 NextOn.Reset();
319 while ((seq=(MSequence*)NextOn()))
320 PrintFile(*seq);
321
322 gLog << endl;
323 gLog << "# Off-Data Files:" << endl;
324 NextOff.Reset();
325 while ((seq=(MSequence*)NextOff()))
326 PrintFile(*seq);
327
328 return;
329 }
330}
331
332// --------------------------------------------------------------------------
333//
334// Adds all sequences contained in list to the MDirIter. After adding
335// everything MDirIter::Sort is called to sort all entries by name.
336//
337Bool_t MDataSet::AddSequencesFromList(const TList &list, MDirIter &files)
338{
339 TIter Next(const_cast<TList*>(&list));
340
341 MSequence *seq=0;
342 while ((seq=(MSequence*)Next()))
343 {
344 if (!seq->IsValid())
345 {
346 gLog << err;
347 gLog << "ERROR - MDataSet::AddSequencesFromList: Sequence invalid!" << endl;
348 gLog << " + File: " << seq->GetFileName() << endl;
349 gLog << " + Dir: " << seq->GetDataPath() << endl;
350 return kFALSE;
351 }
352
353 if (seq->SetupDatRuns(files, MSequence::kImages)<=0)
354 return kFALSE;
355 }
356
357 // This is important in case of synchronisation, because the
358 // files in the sequences can be interleaved (eg W1, W2)
359 // Filenames MUST begin with an appropriate string which allow
360 // to order them correctly in time!
361 // files.Sort();
362
363 if (gLog.GetDebugLevel()>4)
364 {
365 gLog << dbg << "Files which are searched:" << endl;
366 files.Print();
367 }
368 return kTRUE;
369}
370
371Bool_t MDataSet::AddFilesOn(MRead &read) const
372{
373 MDirIter files;
374 if (!AddSequencesFromList(fSequencesOn, files))
375 return kFALSE;
376 return read.AddFiles(files)>0;
377}
378
379Bool_t MDataSet::AddFilesOff(MRead &read) const
380{
381 MDirIter files;
382 if (!AddSequencesFromList(fSequencesOff, files))
383 return kFALSE;
384 return read.AddFiles(files)>0;
385}
386
387Bool_t MDataSet::AddFiles(MRead &read) const
388{
389 const Bool_t rc1 = AddFilesOff(read);
390 const Bool_t rc2 = AddFilesOn(read);
391 return rc1 && rc2;
392}
393
394Int_t MDataSet::AddFilesToChain(MDirIter &files, TChain &chain)
395{
396 Int_t num=0;
397 while (1)
398 {
399 const TString fname = files.Next();
400 if (fname.IsNull())
401 break;
402
403 const Int_t n = chain.Add(fname);
404 if (n<=0)
405 return kFALSE;
406 num += n;
407 }
408 return num;
409}
410
411Bool_t MDataSet::AddFilesOn(TChain &chain) const
412{
413 MDirIter files;
414 if (!AddSequencesFromList(fSequencesOn, files))
415 return kFALSE;
416 return AddFilesToChain(files, chain)>0;
417}
418
419Bool_t MDataSet::AddFilesOff(TChain &chain) const
420{
421 MDirIter files;
422 if (!AddSequencesFromList(fSequencesOff, files))
423 return kFALSE;
424 return AddFilesToChain(files, chain)>0;
425}
426
427Bool_t MDataSet::AddFiles(TChain &read) const
428{
429 const Bool_t rc1 = AddFilesOff(read);
430 const Bool_t rc2 = AddFilesOn(read);
431 return rc1 && rc2;
432}
433
434Bool_t MDataSet::GetSourcePos(MPointingPos &pos) const
435{
436 if (!HasSource())
437 {
438 gLog << err << "ERROR - MDataSet::GetSourcePos called, but no source available." << endl;
439 return kFALSE;
440 }
441
442 TString catalog(fCatalog);
443 gSystem->ExpandPathName(catalog);
444
445 ifstream fin(catalog);
446 if (!fin)
447 {
448 gLog << err << "Cannot open file " << catalog << ": ";
449 gLog << strerror(errno) << endl;
450 return kFALSE;
451 }
452
453 TString ra, dec, epoch;
454
455 Int_t n = 0;
456 while (1)
457 {
458 TString line;
459 line.ReadLine(fin);
460 if (!fin)
461 {
462 gLog << err << "ERROR - Source '" << fNameSource << "' not found in " << catalog << "." << endl;
463 return kFALSE;
464 }
465
466 n++;
467
468 TObjArray *arr = line.Tokenize(",");
469
470 if (arr->GetEntries()<6)
471 {
472 gLog << err << "ERROR - Not enough arguments in line #" << n << " of " << catalog << endl;
473 delete arr;
474 return kFALSE;;
475 }
476
477 const TString name = (*arr)[0]->GetName();
478
479 ra = (*arr)[2]->GetName();
480 dec = (*arr)[3]->GetName();
481 epoch = (*arr)[5]->GetName();
482
483 delete arr;
484
485 if (name.Strip(TString::kBoth)==fNameSource)
486 break;
487 }
488
489 if (epoch.Strip(TString::kBoth)!=(TString)"2000")
490 {
491 gLog << err << "ERROR - Epoch not 2000... not supported." << endl;
492 return kFALSE;
493 }
494
495 Double_t r,d;
496 if (!MAstro::Coordinate2Angle(ra, r))
497 {
498 gLog << err << "ERROR - Interpreting right ascension: " << ra << endl;
499 return kFALSE;
500 }
501 if (!MAstro::Coordinate2Angle(dec, d))
502 {
503 gLog << err << "ERROR - Interpreting declination: " << dec << endl;
504 return kFALSE;
505 }
506
507 pos.SetSkyPosition(r, d);
508 pos.SetTitle(fNameSource);
509
510 return kTRUE;
511}
512
513// --------------------------------------------------------------------------
514//
515// Calls ReplaceAll(old, news) for all Dir-entries
516//
517void MDataSet::ReplaceDir(TList &list, const TString &old, const TString &news) const
518{
519 TIter Next(&list);
520 TNamed *name = 0;
521 while ((name=(TNamed*)Next()))
522 {
523 TString dir = name->GetTitle();
524 dir.ReplaceAll(old, news);
525 name->SetTitle(dir);
526 }
527}
528
529// --------------------------------------------------------------------------
530//
531// Calls ReplaceAll(old, news) for all File-entries
532//
533void MDataSet::ReplaceFile(TList &list, const TString &old, const TString &news) const
534{
535 TIter Next(&list);
536 TNamed *name = 0;
537 while ((name=(TNamed*)Next()))
538 {
539 TString file = name->GetName();
540 file.ReplaceAll(old, news);
541 name->SetName(file);
542 }
543}
Note: See TracBrowser for help on using the repository browser.