source: trunk/Mars/mcore/fits.h@ 13369

Last change on this file since 13369 was 13225, checked in by tbretz, 13 years ago
Do not read more than the expected number of rows per default.
File size: 24.0 KB
Line 
1#ifndef MARS_fits
2#define MARS_fits
3
4#include <stdint.h>
5
6#include <map>
7#include <string>
8#include <sstream>
9#include <algorithm>
10
11#ifdef __EXCEPTIONS
12#include <stdexcept>
13#endif
14
15#ifdef __CINT__
16#define off_t size_t
17#else
18#include <unordered_map>
19#endif
20
21#ifndef __MARS__
22#include <vector>
23#include <iomanip>
24#include <iostream>
25#define gLog cerr
26#define ___err___ ""
27#define ___all___ ""
28#else
29#include "MLog.h"
30#include "MLogManip.h"
31#define ___err___ err
32#define ___all___ all
33#endif
34
35#ifdef HAVE_ZLIB
36#include "izstream.h"
37#else
38#include <fstream>
39#define izstream ifstream
40#warning Support for zipped FITS files disabled.
41#endif
42
43#include "checksum.h"
44
45namespace std
46{
47
48class fits : public izstream
49{
50public:
51 struct Entry
52 {
53 char type;
54 string value;
55 string comment;
56
57 template<typename T>
58 T Get() const
59 {
60 T t;
61
62 istringstream str(value);
63 str >> t;
64
65 return t;
66 }
67 };
68
69 struct Table
70 {
71 off_t offset;
72
73 string name;
74 size_t bytes_per_row;
75 size_t num_rows;
76 size_t num_cols;
77
78 struct Column
79 {
80 size_t offset;
81 size_t num;
82 size_t size;
83 char type;
84 string unit;
85 };
86
87 typedef map<string, Entry> Keys;
88 typedef map<string, Column> Columns;
89
90 Columns cols;
91 Keys keys;
92
93 int64_t datasum;
94
95 string Trim(const string &str, char c=' ') const
96 {
97 // Trim Both leading and trailing spaces
98 const size_t pstart = str.find_first_not_of(c); // Find the first character position after excluding leading blank spaces
99 const size_t pend = str.find_last_not_of(c); // Find the first character position from reverse af
100
101 // if all spaces or empty return an empty string
102 if (string::npos==pstart || string::npos==pend)
103 return string();
104
105 return str.substr(pstart, pend-pstart+1);
106 }
107
108 bool Check(const string &key, char type, const string &value="") const
109 {
110 const Keys::const_iterator it = keys.find(key);
111 if (it==keys.end())
112 {
113 ostringstream str;
114 str << "Key '" << key << "' not found.";
115#ifdef __EXCEPTIONS
116 throw runtime_error(str.str());
117#else
118 gLog << ___err___ << "ERROR - " << str.str() << endl;
119 return false;
120#endif
121 }
122
123 if (it->second.type!=type)
124 {
125 ostringstream str;
126 str << "Wrong type for key '" << key << "': expected " << type << ", found " << it->second.type << ".";
127#ifdef __EXCEPTIONS
128 throw runtime_error(str.str());
129#else
130 gLog << ___err___ << "ERROR - " << str.str() << endl;
131 return false;
132#endif
133 }
134
135 if (!value.empty() && it->second.value!=value)
136 {
137 ostringstream str;
138 str << "Wrong value for key '" << key << "': expected " << value << ", found " << it->second.value << ".";
139#ifdef __EXCEPTIONS
140 throw runtime_error(str.str());
141#else
142 gLog << ___err___ << "ERROR - " << str.str() << endl;
143 return false;
144#endif
145 }
146
147 return true;
148 }
149
150 Keys ParseBlock(const vector<string> &vec) const
151 {
152 map<string,Entry> rc;
153
154 for (unsigned int i=0; i<vec.size(); i++)
155 {
156 const string key = Trim(vec[i].substr(0,8));
157 // Keywords without a value, like COMMENT / HISTORY
158 if (vec[i].substr(8,2)!="= ")
159 continue;
160
161 char type = 0;
162
163 string com;
164 string val = Trim(vec[i].substr(10));
165 if (val[0]=='\'')
166 {
167 // First skip all '' in the string
168 size_t p = 1;
169 while (1)
170 {
171 const size_t pp = val.find_first_of('\'', p);
172 if (pp==string::npos)
173 break;
174
175 p = val[pp+1]=='\'' ? pp+2 : pp+1;
176 }
177
178 // Now find the comment
179 const size_t ppp = val.find_first_of('/', p);
180
181 // Set value, comment and type
182 com = ppp==string::npos ? "" : Trim(val.substr(ppp+1));
183 val = Trim(val.substr(1, p-2));
184 type = 'T';
185 }
186 else
187 {
188 const size_t p = val.find_first_of('/');
189
190 com = Trim(val.substr(p+2));
191 val = Trim(val.substr(0, p));
192
193 if (val.empty() || val.find_first_of('T')!=string::npos || val.find_first_of('F')!=string::npos)
194 type = 'B';
195 else
196 type = val.find_last_of('.')==string::npos ? 'I' : 'F';
197 }
198
199 const Entry e = { type, val, com };
200 rc[key] = e;
201 }
202
203 return rc;
204 }
205
206 Table() : offset(0) { }
207 Table(const vector<string> &vec, off_t off) :
208 offset(off), keys(ParseBlock(vec))
209 {
210 if (!Check("XTENSION", 'T', "BINTABLE") ||
211 !Check("NAXIS", 'I', "2") ||
212 !Check("BITPIX", 'I', "8") ||
213 !Check("PCOUNT", 'I', "0") ||
214 !Check("GCOUNT", 'I', "1") ||
215 !Check("EXTNAME", 'T') ||
216 !Check("NAXIS1", 'I') ||
217 !Check("NAXIS2", 'I') ||
218 !Check("TFIELDS", 'I'))
219 return;
220
221 bytes_per_row = Get<size_t>("NAXIS1");
222 num_rows = Get<size_t>("NAXIS2");
223 num_cols = Get<size_t>("TFIELDS");
224 datasum = Get<int64_t>("DATASUM", -1);
225
226 size_t bytes = 0;
227 for (size_t i=1; i<=num_cols; i++)
228 {
229 ostringstream num;
230 num << i;
231
232 if (!Check("TTYPE"+num.str(), 'T') ||
233 !Check("TFORM"+num.str(), 'T'))
234 return;
235
236 const string id = Get<string>("TTYPE"+num.str());
237 const string fmt = Get<string>("TFORM"+num.str());
238 const string unit = Get<string>("TUNIT"+num.str(), "");
239
240 istringstream sin(fmt);
241 int n = 0;
242 sin >> n;
243 if (!sin)
244 n = 1;
245
246 const char type = fmt[fmt.length()-1];
247
248 size_t size = 0;
249 switch (type)
250 {
251 // We could use negative values to mark floats
252 // otheriwse we could just cast them to int64_t?
253 case 'L': // logical
254 case 'A': // char
255 case 'B': size = 1; break; // byte
256 case 'I': size = 2; break; // short
257 case 'J': size = 4; break; // int
258 case 'K': size = 8; break; // long long
259 case 'E': size = 4; break; // float
260 case 'D': size = 8; break; // double
261 // case 'X': size = n; break; // bits (n=number of bytes needed to contain all bits)
262 // case 'C': size = 8; break; // complex float
263 // case 'M': size = 16; break; // complex double
264 // case 'P': size = 8; break; // array descriptor (32bit)
265 // case 'Q': size = 16; break; // array descriptor (64bit)
266 default:
267 {
268 ostringstream str;
269 str << "FITS format TFORM='" << fmt << "' not yet supported.";
270#ifdef __EXCEPTIONS
271 throw runtime_error(str.str());
272#else
273 gLog << ___err___ << "ERROR - " << str.str() << endl;
274 return;
275#endif
276 }
277 }
278
279 const Table::Column col = { bytes, n, size, type, unit };
280
281 cols[id] = col;
282 bytes += n*size;
283 }
284
285 if (bytes!=bytes_per_row)
286 {
287#ifdef __EXCEPTIONS
288 throw runtime_error("Column size mismatch");
289#else
290 gLog << ___err___ << "ERROR - Column size mismatch" << endl;
291 return;
292#endif
293 }
294
295 name = Get<string>("EXTNAME");
296 }
297
298 void PrintKeys(bool display_all=false) const
299 {
300 for (Keys::const_iterator it=keys.begin(); it!=keys.end(); it++)
301 {
302 if (!display_all &&
303 (it->first.substr(0, 6)=="TTYPE" ||
304 it->first.substr(0, 6)=="TFORM" ||
305 it->first.substr(0, 6)=="TUNIT" ||
306 it->first=="TFIELDS" ||
307 it->first=="XTENSION" ||
308 it->first=="NAXIS" ||
309 it->first=="BITPIX" ||
310 it->first=="PCOUNT" ||
311 it->first=="GCOUNT")
312 )
313 continue;
314
315 gLog << ___all___ << setw(2) << it->second.type << '|' << it->first << '=' << it->second.value << '/' << it->second.comment << '|' << endl;
316 }}
317
318 void PrintColumns() const
319 {
320 typedef map<pair<size_t, string>, Column> Sorted;
321
322 Sorted sorted;
323
324 for (Columns::const_iterator it=cols.begin(); it!=cols.end(); it++)
325 sorted[make_pair(it->second.offset, it->first)] = it->second;
326
327 for (Sorted::const_iterator it=sorted.begin(); it!=sorted.end(); it++)
328 {
329 gLog << ___all___ << setw(6) << it->second.offset << "| ";
330 gLog << it->second.num << 'x';
331 switch (it->second.type)
332 {
333 case 'A': gLog << "char(8)"; break;
334 case 'L': gLog << "bool(8)"; break;
335 case 'B': gLog << "byte(8)"; break;
336 case 'I': gLog << "short(16)"; break;
337 case 'J': gLog << "int(32)"; break;
338 case 'K': gLog << "int(64)"; break;
339 case 'E': gLog << "float(32)"; break;
340 case 'D': gLog << "double(64)"; break;
341 }
342 gLog << ": " << it->first.second << " [" << it->second.unit << "]" << endl;
343 }
344 }
345
346 operator bool() const { return !name.empty(); }
347
348 bool HasKey(const string &key) const
349 {
350 return keys.find(key)!=keys.end();
351 }
352
353 bool HasColumn(const string& col) const
354 {
355 return cols.find(col)!=cols.end();
356 }
357
358 const Columns &GetColumns() const
359 {
360 return cols;
361 }
362
363 const Keys &GetKeys() const
364 {
365 return keys;
366 }
367
368 // Values of keys are always signed
369 template<typename T>
370 T Get(const string &key) const
371 {
372 const map<string,Entry>::const_iterator it = keys.find(key);
373 if (it==keys.end())
374 {
375 ostringstream str;
376 str << "Key '" << key << "' not found." << endl;
377#ifdef __EXCEPTIONS
378 throw runtime_error(str.str());
379#else
380 gLog << ___err___ << "ERROR - " << str.str() << endl;
381 return T();
382#endif
383 }
384 return it->second.Get<T>();
385 }
386
387 // Values of keys are always signed
388 template<typename T>
389 T Get(const string &key, const T &deflt) const
390 {
391 const map<string,Entry>::const_iterator it = keys.find(key);
392 return it==keys.end() ? deflt :it->second.Get<T>();
393 }
394
395 size_t GetN(const string &key) const
396 {
397 const Columns::const_iterator it = cols.find(key);
398 return it==cols.end() ? 0 : it->second.num;
399 }
400 };
401
402private:
403 Table fTable;
404
405 typedef pair<void*, Table::Column> Address;
406 typedef vector<Address> Addresses;
407 //map<void*, Table::Column> fAddresses;
408 Addresses fAddresses;
409
410#ifdef __CINT__
411 typedef map<string, void*> Pointers;
412#else
413 typedef unordered_map<string, void*> Pointers;
414#endif
415 Pointers fPointers;
416
417 vector<vector<char>> fGarbage;
418
419 vector<char> fBufferRow;
420 vector<char> fBufferDat;
421
422 size_t fRow;
423
424 Checksum fChkHeader;
425 Checksum fChkData;
426
427 bool ReadBlock(vector<string> &vec)
428 {
429 int endtag = 0;
430 for (int i=0; i<36; i++)
431 {
432 char c[81];
433 c[80] = 0;
434 read(c, 80);
435 if (!good())
436 break;
437
438 fChkHeader.add(c, 80);
439
440// if (c[0]==0)
441// return vector<string>();
442
443 string str(c);
444
445// if (!str.empty())
446// cout << setw(2) << i << "|" << str << "|" << (endtag?'-':'+') << endl;
447
448 if (endtag==2 || str=="END ")
449 {
450 endtag = 2; // valid END tag found
451 continue;
452 }
453
454 if (endtag==1 || str==" ")
455 {
456 endtag = 1; // end tag not found, but expected to be there
457 continue;
458 }
459
460 vec.push_back(str);
461 }
462
463 // Make sure that no empty vector is returned
464 if (endtag && vec.size()%36==0)
465 vec.push_back(string("END = '' / "));
466
467 return endtag==2;
468 }
469
470 string Compile(const string &key, int16_t i=-1)
471 {
472 if (i<0)
473 return key;
474
475 ostringstream str;
476 str << key << i;
477 return str.str();
478 }
479
480public:
481 fits(const string &fname, bool force=false) : izstream(fname.c_str())
482 {
483 char simple[10];
484 read(simple, 10);
485 if (!good())
486 return;
487
488 if (memcmp(simple, "SIMPLE = ", 10))
489 {
490 clear(rdstate()|ios::badbit);
491#ifdef __EXCEPTIONS
492 throw runtime_error("File is not a FITS file.");
493#else
494 gLog << ___err___ << "ERROR - File is not a FITS file." << endl;
495 return;
496#endif
497 }
498
499 seekg(0);
500
501 while (good())
502 {
503 vector<string> block;
504 while (1)
505 {
506 // FIXME: Set limit on memory consumption
507 const int rc = ReadBlock(block);
508 if (!good())
509 {
510 clear(rdstate()|ios::badbit);
511#ifdef __EXCEPTIONS
512 throw runtime_error("FITS file corrupted.");
513#else
514 gLog << ___err___ << "ERROR - FITS file corrupted." << endl;
515 return;
516#endif
517 }
518
519 if (block.size()%36)
520 {
521 if (!rc && !force)
522 {
523 clear(rdstate()|ios::badbit);
524#ifdef __EXCEPTIONS
525 throw runtime_error("END keyword missing in FITS header.");
526#else
527 gLog << ___err___ << "ERROR - END keyword missing in FITS file... file might be corrupted." << endl;
528 return;
529#endif
530 }
531 break;
532 }
533 }
534
535 if (block.size()==0)
536 break;
537
538 if (block[0].substr(0, 9)=="SIMPLE =")
539 {
540 fChkHeader.reset();
541 continue;
542 }
543
544 if (block[0].substr(0, 9)=="XTENSION=")
545 {
546 // FIXME: Check for table name
547
548 fTable = Table(block, tellg());
549 fRow = (size_t)-1;
550
551 //fTable.PrintKeys();
552
553 if (!fTable)
554 {
555 clear(rdstate()|ios::badbit);
556 return;
557 }
558
559 fBufferRow.resize(fTable.bytes_per_row + 4-fTable.bytes_per_row%4);
560 fBufferDat.resize(fTable.bytes_per_row);
561
562 /*
563 // Next table should start at:
564 const size_t size = fTable.bytes_per_row*fTable.num_rows;
565 const size_t blks = size/(36*80);
566 const size_t rest = size%(36*80);
567
568 seekg((blks+(rest>0?1:0))*(36*80), ios::cur);
569 if (!good())
570 gLog << ___err___ << "File seems to be incomplete (less data than expected from header)." << endl;
571
572 fRow = fTable.num_rows;
573 */
574
575 break;
576 }
577 }
578 }
579
580 uint8_t ReadRow(size_t row)
581 {
582 // if (row!=fRow+1) // Fast seeking is ensured by izstream
583 seekg(fTable.offset+row*fTable.bytes_per_row);
584
585 // For the checksum we need everything to be correctly aligned
586 const uint8_t offset = (row*fTable.bytes_per_row)%4;
587
588 auto ib = fBufferRow.begin();
589 auto ie = fBufferRow.end();
590 *ib++ = 0;
591 *ib++ = 0;
592 *ib++ = 0;
593 *ib = 0;
594
595 *--ie = 0;
596 *--ie = 0;
597 *--ie = 0;
598 *--ie = 0;
599
600 fBufferRow.assign(fBufferRow.size(), 0);
601
602 read(fBufferRow.data()+offset, fTable.bytes_per_row);
603 //fin.clear(fin.rdstate()&~ios::eofbit);
604
605 if (row==fRow+1)
606 fChkData.add(fBufferRow);
607
608 fRow = row;
609
610 return offset;
611 }
612
613 template<size_t N>
614 void revcpy(char *dest, const char *src, int num)
615 {
616 const char *pend = src + num*N;
617 for (const char *ptr = src; ptr<pend; ptr+=N, dest+=N)
618 reverse_copy(ptr, ptr+N, dest);
619 }
620
621 bool GetRow(size_t row, bool check=true)
622 {
623 if (check && row>=fTable.num_rows)
624 return false;
625
626 const uint8_t offset = ReadRow(row);
627 if (!good())
628 return good();
629
630 const char *ptr = fBufferRow.data() + offset;
631
632 for (Addresses::const_iterator it=fAddresses.begin(); it!=fAddresses.end(); it++)
633 {
634 const Table::Column &c = it->second;
635
636 const char *src = ptr + c.offset;
637 char *dest = reinterpret_cast<char*>(it->first);
638
639 // Let the compiler do some optimization by
640 // knowing the we only have 1, 2, 4 and 8
641 switch (c.size)
642 {
643 case 1: memcpy (dest, src, c.num*c.size); break;
644 case 2: revcpy<2>(dest, src, c.num); break;
645 case 4: revcpy<4>(dest, src, c.num); break;
646 case 8: revcpy<8>(dest, src, c.num); break;
647 }
648 }
649
650 return good();
651 }
652
653 bool GetNextRow(bool check=true)
654 {
655 return GetRow(fRow+1, check);
656 }
657
658 bool SkipNextRow()
659 {
660 seekg(fTable.offset+(++fRow)*fTable.bytes_per_row);
661 return good();
662 }
663
664 static bool Compare(const Address &p1, const Address &p2)
665 {
666 return p1.first>p2.first;
667 }
668
669 template<class T, class S>
670 const T &GetAs(const string &name)
671 {
672 return *reinterpret_cast<S*>(fPointers[name]);
673 }
674
675 void *SetPtrAddress(const string &name)
676 {
677 if (fTable.cols.count(name)==0)
678 {
679 ostringstream str;
680 str <<"SetPtrAddress('" << name << "') - Column not found." << endl;
681#ifdef __EXCEPTIONS
682 throw runtime_error(str.str());
683#else
684 gLog << ___err___ << "ERROR - " << str.str() << endl;
685 return NULL;
686#endif
687 }
688
689 Pointers::const_iterator it = fPointers.find(name);
690 if (it!=fPointers.end())
691 return it->second;
692
693 fGarbage.push_back(vector<char>(fTable.cols[name].size*fTable.cols[name].num));
694
695 void *ptr = fGarbage.back().data();
696
697 fPointers[name] = ptr;
698 fAddresses.push_back(make_pair(ptr, fTable.cols[name]));
699 sort(fAddresses.begin(), fAddresses.end(), Compare);
700 return ptr;
701 }
702
703 template<typename T>
704 bool SetPtrAddress(const string &name, T *ptr, size_t cnt)
705 {
706 if (fTable.cols.count(name)==0)
707 {
708 ostringstream str;
709 str << "SetPtrAddress('" << name << "') - Column not found." << endl;
710#ifdef __EXCEPTIONS
711 throw runtime_error(str.str());
712#else
713 gLog << ___err___ << "ERROR - " << str.str() << endl;
714 return false;
715#endif
716 }
717
718 if (sizeof(T)!=fTable.cols[name].size)
719 {
720 ostringstream str;
721 str << "SetPtrAddress('" << name << "') - Element size mismatch: expected "
722 << fTable.cols[name].size << " from header, got " << sizeof(T) << endl;
723#ifdef __EXCEPTIONS
724 throw runtime_error(str.str());
725#else
726 gLog << ___err___ << "ERROR - " << str.str() << endl;
727 return false;
728#endif
729 }
730
731 if (cnt!=fTable.cols[name].num)
732 {
733 ostringstream str;
734 str << "SetPtrAddress('" << name << "') - Element count mismatch: expected "
735 << fTable.cols[name].num << " from header, got " << cnt << endl;
736#ifdef __EXCEPTIONS
737 throw runtime_error(str.str());
738#else
739 gLog << ___err___ << "ERROR - " << str.str() << endl;
740 return false;
741#endif
742 }
743
744 // if (fAddresses.count(ptr)>0)
745 // gLog << warn << "SetPtrAddress('" << name << "') - Pointer " << ptr << " already assigned." << endl;
746
747 //fAddresses[ptr] = fTable.cols[name];
748 fPointers[name] = ptr;
749 fAddresses.push_back(make_pair(ptr, fTable.cols[name]));
750 sort(fAddresses.begin(), fAddresses.end(), Compare);
751 return true;
752 }
753
754 template<class T>
755 bool SetRefAddress(const string &name, T &ptr)
756 {
757 return SetPtrAddress(name, &ptr, sizeof(ptr)/sizeof(T));
758 }
759
760 template<typename T>
761 bool SetVecAddress(const string &name, vector<T> &vec)
762 {
763 return SetPtrAddress(name, vec.data(), vec.size());
764 }
765
766 template<typename T>
767 T Get(const string &key) const
768 {
769 return fTable.Get<T>(key);
770 }
771
772 template<typename T>
773 T Get(const string &key, const string &deflt) const
774 {
775 return fTable.Get<T>(key, deflt);
776 }
777
778 bool SetPtrAddress(const string &name, void *ptr)
779 {
780 if (fTable.cols.count(name)==0)
781 {
782 ostringstream str;
783 str <<"SetPtrAddress('" << name << "') - Column not found." << endl;
784#ifdef __EXCEPTIONS
785 throw runtime_error(str.str());
786#else
787 gLog << ___err___ << "ERROR - " << str.str() << endl;
788 return false;
789#endif
790 }
791
792 // if (fAddresses.count(ptr)>0)
793 // gLog << warn << "SetPtrAddress('" << name << "') - Pointer " << ptr << " already assigned." << endl;
794
795 //fAddresses[ptr] = fTable.cols[name];
796 fPointers[name] = ptr;
797 fAddresses.push_back(make_pair(ptr, fTable.cols[name]));
798 sort(fAddresses.begin(), fAddresses.end(), Compare);
799 return true;
800 }
801
802 bool HasKey(const string &key) const { return fTable.HasKey(key); }
803 bool HasColumn(const string& col) const { return fTable.HasColumn(col);}
804 const Table::Columns &GetColumns() const { return fTable.GetColumns();}
805 const Table::Keys &GetKeys() const { return fTable.GetKeys();}
806
807 int64_t GetInt(const string &key) const { return fTable.Get<int64_t>(key); }
808 uint64_t GetUInt(const string &key) const { return fTable.Get<uint64_t>(key); }
809 double GetFloat(const string &key) const { return fTable.Get<double>(key); }
810 string GetStr(const string &key) const { return fTable.Get<string>(key); }
811
812 size_t GetN(const string &key) const
813 {
814 return fTable.GetN(key);
815 }
816
817 size_t GetNumRows() const { return fTable.num_rows; }
818 size_t GetRow() const { return fRow; }
819
820 operator bool() const { return fTable && fTable.offset!=0; }
821
822 void PrintKeys() const { fTable.PrintKeys(); }
823 void PrintColumns() const { fTable.PrintColumns(); }
824
825 bool IsHeaderOk() const { return fTable.datasum<0?false:(fChkHeader+Checksum(fTable.datasum)).valid(); }
826 bool IsFileOk() const { return (fChkHeader+fChkData).valid(); }
827
828};
829
830};
831#endif
Note: See TracBrowser for help on using the repository browser.