source: trunk/FACT++/src/fixfits.cc@ 20084

Last change on this file since 20084 was 20067, checked in by tbretz, 4 years ago
That is a better solution.
File size: 17.5 KB
Line 
1//Compile with g++ -o FixFits fixFits.cpp -O2 -std=c++11
2//This is a "FITS file fixer" program for FACT.
3//It goes through the file, relying only on basic header data and compressed blocks headers
4//It will reconstruct the catalog data too in the output file a strip all corrupt data
5//The actual header keywords must be edited with the FTOOLS afterwards
6
7#include <string>
8#include <iostream>
9#include <iomanip>
10#include <fstream>
11#include <cstring>
12#include <vector>
13#include <algorithm>
14#include <sstream>
15
16#include <boost/filesystem.hpp>
17
18#include "factfits.h"
19#include "Time.h"
20#include "Configuration.h"
21
22using namespace std;
23using namespace FITS;
24namespace fs = boost::filesystem;
25
26
27template<size_t N>
28 void revcpy(char *dest, const char *src, int num)
29{
30 const char *pend = src + num*N;
31 for (const char *ptr = src; ptr<pend; ptr+=N, dest+=N)
32 std::reverse_copy(ptr, ptr+N, dest);
33}
34
35bool update_header_key(char buffer[], const string& keyword, uint64_t value, const string& comment)
36{
37 ostringstream str;
38 str << value;
39
40 const auto num_length = str.str().size();
41 str.str("");
42
43 str << setfill(' ');
44 str << left << setw(8) << keyword;
45
46 str << "= ";
47
48 str << right << setw(20) << value << " / " << comment;
49
50 str << setw(80-str.str().size()) << ' ';
51
52 cout << keyword.size() << ":" << num_length << ":" << str.str().size() << ":" << str.str() << '\n';
53
54 if (str.str().size() != 80)
55 {
56 cerr << "Wrong " << keyword << " string size." << endl;
57 return false;
58 }
59
60 memcpy(buffer, str.str().c_str(), 80);
61
62 return true;
63}
64
65void SetupConfiguration(Configuration &conf)
66{
67 po::options_description control("Root to SQL");
68 control.add_options()
69 ("file", var<string>()->required(), "Input file name. Fits file to be repaired.")
70 ("out,o", var<string>(), "Output file name (+'.repaired' if empty), dry-run if not provided")
71 ("no-progress", po_switch(), "Suppress progress output")
72 ("dry-run", po_switch(), "Do not write any output")
73 ;
74
75 po::positional_options_description p;
76 p.add("file", 1); // All positional options
77 p.add("out", 1); // All positional options
78
79 conf.AddOptions(control);
80 conf.SetArgumentPositions(p);
81}
82
83void PrintUsage()
84{
85 cout <<
86 "fixfits - Fix the header of a none closed raw-data file\n"
87 "\n"
88 "The goal of this tool is to touch the data as less as possible. "
89 "This, however, might result in a few more bytes being lost than "
90 "strictly necessary.\n"
91 "\n"
92 "Usage: fixfits input-file-name [output-file-name]\n"
93 "\n"
94 ;
95 cout << endl;
96}
97
98
99int main(int argc, const char* argv[])
100{
101 Configuration conf(argv[0]);
102 conf.SetPrintUsage(PrintUsage);
103 SetupConfiguration(conf);
104
105 if (!conf.DoParse(argc, argv))
106 return 127;
107
108 // ----------------------------- Evaluate options --------------------------
109 const string input_name = conf.Get<string>("file");
110 const string output_name = conf.Has("out") ? conf.Get<string>("out") :
111 (input_name+".repaired");
112
113 const bool no_progress = conf.Get<bool>("no-progress");
114 const bool dry_run = conf.Get<bool>("dry-run");
115
116 cout << "\nReading: " << input_name << endl;
117
118 Time start_read;
119
120 ifstream input(input_name.c_str(), ios_base::binary);
121 if (!input)
122 {
123 cerr << "Error opening file: " << strerror(errno) << endl;
124 return 6;
125 }
126
127 cout << "Seeking beginning of data table." << endl;
128
129 // ------------------------------------------------------------
130
131 //first off, find the beginning of the data table
132 char buffer[2048];
133 buffer[80] = '\0';
134
135 size_t counter = 0;
136 size_t num_bytes = 80;
137 input.read(buffer, 80);
138
139 while (counter != 2)
140 {
141 if (!strcmp(buffer, "XTENSION= 'BINTABLE' / binary table extension "))
142 counter++;
143
144 if (input.eof())
145 {
146 cerr << "Reached end-of-file but could not find start of data table." << endl;
147 return 2;
148 }
149
150 input.read(buffer, 80);
151 num_bytes+=80;
152
153 if (!no_progress)
154 cout << "\rByte " << num_bytes << " ";
155 }
156 if (no_progress)
157 cout << "Reached " << num_bytes << '\n';
158 cout << "\nFound beginning of data table." << endl;
159
160 // ------------------------------------------------------------
161
162 //we just read the start of the data table. continue until the end of the table header
163 while (strcmp(buffer, "END "))
164 {
165 if (input.eof())
166 {
167 cerr << "Data table header was not closed properly. Aborting." << endl;
168 return 3;
169 }
170
171 input.read(buffer, 80);
172 num_bytes+=80;
173
174 if (!no_progress)
175 cout << "\rByte " << num_bytes << " ";
176 }
177 if (no_progress)
178 cout << "Reached " << num_bytes << '\n';
179
180 cout << "\nReached end of data table header" << endl;
181
182 // ------------------------------------------------------------
183
184 //now skip the fits padding
185 while (num_bytes % 2880 != 0)
186 {
187 input.read(buffer, 80);
188 num_bytes+=80;
189
190 if (!no_progress)
191 cout << "\rByte " << num_bytes << " ";
192 }
193 if (no_progress)
194 cout << "Reached " << num_bytes << '\n';
195
196
197 const size_t output_catalog_start = input.tellg();
198
199 //now we are in the catalog. skip it until we find the string 'TILE'
200 buffer[4] = '\0';
201 while (strcmp(buffer, "TILE"))
202 {
203 input.read(buffer, 4);
204 num_bytes += 4;
205
206 if (!no_progress)
207 cout << "\rByte " << num_bytes << " ";
208 }
209 if (no_progress)
210 cout << "Reached " << num_bytes << '\n';
211
212 input.seekg(num_bytes-4);
213 cout << "\nFound start of first compressed tile." << endl;
214
215 // ------------------------------------------------------------
216
217 const size_t output_heap_start = input.tellg();
218
219 size_t byte_where_tile_start = input.tellg();
220
221 //now figure out how many blocks are in each tile
222 TileHeader tile_head;
223 BlockHeader block_head;
224
225 input.read(reinterpret_cast<char*>(&tile_head), sizeof(TileHeader));
226
227 cout << "First tile is " << tile_head.size << " bytes long." << endl;
228
229 // ------------------------------------------------------------
230
231 size_t block_bytes = 0;
232 size_t num_blocks = 0;
233
234 while (size_t(input.tellg()) < byte_where_tile_start+tile_head.size)
235 {
236 input.read(reinterpret_cast<char*>(&block_head), sizeof(BlockHeader));
237 block_bytes += block_head.size;
238
239 input.seekg(byte_where_tile_start + sizeof(TileHeader) + block_bytes);
240 num_blocks ++;
241
242 if (input.eof())
243 {
244 cerr << "First tile seems broken unable to recover anything." << endl;
245 return 4;
246 }
247 }
248
249 if (size_t(input.tellg()) != byte_where_tile_start+tile_head.size)
250 {
251 cerr << "Discrepancy between tile size and sum of blocks size." << endl;
252 return 1;
253 }
254
255 cout << "There are " << num_blocks << " blocks in each tile.\nLooping over all tiles" << endl;
256
257 // ------------------------------------------------------------
258
259 size_t num_tiles = 0;
260 input.seekg(output_heap_start);
261
262 size_t output_heap_end = input.tellg();
263 size_t previous_heap_end = input.tellg();
264
265 vector<vector<pair<int64_t, int64_t> > > catalog;
266
267 uint64_t offset_in_heap = 0;
268 while (!input.eof())
269 {
270 previous_heap_end = output_heap_end;
271 output_heap_end = input.tellg();
272
273 byte_where_tile_start = input.tellg();
274 block_bytes = 0;
275
276 if (input.peek() == EOF)
277 {
278 cout << "Reached end-of-file." << endl;
279 break;
280 }
281
282 input.read(reinterpret_cast<char*>(&tile_head), sizeof(TileHeader));
283 if (memcmp(tile_head.id, "TILE", 4) || input.eof())
284 {
285 cout << "Reached end-of-table padding or end-of-file." << endl;
286 break;
287 }
288
289 offset_in_heap += sizeof(TileHeader);
290 catalog.emplace_back();
291
292 for (size_t i=0; i<num_blocks; i++)
293 {
294 input.read((char*)(&block_head), sizeof(BlockHeader));
295 if (input.eof())
296 {
297 cout << "Reached end-of-file." << endl;
298 break;
299 }
300 block_bytes += block_head.size;
301 input.seekg(byte_where_tile_start + sizeof(TileHeader) + block_bytes);
302 if (input.eof())
303 {
304 cout << "Reached end-of-file." << endl;
305 break;
306 }
307 catalog.back().emplace_back((int64_t)(block_head.size),offset_in_heap);
308 offset_in_heap += block_head.size;
309 }
310
311 //one last catalog entry for the column that has no data
312 catalog.back().emplace_back(0,0);
313
314 if (!no_progress)
315 cout << "\rByte " << input.tellg() << " ";
316 num_tiles++;
317 }
318
319 input.clear();
320
321 // ------------------------------------------------------------
322
323 //ignore last read tile as we have no idea if it looked ok or not
324 num_tiles--;
325 output_heap_end = previous_heap_end;
326
327 cout << "Recovery finished!\n";
328
329 const auto time_read = Time().UnixTime()-start_read.UnixTime();
330
331 // ============================================================
332 //Now we have everything that we need. Go again through the input file and write the output, fixed file
333
334 cout << "\nWriting: " << output_name << endl;
335
336 Time start_write;
337
338 const uint64_t num_cols = num_blocks+1;//+1 because of not-used drs-time-marker column
339 const uint64_t output_catalog_end = output_catalog_start + num_tiles*num_cols*16;
340
341 const uint64_t zheapptr = output_heap_start - output_catalog_start;
342 const uint64_t theap = output_catalog_end - output_catalog_start;
343
344 map<string, size_t> header;
345
346 if (!dry_run)
347 {
348 ofstream output(output_name.c_str(), ios_base::binary);
349 input.seekg(0);
350
351 //copy everything until the catalog start and update header values on our way
352 counter = 0;
353 for (uint64_t i=0; i<output_catalog_start; i+=80)
354 {
355 input.read(buffer, 80);
356
357 if (!strcmp(buffer, "XTENSION= 'BINTABLE' / binary table extension "))
358 counter++;
359
360 if (counter == 2)
361 {
362 bool rc = true;
363
364 if (!memcmp(buffer, "NAXIS2", 6))
365 rc &= update_header_key(buffer, "NAXIS2", num_tiles, "number of rows in table");
366 if (!memcmp(buffer, "ZNAXIS2", 7))
367 rc &= update_header_key(buffer, "ZNAXIS2", num_tiles, "Number of uncompressed rows");
368 if (!memcmp(buffer, "PCOUNT", 6))
369 rc &= update_header_key(buffer, "PCOUNT", output_heap_end - output_catalog_end, "size of special data area");
370 if (!memcmp(buffer, "THEAP", 5))
371 rc &= update_header_key(buffer, "THEAP", theap, "Pointer to heap");
372 if (!memcmp(buffer, "ZHEAPPTR", 8))
373 rc &= update_header_key(buffer, "ZHEAPPTR", zheapptr, "Pointer to data");
374
375 if (!rc)
376 return 5;
377
378 header[string(buffer, 8)] = size_t(output.tellp())+10;
379 }
380
381 output.write(buffer, 80);
382
383 if (!no_progress)
384 cout << "\rByte " << input.tellg() << " ";
385 }
386
387 if (no_progress)
388 cout << "Reached " << input.tellg() << '\n';
389
390 // ------------------------------------------------------------
391
392 // now write the updated catalog
393 const uint64_t reserved_num_tiles = zheapptr / (num_cols*16);
394 const uint32_t one_catalog_row_size = num_cols*16;
395 const uint32_t total_catalog_size = reserved_num_tiles*one_catalog_row_size;
396
397 vector<char> swapped_catalog(total_catalog_size);
398
399 uint32_t shift = 0;
400 for (auto it=catalog.cbegin(); it!=catalog.cend(); it++)
401 {
402 revcpy<sizeof(int64_t)>(swapped_catalog.data() + shift, (char*)(it->data()), num_cols*2);
403 shift += one_catalog_row_size;
404 }
405
406 if (num_tiles < reserved_num_tiles)
407 memset(swapped_catalog.data()+shift, 0, total_catalog_size - shift);
408
409 output.write(swapped_catalog.data(), total_catalog_size);
410
411 // ------------------------------------------------------------
412
413 // we are now at the beginning of the heap in the output file. Move there in the input file
414 input.seekg(output_heap_start);
415
416 uint64_t num_heap_bytes_written = 0;
417 uint64_t total_heap_bytes_to_write = output_heap_end - output_heap_start;
418
419 cout << "\nCatalog updated.\nCopying " << total_heap_bytes_to_write << " of data now." << endl;
420
421 while (num_heap_bytes_written != total_heap_bytes_to_write)
422 {
423 uint64_t bytes_to_write = 2048;
424 if (total_heap_bytes_to_write - num_heap_bytes_written < 2048)
425 bytes_to_write = total_heap_bytes_to_write - num_heap_bytes_written;
426
427 input.read(buffer, bytes_to_write);
428 output.write(buffer, bytes_to_write);
429 num_heap_bytes_written += bytes_to_write;
430
431 if (!no_progress)
432 cout << "\rByte " << input.tellg() << " ";
433 }
434 if (no_progress)
435 cout << "Reached " << input.tellg() << '\n';
436
437 cout << "\nData written. Adding FITS padding. " << endl;
438
439 // ------------------------------------------------------------
440
441 //now add the FITS padding at the end
442 while (output.tellp() % 2880 != 0)
443 output.write("\0", 1);
444 }
445 else
446 cout << " ... skipped ...\n";
447
448 const auto time_write = Time().UnixTime()-start_write.UnixTime();
449
450 cout << "File recovered!\n";
451
452 // ------------------------------------------------------------
453
454 cout << "\nValid num tiles : " << setw(12) << num_tiles;
455 cout << "\nNAXIS2 : " << setw(12) << num_tiles;
456 cout << "\nZNAXIS2 : " << setw(12) << num_tiles;
457 cout << "\nCatalog start : " << setw(12) << output_catalog_start;
458 cout << "\nCatalog end : " << setw(12) << output_catalog_end;
459 cout << "\nZHEAPPTR : " << setw(12) << zheapptr;
460 cout << "\nTHEAP : " << setw(12) << theap;
461 cout << "\nHeap start : " << setw(12) << output_heap_start;
462 cout << "\nHeap end : " << setw(12) << output_heap_end;
463 cout << "\nPCOUNT : " << setw(12) << output_heap_end - output_catalog_end;
464 cout << '\n';
465 cout << "\nInput file size : " << setw(12) << fs::file_size(input_name);
466 if (!dry_run)
467 {
468 cout << "\nOutput file size : " << setw(12) << fs::file_size(output_name);
469 cout << "\nLost bytes : " << setw(12) << fs::file_size(input_name)-fs::file_size(output_name);
470 cout << "\nLost fraction : " << setw(12) << 1-double(fs::file_size(output_name))/fs::file_size(input_name);
471 }
472 cout << "\n";
473
474 cout << "\nExecution [read] : " << time_read << "s, " << fs::file_size(input_name)/1024./1024/time_read << "Mb/s";
475 cout << "\nExecution [write] : " << time_write << "s, " << fs::file_size(output_name)/1024./1024/time_write << "Mb/s";
476
477 if (0)
478 return 0;
479
480 factfits fits(output_name);
481
482 cout << "\nNew file contains " << fits.GetNumRows() << " events.";
483 cout << "\nChecking for raw data contents." << endl;
484
485 if (fits.GetStr("TELESCOP")!="FACT" || fits.GetStr("ORIGIN")!="FACT" || fits.GetStr("PACKAGE")!="FACT++" || fits.GetStr("EXTNAME")!="Events" || !fits.HasKey("DRSCALIB"))
486 {
487 cerr << "Header incomplete." << endl;
488 return 0;
489 }
490
491 if (!fits.HasColumn("UnixTimeUTC"))
492 {
493 cerr << "Column UnixTimeUTC missing." << endl;
494 return 0;
495 }
496
497 cout << "\nRaw data file detected.";
498 cout << "\nUpdating run-end with last event time." << endl;
499
500 vector<uint32_t> utime(2);
501 fits.SetVecAddress("UnixTimeUTC", utime);
502 fits.GetRow(fits.GetNumRows()-1);
503
504 const Time stop(utime[0], utime[1]);
505
506 const uint32_t tstopi = floor(stop.UnixDate());
507 const double tstopf = fmod(stop.UnixDate(), 1);
508
509 fits.close();
510
511 cout << setfill(' ');
512 cout << "\nTSTOPI = " << right << setw(20) << tstopi << " / Time when last event received (integral part)";
513 cout << "\nTSTOPF = " << right << setw(20) << tstopf << " / Time when last event received (fractional part)";
514 cout << "\nDATE-END= '" << stop.Iso() << "' / Time when last event received";
515
516 fstream fout(output_name);//, ios::binary);
517 fout << setfill(' ');
518
519 fout.seekp(header["DATE-END"], ios::beg);
520 //fout.seekg(header["DATE-END"], ios::beg);
521 fout << left << setw(20) << string("'" + stop.Iso() + "'") << " / Time when last event received";
522
523 fout.seekp(header["TSTOPI "], ios::beg);
524 //fout.seekg(header["TSTOPI "], ios::beg);
525 fout << right << setw(20) << tstopi;
526
527 fout.seekp(header["TSTOPF "], ios::beg);
528 //fout.seekg(header["TSTOPF "], ios::beg);
529 fout << right << setw(20) << tstopf;
530 fout.flush();
531
532 cout << "\nUpdating checksum." << endl;
533
534 factfits fits2(output_name);
535 fout.seekp(header["CHECKSUM"], ios::beg);
536 //fout.seekg(header["CHECKSUM"], ios::beg);
537 fout << left << setw(20) << string("'" + fits2.GetChecksum().str() + "'");
538
539 fout.close();
540 fits2.close();
541
542 factfits fits3(output_name);
543 if (!fits3.IsHeaderOk() || !fits3.IsFileOk())
544 {
545 cerr << "\nFAILED: The checksum is still invalid! :(" << endl;
546 return 7;
547 }
548
549 cout << "\nFile header successfully updated!\n" << endl;
550
551 return 0;
552
553}
Note: See TracBrowser for help on using the repository browser.