source: trunk/FACT++/src/fixfits.cc@ 20036

Last change on this file since 20036 was 20036, checked in by tbretz, 4 years ago
Mostly updated to the console output, implemented supression the progress for logging purposes and a dry-run.
File size: 14.8 KB
Line 
1//Compile with g++ -o FixFits fixFits.cpp -O2 -std=c++11
2//This is a "FITS file fixer" program for FACT.
3//It goes through the file, relying only on basic header data and compressed blocks headers
4//It will reconstruct the catalog data too in the output file a strip all corrupt data
5//The actual header keywords must be edited with the FTOOLS afterwards
6
7#include <string>
8#include <iostream>
9#include <iomanip>
10#include <fstream>
11#include <cstring>
12#include <vector>
13#include <algorithm>
14#include <sstream>
15
16#include <boost/filesystem.hpp>
17
18#include "FITS.h"
19#include "Time.h"
20#include "Configuration.h"
21
22using namespace std;
23using namespace FITS;
24namespace fs = boost::filesystem;
25
26
27template<size_t N>
28 void revcpy(char *dest, const char *src, int num)
29{
30 const char *pend = src + num*N;
31 for (const char *ptr = src; ptr<pend; ptr+=N, dest+=N)
32 std::reverse_copy(ptr, ptr+N, dest);
33}
34
35bool update_header_key(char buffer[], const string& keyword, uint64_t value, const string& comment)
36{
37 ostringstream str;
38 str << value;
39
40 const auto num_length = str.str().size();
41 str.str("");
42
43 str << setfill(' ') << left;
44 str << setw(8) << keyword;
45
46 str << "= ";
47
48 str << setw(20) << value << " / " << comment;
49
50 str << setw(80-str.str().size()) << ' ';
51
52 cout << keyword.size() << ":" << num_length << ":" << str.str().size() << ":" << str.str() << '\n';
53
54 if (str.str().size() != 80)
55 {
56 cerr << "Wrong " << keyword << " string size." << endl;
57 return false;
58 }
59
60 memcpy(buffer, str.str().c_str(), 80);
61
62 return true;
63}
64
65void SetupConfiguration(Configuration &conf)
66{
67 po::options_description control("Root to SQL");
68 control.add_options()
69 ("file", var<string>()->required(), "Input file name. Fits file to be repaired.")
70 ("out,o", var<string>(), "Output file name (+'.repaired' if empty), dry-run if not provided")
71 ("no-progress", po_switch(), "Suppress progress output")
72 ("dry-run", po_switch(), "Do not write any output")
73 ;
74
75 po::positional_options_description p;
76 p.add("file", 1); // All positional options
77 p.add("out", 1); // All positional options
78
79 conf.AddOptions(control);
80 conf.SetArgumentPositions(p);
81}
82
83void PrintUsage()
84{
85 cout <<
86 "fixfits - Fix the header of a none closed raw-data file\n"
87 "\n"
88 "Usage: fixfits input-file-name [output-file-name]\n"
89 "\n"
90 ;
91 cout << endl;
92}
93
94
95int main(int argc, const char* argv[])
96{
97 Time start;
98
99 Configuration conf(argv[0]);
100 conf.SetPrintUsage(PrintUsage);
101 SetupConfiguration(conf);
102
103 if (!conf.DoParse(argc, argv))
104 return 127;
105
106 // ----------------------------- Evaluate options --------------------------
107 const string input_name = conf.Get<string>("file");
108 const string output_name = conf.Has("out") ? conf.Get<string>("out") :
109 (input_name+".repaired");
110
111 const bool no_progress = conf.Get<bool>("no-progress");
112 const bool dry_run = conf.Get<bool>("dry-run");
113
114 cout << "\nReading: " << input_name << endl;
115
116 ifstream input(input_name.c_str(), ios_base::binary);
117 if (!input)
118 {
119 cerr << "Error opening file: " << strerror(errno) << endl;
120 return 6;
121 }
122
123 cout << "Seeking beginning of data table." << endl;
124
125 // ------------------------------------------------------------
126
127 //first off, find the beginning of the data table
128 char buffer[2048];
129 buffer[80] = '\0';
130
131 size_t counter = 0;
132 size_t num_bytes = 80;
133 input.read(buffer, 80);
134
135 while (counter != 2)
136 {
137 if (!strcmp(buffer, "XTENSION= 'BINTABLE' / binary table extension "))
138 counter++;
139
140 if (input.eof())
141 {
142 cout << "Reached end-of-file but could not find start of data table." << endl;
143 return 2;
144 }
145
146 input.read(buffer, 80);
147 num_bytes+=80;
148
149 if (!no_progress)
150 cout << "\rByte " << num_bytes << " ";
151 }
152 if (no_progress)
153 cout << "Reached " << num_bytes << '\n';
154 cout << "\nFound beginning of data table." << endl;
155
156 // ------------------------------------------------------------
157
158 //we just read the start of the data table. continue until the end of the table header
159 while (strcmp(buffer, "END "))
160 {
161 if (input.eof())
162 {
163 cout << "\nData table header was not closed properly. Aborting." << endl;
164 return 3;
165 }
166
167 input.read(buffer, 80);
168 num_bytes+=80;
169
170 if (!no_progress)
171 cout << "\rByte " << num_bytes << " ";
172 }
173 if (no_progress)
174 cout << "Reached " << num_bytes << '\n';
175
176 cout << "\nReached end of data table header" << endl;
177
178 // ------------------------------------------------------------
179
180 //now skip the fits padding
181 while (num_bytes % 2880 != 0)
182 {
183 input.read(buffer, 80);
184 num_bytes+=80;
185
186 if (!no_progress)
187 cout << "\rByte " << num_bytes << " ";
188 }
189 if (no_progress)
190 cout << "Reached " << num_bytes << '\n';
191
192
193 const size_t output_catalog_start = input.tellg();
194
195 //now we are in the catalog. skip it until we find the string 'TILE'
196 buffer[4] = '\0';
197 while (strcmp(buffer, "TILE"))
198 {
199 input.read(buffer, 4);
200 num_bytes += 4;
201
202 if (!no_progress)
203 cout << "\rByte " << num_bytes << " ";
204 }
205 if (no_progress)
206 cout << "Reached " << num_bytes << '\n';
207
208 input.seekg(num_bytes-4);
209 cout << "\nFound start of first compressed tile," << endl;
210
211 // ------------------------------------------------------------
212
213 const size_t output_heap_start = input.tellg();
214
215 size_t byte_where_tile_start = input.tellg();
216
217 //now figure out how many blocks are in each tile
218 TileHeader tile_head;
219 BlockHeader block_head;
220
221 input.read(reinterpret_cast<char*>(&tile_head), sizeof(TileHeader));
222
223 cout << "First tile is " << tile_head.size << " bytes long." << endl;
224
225 // ------------------------------------------------------------
226
227 size_t block_bytes = 0;
228 size_t num_blocks = 0;
229
230 while (size_t(input.tellg()) < byte_where_tile_start+tile_head.size)
231 {
232 input.read(reinterpret_cast<char*>(&block_head), sizeof(BlockHeader));
233 block_bytes += block_head.size;
234
235 input.seekg(byte_where_tile_start + sizeof(TileHeader) + block_bytes);
236 num_blocks ++;
237
238 if (input.eof())
239 {
240 cout << "First tile seems broken unable to recover anything." << endl;
241 return 4;
242 }
243 }
244
245 if (size_t(input.tellg()) != byte_where_tile_start+tile_head.size)
246 {
247 cout << "Discrepancy between tile size and sum of blocks size." << endl;
248 return 1;
249 }
250
251 cout << "There are " << num_blocks << " blocks in each tile.\nLooping over all tiles" << endl;
252
253 // ------------------------------------------------------------
254
255 size_t num_tiles = 0;
256 input.seekg(output_heap_start);
257
258 size_t output_heap_end = input.tellg();
259 size_t previous_heap_end = input.tellg();
260
261 vector<vector<pair<int64_t, int64_t> > > catalog;
262
263 uint64_t offset_in_heap = 0;
264 while (!input.eof())
265 {
266 previous_heap_end = output_heap_end;
267 output_heap_end = input.tellg();
268
269 byte_where_tile_start = input.tellg();
270 block_bytes = 0;
271
272 if (input.peek() == EOF)
273 {
274 cout << "Reached end-of-file." << endl;
275 break;
276 }
277
278 input.read(reinterpret_cast<char*>(&tile_head), sizeof(TileHeader));
279 if (memcmp(tile_head.id, "TILE", 4) || input.eof())
280 {
281 cout << "Reached end-of-table padding or end-of-file." << endl;
282 break;
283 }
284
285 offset_in_heap += sizeof(TileHeader);
286 catalog.emplace_back();
287
288 for (size_t i=0; i<num_blocks; i++)
289 {
290 input.read((char*)(&block_head), sizeof(BlockHeader));
291 if (input.eof())
292 {
293 cout << "Reached end-of-file." << endl;
294 break;
295 }
296 block_bytes += block_head.size;
297 input.seekg(byte_where_tile_start + sizeof(TileHeader) + block_bytes);
298 if (input.eof())
299 {
300 cout << "Reached end-of-file." << endl;
301 break;
302 }
303 catalog.back().emplace_back((int64_t)(block_head.size),offset_in_heap);
304 offset_in_heap += block_head.size;
305 }
306
307 //one last catalog entry for the column that has no data
308 catalog.back().emplace_back(0,0);
309
310 if (!no_progress)
311 cout << "\rByte " << input.tellg() << " ";
312 num_tiles++;
313 }
314
315 input.clear();
316
317 // ------------------------------------------------------------
318
319 //ignore last read tile as we have no idea if it looked ok or not
320 num_tiles--;
321 output_heap_end = previous_heap_end;
322
323 const uint64_t num_cols = num_blocks+1;//+1 because of not-used drs-time-marker column
324 const uint64_t output_catalog_end = output_catalog_start + num_tiles*num_cols*16;
325
326 const uint64_t zheapptr = output_heap_start - output_catalog_start;
327 const uint64_t theap = output_catalog_end - output_catalog_start;
328
329 //Now we have everything that we need. Go again through the input file and write the output, fixed file
330 cout << "Recovery finished!\n\nWriting: " << output_name << endl;
331
332 // ============================================================
333
334 if (!dry_run)
335 {
336 ofstream output(output_name.c_str(), ios_base::binary);
337 input.seekg(0);
338
339 //copy everything until the catalog start and update header values on our way
340 counter = 0;
341 for (uint64_t i=0; i<output_catalog_start; i+=80)
342 {
343 input.read(buffer, 80);
344
345 if (!strcmp(buffer, "XTENSION= 'BINTABLE' / binary table extension "))
346 counter++;
347
348 if (counter == 2)
349 {
350 bool rc = true;
351
352 if (!memcmp(buffer, "NAXIS2", 6))
353 rc &= update_header_key(buffer, "NAXIS2", num_tiles, "number of rows in table");
354 if (!memcmp(buffer, "ZNAXIS2", 7))
355 rc &= update_header_key(buffer, "ZNAXIS2", num_tiles, "Number of uncompressed rows");
356 if (!memcmp(buffer, "PCOUNT", 6))
357 rc &= update_header_key(buffer, "PCOUNT", output_heap_end - output_catalog_end, "size of special data area");
358 if (!memcmp(buffer, "THEAP", 5))
359 rc &= update_header_key(buffer, "THEAP", theap, "Pointer to heap");
360 if (!memcmp(buffer, "ZHEAPPTR", 8))
361 rc &= update_header_key(buffer, "ZHEAPPTR", zheapptr, "Pointer to data");
362
363 if (!rc)
364 return 5;
365 }
366
367 output.write(buffer, 80);
368
369 if (!no_progress)
370 cout << "\rByte " << input.tellg() << " ";
371 }
372
373 if (no_progress)
374 cout << "Reached " << input.tellg() << '\n';
375
376 // ------------------------------------------------------------
377
378 // now write the updated catalog
379 const uint64_t reserved_num_tiles = zheapptr / (num_cols*16);
380 const uint32_t one_catalog_row_size = num_cols*16;
381 const uint32_t total_catalog_size = reserved_num_tiles*one_catalog_row_size;
382
383 vector<char> swapped_catalog(total_catalog_size);
384
385 uint32_t shift = 0;
386 for (auto it=catalog.cbegin(); it!=catalog.cend(); it++)
387 {
388 revcpy<sizeof(int64_t)>(swapped_catalog.data() + shift, (char*)(it->data()), num_cols*2);
389 shift += one_catalog_row_size;
390 }
391
392 if (num_tiles < reserved_num_tiles)
393 memset(swapped_catalog.data()+shift, 0, total_catalog_size - shift);
394
395 output.write(swapped_catalog.data(), total_catalog_size);
396
397 // ------------------------------------------------------------
398
399 // we are now at the beginning of the heap in the output file. Move there in the input file
400 input.seekg(output_heap_start);
401
402 uint64_t num_heap_bytes_written = 0;
403 uint64_t total_heap_bytes_to_write = output_heap_end - output_heap_start;
404
405 cout << "\nCatalog updated.\nCopying " << total_heap_bytes_to_write << " of data now." << endl;
406
407 while (num_heap_bytes_written != total_heap_bytes_to_write)
408 {
409 uint64_t bytes_to_write = 2048;
410 if (total_heap_bytes_to_write - num_heap_bytes_written < 2048)
411 bytes_to_write = total_heap_bytes_to_write - num_heap_bytes_written;
412
413 input.read(buffer, bytes_to_write);
414 output.write(buffer, bytes_to_write);
415 num_heap_bytes_written += bytes_to_write;
416
417 if (!no_progress)
418 cout << "\rByte " << input.tellg() << " ";
419 }
420 if (no_progress)
421 cout << "Reached " << input.tellg() << '\n';
422
423 cout << "\nData written. Adding FITS padding. " << endl;
424
425 // ------------------------------------------------------------
426
427 //now add the FITS padding at the end
428 while (output.tellp() % 2880 != 0)
429 output.write("\0", 1);
430 }
431 else
432 cout << " ... skipped ...\n";
433
434 cout << "File recovered!\n";
435
436 // ------------------------------------------------------------
437
438 cout << "\nValid num tiles : " << setw(12) << num_tiles;
439 cout << "\nNAXIS2 : " << setw(12) << num_tiles;
440 cout << "\nZNAXIS2 : " << setw(12) << num_tiles;
441 cout << "\nCatalog start : " << setw(12) << output_catalog_start;
442 cout << "\nCatalog end : " << setw(12) << output_catalog_end;
443 cout << "\nZHEAPPTR : " << setw(12) << zheapptr;
444 cout << "\nTHEAP : " << setw(12) << theap;
445 cout << "\nHeap start : " << setw(12) << output_heap_start;
446 cout << "\nHeap end : " << setw(12) << output_heap_end;
447 cout << "\nPCOUNT : " << setw(12) << output_heap_end - output_catalog_end;
448 cout << '\n';
449 cout << "\nInput file size : " << setw(12) << fs::file_size(input_name);
450 if (!dry_run)
451 {
452 cout << "\nOutput file size : " << setw(12) << fs::file_size(output_name);
453 cout << "\nLost bytes : " << setw(12) << fs::file_size(input_name)-fs::file_size(output_name);
454 cout << "\nLost fraction : " << setw(12) << 1.-fs::file_size(output_name)/fs::file_size(input_name);
455 }
456 cout << "\n\nYou should now:\n";
457 cout << "- update TSTOPI and TSTOPF using \"fitsdump -c UnixTimeUTC --minmax --nozero\" and then fv\n";
458 cout << "- update DATE-END using TSTOP given to the MjDtoISO program in ~fact_arc/ingestScripts and then fv again\n";
459 cout << "- update the checksum of the file with \"fchecksum update+ <filename>\"\n" << endl;
460
461 return 0;
462
463}
Note: See TracBrowser for help on using the repository browser.