source: trunk/FACT++/src/fixfits.cc@ 20035

Last change on this file since 20035 was 20035, checked in by tbretz, 4 years ago
Etienne's tool for repairing fits/raw files.
File size: 12.9 KB
Line 
1//Compile with g++ -o FixFits fixFits.cpp -O2 -std=c++11
2//This is a "FITS file fixer" program for FACT.
3//It goes through the file, relying only on basic header data and compressed blocks headers
4//It will reconstruct the catalog data too in the output file a strip all corrupt data
5//The actual header keywords must be edited with the FTOOLS afterwards
6
7#include <string>
8#include <iostream>
9#include <iomanip>
10#include <fstream>
11#include <cstring>
12#include <vector>
13#include <algorithm>
14#include <sstream>
15
16#include <boost/filesystem.hpp>
17
18#include "FITS.h"
19#include "Time.h"
20#include "Configuration.h"
21
22using namespace std;
23using namespace FITS;
24namespace fs = boost::filesystem;
25
26
27template<size_t N>
28 void revcpy(char *dest, const char *src, int num)
29{
30 const char *pend = src + num*N;
31 for (const char *ptr = src; ptr<pend; ptr+=N, dest+=N)
32 std::reverse_copy(ptr, ptr+N, dest);
33}
34
35bool update_header_key(char buffer[], const string& keyword, uint64_t value, const string& comment)
36{
37 ostringstream str;
38 str << setfill(' ');
39 str << value;
40
41 const auto num_length = str.str().size();
42 str.str("");
43
44 str << setw(8-keyword.size()) << keyword;
45
46 str << "= ";
47
48 str << setw(20-num_length) << value << " / " << comment;
49
50 str << setw(80-str.str().size()) << ' ';
51
52 cout << '\n' << str.str() << '\n';
53
54 if (str.str().size() != 80)
55 {
56 cerr << "Wrong " << keyword << " string size." << endl;
57 return false;
58 }
59
60 memcpy(buffer, str.str().c_str(), 80);
61
62 return true;
63}
64
65void SetupConfiguration(Configuration &conf)
66{
67 po::options_description control("Root to SQL");
68 control.add_options()
69 ("file", var<string>()->required(), "Input file name. Fits file to be repaired.")
70 ("out,o", var<string>(), "Output file name (+'.repaired' if empty)")
71 ;
72
73 po::positional_options_description p;
74 p.add("file", 1); // All positional options
75 p.add("out", 1); // All positional options
76
77 conf.AddOptions(control);
78 conf.SetArgumentPositions(p);
79}
80
81void PrintUsage()
82{
83 cout <<
84 "fixfits - Fix the header of a none closed raw-data file\n"
85 "\n"
86 "Usage: fixfits input-file-name [output-file-name]\n"
87 "\n"
88 ;
89 cout << endl;
90}
91
92
93int main(int argc, const char* argv[])
94{
95 Time start;
96
97 Configuration conf(argv[0]);
98 conf.SetPrintUsage(PrintUsage);
99 SetupConfiguration(conf);
100
101 if (!conf.DoParse(argc, argv))
102 return 127;
103
104 // ----------------------------- Evaluate options --------------------------
105 const string input_name = conf.Get<string>("file");
106 const string output_name = conf.Has("out") ? conf.Get<string>("out") :
107 (fs::path(input_name)/".repaired").string();
108
109 cout << "Reading: " << input_name << endl;
110
111 ifstream input(input_name.c_str(), ios_base::binary);
112
113 cout << "Looking for beginning of data table." << endl;
114
115 // ------------------------------------------------------------
116
117 //first off, find the beginning of the data table
118 char buffer[2048];
119 buffer[80] = '\0';
120
121 size_t counter = 0;
122 size_t num_bytes = 80;
123 input.read(buffer, 80);
124
125 while (counter != 2)
126 {
127 if (!strcmp(buffer, "XTENSION= 'BINTABLE' / binary table extension "))
128 counter++;
129
130 if (input.eof())
131 {
132 cout << "Reached end-of-file but could not find start of data table." << endl;
133 return 2;
134 }
135
136 input.read(buffer, 80);
137 num_bytes+=80;
138
139 cout << "\rByte " << num_bytes << " ";
140 }
141 cout << "\nFound beginning of data table." << endl;
142
143 // ------------------------------------------------------------
144
145 //we just read the start of the data table. continue until the end of the table header
146 while (strcmp(buffer, "END "))
147 {
148 if (input.eof())
149 {
150 cout << "\nData table header was not closed properly. Aborting." << endl;
151 return 3;
152 }
153
154 input.read(buffer, 80);
155 num_bytes+=80;
156
157 cout << "\rByte " << num_bytes << " ";
158 }
159
160 cout << "\nReached end of data table header" << endl;
161
162 // ------------------------------------------------------------
163
164 //now skip the fits padding
165 while (num_bytes % 2880 != 0)
166 {
167 input.read(buffer, 80);
168 num_bytes+=80;
169
170 cout << "\rByte " << num_bytes << " ";
171 }
172
173
174 const size_t output_catalog_start = input.tellg();
175
176 //now we are in the catalog. skip it until we find the string 'TILE'
177 buffer[4] = '\0';
178 while (strcmp(buffer, "TILE"))
179 {
180 input.read(buffer, 4);
181 num_bytes += 4;
182 cout << "\rByte " << num_bytes << " ";
183 }
184
185 input.seekg(num_bytes-4);
186 cout << "\nFound start of very first compressed tile" << endl;
187
188 // ------------------------------------------------------------
189
190 const size_t output_heap_start = input.tellg();
191
192 size_t byte_where_tile_start = input.tellg();
193
194 //now figure out how many blocks are in each tile
195 TileHeader tile_head;
196 BlockHeader block_head;
197
198 input.read((char*)(&tile_head), sizeof(TileHeader));
199
200 cout << "First tile is " << tile_head.size << " bytes long" << endl;
201
202 // ------------------------------------------------------------
203
204 size_t block_bytes = 0;
205 size_t num_blocks = 0;
206
207 while (input.tellg() < byte_where_tile_start+tile_head.size)
208 {
209 input.read((char*)(&block_head), sizeof(BlockHeader));
210 block_bytes += block_head.size;
211
212 input.seekg(byte_where_tile_start + sizeof(TileHeader) + block_bytes);
213 num_blocks ++;
214
215 if (input.eof())
216 {
217 cout << "First tile seems broken unable to recover anything." << endl;
218 return 4;
219 }
220 }
221
222 if (input.tellg() != byte_where_tile_start+tile_head.size)
223 {
224 cout << "Discrepancy between tile size and sum of blocks size." << endl;
225 return 1;
226 }
227
228 cout << "There are " << num_blocks << " blocks in each tile. Now looping over all tiles" << endl;
229
230 // ------------------------------------------------------------
231
232 size_t num_tiles = 0;
233 input.seekg(output_heap_start);
234
235 size_t output_heap_end = input.tellg();
236 size_t previous_heap_end = input.tellg();
237
238 vector<vector<pair<int64_t, int64_t> > > catalog;
239
240 uint64_t offset_in_heap = 0;
241 while (!input.eof())
242 {
243 previous_heap_end = output_heap_end;
244 output_heap_end = input.tellg();
245
246 byte_where_tile_start = input.tellg();
247 block_bytes = 0;
248
249 if (input.peek() == EOF)
250 {
251 cout << "Reached end-of-file." << endl;
252 break;
253 }
254
255 input.read((char*)(&tile_head), sizeof(TileHeader));
256 if (memcmp(tile_head.id, "TILE", 4) || input.eof())
257 {
258 cout << "Reached end-of-table padding or end-of-file." << endl;
259 break;
260 }
261
262 offset_in_heap += sizeof(TileHeader);
263 catalog.emplace_back();
264
265 for (int i=0;i<num_blocks;i++)
266 {
267 input.read((char*)(&block_head), sizeof(BlockHeader));
268 if (input.eof())
269 {
270 cout << "Reached end-of-file." << endl;
271 break;
272 }
273 block_bytes += block_head.size;
274 input.seekg(byte_where_tile_start + sizeof(TileHeader) + block_bytes);
275 if (input.eof())
276 {
277 cout << "Reached end-of-file." << endl;
278 break;
279 }
280 catalog.back().emplace_back((int64_t)(block_head.size),offset_in_heap);
281 offset_in_heap += block_head.size;
282 }
283
284 //one last catalog entry for the column that has no data
285 catalog.back().emplace_back(0,0);
286
287 cout << "\rByte " << input.tellg() << " ";
288 num_tiles++;
289 }
290
291 input.clear();
292
293 // ------------------------------------------------------------
294
295 //ignore last read tile as we have no idea if it looked ok or not
296 num_tiles--;
297 output_heap_end = previous_heap_end;
298
299 const uint64_t num_cols = num_blocks+1;//+1 because of not-used drs-time-marker column
300 const uint64_t output_catalog_end = output_catalog_start + num_tiles*num_cols*16;
301
302 const uint64_t zheapptr = output_heap_start - output_catalog_start;
303 const uint64_t theap = output_catalog_end - output_catalog_start;
304
305 //Now we have everything that we need. Go again through the input file and write the output, fixed file
306 cout << "Data recovered.\n Writing output file: " << output_name << endl;
307
308 // ============================================================
309
310 ofstream output(output_name.c_str(), ios_base::binary);
311 input.seekg(0);
312
313 //copy everything until the catalog start and update header values on our way
314 counter = 0;
315 for (uint64_t i=0; i<output_catalog_start; i+=80)
316 {
317 input.read(buffer, 80);
318
319 if (!strcmp(buffer, "XTENSION= 'BINTABLE' / binary table extension "))
320 counter++;
321
322 if (counter == 2)
323 {
324 bool rc = true;
325 if (!memcmp(buffer, "NAXIS2", 6))
326 rc &= update_header_key(buffer, "NAXIS2", num_tiles, "number of rows in table");
327 if (!memcmp(buffer, "ZNAXIS2", 7))
328 rc &= update_header_key(buffer, "ZNAXIS2", num_tiles, "Number of uncompressed rows");
329 if (!memcmp(buffer, "PCOUNT", 6))
330 rc &= update_header_key(buffer, "PCOUNT", output_heap_end - output_catalog_end, "size of special data area");
331 if (!memcmp(buffer, "THEAP", 5))
332 rc &= update_header_key(buffer, "THEAP", theap, "Pointer to heap");
333 if (!memcmp(buffer, "ZHEAPPTR", 8))
334 rc &= update_header_key(buffer, "ZHEAPPTR", zheapptr, "Pointer to data");
335
336 if (!rc)
337 return 5;
338 }
339
340 output.write(buffer, 80);
341
342 cout << "\rByte " << input.tellg() << " ";
343 }
344
345 // ------------------------------------------------------------
346
347 // now write the updated catalog
348 const uint64_t reserved_num_tiles = zheapptr / (num_cols*16);
349 const uint32_t one_catalog_row_size = num_cols*16;
350 const uint32_t total_catalog_size = reserved_num_tiles*one_catalog_row_size;
351
352 vector<char> swapped_catalog(total_catalog_size);
353
354 uint32_t shift = 0;
355 for (auto it=catalog.cbegin(); it!=catalog.cend(); it++)
356 {
357 revcpy<sizeof(int64_t)>(swapped_catalog.data() + shift, (char*)(it->data()), num_cols*2);
358 shift += one_catalog_row_size;
359 }
360
361 if (num_tiles < reserved_num_tiles)
362 memset(swapped_catalog.data()+shift, 0, total_catalog_size - shift);
363
364 output.write(swapped_catalog.data(), total_catalog_size);
365
366 // ------------------------------------------------------------
367
368 // we are now at the beginning of the heap in the output file. Move there in the input file
369 input.seekg(output_heap_start);
370
371 uint64_t num_heap_bytes_written = 0;
372 uint64_t total_heap_bytes_to_write = output_heap_end - output_heap_start;
373
374 cout << endl << "Catalog updated. Copying " << total_heap_bytes_to_write << " of data now." << endl;
375
376 while (num_heap_bytes_written != total_heap_bytes_to_write)
377 {
378 uint64_t bytes_to_write = 2048;
379 if (total_heap_bytes_to_write - num_heap_bytes_written < 2048)
380 bytes_to_write = total_heap_bytes_to_write - num_heap_bytes_written;
381
382 input.read(buffer, bytes_to_write);
383 output.write(buffer, bytes_to_write);
384 num_heap_bytes_written += bytes_to_write;
385 cout << "\rByte " << input.tellg() << " ";
386 }
387
388 cout << "\nData written. Adding FITS padding. " << endl;
389
390 // ------------------------------------------------------------
391
392 //now add the FITS padding at the end
393 while (output.tellp() % 2880 != 0)
394 {
395 output.write("\0", 1);
396 }
397 cout << "File recovered!\n";
398
399 input.close();
400 output.close();
401
402 // ------------------------------------------------------------
403
404 cout << "\nValid num tiles: " << num_tiles;
405 cout << "\nCatalog start : " << output_catalog_start;
406 cout << "\nCatalog end : " << output_catalog_end;
407 cout << "\nHeap start : " << output_heap_start;
408 cout << "\nHeap end : " << output_heap_end;
409 cout << "\nZHEAPPTR : " << zheapptr;
410 cout << "\nTHEAP : " << theap;
411 cout << "\nPCOUNT : " << output_heap_end - output_catalog_end;
412 cout << "\nNAXIS2 : " << num_tiles;
413 cout << "\nZNAXIS2 : " << num_tiles;
414
415 cout << "\n\nYou should now:\n";
416 cout << "- update TSTOPI and TSTOPF using \"fitsdump -c UnixTimeUTC --minmax --nozero\" and then fv\n";
417 cout << "- update DATE-END using TSTOP given to the MjDtoISO program in ~fact_arc/ingestScripts and then fv again\n";
418 cout << "- update the checksum of the file with \"fchecksum update+ <filename>\"\n" << endl;
419
420 return 0;
421
422}
Note: See TracBrowser for help on using the repository browser.