1 | //Compile with g++ -o FixFits fixFits.cpp -O2 -std=c++11
|
---|
2 | //This is a "FITS file fixer" program for FACT.
|
---|
3 | //It goes through the file, relying only on basic header data and compressed blocks headers
|
---|
4 | //It will reconstruct the catalog data too in the output file a strip all corrupt data
|
---|
5 | //The actual header keywords must be edited with the FTOOLS afterwards
|
---|
6 |
|
---|
7 | #include <string>
|
---|
8 | #include <iostream>
|
---|
9 | #include <iomanip>
|
---|
10 | #include <fstream>
|
---|
11 | #include <cstring>
|
---|
12 | #include <vector>
|
---|
13 | #include <algorithm>
|
---|
14 | #include <sstream>
|
---|
15 |
|
---|
16 | #include <boost/filesystem.hpp>
|
---|
17 |
|
---|
18 | #include "FITS.h"
|
---|
19 | #include "Time.h"
|
---|
20 | #include "Configuration.h"
|
---|
21 |
|
---|
22 | using namespace std;
|
---|
23 | using namespace FITS;
|
---|
24 | namespace fs = boost::filesystem;
|
---|
25 |
|
---|
26 |
|
---|
27 | template<size_t N>
|
---|
28 | void revcpy(char *dest, const char *src, int num)
|
---|
29 | {
|
---|
30 | const char *pend = src + num*N;
|
---|
31 | for (const char *ptr = src; ptr<pend; ptr+=N, dest+=N)
|
---|
32 | std::reverse_copy(ptr, ptr+N, dest);
|
---|
33 | }
|
---|
34 |
|
---|
35 | bool update_header_key(char buffer[], const string& keyword, uint64_t value, const string& comment)
|
---|
36 | {
|
---|
37 | ostringstream str;
|
---|
38 | str << setfill(' ');
|
---|
39 | str << value;
|
---|
40 |
|
---|
41 | const auto num_length = str.str().size();
|
---|
42 | str.str("");
|
---|
43 |
|
---|
44 | str << setw(8-keyword.size()) << keyword;
|
---|
45 |
|
---|
46 | str << "= ";
|
---|
47 |
|
---|
48 | str << setw(20-num_length) << value << " / " << comment;
|
---|
49 |
|
---|
50 | str << setw(80-str.str().size()) << ' ';
|
---|
51 |
|
---|
52 | cout << '\n' << str.str() << '\n';
|
---|
53 |
|
---|
54 | if (str.str().size() != 80)
|
---|
55 | {
|
---|
56 | cerr << "Wrong " << keyword << " string size." << endl;
|
---|
57 | return false;
|
---|
58 | }
|
---|
59 |
|
---|
60 | memcpy(buffer, str.str().c_str(), 80);
|
---|
61 |
|
---|
62 | return true;
|
---|
63 | }
|
---|
64 |
|
---|
65 | void SetupConfiguration(Configuration &conf)
|
---|
66 | {
|
---|
67 | po::options_description control("Root to SQL");
|
---|
68 | control.add_options()
|
---|
69 | ("file", var<string>()->required(), "Input file name. Fits file to be repaired.")
|
---|
70 | ("out,o", var<string>(), "Output file name (+'.repaired' if empty)")
|
---|
71 | ;
|
---|
72 |
|
---|
73 | po::positional_options_description p;
|
---|
74 | p.add("file", 1); // All positional options
|
---|
75 | p.add("out", 1); // All positional options
|
---|
76 |
|
---|
77 | conf.AddOptions(control);
|
---|
78 | conf.SetArgumentPositions(p);
|
---|
79 | }
|
---|
80 |
|
---|
81 | void PrintUsage()
|
---|
82 | {
|
---|
83 | cout <<
|
---|
84 | "fixfits - Fix the header of a none closed raw-data file\n"
|
---|
85 | "\n"
|
---|
86 | "Usage: fixfits input-file-name [output-file-name]\n"
|
---|
87 | "\n"
|
---|
88 | ;
|
---|
89 | cout << endl;
|
---|
90 | }
|
---|
91 |
|
---|
92 |
|
---|
93 | int main(int argc, const char* argv[])
|
---|
94 | {
|
---|
95 | Time start;
|
---|
96 |
|
---|
97 | Configuration conf(argv[0]);
|
---|
98 | conf.SetPrintUsage(PrintUsage);
|
---|
99 | SetupConfiguration(conf);
|
---|
100 |
|
---|
101 | if (!conf.DoParse(argc, argv))
|
---|
102 | return 127;
|
---|
103 |
|
---|
104 | // ----------------------------- Evaluate options --------------------------
|
---|
105 | const string input_name = conf.Get<string>("file");
|
---|
106 | const string output_name = conf.Has("out") ? conf.Get<string>("out") :
|
---|
107 | (fs::path(input_name)/".repaired").string();
|
---|
108 |
|
---|
109 | cout << "Reading: " << input_name << endl;
|
---|
110 |
|
---|
111 | ifstream input(input_name.c_str(), ios_base::binary);
|
---|
112 |
|
---|
113 | cout << "Looking for beginning of data table." << endl;
|
---|
114 |
|
---|
115 | // ------------------------------------------------------------
|
---|
116 |
|
---|
117 | //first off, find the beginning of the data table
|
---|
118 | char buffer[2048];
|
---|
119 | buffer[80] = '\0';
|
---|
120 |
|
---|
121 | size_t counter = 0;
|
---|
122 | size_t num_bytes = 80;
|
---|
123 | input.read(buffer, 80);
|
---|
124 |
|
---|
125 | while (counter != 2)
|
---|
126 | {
|
---|
127 | if (!strcmp(buffer, "XTENSION= 'BINTABLE' / binary table extension "))
|
---|
128 | counter++;
|
---|
129 |
|
---|
130 | if (input.eof())
|
---|
131 | {
|
---|
132 | cout << "Reached end-of-file but could not find start of data table." << endl;
|
---|
133 | return 2;
|
---|
134 | }
|
---|
135 |
|
---|
136 | input.read(buffer, 80);
|
---|
137 | num_bytes+=80;
|
---|
138 |
|
---|
139 | cout << "\rByte " << num_bytes << " ";
|
---|
140 | }
|
---|
141 | cout << "\nFound beginning of data table." << endl;
|
---|
142 |
|
---|
143 | // ------------------------------------------------------------
|
---|
144 |
|
---|
145 | //we just read the start of the data table. continue until the end of the table header
|
---|
146 | while (strcmp(buffer, "END "))
|
---|
147 | {
|
---|
148 | if (input.eof())
|
---|
149 | {
|
---|
150 | cout << "\nData table header was not closed properly. Aborting." << endl;
|
---|
151 | return 3;
|
---|
152 | }
|
---|
153 |
|
---|
154 | input.read(buffer, 80);
|
---|
155 | num_bytes+=80;
|
---|
156 |
|
---|
157 | cout << "\rByte " << num_bytes << " ";
|
---|
158 | }
|
---|
159 |
|
---|
160 | cout << "\nReached end of data table header" << endl;
|
---|
161 |
|
---|
162 | // ------------------------------------------------------------
|
---|
163 |
|
---|
164 | //now skip the fits padding
|
---|
165 | while (num_bytes % 2880 != 0)
|
---|
166 | {
|
---|
167 | input.read(buffer, 80);
|
---|
168 | num_bytes+=80;
|
---|
169 |
|
---|
170 | cout << "\rByte " << num_bytes << " ";
|
---|
171 | }
|
---|
172 |
|
---|
173 |
|
---|
174 | const size_t output_catalog_start = input.tellg();
|
---|
175 |
|
---|
176 | //now we are in the catalog. skip it until we find the string 'TILE'
|
---|
177 | buffer[4] = '\0';
|
---|
178 | while (strcmp(buffer, "TILE"))
|
---|
179 | {
|
---|
180 | input.read(buffer, 4);
|
---|
181 | num_bytes += 4;
|
---|
182 | cout << "\rByte " << num_bytes << " ";
|
---|
183 | }
|
---|
184 |
|
---|
185 | input.seekg(num_bytes-4);
|
---|
186 | cout << "\nFound start of very first compressed tile" << endl;
|
---|
187 |
|
---|
188 | // ------------------------------------------------------------
|
---|
189 |
|
---|
190 | const size_t output_heap_start = input.tellg();
|
---|
191 |
|
---|
192 | size_t byte_where_tile_start = input.tellg();
|
---|
193 |
|
---|
194 | //now figure out how many blocks are in each tile
|
---|
195 | TileHeader tile_head;
|
---|
196 | BlockHeader block_head;
|
---|
197 |
|
---|
198 | input.read((char*)(&tile_head), sizeof(TileHeader));
|
---|
199 |
|
---|
200 | cout << "First tile is " << tile_head.size << " bytes long" << endl;
|
---|
201 |
|
---|
202 | // ------------------------------------------------------------
|
---|
203 |
|
---|
204 | size_t block_bytes = 0;
|
---|
205 | size_t num_blocks = 0;
|
---|
206 |
|
---|
207 | while (input.tellg() < byte_where_tile_start+tile_head.size)
|
---|
208 | {
|
---|
209 | input.read((char*)(&block_head), sizeof(BlockHeader));
|
---|
210 | block_bytes += block_head.size;
|
---|
211 |
|
---|
212 | input.seekg(byte_where_tile_start + sizeof(TileHeader) + block_bytes);
|
---|
213 | num_blocks ++;
|
---|
214 |
|
---|
215 | if (input.eof())
|
---|
216 | {
|
---|
217 | cout << "First tile seems broken unable to recover anything." << endl;
|
---|
218 | return 4;
|
---|
219 | }
|
---|
220 | }
|
---|
221 |
|
---|
222 | if (input.tellg() != byte_where_tile_start+tile_head.size)
|
---|
223 | {
|
---|
224 | cout << "Discrepancy between tile size and sum of blocks size." << endl;
|
---|
225 | return 1;
|
---|
226 | }
|
---|
227 |
|
---|
228 | cout << "There are " << num_blocks << " blocks in each tile. Now looping over all tiles" << endl;
|
---|
229 |
|
---|
230 | // ------------------------------------------------------------
|
---|
231 |
|
---|
232 | size_t num_tiles = 0;
|
---|
233 | input.seekg(output_heap_start);
|
---|
234 |
|
---|
235 | size_t output_heap_end = input.tellg();
|
---|
236 | size_t previous_heap_end = input.tellg();
|
---|
237 |
|
---|
238 | vector<vector<pair<int64_t, int64_t> > > catalog;
|
---|
239 |
|
---|
240 | uint64_t offset_in_heap = 0;
|
---|
241 | while (!input.eof())
|
---|
242 | {
|
---|
243 | previous_heap_end = output_heap_end;
|
---|
244 | output_heap_end = input.tellg();
|
---|
245 |
|
---|
246 | byte_where_tile_start = input.tellg();
|
---|
247 | block_bytes = 0;
|
---|
248 |
|
---|
249 | if (input.peek() == EOF)
|
---|
250 | {
|
---|
251 | cout << "Reached end-of-file." << endl;
|
---|
252 | break;
|
---|
253 | }
|
---|
254 |
|
---|
255 | input.read((char*)(&tile_head), sizeof(TileHeader));
|
---|
256 | if (memcmp(tile_head.id, "TILE", 4) || input.eof())
|
---|
257 | {
|
---|
258 | cout << "Reached end-of-table padding or end-of-file." << endl;
|
---|
259 | break;
|
---|
260 | }
|
---|
261 |
|
---|
262 | offset_in_heap += sizeof(TileHeader);
|
---|
263 | catalog.emplace_back();
|
---|
264 |
|
---|
265 | for (int i=0;i<num_blocks;i++)
|
---|
266 | {
|
---|
267 | input.read((char*)(&block_head), sizeof(BlockHeader));
|
---|
268 | if (input.eof())
|
---|
269 | {
|
---|
270 | cout << "Reached end-of-file." << endl;
|
---|
271 | break;
|
---|
272 | }
|
---|
273 | block_bytes += block_head.size;
|
---|
274 | input.seekg(byte_where_tile_start + sizeof(TileHeader) + block_bytes);
|
---|
275 | if (input.eof())
|
---|
276 | {
|
---|
277 | cout << "Reached end-of-file." << endl;
|
---|
278 | break;
|
---|
279 | }
|
---|
280 | catalog.back().emplace_back((int64_t)(block_head.size),offset_in_heap);
|
---|
281 | offset_in_heap += block_head.size;
|
---|
282 | }
|
---|
283 |
|
---|
284 | //one last catalog entry for the column that has no data
|
---|
285 | catalog.back().emplace_back(0,0);
|
---|
286 |
|
---|
287 | cout << "\rByte " << input.tellg() << " ";
|
---|
288 | num_tiles++;
|
---|
289 | }
|
---|
290 |
|
---|
291 | input.clear();
|
---|
292 |
|
---|
293 | // ------------------------------------------------------------
|
---|
294 |
|
---|
295 | //ignore last read tile as we have no idea if it looked ok or not
|
---|
296 | num_tiles--;
|
---|
297 | output_heap_end = previous_heap_end;
|
---|
298 |
|
---|
299 | const uint64_t num_cols = num_blocks+1;//+1 because of not-used drs-time-marker column
|
---|
300 | const uint64_t output_catalog_end = output_catalog_start + num_tiles*num_cols*16;
|
---|
301 |
|
---|
302 | const uint64_t zheapptr = output_heap_start - output_catalog_start;
|
---|
303 | const uint64_t theap = output_catalog_end - output_catalog_start;
|
---|
304 |
|
---|
305 | //Now we have everything that we need. Go again through the input file and write the output, fixed file
|
---|
306 | cout << "Data recovered.\n Writing output file: " << output_name << endl;
|
---|
307 |
|
---|
308 | // ============================================================
|
---|
309 |
|
---|
310 | ofstream output(output_name.c_str(), ios_base::binary);
|
---|
311 | input.seekg(0);
|
---|
312 |
|
---|
313 | //copy everything until the catalog start and update header values on our way
|
---|
314 | counter = 0;
|
---|
315 | for (uint64_t i=0; i<output_catalog_start; i+=80)
|
---|
316 | {
|
---|
317 | input.read(buffer, 80);
|
---|
318 |
|
---|
319 | if (!strcmp(buffer, "XTENSION= 'BINTABLE' / binary table extension "))
|
---|
320 | counter++;
|
---|
321 |
|
---|
322 | if (counter == 2)
|
---|
323 | {
|
---|
324 | bool rc = true;
|
---|
325 | if (!memcmp(buffer, "NAXIS2", 6))
|
---|
326 | rc &= update_header_key(buffer, "NAXIS2", num_tiles, "number of rows in table");
|
---|
327 | if (!memcmp(buffer, "ZNAXIS2", 7))
|
---|
328 | rc &= update_header_key(buffer, "ZNAXIS2", num_tiles, "Number of uncompressed rows");
|
---|
329 | if (!memcmp(buffer, "PCOUNT", 6))
|
---|
330 | rc &= update_header_key(buffer, "PCOUNT", output_heap_end - output_catalog_end, "size of special data area");
|
---|
331 | if (!memcmp(buffer, "THEAP", 5))
|
---|
332 | rc &= update_header_key(buffer, "THEAP", theap, "Pointer to heap");
|
---|
333 | if (!memcmp(buffer, "ZHEAPPTR", 8))
|
---|
334 | rc &= update_header_key(buffer, "ZHEAPPTR", zheapptr, "Pointer to data");
|
---|
335 |
|
---|
336 | if (!rc)
|
---|
337 | return 5;
|
---|
338 | }
|
---|
339 |
|
---|
340 | output.write(buffer, 80);
|
---|
341 |
|
---|
342 | cout << "\rByte " << input.tellg() << " ";
|
---|
343 | }
|
---|
344 |
|
---|
345 | // ------------------------------------------------------------
|
---|
346 |
|
---|
347 | // now write the updated catalog
|
---|
348 | const uint64_t reserved_num_tiles = zheapptr / (num_cols*16);
|
---|
349 | const uint32_t one_catalog_row_size = num_cols*16;
|
---|
350 | const uint32_t total_catalog_size = reserved_num_tiles*one_catalog_row_size;
|
---|
351 |
|
---|
352 | vector<char> swapped_catalog(total_catalog_size);
|
---|
353 |
|
---|
354 | uint32_t shift = 0;
|
---|
355 | for (auto it=catalog.cbegin(); it!=catalog.cend(); it++)
|
---|
356 | {
|
---|
357 | revcpy<sizeof(int64_t)>(swapped_catalog.data() + shift, (char*)(it->data()), num_cols*2);
|
---|
358 | shift += one_catalog_row_size;
|
---|
359 | }
|
---|
360 |
|
---|
361 | if (num_tiles < reserved_num_tiles)
|
---|
362 | memset(swapped_catalog.data()+shift, 0, total_catalog_size - shift);
|
---|
363 |
|
---|
364 | output.write(swapped_catalog.data(), total_catalog_size);
|
---|
365 |
|
---|
366 | // ------------------------------------------------------------
|
---|
367 |
|
---|
368 | // we are now at the beginning of the heap in the output file. Move there in the input file
|
---|
369 | input.seekg(output_heap_start);
|
---|
370 |
|
---|
371 | uint64_t num_heap_bytes_written = 0;
|
---|
372 | uint64_t total_heap_bytes_to_write = output_heap_end - output_heap_start;
|
---|
373 |
|
---|
374 | cout << endl << "Catalog updated. Copying " << total_heap_bytes_to_write << " of data now." << endl;
|
---|
375 |
|
---|
376 | while (num_heap_bytes_written != total_heap_bytes_to_write)
|
---|
377 | {
|
---|
378 | uint64_t bytes_to_write = 2048;
|
---|
379 | if (total_heap_bytes_to_write - num_heap_bytes_written < 2048)
|
---|
380 | bytes_to_write = total_heap_bytes_to_write - num_heap_bytes_written;
|
---|
381 |
|
---|
382 | input.read(buffer, bytes_to_write);
|
---|
383 | output.write(buffer, bytes_to_write);
|
---|
384 | num_heap_bytes_written += bytes_to_write;
|
---|
385 | cout << "\rByte " << input.tellg() << " ";
|
---|
386 | }
|
---|
387 |
|
---|
388 | cout << "\nData written. Adding FITS padding. " << endl;
|
---|
389 |
|
---|
390 | // ------------------------------------------------------------
|
---|
391 |
|
---|
392 | //now add the FITS padding at the end
|
---|
393 | while (output.tellp() % 2880 != 0)
|
---|
394 | {
|
---|
395 | output.write("\0", 1);
|
---|
396 | }
|
---|
397 | cout << "File recovered!\n";
|
---|
398 |
|
---|
399 | input.close();
|
---|
400 | output.close();
|
---|
401 |
|
---|
402 | // ------------------------------------------------------------
|
---|
403 |
|
---|
404 | cout << "\nValid num tiles: " << num_tiles;
|
---|
405 | cout << "\nCatalog start : " << output_catalog_start;
|
---|
406 | cout << "\nCatalog end : " << output_catalog_end;
|
---|
407 | cout << "\nHeap start : " << output_heap_start;
|
---|
408 | cout << "\nHeap end : " << output_heap_end;
|
---|
409 | cout << "\nZHEAPPTR : " << zheapptr;
|
---|
410 | cout << "\nTHEAP : " << theap;
|
---|
411 | cout << "\nPCOUNT : " << output_heap_end - output_catalog_end;
|
---|
412 | cout << "\nNAXIS2 : " << num_tiles;
|
---|
413 | cout << "\nZNAXIS2 : " << num_tiles;
|
---|
414 |
|
---|
415 | cout << "\n\nYou should now:\n";
|
---|
416 | cout << "- update TSTOPI and TSTOPF using \"fitsdump -c UnixTimeUTC --minmax --nozero\" and then fv\n";
|
---|
417 | cout << "- update DATE-END using TSTOP given to the MjDtoISO program in ~fact_arc/ingestScripts and then fv again\n";
|
---|
418 | cout << "- update the checksum of the file with \"fchecksum update+ <filename>\"\n" << endl;
|
---|
419 |
|
---|
420 | return 0;
|
---|
421 |
|
---|
422 | }
|
---|