//Compile with g++ -o FixFits fixFits.cpp -O2 -std=c++11 //This is a "FITS file fixer" program for FACT. //It goes through the file, relying only on basic header data and compressed blocks headers //It will reconstruct the catalog data too in the output file a strip all corrupt data //The actual header keywords must be edited with the FTOOLS afterwards #include #include #include #include #include #include #include #include #include #include "factfits.h" #include "Time.h" #include "Configuration.h" using namespace std; using namespace FITS; namespace fs = boost::filesystem; template void revcpy(char *dest, const char *src, int num) { const char *pend = src + num*N; for (const char *ptr = src; ptr()->required(), "Input file name. Fits file to be repaired.") ("out,o", var(), "Output file name (+'.repaired' if empty), dry-run if not provided") ("no-progress", po_switch(), "Suppress progress output") ("dry-run", po_switch(), "Do not write any output") ; po::positional_options_description p; p.add("file", 1); // All positional options p.add("out", 1); // All positional options conf.AddOptions(control); conf.SetArgumentPositions(p); } void PrintUsage() { cout << "fixfits - Fix the header of a none closed raw-data file\n" "\n" "The goal of this tool is to touch the data as less as possible. " "This, however, might result in a few more bytes being lost than " "strictly necessary.\n" "\n" "Usage: fixfits input-file-name [output-file-name]\n" "\n" ; cout << endl; } int main(int argc, const char* argv[]) { Configuration conf(argv[0]); conf.SetPrintUsage(PrintUsage); SetupConfiguration(conf); if (!conf.DoParse(argc, argv)) return 127; // ----------------------------- Evaluate options -------------------------- const string input_name = conf.Get("file"); const string output_name = conf.Has("out") ? conf.Get("out") : (input_name+".repaired"); const bool no_progress = conf.Get("no-progress"); const bool dry_run = conf.Get("dry-run"); cout << "\nReading: " << input_name << endl; Time start_read; ifstream input(input_name.c_str(), ios_base::binary); if (!input) { cerr << "Error opening file: " << strerror(errno) << endl; return 6; } cout << "Seeking beginning of data table." << endl; // ------------------------------------------------------------ //first off, find the beginning of the data table char buffer[2048]; buffer[80] = '\0'; size_t counter = 0; size_t num_bytes = 80; input.read(buffer, 80); while (counter != 2) { if (!strcmp(buffer, "XTENSION= 'BINTABLE' / binary table extension ")) counter++; if (input.eof()) { cerr << "Reached end-of-file but could not find start of data table." << endl; return 2; } input.read(buffer, 80); num_bytes+=80; if (!no_progress) cout << "\rByte " << num_bytes << " "; } if (no_progress) cout << "Reached " << num_bytes << '\n'; cout << "\nFound beginning of data table." << endl; // ------------------------------------------------------------ //we just read the start of the data table. continue until the end of the table header while (strcmp(buffer, "END ")) { if (input.eof()) { cerr << "Data table header was not closed properly. Aborting." << endl; return 3; } input.read(buffer, 80); num_bytes+=80; if (!no_progress) cout << "\rByte " << num_bytes << " "; } if (no_progress) cout << "Reached " << num_bytes << '\n'; cout << "\nReached end of data table header" << endl; // ------------------------------------------------------------ //now skip the fits padding while (num_bytes % 2880 != 0) { input.read(buffer, 80); num_bytes+=80; if (!no_progress) cout << "\rByte " << num_bytes << " "; } if (no_progress) cout << "Reached " << num_bytes << '\n'; const size_t output_catalog_start = input.tellg(); //now we are in the catalog. skip it until we find the string 'TILE' buffer[4] = '\0'; while (strcmp(buffer, "TILE")) { input.read(buffer, 4); num_bytes += 4; if (!no_progress) cout << "\rByte " << num_bytes << " "; } if (no_progress) cout << "Reached " << num_bytes << '\n'; input.seekg(num_bytes-4); cout << "\nFound start of first compressed tile." << endl; // ------------------------------------------------------------ const size_t output_heap_start = input.tellg(); size_t byte_where_tile_start = input.tellg(); //now figure out how many blocks are in each tile TileHeader tile_head; BlockHeader block_head; input.read(reinterpret_cast(&tile_head), sizeof(TileHeader)); cout << "First tile is " << tile_head.size << " bytes long." << endl; // ------------------------------------------------------------ size_t block_bytes = 0; size_t num_blocks = 0; while (size_t(input.tellg()) < byte_where_tile_start+tile_head.size) { input.read(reinterpret_cast(&block_head), sizeof(BlockHeader)); block_bytes += block_head.size; input.seekg(byte_where_tile_start + sizeof(TileHeader) + block_bytes); num_blocks ++; if (input.eof()) { cerr << "First tile seems broken unable to recover anything." << endl; return 4; } } if (size_t(input.tellg()) != byte_where_tile_start+tile_head.size) { cerr << "Discrepancy between tile size and sum of blocks size." << endl; return 1; } cout << "There are " << num_blocks << " blocks in each tile.\nLooping over all tiles" << endl; // ------------------------------------------------------------ size_t num_tiles = 0; input.seekg(output_heap_start); size_t output_heap_end = input.tellg(); size_t previous_heap_end = input.tellg(); vector > > catalog; uint64_t offset_in_heap = 0; while (!input.eof()) { previous_heap_end = output_heap_end; output_heap_end = input.tellg(); byte_where_tile_start = input.tellg(); block_bytes = 0; if (input.peek() == EOF) { cout << "Reached end-of-file." << endl; break; } input.read(reinterpret_cast(&tile_head), sizeof(TileHeader)); if (memcmp(tile_head.id, "TILE", 4) || input.eof()) { cout << "Reached end-of-table padding or end-of-file." << endl; break; } offset_in_heap += sizeof(TileHeader); catalog.emplace_back(); for (size_t i=0; i header; if (!dry_run) { ofstream output(output_name.c_str(), ios_base::binary); input.seekg(0); //copy everything until the catalog start and update header values on our way counter = 0; for (uint64_t i=0; i swapped_catalog(total_catalog_size); uint32_t shift = 0; for (auto it=catalog.cbegin(); it!=catalog.cend(); it++) { revcpy(swapped_catalog.data() + shift, (char*)(it->data()), num_cols*2); shift += one_catalog_row_size; } if (num_tiles < reserved_num_tiles) memset(swapped_catalog.data()+shift, 0, total_catalog_size - shift); output.write(swapped_catalog.data(), total_catalog_size); // ------------------------------------------------------------ // we are now at the beginning of the heap in the output file. Move there in the input file input.seekg(output_heap_start); uint64_t num_heap_bytes_written = 0; uint64_t total_heap_bytes_to_write = output_heap_end - output_heap_start; cout << "\nCatalog updated.\nCopying " << total_heap_bytes_to_write << " of data now." << endl; while (num_heap_bytes_written != total_heap_bytes_to_write) { uint64_t bytes_to_write = 2048; if (total_heap_bytes_to_write - num_heap_bytes_written < 2048) bytes_to_write = total_heap_bytes_to_write - num_heap_bytes_written; input.read(buffer, bytes_to_write); output.write(buffer, bytes_to_write); num_heap_bytes_written += bytes_to_write; if (!no_progress) cout << "\rByte " << input.tellg() << " "; } if (no_progress) cout << "Reached " << input.tellg() << '\n'; cout << "\nData written. Adding FITS padding. " << endl; // ------------------------------------------------------------ //now add the FITS padding at the end while (output.tellp() % 2880 != 0) output.write("\0", 1); } else cout << " ... skipped ...\n"; const auto time_write = Time().UnixTime()-start_write.UnixTime(); cout << "File recovered!\n"; // ------------------------------------------------------------ cout << "\nValid num tiles : " << setw(12) << num_tiles; cout << "\nNAXIS2 : " << setw(12) << num_tiles; cout << "\nZNAXIS2 : " << setw(12) << num_tiles; cout << "\nCatalog start : " << setw(12) << output_catalog_start; cout << "\nCatalog end : " << setw(12) << output_catalog_end; cout << "\nZHEAPPTR : " << setw(12) << zheapptr; cout << "\nTHEAP : " << setw(12) << theap; cout << "\nHeap start : " << setw(12) << output_heap_start; cout << "\nHeap end : " << setw(12) << output_heap_end; cout << "\nPCOUNT : " << setw(12) << output_heap_end - output_catalog_end; cout << '\n'; cout << "\nInput file size : " << setw(12) << fs::file_size(input_name); if (!dry_run) { cout << "\nOutput file size : " << setw(12) << fs::file_size(output_name); cout << "\nLost bytes : " << setw(12) << fs::file_size(input_name)-fs::file_size(output_name); cout << "\nLost fraction : " << setw(12) << 1-double(fs::file_size(output_name))/fs::file_size(input_name); } cout << "\n"; cout << "\nExecution [read] : " << time_read << "s, " << fs::file_size(input_name)/1024./1024/time_read << "Mb/s"; cout << "\nExecution [write] : " << time_write << "s, " << fs::file_size(output_name)/1024./1024/time_write << "Mb/s"; if (0) return 0; factfits fits(output_name); cout << "\nNew file contains " << fits.GetNumRows() << " events."; cout << "\nChecking for raw data contents." << endl; if (fits.GetStr("TELESCOP")!="FACT" || fits.GetStr("ORIGIN")!="FACT" || fits.GetStr("PACKAGE")!="FACT++" || fits.GetStr("EXTNAME")!="Events" || !fits.HasKey("DRSCALIB")) { cerr << "Header incomplete." << endl; return 0; } if (!fits.HasColumn("UnixTimeUTC")) { cerr << "Column UnixTimeUTC missing." << endl; return 0; } cout << "\nRaw data file detected."; cout << "\nUpdating run-end with last event time." << endl; vector utime(2); fits.SetVecAddress("UnixTimeUTC", utime); fits.GetRow(fits.GetNumRows()-1); const Time stop(utime[0], utime[1]); const uint32_t tstopi = floor(stop.UnixDate()); const double tstopf = fmod(stop.UnixDate(), 1); fits.close(); cout << setfill(' '); cout << "\nTSTOPI = " << right << setw(20) << tstopi << " / Time when last event received (integral part)"; cout << "\nTSTOPF = " << right << setw(20) << tstopf << " / Time when last event received (fractional part)"; cout << "\nDATE-END= '" << stop.Iso() << "' / Time when last event received"; fstream fout(output_name);//, ios::binary); fout << setfill(' '); fout.seekp(header["DATE-END"], ios::beg); //fout.seekg(header["DATE-END"], ios::beg); fout << left << setw(20) << string("'" + stop.Iso() + "'") << " / Time when last event received"; fout.seekp(header["TSTOPI "], ios::beg); //fout.seekg(header["TSTOPI "], ios::beg); fout << right << setw(20) << tstopi; fout.seekp(header["TSTOPF "], ios::beg); //fout.seekg(header["TSTOPF "], ios::beg); fout << right << setw(20) << tstopf; fout.flush(); cout << "\nUpdating checksum." << endl; factfits fits2(output_name); fout.seekp(header["CHECKSUM"], ios::beg); //fout.seekg(header["CHECKSUM"], ios::beg); fout << left << setw(20) << string("'" + fits2.GetChecksum().str() + "'"); fout.close(); fits2.close(); factfits fits3(output_name); if (!fits3.IsHeaderOk() || !fits3.IsFileOk()) { cerr << "\nFAILED: The checksum is still invalid! :(" << endl; return 7; } cout << "\nFile header successfully updated!\n" << endl; return 0; }