Index: /trunk/FACT++/src/fixfits.cc
===================================================================
--- /trunk/FACT++/src/fixfits.cc	(revision 20035)
+++ /trunk/FACT++/src/fixfits.cc	(revision 20035)
@@ -0,0 +1,422 @@
+//Compile with g++ -o FixFits fixFits.cpp -O2 -std=c++11
+//This is a "FITS file fixer" program for FACT.
+//It goes through the file, relying only on basic header data and compressed blocks headers
+//It will reconstruct the catalog data too in the output file a strip all corrupt data
+//The actual header keywords must be edited with the FTOOLS afterwards
+
+#include <string>
+#include <iostream>
+#include <iomanip>
+#include <fstream>
+#include <cstring>
+#include <vector>
+#include <algorithm>
+#include <sstream>
+
+#include <boost/filesystem.hpp>
+
+#include "FITS.h"
+#include "Time.h"
+#include "Configuration.h"
+
+using namespace std;
+using namespace FITS;
+namespace fs = boost::filesystem;
+
+
+template<size_t N>
+    void revcpy(char *dest, const char *src, int num)
+{
+    const char *pend = src + num*N;
+    for (const char *ptr = src; ptr<pend; ptr+=N, dest+=N)
+        std::reverse_copy(ptr, ptr+N, dest);
+}
+
+bool update_header_key(char buffer[], const string& keyword, uint64_t value, const string& comment)
+{
+    ostringstream str;
+    str << setfill(' ');
+    str << value;
+
+    const auto num_length = str.str().size();
+    str.str("");
+
+    str << setw(8-keyword.size()) << keyword;
+
+    str << "= ";
+
+    str << setw(20-num_length) << value << " / " << comment;
+
+    str << setw(80-str.str().size()) << ' ';
+
+    cout << '\n' << str.str() << '\n';
+
+    if (str.str().size() != 80)
+    {
+        cerr << "Wrong " << keyword << " string size." << endl;
+        return false;
+    }
+
+    memcpy(buffer, str.str().c_str(), 80);
+
+    return true;
+}
+
+void SetupConfiguration(Configuration &conf)
+{
+    po::options_description control("Root to SQL");
+    control.add_options()
+        ("file",  var<string>()->required(),  "Input file name. Fits file to be repaired.")
+        ("out,o", var<string>(),              "Output file name (+'.repaired' if empty)")
+        ;
+
+    po::positional_options_description p;
+    p.add("file", 1); // All positional options
+    p.add("out",  1); // All positional options
+
+    conf.AddOptions(control);
+    conf.SetArgumentPositions(p);
+}
+
+void PrintUsage()
+{
+    cout <<
+        "fixfits - Fix the header of a none closed raw-data file\n"
+        "\n"
+        "Usage: fixfits input-file-name [output-file-name]\n"
+        "\n"
+        ;
+    cout << endl;
+}
+
+
+int main(int argc, const char* argv[])
+{
+    Time start;
+
+    Configuration conf(argv[0]);
+    conf.SetPrintUsage(PrintUsage);
+    SetupConfiguration(conf);
+
+    if (!conf.DoParse(argc, argv))
+        return 127;
+
+    // ----------------------------- Evaluate options --------------------------
+    const string input_name  = conf.Get<string>("file");
+    const string output_name = conf.Has("out") ? conf.Get<string>("out") :
+        (fs::path(input_name)/".repaired").string();
+
+    cout << "Reading: " << input_name << endl;
+
+    ifstream input(input_name.c_str(), ios_base::binary);
+
+    cout << "Looking for beginning of data table." << endl;
+
+    // ------------------------------------------------------------
+
+    //first off, find the beginning of the data table
+    char buffer[2048];
+    buffer[80] = '\0';
+
+    size_t counter = 0;
+    size_t num_bytes = 80;
+    input.read(buffer, 80);
+
+    while (counter != 2)
+    {
+        if (!strcmp(buffer, "XTENSION= 'BINTABLE'           / binary table extension                         "))
+            counter++;
+
+        if (input.eof())
+        {
+            cout << "Reached end-of-file but could not find start of data table." << endl;
+            return 2;
+        }
+
+        input.read(buffer, 80);
+        num_bytes+=80;
+
+        cout << "\rByte " << num_bytes << "      ";
+    }
+    cout << "\nFound beginning of data table." << endl;
+
+    // ------------------------------------------------------------
+
+    //we just read the start of the data table. continue until the end of the table header
+    while (strcmp(buffer, "END                                                                             "))
+    {
+        if (input.eof())
+        {
+            cout << "\nData table header was not closed properly. Aborting." << endl;
+            return 3;
+        }
+
+        input.read(buffer, 80);
+        num_bytes+=80;
+
+        cout << "\rByte " << num_bytes << "       ";
+    }
+
+    cout << "\nReached end of data table header" << endl;
+
+    // ------------------------------------------------------------
+
+    //now skip the fits padding
+    while (num_bytes % 2880 != 0)
+    {
+        input.read(buffer, 80);
+        num_bytes+=80;
+
+        cout << "\rByte " << num_bytes << "       ";
+    }
+
+
+    const size_t output_catalog_start = input.tellg();
+
+    //now we are in the catalog. skip it until we find the string 'TILE'
+    buffer[4] = '\0';
+    while (strcmp(buffer, "TILE"))
+    {
+        input.read(buffer, 4);
+        num_bytes += 4;
+        cout << "\rByte " << num_bytes << "      ";
+    }
+
+    input.seekg(num_bytes-4);
+    cout << "\nFound start of very first compressed tile" << endl;
+
+    // ------------------------------------------------------------
+
+    const size_t output_heap_start = input.tellg();
+
+    size_t byte_where_tile_start = input.tellg();
+
+    //now figure out how many blocks are in each tile
+    TileHeader tile_head;
+    BlockHeader block_head;
+
+    input.read((char*)(&tile_head), sizeof(TileHeader));
+
+    cout << "First tile is " << tile_head.size << " bytes long" << endl;
+
+    // ------------------------------------------------------------
+
+    size_t block_bytes = 0;
+    size_t num_blocks = 0;
+
+    while (input.tellg() < byte_where_tile_start+tile_head.size)
+    {
+        input.read((char*)(&block_head), sizeof(BlockHeader));
+        block_bytes += block_head.size;
+
+        input.seekg(byte_where_tile_start + sizeof(TileHeader) + block_bytes);
+        num_blocks ++;
+
+        if (input.eof())
+        {
+            cout << "First tile seems broken unable to recover anything." << endl;
+            return 4;
+        }
+    }
+
+    if (input.tellg() != byte_where_tile_start+tile_head.size)
+    {
+        cout << "Discrepancy between tile size and sum of blocks size." << endl;
+        return 1;
+    }
+
+    cout << "There are " << num_blocks << " blocks in each tile. Now looping over all tiles" << endl;
+
+    // ------------------------------------------------------------
+
+    size_t num_tiles = 0;
+    input.seekg(output_heap_start);
+
+    size_t output_heap_end = input.tellg();
+    size_t previous_heap_end = input.tellg();
+
+    vector<vector<pair<int64_t, int64_t> > > catalog;
+
+    uint64_t offset_in_heap = 0;
+    while (!input.eof())
+    {
+        previous_heap_end = output_heap_end;
+        output_heap_end = input.tellg();
+
+        byte_where_tile_start = input.tellg();
+        block_bytes = 0;
+
+        if (input.peek() == EOF)
+        {
+            cout << "Reached end-of-file." << endl;
+            break;
+        }
+
+        input.read((char*)(&tile_head), sizeof(TileHeader));
+        if (memcmp(tile_head.id, "TILE", 4) || input.eof())
+        {
+            cout << "Reached end-of-table padding or end-of-file." << endl;
+            break;
+        }
+
+        offset_in_heap += sizeof(TileHeader);
+        catalog.emplace_back();
+
+        for (int i=0;i<num_blocks;i++)
+        {
+            input.read((char*)(&block_head), sizeof(BlockHeader));
+            if (input.eof())
+            {
+                cout << "Reached end-of-file." << endl;
+                break;
+            }
+            block_bytes += block_head.size;
+            input.seekg(byte_where_tile_start + sizeof(TileHeader) + block_bytes);
+            if (input.eof())
+            {
+                cout << "Reached end-of-file." << endl;
+                break;
+            }
+            catalog.back().emplace_back((int64_t)(block_head.size),offset_in_heap);
+            offset_in_heap += block_head.size;
+        }
+
+        //one last catalog entry for the column that has no data
+        catalog.back().emplace_back(0,0);
+
+        cout << "\rByte " << input.tellg() << "      ";
+        num_tiles++;
+    }
+
+    input.clear();
+
+    // ------------------------------------------------------------
+
+    //ignore last read tile as we have no idea if it looked ok or not
+    num_tiles--;
+    output_heap_end = previous_heap_end;
+
+    const uint64_t num_cols = num_blocks+1;//+1 because of not-used drs-time-marker column
+    const uint64_t output_catalog_end = output_catalog_start + num_tiles*num_cols*16;
+
+    const uint64_t zheapptr = output_heap_start - output_catalog_start;
+    const uint64_t theap    = output_catalog_end - output_catalog_start;
+
+    //Now we have everything that we need. Go again through the input file and write the output, fixed file
+    cout << "Data recovered.\n Writing output file: " << output_name << endl;
+
+    // ============================================================
+
+    ofstream output(output_name.c_str(), ios_base::binary);
+    input.seekg(0);
+
+    //copy everything until the catalog start and update header values on our way
+    counter = 0;
+    for (uint64_t i=0; i<output_catalog_start; i+=80)
+    {
+        input.read(buffer, 80);
+
+        if (!strcmp(buffer, "XTENSION= 'BINTABLE'           / binary table extension                         "))
+            counter++;
+
+        if (counter == 2)
+        {
+            bool rc = true;
+            if (!memcmp(buffer, "NAXIS2", 6))
+                rc &= update_header_key(buffer, "NAXIS2", num_tiles, "number of rows in table");
+            if (!memcmp(buffer, "ZNAXIS2", 7))
+                rc &= update_header_key(buffer, "ZNAXIS2", num_tiles, "Number of uncompressed rows");
+            if (!memcmp(buffer, "PCOUNT", 6))
+                rc &= update_header_key(buffer, "PCOUNT", output_heap_end - output_catalog_end, "size of special data area");
+            if (!memcmp(buffer, "THEAP", 5))
+                rc &= update_header_key(buffer, "THEAP", theap, "Pointer to heap");
+            if (!memcmp(buffer, "ZHEAPPTR", 8))
+                rc &= update_header_key(buffer, "ZHEAPPTR", zheapptr, "Pointer to data");
+
+            if (!rc)
+                return 5;
+        }
+
+        output.write(buffer, 80);
+
+        cout << "\rByte " << input.tellg() << "       ";
+    }
+
+    // ------------------------------------------------------------
+
+    // now write the updated catalog
+    const uint64_t reserved_num_tiles   = zheapptr / (num_cols*16);
+    const uint32_t one_catalog_row_size = num_cols*16;
+    const uint32_t total_catalog_size   = reserved_num_tiles*one_catalog_row_size;
+
+    vector<char> swapped_catalog(total_catalog_size);
+
+    uint32_t shift = 0;
+    for (auto it=catalog.cbegin(); it!=catalog.cend(); it++)
+    {
+        revcpy<sizeof(int64_t)>(swapped_catalog.data() + shift, (char*)(it->data()), num_cols*2);
+        shift += one_catalog_row_size;
+    }
+
+    if (num_tiles < reserved_num_tiles)
+        memset(swapped_catalog.data()+shift, 0, total_catalog_size - shift);
+
+    output.write(swapped_catalog.data(), total_catalog_size);
+
+    // ------------------------------------------------------------
+
+    // we are now at the beginning of the heap in the output file. Move there in the input file
+    input.seekg(output_heap_start);
+
+    uint64_t num_heap_bytes_written = 0;
+    uint64_t total_heap_bytes_to_write = output_heap_end - output_heap_start;
+
+    cout << endl << "Catalog updated. Copying " << total_heap_bytes_to_write << " of data now." << endl;
+
+    while (num_heap_bytes_written != total_heap_bytes_to_write)
+    {
+        uint64_t bytes_to_write = 2048;
+        if (total_heap_bytes_to_write - num_heap_bytes_written < 2048)
+            bytes_to_write = total_heap_bytes_to_write - num_heap_bytes_written;
+
+        input.read(buffer, bytes_to_write);
+        output.write(buffer, bytes_to_write);
+        num_heap_bytes_written += bytes_to_write;
+        cout << "\rByte " << input.tellg() << "           ";
+    }
+
+    cout << "\nData written. Adding FITS padding. " << endl;
+
+    // ------------------------------------------------------------
+
+    //now add the FITS padding at the end
+    while (output.tellp() % 2880 != 0)
+    {
+        output.write("\0", 1);
+    }
+    cout << "File recovered!\n";
+
+    input.close();
+    output.close();
+
+    // ------------------------------------------------------------
+
+    cout << "\nValid num tiles: " << num_tiles;
+    cout << "\nCatalog start  : " << output_catalog_start;
+    cout << "\nCatalog end    : " << output_catalog_end;
+    cout << "\nHeap start     : " << output_heap_start;
+    cout << "\nHeap end       : " << output_heap_end;
+    cout << "\nZHEAPPTR       : " << zheapptr;
+    cout << "\nTHEAP          : " << theap;
+    cout << "\nPCOUNT         : " << output_heap_end - output_catalog_end;
+    cout << "\nNAXIS2         : " << num_tiles;
+    cout << "\nZNAXIS2        : " << num_tiles;
+
+    cout << "\n\nYou should now:\n";
+    cout << "- update TSTOPI and TSTOPF using \"fitsdump -c UnixTimeUTC --minmax --nozero\" and then fv\n";
+    cout << "- update DATE-END using TSTOP given to the MjDtoISO program in ~fact_arc/ingestScripts and then fv again\n";
+    cout << "- update the checksum of the file with \"fchecksum update+ <filename>\"\n" << endl;
+
+    return 0;
+
+}
