/*
 * zfits.h
 *
 *  Created on: May 16, 2013
 *      Author: lyard
 */

#ifndef MARS_ZFITS
#define MARS_ZFITS

#include <stdexcept>

#include "fits.h"
#include "huffman.h"

#ifndef __MARS__
namespace std
{
#endif

class zfits : public fits
{
public:

    enum CompressionProcess_t
    {
        kFactRaw       = 0x0,
        kFactSmoothing = 0x1,
        kFactHuffman16 = 0x2
    };

    enum RowOrdering_t
    {
        kOrderByCol = 'C',
        kOrderByRow = 'R'
    };

    // Basic constructor
    zfits(const string& fname, const string& tableName="",
          bool force=false) : fits(fname, tableName, force),
        fNumTiles(0),
        fNumRowsPerTile(0),
        fCurrentRow(-1),
        fHeapOff(0),
        fTileSize(0)
    {
        InitCompressionReading();
    }

    // Alternative contstructor
    zfits(const string& fname, const string& fout, const string& tableName,
          bool force=false) : fits(fname, fout, tableName, force),
              fNumTiles(0),
              fNumRowsPerTile(0),
              fCurrentRow(-1),
              fHeapOff(0),
              fTileSize(0)
    {
        InitCompressionReading();
    }

    //  Skip the next row
    bool SkipNextRow()
    {
        if (!fTable.is_compressed)
            return fits::SkipNextRow();

        fRow++;
        return true;
    }

    virtual bool IsFileOk() const
    {
        bool rawsum = true;

        if (HasKey("RAWSUM"))
        {
                ostringstream str;
                str << fRawsum.val();
                rawsum = (GetStr("RAWSUM") == str.str());
        }

        return fits::IsFileOk() && rawsum;
    };

protected:

    //  Stage the requested row to internal buffer
    //  Does NOT return data to users
    virtual void StageRow(size_t row, char* dest)
    {
        if (!fTable.is_compressed)
        {
            fits::StageRow(row, dest);
            return;
        }

        ReadBinaryRow(row, dest);
    }

private:
#ifndef __CINT__
    //Structure helper for reading tiles headers
    struct TileHeader
    {
      char     id[4];
      uint32_t numRows;
      uint64_t size;

      TileHeader() {}

      TileHeader(uint32_t nRows,
                 uint64_t s) : id({'T', 'I', 'L', 'E'}),
                                 numRows(nRows),
                                 size(s)
      { };
    } __attribute__((__packed__));

    //Structure helper for reading blocks headers and compresion schemes
    struct BlockHeader
    {
        uint64_t      size;
        char          ordering;
        unsigned char numProcs;
        uint16_t      processings[];

        BlockHeader(uint64_t      s=0,
                    char          o=kOrderByRow,
                    unsigned char n=1) : size(s),
                                         ordering(o),
                                         numProcs(n)
        {}
    } __attribute__((__packed__));
#endif
    // Do what it takes to initialize the compressed structured
    void InitCompressionReading()
    {
        if (!fTable.is_compressed)
            return;

        //The constructor may have failed
        if (!good())
            return;

        if (fTable.is_compressed)
        for (auto it=fTable.sorted_cols.begin(); it!= fTable.sorted_cols.end(); it++)
            {
                if (it->comp == kCompFACT)
                    continue;

                clear(rdstate()|ios::badbit);
#ifdef __EXCEPTIONS
                throw runtime_error("Only the FACT compression scheme is handled by this reader.");
#else
                gLog << ___err___ << "ERROR - Only the FACT compression scheme is handled by this reader." << endl;
                return;
#endif
            }

        fColumnOrdering.resize(fTable.sorted_cols.size());
        for (auto it=fColumnOrdering.begin(); it != fColumnOrdering.end(); it++)
            (*it) = kOrderByRow;
        //Get compressed specific keywords
        fNumTiles       = fTable.is_compressed ? GetInt("NAXIS2") : 0;
        fNumRowsPerTile = fTable.is_compressed ? GetInt("ZTILELEN") : 0;

        //give it some space for uncompressing
        AllocateBuffers();

        //read the file's catalog
        ReadCatalog();

        //check that heap agrees with head
        //CheckIfFileIsConsistent();
    }

    // Copy decompressed data to location requested by user
    void MoveColumnDataToUserSpace(char* dest, const char* src, const Table::Column& c)
    {
        if (!fTable.is_compressed)
        {
            fits::MoveColumnDataToUserSpace(dest, src, c);
            return;
        }

        memcpy(dest, src, c.num*c.size);
    }

    vector<char> fBuffer;           ///<store the uncompressed rows
    vector<char> fTransposedBuffer; ///<intermediate buffer to transpose the rows
    vector<char> fCompressedBuffer; ///<compressed rows
    vector<char> fColumnOrdering;   ///< ordering of the column's rows. Can change from tile to tile.

    size_t fNumTiles;       ///< Total number of tiles
    size_t fNumRowsPerTile; ///< Number of rows per compressed tile
    int64_t fCurrentRow;    ///< current row in memory signed because we need -1

    streamoff fHeapOff;     ///< offset from the beginning of the file of the binary data
    streamoff fHeapFromDataStart; ///< offset from the beginning of the data table

    vector<vector<pair<int64_t, int64_t>>> fCatalog;///< Catalog, i.e. the main table that points to the compressed data.
    vector<size_t>                         fTileSize; ///< size in bytes of each compressed tile
    vector<vector<size_t>>                 fTileOffsets; ///< offset from start of tile of a given compressed column

    Checksum fRawsum;   ///< Checksum of the uncompressed, raw data

    // Get buffer space
    void AllocateBuffers()
    {
        fBuffer.resize(fTable.bytes_per_row*fNumRowsPerTile);

        fTransposedBuffer.resize(fTable.bytes_per_row*fNumRowsPerTile);
        fCompressedBuffer.resize(fTable.bytes_per_row*fNumRowsPerTile +
                                 fTable.num_cols*(sizeof(BlockHeader)+256) + //use a bit more memory for block headers. 256 char coding the compression sequence max.
                                 sizeof(TileHeader), //a bit more for the tile headers
                                 8); //and a bit more for checksuming
    }

    // Read catalog data. I.e. the address of the compressed data inside the heap
    void ReadCatalog()
    {
        vector<char> readBuf(16);
        fCatalog.resize(fNumTiles);

        const streampos catalogStart = tellg();

fChkData.reset();

        //do the actual reading
        for (uint32_t i=0;i<fNumTiles;i++)
            for (uint32_t j=0;j<fTable.num_cols;j++)
            {
                read(readBuf.data(), 2*sizeof(int64_t));
                fChkData.add(readBuf);
                //swap the bytes
                int64_t tempValues[2] = {0,0};
                revcpy<8>(reinterpret_cast<char*>(tempValues), readBuf.data(), 2);
                if (tempValues[0] < 0 || tempValues[1] < 0)
                {
                    clear(rdstate()|ios::badbit);
#ifdef __EXCEPTIONS
                    throw runtime_error("Negative value in the catalog");
#else
                    gLog << ___err___ << "ERROR - negative value in the catalog" << endl;
                    return;
#endif
                }
                //add catalog entry
                fCatalog[i].emplace_back(tempValues[0], tempValues[1]);
            }

        //compute the total size of each compressed tile
        fTileSize.resize(fNumTiles);
        fTileOffsets.resize(fNumTiles);
        for (uint32_t i=0;i<fNumTiles;i++)
        {
            fTileSize[i] = 0;
            for (uint32_t j=0;j<fTable.num_cols;j++)
            {
                fTileSize[i] += fCatalog[i][j].first;
                fTileOffsets[i].emplace_back(fCatalog[i][j].second - fCatalog[i][0].second);
            }
        }
        //see if there is a gap before heap data
        fHeapOff = tellg()+fTable.GetHeapShift();
        fHeapFromDataStart = fNumTiles*fTable.num_cols*2*sizeof(int64_t) + fTable.GetHeapShift();

        if (!fCopy.is_open())
            return;

        //write catalog and heap gap to target file
        seekg(catalogStart);

        const size_t catSize = fTable.GetHeapShift() + fTable.total_bytes;

        vector<char> buf(catSize);
        read(buf.data(), catSize);

        fCopy.write(buf.data(), catSize);
        if (!fCopy)
            clear(rdstate()|ios::badbit);
    }

    //overrides fits.h method with empty one
    //work is done in ReadBinaryRow because it requires volatile data from ReadBinaryRow
    virtual void WriteRowToCopyFile(size_t row)
    {
        if (row == fRow+1)
            fRawsum.add(fBufferRow, false);
    }

    // Compressed version of the read row
    bool ReadBinaryRow(const size_t &rowNum, char *bufferToRead)
    {
        if (rowNum >= GetNumRows())
            return false;

        const uint32_t requestedTile = rowNum/fNumRowsPerTile;
        const uint32_t currentTile   = fCurrentRow/fNumRowsPerTile;

        bool addCheckSum = ((requestedTile == currentTile+1) || (fCurrentRow == -1));

        fCurrentRow = rowNum;
        //should we read yet another chunk of data ?
        if (requestedTile != currentTile)
        {
            //read yet another chunk from the file
            const int64_t sizeToRead = fTileSize[requestedTile] + sizeof(TileHeader);

            //skip to the beginning of the tile
            const int64_t tileStart =  fCatalog[requestedTile][0].second - sizeof(TileHeader);

            seekg(fHeapOff+tileStart);

            //calculate the 32 bits offset of the current tile.
            const uint32_t offset = (tileStart + fHeapFromDataStart)%4;

            //point the tile header where it should be
            //we ain't checking the header now
//            TileHeader* tHead = reinterpret_cast<TileHeader*>(fCompressedBuffer.data()+offset);

            ZeroBufferForChecksum(fCompressedBuffer, fCompressedBuffer.size()-(sizeToRead+offset+8));

            //read one tile from disk
            read(fCompressedBuffer.data()+offset, sizeToRead);

            if (addCheckSum)
                fChkData.add(fCompressedBuffer);

            if (requestedTile == currentTile+1 &&
                fCopy.is_open() &&
                fCopy.good())
            {
                fCopy.write(fCompressedBuffer.data()+offset, sizeToRead);
                if (!fCopy)
                    clear(rdstate()|ios::badbit);
            }
            else
                if (fCopy.is_open())
                    clear(rdstate()|ios::badbit);

            const uint32_t thisRoundNumRows = (GetNumRows()<fCurrentRow + fNumRowsPerTile) ? GetNumRows()%fNumRowsPerTile : fNumRowsPerTile;

            //uncompress it
            UncompressBuffer(requestedTile, thisRoundNumRows, offset+sizeof(TileHeader));

            // pointer to column (source buffer)
            const char *src = fTransposedBuffer.data();

            uint32_t i=0;
            for (auto it=fTable.sorted_cols.begin(); it!=fTable.sorted_cols.end(); it++, i++)
            {
                char *buffer = fBuffer.data() + it->offset; // pointer to column (destination buffer)

                switch (fColumnOrdering[i])
                {
                    case kOrderByRow:
                        // regular, "semi-transposed" copy
                        for (char *dest=buffer; dest<buffer+thisRoundNumRows*fTable.bytes_per_row; dest+=fTable.bytes_per_row) // row-by-row
                        {
                            memcpy(dest, src, it->bytes);
                            src += it->bytes;  // next column
                        }
                    break;

                    case kOrderByCol:
                        // transposed copy
                        for (char *elem=buffer; elem<buffer+it->bytes; elem+=it->size) // element-by-element (arrays)
                        {
                            for (char *dest=elem; dest<elem+thisRoundNumRows*fTable.bytes_per_row; dest+=fTable.bytes_per_row) // row-by-row
                            {
                                memcpy(dest, src, it->size);
                                src += it->size; // next element
                            }
                        }
                    break;
                    default:
                        clear(rdstate()|ios::badbit);
    #ifdef __EXCEPTIONS
                        throw runtime_error("Unkown column ordering scheme found");
    #else
                        gLog << ___err___ << "ERROR - unkown column ordering scheme" << endl;
                        return false;
    #endif
                    break;
                };
            }
        }

        //Data loaded and uncompressed. Copy it to destination
        memcpy(bufferToRead, fBuffer.data()+fTable.bytes_per_row*(fCurrentRow%fNumRowsPerTile), fTable.bytes_per_row);
        return good();
    }

    // Read a bunch of uncompressed data
    uint32_t UncompressUNCOMPRESSED(char*       dest,
                                    const char* src,
                                    uint32_t    numElems,
                                    uint32_t    sizeOfElems)
    {
        memcpy(dest, src, numElems*sizeOfElems);
        return numElems*sizeOfElems;
    }

    // Read a bunch of data compressed with the Huffman algorithm
    uint32_t UncompressHUFFMAN16(char*       dest,
                                 const char* src,
                                 uint32_t    numChunks)
    {
        vector<uint16_t> uncompressed;

        //read compressed sizes (one per row)
        const uint32_t* compressedSizes = reinterpret_cast<const uint32_t*>(src);
        src += sizeof(uint32_t)*numChunks;

        //uncompress the rows, one by one
        uint32_t sizeWritten = 0;
        for (uint32_t j=0;j<numChunks;j++)
        {
            Huffman::Decode(reinterpret_cast<const unsigned char*>(src), compressedSizes[j], uncompressed);

            memcpy(dest, uncompressed.data(), uncompressed.size()*sizeof(uint16_t));

            sizeWritten += uncompressed.size()*sizeof(uint16_t);
            dest        += uncompressed.size()*sizeof(uint16_t);
            src         += compressedSizes[j];
        }
        return sizeWritten;
    }

    // Apply the inverse transform of the integer smoothing
    uint32_t UnApplySMOOTHING(int16_t*   data,
                              uint32_t   numElems)
    {
        //un-do the integer smoothing
        for (uint32_t j=2;j<numElems;j++)
            data[j] = data[j] + (data[j-1]+data[j-2])/2;

        return numElems*sizeof(uint16_t);
    }

    // Data has been read from disk. Uncompress it !
    void UncompressBuffer(const uint32_t &catalogCurrentRow,
                          const uint32_t &thisRoundNumRows,
                          const uint32_t offset)
    {
        char *dest = fTransposedBuffer.data();

        //uncompress column by column
        for (uint32_t i=0; i<fTable.sorted_cols.size(); i++)
        {
            const fits::Table::Column &col = fTable.sorted_cols[i];
            if (col.num == 0)
                continue;

            //get the compression flag
            const int64_t compressedOffset = fTileOffsets[catalogCurrentRow][i]+offset;

            const BlockHeader* head = reinterpret_cast<BlockHeader*>(&fCompressedBuffer[compressedOffset]);

            fColumnOrdering[i] = head->ordering;

            const uint32_t numRows = (head->ordering==kOrderByRow) ? thisRoundNumRows : col.num;
            const uint32_t numCols = (head->ordering==kOrderByCol) ? thisRoundNumRows : col.num;

            const char *src = fCompressedBuffer.data()+compressedOffset+sizeof(BlockHeader)+sizeof(uint16_t)*head->numProcs;

            for (int32_t j=head->numProcs-1;j >= 0; j--)
            {
                uint32_t sizeWritten=0;

                switch (head->processings[j])
                {
                case kFactRaw:
                    sizeWritten = UncompressUNCOMPRESSED(dest, src, numRows*numCols, col.size);
                    break;

                case kFactSmoothing:
                    sizeWritten = UnApplySMOOTHING(reinterpret_cast<int16_t*>(dest), numRows*numCols);
                    break;

                case kFactHuffman16:
                    sizeWritten = UncompressHUFFMAN16(dest, src, numRows);
                    break;

                default:
                    clear(rdstate()|ios::badbit);
#ifdef __EXCEPTIONS
                    throw runtime_error("Unknown processing applied to data. Aborting");
#else
                    gLog << ___err___ << "ERROR - Unknown processing applied to data. Aborting" << endl;
                    return;
#endif
                }
                //increment destination counter only when processing done.
                if (j==0)
                    dest+= sizeWritten;
            }
        }
    }

    void CheckIfFileIsConsistent()
    {
        //goto start of heap
        streamoff whereAreWe = tellg();
        seekg(fHeapOff);

        //init number of rows to zero
        uint64_t numRows = 0;

        //get number of columns from header
        size_t numCols = fTable.num_cols;

        vector<vector<pair<int64_t, int64_t> > > catalog;

        TileHeader tileHead;
        BlockHeader columnHead;
        streamoff offsetInHeap = 0;
        //skip through the heap
        while (true)
        {
            read((char*)(&tileHead), sizeof(TileHeader));
            //end of file
            if (!good())
                break;
            //padding or corrupt data
            if (memcmp(tileHead.id, "TILE", 4))
            {
                clear(rdstate()|ios::badbit);
                break;
            }

            //a new tile begins here
            catalog.push_back(vector<pair<int64_t, int64_t> >(0));
            offsetInHeap += sizeof(TileHeader);

            //skip through the columns
            for (size_t i=0;i<numCols;i++)
            {
                //zero sized column do not have headers. Skip it
                if (fTable.sorted_cols[i].num == 0)
                {
                    catalog.back().push_back(make_pair(0,0));
                    continue;
                }
                //read column header
                read((char*)(&columnHead), sizeof(BlockHeader));
                //corrupted tile
                if (!good())
                    break;
                catalog.back().emplace_back((int64_t)(columnHead.size),offsetInHeap);
                offsetInHeap += columnHead.size;
                seekg(fHeapOff+offsetInHeap);
            }

            //if we ain't good, this means that something went wrong inside the current tile.
            if (!good())
            {
                catalog.pop_back();
                break;
            }

            //current tile is complete. Add rows
            numRows += tileHead.numRows;
        }

        if (catalog.size() != fCatalog.size() ||
            numRows        != fTable.num_rows)
        {
                    clear(rdstate()|ios::badbit);
#ifdef __EXCEPTIONS
                    throw runtime_error("Heap data does not agree with header.");
#else
                    gLog << ___err___ << "ERROR - Heap data does not agree with header." << endl;
                    return;
#endif
        }

        for (uint32_t i=0;i<catalog.size(); i++)
            for (uint32_t j=0;j<numCols;j++)
            {
                if (catalog[i][j].first  != fCatalog[i][j].first ||
                    catalog[i][j].second != fCatalog[i][j].second)
                {
                    clear(rdstate()|ios::badbit);
#ifdef __EXCEPTIONS
                    throw runtime_error("Heap data does not agree with header.");
#else
                    gLog << ___err___ << "ERROR - Heap data does not agree with header." << endl;
                    return;
#endif
                }
            }
        //go back to start of heap
        seekg(whereAreWe);
    }

};//class zfits

#ifndef __MARS__
}; //namespace std
#endif

#endif 
