Index: /trunk/Mars/mcore/fits.h
===================================================================
--- /trunk/Mars/mcore/fits.h	(revision 17215)
+++ /trunk/Mars/mcore/fits.h	(revision 17216)
@@ -267,5 +267,5 @@
             num_cols      = Get<size_t>("TFIELDS");
             datasum       = is_compressed ? Get<int64_t>("ZDATASUM", -1) : Get<int64_t>("DATASUM", -1);
-
+//cout << "IS COMPRESSED =-========= " << is_compressed << " " << Get<size_t>("NAXIS1") << endl;
             size_t bytes = 0;
 
@@ -456,8 +456,8 @@
         streamoff GetHeapShift() const
         {
-            if (!HasKey("THEAP"))
+            if (!HasKey("ZHEAPPTR"))
                 return 0;
 
-            const size_t shift = Get<size_t>("THEAP");
+            const size_t shift = Get<size_t>("ZHEAPPTR");
             return shift <= total_bytes ? 0 : shift - total_bytes;
         }
Index: /trunk/Mars/mcore/ofits.h
===================================================================
--- /trunk/Mars/mcore/ofits.h	(revision 17215)
+++ /trunk/Mars/mcore/ofits.h	(revision 17216)
@@ -308,4 +308,5 @@
     }
 
+protected:
     struct Table
     {
@@ -363,7 +364,7 @@
         this->open(fname);
     }
-    ~ofits() { close(); }
-
-    void open(const char * filename, bool addEXTNAMEKey=true)
+    ~ofits() { if (is_open()) close(); }
+
+    virtual void open(const char * filename, bool addEXTNAMEKey=true)
     {
         fDataSum  = 0;
@@ -509,6 +510,45 @@
     }
 
+    string CommentFromType(char type)
+    {
+        string comment;
+
+        switch (type)
+        {
+        case 'L': comment = "1-byte BOOL]";  break;
+        case 'A': comment = "1-byte CHAR]";  break;
+        case 'B': comment = "1-byte BOOL]";  break;
+        case 'I': comment = "2-byte INT]";   break;
+        case 'J': comment = "4-byte INT]";   break;
+        case 'K': comment = "8-byte INT]";   break;
+        case 'E': comment = "4-byte FLOAT]"; break;
+        case 'D': comment = "8-byte FLOAT]"; break;
+        case 'Q': comment = "var. Length]"; break;
+        }
+
+        return comment;
+    }
+
+    uint32_t SizeFromType(char type)
+    {
+        size_t size = 0;
+
+        switch (type)
+        {
+        case 'L': size = 1; break;
+        case 'A': size = 1; break;
+        case 'B': size = 1; break;
+        case 'I': size = 2; break;
+        case 'J': size = 4; break;
+        case 'K': size = 8; break;
+        case 'E': size = 4; break;
+        case 'D': size = 8; break;
+        case 'Q': size = 16; break;
+        }
+
+        return size;
+    }
     //ETIENNE to be able to restore the file 1 to 1, I must restore the header keys myself
-    bool AddColumn(uint32_t cnt, char typechar, const string &name, const string &unit, const string &comment="", bool addHeaderKeys=true)
+    virtual bool AddColumn(uint32_t cnt, char typechar, const string &name, const string &unit, const string &comment="", bool addHeaderKeys=true)
     {
         if (tellp()<0)
@@ -550,5 +590,5 @@
         typechar = toupper(typechar);
 
-        static const string allow("LABIJKED");
+        static const string allow("LABIJKEDQ");
         if (std::find(allow.begin(), allow.end(), typechar)==allow.end())
         {
@@ -564,5 +604,9 @@
 
         ostringstream type;
-        type << cnt << typechar;
+        type << cnt;
+        if (typechar=='Q')
+            type << "QB";
+        else
+            type << typechar;
 
         fTable.num_cols++;
@@ -576,5 +620,6 @@
         typecom << "format of " << name << " [";
 
-        switch (typechar)
+        typecom << CommentFromType(typechar);
+/*        switch (typechar)
         {
         case 'L': typecom << "1-byte BOOL]";  break;
@@ -586,6 +631,7 @@
         case 'E': typecom << "4-byte FLOAT]"; break;
         case 'D': typecom << "8-byte FLOAT]"; break;
-        }
-
+        case 'Q': typecom << "var. Length]"; break;
+        }
+*/
         if (addHeaderKeys)
         {
@@ -595,7 +641,7 @@
                 SetStr(unitkey.str(), unit, unitcom.str());
         }
-        size_t size = 0;
-
-        switch (typechar)
+        size_t size = SizeFromType(typechar);
+
+/*        switch (typechar)
         {
         case 'L': size = 1; break;
@@ -607,6 +653,7 @@
         case 'E': size = 4; break;
         case 'D': size = 8; break;
-        }
-
+        case 'Q': size = 16; break;
+        }
+*/
         Table::Column col;
 
@@ -678,8 +725,10 @@
     {
         Checksum sum;
+        uint32_t count=0;
         for (auto it=fKeys.begin(); it!=fKeys.end(); it++)
         {
             it->Out(fout);
             sum += it->checksum;
+            count++;
         }
         fout.flush();
@@ -725,5 +774,15 @@
     }
 
-    bool WriteTableHeader(const char *name="DATA")
+    virtual bool WriteDrsOffsetsTable ()
+    {
+        return true;
+    }
+
+    virtual bool WriteCatalog()
+    {
+        return true;
+    }
+
+    virtual bool WriteTableHeader(const char *name="DATA")
     {
         if (tellp()>0)
@@ -738,4 +797,6 @@
 
         fHeaderSum = WriteFitsHeader();
+
+        WriteDrsOffsetsTable();
 
         if (!fManualExtName)
@@ -748,4 +809,6 @@
         WriteHeader();
 
+        WriteCatalog();
+
         return good();
     }
@@ -759,7 +822,7 @@
     }
 
-    uint32_t GetBytesPerRow() const { return fTable.bytes_per_row; }
-
-    bool WriteRow(const void *ptr, size_t cnt, bool byte_swap=true)
+    virtual uint32_t GetBytesPerRow() const { return fTable.bytes_per_row; }
+
+    virtual bool WriteRow(const void *ptr, size_t cnt, bool byte_swap=true)
     {
         // FIXME: Make sure that header was already written
@@ -830,5 +893,5 @@
 
     // Flushes the number of rows to the header on disk
-    void FlushNumRows()
+    virtual void FlushNumRows()
     {
         SetInt("NAXIS2", fTable.num_rows);
@@ -838,17 +901,15 @@
     size_t GetNumRows() const { return fTable.num_rows; }
 
-    bool close()
-    {
-        if (tellp()<0)
-            return false;
-
+    void AlignTo2880Bytes()
+    {
         if (tellp()%(80*36)>0)
         {
-            const vector<char> filler(80*36-tellp()%(80*36));
+            vector<char> filler(80*36-tellp()%(80*36));
             write(filler.data(), filler.size());
         }
-
-        // We don't have to jump back to the end of the file
-        SetInt("NAXIS2",  fTable.num_rows);
+    }
+
+    Checksum UpdateHeaderChecksum()
+    {
 
         ostringstream dataSumStr;
@@ -862,5 +923,18 @@
         SetStr("CHECKSUM", (sum+fDataSum).str());
 
-        const Checksum chk = WriteHeader();
+        return WriteHeader();
+    }
+    virtual bool close()
+    {
+        if (tellp()<0)
+            return false;
+
+        AlignTo2880Bytes();
+
+        // We don't have to jump back to the end of the file
+        SetInt("NAXIS2",  fTable.num_rows);
+
+
+        const Checksum chk = UpdateHeaderChecksum();
 
         ofstream::close();
Index: /trunk/Mars/mcore/zofits.h
===================================================================
--- /trunk/Mars/mcore/zofits.h	(revision 17216)
+++ /trunk/Mars/mcore/zofits.h	(revision 17216)
@@ -0,0 +1,960 @@
+/*
+ * zofits.h
+ *
+ *  FACT native compressed FITS writer
+ *      Author: lyard
+ */
+
+#include "ofits.h"
+
+#ifndef __MARS__
+namespace std
+{
+#else
+using namespace std;
+#endif
+
+class zofits : public ofits
+{
+    public:
+
+        //This has been duplicated from zfits. Should be be located one level up ?
+        //If so, where ?
+        enum CompressionProcess_t
+        {
+            kFactRaw       = 0x0,
+            kFactSmoothing = 0x1,
+            kFactHuffman16 = 0x2
+        };
+
+        enum RowOrdering_t
+        {
+            kOrderByCol = 'C',
+            kOrderByRow = 'R'
+        };
+
+        //TileHeaders are only written, but never read-back
+        //They are here to be able to recover raw data from binary if the header is corrupted
+        //Or to cross-check the data, if desired: the zfits method CheckIfFileIsConsistent can do this
+        struct TileHeader
+        {
+          char     id[4];
+          uint32_t numRows;
+          uint64_t size;
+          TileHeader(uint32_t nRows=0,
+                     uint64_t s=0) : id({'T', 'I', 'L', 'E'}),
+                                     numRows(nRows),
+                                     size(s)
+          { };
+        } __attribute__((__packed__));
+
+        //BlockHeaders are written before every compressed blob of data
+        struct BlockHeader
+        {
+            uint64_t      size;
+            char          ordering;
+            unsigned char numProcs;
+            BlockHeader(uint64_t      s=0,
+                        char          o=zfits::kOrderByRow,
+                        unsigned char n=1) : size(s),
+                                             ordering(o),
+                                             numProcs(n)
+            {}
+        } __attribute__((__packed__)) ;
+
+
+        //constructors
+        zofits(uint32_t numTiles=1000,
+               uint32_t rowPerTile=100) : ofits()
+        {
+            InitMemberVariables(numTiles, rowPerTile);
+            SetNumWorkingThreads(1);
+        }
+
+        zofits(const char* fname,
+               uint32_t numTiles=1000,
+               uint32_t rowPerTile=100) : ofits(fname)
+        {
+            InitMemberVariables(numTiles, rowPerTile);
+            SetNumWorkingThreads(1);
+        }
+
+        ~zofits()
+        {
+        }
+
+        //initialization of member variables
+        void InitMemberVariables(uint32_t nt=0, uint32_t rpt=0)
+        {
+            fCheckOffset  = 0;
+            fThreadLooper = 0;
+
+            fNumTiles       = nt;
+            fNumRowsPerTile = rpt;
+
+            fNumThreads  = 1;
+            fThreadIndex = 0;   ///< A variable to assign threads indices
+
+            fBuffer       = NULL;
+            fRealRowWidth = 0;
+
+            fCatalogOffset    =  0;
+            fStartCellsOffset = -1;
+            fDataOffset       = -1;
+        }
+
+        //whether or not a calibration was given to the file writer
+        bool IsOffsetCalibrated()
+        {
+            return (fOffsetCalibration.size() != 0);
+        }
+
+        //assign a given drs offset calibration
+        void SetDrsCalibration(const float* calib)
+        {
+            if (!IsOffsetCalibrated())
+                fOffsetCalibration.resize(1440*1024);
+
+            for (uint32_t i=0;i<1440*1024;i++)
+                fOffsetCalibration[i] = (int16_t)(calib[i]*4096.f/2000.f);
+        }
+
+        void SetDrsCalibration(const vector<float>& calib)
+        {
+            if (calib.size() != 1440*1024)
+#ifdef __EXCEPTIONS
+            throw runtime_error("Cannot load calibration with anything else than 1024 samples per pixel");
+#else
+            gLog << ___err___ << "ERROR - Cannot load calibration with anything else than 1024 samples per pixel");
+#endif
+            SetDrsCalibration(calib.data());
+        }
+
+        void LoadDrsCalibrationFromFile(const string& fileName)
+        {
+            factfits drsFile(fileName);
+            float* drsCalibFloat  = reinterpret_cast<float*>(drsFile.SetPtrAddress("BaselineMean"));
+
+            drsFile.GetNextRow();
+
+            SetDrsCalibration(drsCalibFloat);
+        }
+
+        //write the header of the binary table
+        bool WriteTableHeader(const char* name="DATA")
+        {
+            ofits::WriteTableHeader(name);
+
+            if (IsOffsetCalibrated())
+            {//retrieve the column storing the start cell offsets, if required.
+
+                for (auto it=fRealColumns.begin(); it!=fRealColumns.end(); it++)//Table.cols.begin(); it!= fTable.cols.end(); it++)
+                {
+                    if (it->col.name == "StartCellData")
+                        fStartCellsOffset = it->col.offset;
+                    if (it->col.name == "Data")
+                    {
+                        fNumSlices = it->col.num;
+                        fDataOffset = it->col.offset;
+                        if (fNumSlices % 1440 != 0)
+                        {
+#ifdef __EXCEPTIONS
+                            throw runtime_error("Number of data samples not a multiple of 1440.");
+#else
+                            gLog << ___err___ << "ERROR - Number of data samples not a multiple of 1440. Doing it uncalibrated." << endl;
+#endif
+                            fOffsetCalibration.resize(0);
+                        }
+                        fNumSlices /= 1440;
+                    }
+                }
+                if (fStartCellsOffset < 0)
+                {
+#ifdef __EXCEPTIONS
+                    throw runtime_error("FACT Calibration requested, but \"StartCellData\" column not found.");
+#else
+                    gLog << ___err___ << "ERROR - FACT Calibration requested, but \"StartCellData\" column not found. Doing it uncalibrated." << endl;
+#endif
+                    //throw away the calibration data
+                    fOffsetCalibration.resize(0);
+                }
+                if (fDataOffset < 0)
+                {
+#ifdef __EXCEPTIONS
+                    throw runtime_error("FACT Calibration requested, but \"Data\" column not found.");
+#else
+                    gLog << ___err___ << "ERROR - FACT Calibration requested, but \"Data\" column not found. Doing it uncalibrated." << endl;
+#endif
+                    //throw away the calibration data
+                    fOffsetCalibration.resize(0);
+                }
+            }
+        }
+
+        void open(const char* filename, bool addEXTNAMEKey=true)
+        {
+            ofits::open(filename, addEXTNAMEKey);
+
+            //add compression-related header entries
+            SetBool("ZTABLE", true, "Table is compressed");
+            SetInt("ZNAXIS1", 0, "Width of uncompressed rows");
+            SetInt("ZNAXIS2", 0, "Number of uncompressed rows");
+            SetInt("ZPCOUNT", 0, "");
+            SetInt("ZHEAPPTR", 0, "");
+            SetInt("ZTILELEN", fNumRowsPerTile, "Number of rows per tile");
+            SetInt("THEAP", 0, "");
+            SetStr("RAWSUM", "         0", "Checksum of raw littlen endian data");
+
+
+            fRawSum.reset();
+
+            //start the compression threads
+            pthread_mutex_init(&fMutex, NULL);
+
+            fThreadIndex = 0;
+            for (uint32_t i=0;i<fNumThreads;i++)
+                pthread_create(&(fThread[i]), NULL, threadFunction, this);
+
+            //wait for all threads to have started
+            while (fNumThreads != fThreadIndex)
+                usleep(1000);
+
+            //set the writing fence to the last thread (so that the first one can start writing right away)
+            fThreadIndex = fNumThreads-1;
+        }
+
+        bool WriteDrsOffsetsTable()
+        {
+            if (!IsOffsetCalibrated())
+                return false;
+
+            ofits c;
+            c.SetStr("XTENSION", "BINTABLE"            , "binary table extension");
+            c.SetInt("BITPIX"  , 8                     , "8-bit bytes");
+            c.SetInt("NAXIS"   , 2                     , "2-dimensional binary table");
+            c.SetInt("NAXIS1"  , 1024*1440*2           , "width of table in bytes");
+            c.SetInt("NAXIS2"  , 1                     , "number of rows in table");
+            c.SetInt("PCOUNT"  , 0                     , "size of special data area");
+            c.SetInt("GCOUNT"  , 1                     , "one data group (required keyword)");
+            c.SetInt("TFIELDS" , 1                     , "number of fields in each row");
+            c.SetStr("CHECKSUM", "0000000000000000"    , "Checksum for the whole HDU");
+            c.SetStr("DATASUM" ,  "         0"         , "Checksum for the data block");
+            c.SetStr("EXTNAME" , "ZDrsCellOffsets"     , "name of this binary table extension");
+            c.SetStr("TTYPE1"  , "OffsetCalibration"   , "label for field   1");
+            c.SetStr("TFORM1"  , "1474560I"            , "data format of field: 2-byte INTEGER");
+            c.End();
+
+            vector<char> swappedOffsets;
+            swappedOffsets.resize(1024*1440*sizeof(int16_t));
+            revcpy<sizeof(int16_t)>(swappedOffsets.data(), (char*)(fOffsetCalibration.data()), 1024*1440);
+
+            Checksum datasum;
+            datasum.add(swappedOffsets.data(), sizeof(int16_t)*1024*1440);
+
+            ostringstream dataSumStr;
+            dataSumStr << datasum.val();
+            c.SetStr("DATASUM", dataSumStr.str());
+
+            datasum += c.WriteHeader(*this);
+
+            const off_t here_I_am = tellp();
+
+            c.SetStr("CHECKSUM", datasum.str());
+            c.WriteHeader(*this);
+
+            seekp(here_I_am);
+
+            write(swappedOffsets.data(), swappedOffsets.size());
+
+            AlignTo2880Bytes();
+
+            return good();
+        }
+
+        uint32_t GetBytesPerRow() const
+        {
+            return fRealRowWidth;
+        }
+
+        bool WriteCatalog()
+        {
+            const uint32_t one_catalog_row_size = fTable.num_cols*2*sizeof(uint64_t);
+            const uint32_t total_catalog_size = fCatalog.size()*one_catalog_row_size;
+
+            vector<char> swapped_catalog(total_catalog_size);
+            uint32_t shift = 0;
+            for (auto it=fCatalog.begin(); it!=fCatalog.end(); it++)
+            {
+                revcpy<sizeof(uint64_t)>(swapped_catalog.data() + shift, (char*)(it->data()), fTable.num_cols*2);
+                shift += one_catalog_row_size;
+            }
+
+            if (fCatalogOffset == 0)
+            {
+                fCatalogOffset = tellp();
+            }
+
+            const off_t where_are_we = tellp();
+
+            seekp(fCatalogOffset);
+            write(swapped_catalog.data(), total_catalog_size);
+            if (where_are_we != fCatalogOffset)
+                seekp(where_are_we);
+
+            fCatalogSum.reset();
+            fCatalogSum.add(swapped_catalog.data(), total_catalog_size);
+
+            return good();
+        }
+
+        bool WriteRow(const void* ptr, size_t cnt, bool byte_swap=true)
+        {
+            if (cnt != fRealRowWidth)
+            {
+#ifdef __EXCEPTIONS
+                throw runtime_error("Wrong size of row given to WriteRow");
+#else
+                gLog << ___err___ << "ERROR - Wrong size of row given to WriteRow" << endl;
+                return false;
+#endif
+            }
+
+            if (fTable.num_rows >= fNumRowsPerTile*fNumTiles)
+            {
+#ifdef __EXCEPTIONS
+                throw runtime_error("Maximum number of rows exceeded for this file");
+#else
+                gLog << ___err___ << "ERROR - Maximum number of rows exceeded for this file" << endl;
+                return false;
+#endif
+            }
+
+            //copy current row to pool or rows waiting for compression
+            char* target_location = fBuffer + fRealRowWidth*(fTable.num_rows%fNumRowsPerTile);
+            memcpy(target_location, ptr, fRealRowWidth);
+
+            //for now, make an extra copy of the data, for RAWSUM checksuming.
+            //Ideally this should be moved to the threads, along with the drs-offset-calibration
+            //However, because the RAWSUM must be calculated before the tile is transposed, I am not sure whether
+            //one extra memcpy per row written is worse than 100 rows checksumed when the tile is full....
+            const uint32_t rawOffset = (fTable.num_rows*fRealRowWidth)%4;
+            char* buffer = fRawSumBuffer.data() + rawOffset;
+            auto ib = fRawSumBuffer.begin();
+            auto ie = fRawSumBuffer.rbegin();
+            *ib++ = 0;
+            *ib++ = 0;
+            *ib++ = 0;
+            *ib   = 0;
+
+            *ie++ = 0;
+            *ie++ = 0;
+            *ie++ = 0;
+            *ie   = 0;
+
+            memcpy(buffer, ptr, fRealRowWidth);
+
+            fRawSum.add(fRawSumBuffer, false);
+
+            if (IsOffsetCalibrated())
+            {
+
+                int16_t* startCell = reinterpret_cast<int16_t*>(target_location + fStartCellsOffset);
+                int16_t* data      = reinterpret_cast<int16_t*>(target_location + fDataOffset);
+
+                for (uint32_t ch=0; ch<1440; ch++)
+                {
+                    if (startCell[ch] < 0)
+                    {
+                        data += fNumSlices;
+                        continue;
+                    }
+
+                    const int16_t modStart = startCell[ch]%1024;
+                    const int16_t *off     = fOffsetCalibration.data() + ch*1024;
+
+                    const int16_t* cal        = off+modStart;
+                    const int16_t* end_stride = data+fNumSlices;
+
+                    if (modStart+fNumSlices > 1024)
+                    {
+                        while (cal < off+1024)
+                            *data++ -= *cal++;
+                        cal = off;
+                    }
+
+                    while (data<end_stride)
+                        *data++ -= *cal++;
+                }
+            }
+
+            fTable.num_rows++;
+
+            if (fTable.num_rows % fNumRowsPerTile == 0)
+            {//give a new tile to compress to a thread
+                while (fThreadStatus[fThreadLooper] == _THREAD_COMPRESS_)
+                    usleep(100000);
+
+                copyTransposeTile(fThreadLooper);
+
+                while (fThreadStatus[fThreadLooper] != _THREAD_WAIT_)
+                    usleep(100000);
+
+                fThreadNumRows[fThreadLooper] = fTable.num_rows;
+                fThreadStatus[fThreadLooper] = _THREAD_COMPRESS_;
+                fThreadLooper = (fThreadLooper+1)%fNumThreads;
+            }
+
+            return true;
+        }
+
+        void FlushNumRows()
+        {
+            SetInt("NAXIS2", fTable.num_rows/fNumRowsPerTile);
+            SetInt("ZNAXIS2", fTable.num_rows);
+            FlushHeader();
+        }
+
+        bool close()
+        {
+            if (tellp() < 0)
+                return false;
+
+            //wait for compression threads to finish
+            for (auto it=fThreadStatus.begin(); it!= fThreadStatus.end(); it++)
+                while (*it != _THREAD_WAIT_)
+                    usleep(100000);
+
+            for (auto it=fThreadStatus.begin(); it!= fThreadStatus.end(); it++)
+                *it = _THREAD_EXIT_;
+
+            for (auto it=fThread.begin(); it!= fThread.end(); it++)
+                pthread_join(*it, NULL);
+
+            pthread_mutex_destroy(&fMutex);
+
+            if (fTable.num_rows%fNumRowsPerTile != 0)
+            {
+                copyTransposeTile(0);
+                fThreadNumRows[0] = fTable.num_rows;
+                uint32_t numBytes = compressBuffer(0);
+                writeCompressedDataToDisk(0, numBytes);
+            }
+
+            AlignTo2880Bytes();
+
+            //update header keywords
+            SetInt("ZNAXIS1", fRealRowWidth);
+            SetInt("ZNAXIS2", fTable.num_rows);
+
+            uint64_t heap_offset = fCatalog.size()*fTable.num_cols*sizeof(uint64_t)*2;
+            SetInt("ZHEAPPTR", heap_offset);
+//            SetInt("THEAP"   , heap_offset);0
+
+            const uint32_t total_num_tiles_written = (fTable.num_rows + fNumRowsPerTile-1)/fNumRowsPerTile;
+
+            SetInt("THEAP", total_num_tiles_written*2*sizeof(int64_t)*fTable.num_cols);
+
+            SetInt("NAXIS1", 2*sizeof(int64_t)*fTable.num_cols);
+            SetInt("NAXIS2", total_num_tiles_written);
+
+            ostringstream str;
+            str << fRawSum.val();
+            SetStr("RAWSUM", str.str());
+
+            int64_t heap_size = 0;
+            int64_t compressed_offset = 0;
+
+            for (uint32_t i=0; i<total_num_tiles_written; i++)
+            {
+                compressed_offset += sizeof(TileHeader);
+                heap_size         += sizeof(TileHeader);
+                for (uint32_t j=0; j<fCatalog[i].size(); j++)
+                {
+                    heap_size += fCatalog[i][j].first;
+                    fCatalog[i][j].second = compressed_offset;
+                    compressed_offset += fCatalog[i][j].first;
+                    if (fCatalog[i][j].first == 0)
+                        fCatalog[i][j].second = 0;
+                }
+            }
+
+            //add to the heap size the size of the gap between the catalog and the actual heap
+            heap_size += (fCatalog.size() - total_num_tiles_written)*fTable.num_cols*sizeof(uint64_t)*2;
+
+            SetInt("PCOUNT", heap_size, "size of special data area");
+
+            //Just for updating the fCatalogSum value
+            WriteCatalog();
+
+            fDataSum += fCatalogSum;
+
+            const Checksum checksm = UpdateHeaderChecksum();
+
+            ofstream::close();
+
+            if ((checksm+fDataSum).valid())
+                return true;
+
+            ostringstream sout;
+            sout << "Checksum (" << std::hex << checksm.val() << ") invalid.";
+#ifdef __EXCEPTIONS
+            throw runtime_error(sout.str());
+#else
+            gLog << ___err___ << "ERROR - " << sout.str() << endl;
+            return false;
+#endif
+        }
+
+        //Overload of the ofits method. Just calls the zofits specific one with default, uncompressed options for this column
+        bool AddColumn(uint32_t cnt, char typechar, const string& name, const string& unit, const string& comment="", bool addHeaderKeys=true)
+        {
+            BlockHeader head;
+            vector<uint16_t> processing(1);
+            processing[0] = kFactRaw;
+            AddColumn(cnt, typechar, name, unit, head, processing, comment, addHeaderKeys);
+        }
+
+        bool AddColumn(uint32_t cnt, char typechar, const string& name, const string& unit, BlockHeader& header, vector<uint16_t>& comp_sequence, const string& comment="", bool addHeaderKeys=true)
+        {
+            if (!ofits::AddColumn(1, 'Q', name, unit, comment, addHeaderKeys))
+                return false;
+
+            Table::Column col;
+            size_t size = SizeFromType(typechar);
+
+            col.name   = name;
+            col.type   = typechar;
+            col.num    = cnt;
+            col.size   = size;
+            col.offset = fRealRowWidth;
+
+            fRealRowWidth += size*cnt;
+
+            fRealColumns.emplace_back(CompressedColumn(col, header, comp_sequence));
+
+            ostringstream strKey, strVal, strCom;
+            strKey << "ZFORM" << fRealColumns.size();
+            strVal << cnt << typechar;
+            strCom << "format of " << name << " [" << CommentFromType(typechar);
+            SetStr(strKey.str(), strVal.str(), strCom.str());
+
+            strKey.str("");
+            strVal.str("");
+            strCom.str("");
+            strKey << "ZCTYP" << fRealColumns.size();
+            strVal << "FACT";
+            strCom << "Comp. of FACT telescope";
+            SetStr(strKey.str(), strVal.str(), strCom.str());
+
+            return reallocateBuffers();
+        }
+
+        bool SetNumWorkingThreads(uint32_t num)
+        {
+            if (is_open())
+            {
+#ifdef __EXCEPTIONS
+                throw runtime_error("File must be closed before changing the number of compression threads");
+#else
+                gLog << ___err___ << "ERROR - File must be closed before changing the number of compression threads");
+#endif
+                return false;
+            }
+            if (num < 1 || num > 64)
+            {
+#ifdef __EXCEPTIONS
+                throw runtime_error("Number of threads must be between 1 and 64");
+#else
+                gLog << ___err___ << "ERROR - Number of threads must be between 1 and 64");
+#endif
+                return false;
+            }
+
+            fNumThreads = num;
+            fThreadStatus.resize(num);
+            fThread.resize(num);
+            fThreadNumRows.resize(num);
+            for (uint32_t i=0;i<num;i++)
+            {
+                fThreadNumRows[i] = 0;
+                fThreadStatus[i] = _THREAD_WAIT_;
+            }
+
+            return reallocateBuffers();
+        }
+
+    private:
+
+
+        bool reallocateBuffers()
+        {
+            fBufferVector.resize(fRealRowWidth*fNumRowsPerTile + 8); //8 more for checksuming
+            memset(fBufferVector.data(), 0, 4);
+            fBuffer = fBufferVector.data()+4;
+
+            fRawSumBuffer.resize(fRealRowWidth + 4-fRealRowWidth%4); //for checksuming
+
+
+            fTransposedBufferVector.resize(fNumThreads);
+            fCompressedBufferVector.resize(fNumThreads);
+            fTransposedBuffer.resize(fNumThreads);
+            fCompressedBuffer.resize(fNumThreads);
+            for (uint32_t i=0;i<fNumThreads;i++)
+            {
+                fTransposedBufferVector[i].resize(fRealRowWidth*fNumRowsPerTile);
+                fCompressedBufferVector[i].resize(fRealRowWidth*fNumRowsPerTile + fRealColumns.size() + sizeof(TileHeader) + 8);
+                memset(fCompressedBufferVector[i].data(), 0, 4);
+                TileHeader tileHeader;
+                memcpy(fCompressedBufferVector[i].data()+4, &tileHeader, sizeof(TileHeader));
+                fTransposedBuffer[i] = fTransposedBufferVector[i].data();
+                fCompressedBuffer[i] = fCompressedBufferVector[i].data()+4;
+            }
+
+            //give the catalog enough space
+            fCatalog.resize(fNumTiles);
+            for (uint32_t i=0;i<fNumTiles;i++)
+            {
+                fCatalog[i].resize(fRealColumns.size());
+                for (auto it=fCatalog[i].begin(); it!=fCatalog[i].end(); it++)
+                    *it = CatalogEntry(0,0);
+            }
+            return true;
+        }
+
+        bool writeCompressedDataToDisk(uint32_t threadID, uint32_t sizeToWrite)
+        {
+            char* checkSumPointer = fCompressedBuffer[threadID];
+            int32_t extraBytes = 0;
+            uint32_t sizeToChecksum = sizeToWrite;
+            if (fCheckOffset != 0)
+            {//should we extend the array to the left ?
+                sizeToChecksum += fCheckOffset;
+                checkSumPointer -= fCheckOffset;
+                memset(checkSumPointer, 0, fCheckOffset);
+            }
+            if (sizeToChecksum%4 != 0)
+            {//should we extend the array to the right ?
+                extraBytes = 4 - (sizeToChecksum%4);
+                memset(checkSumPointer+sizeToChecksum, 0,extraBytes);
+                sizeToChecksum += extraBytes;
+            }
+
+            //do the checksum
+            fDataSum.add(checkSumPointer, sizeToChecksum);
+
+            fCheckOffset = (4 - extraBytes)%4;
+            //write data to disk
+            write(fCompressedBuffer[threadID], sizeToWrite);
+            return good();
+        }
+
+        static void* threadFunction(void* context)
+        {
+            zofits* myself = static_cast<zofits*>(context);
+
+            uint32_t myID = 0;
+            pthread_mutex_lock(&(myself->fMutex));
+            myID = myself->fThreadIndex++;
+            pthread_mutex_unlock(&(myself->fMutex));
+            uint32_t threadToWaitForBeforeWriting = (myID == 0) ? myself->fNumThreads-1 : myID-1;
+
+            while (myself->fThreadStatus[myID] != _THREAD_EXIT_)
+            {
+                while (myself->fThreadStatus[myID] == _THREAD_WAIT_)
+                    usleep(100000);
+
+                if (myself->fThreadStatus[myID] != _THREAD_COMPRESS_)
+                    continue;
+                uint32_t numBytes = myself->compressBuffer(myID);
+                myself->fThreadStatus[myID] = _THREAD_WRITE_;
+
+                //wait for the previous data to be written
+                while (myself->fThreadIndex != threadToWaitForBeforeWriting)
+                    usleep(1000);
+                //do the actual writing to disk
+                pthread_mutex_lock(&(myself->fMutex));
+                myself->writeCompressedDataToDisk(myID, numBytes);
+                myself->fThreadIndex = myID;
+                pthread_mutex_unlock(&(myself->fMutex));
+                myself->fThreadStatus[myID] = _THREAD_WAIT_;
+            }
+            return NULL;
+        }
+
+        uint64_t compressBuffer(uint32_t threadIndex)
+        {
+            uint32_t thisRoundNumRows = (fThreadNumRows[threadIndex]%fNumRowsPerTile) ? fThreadNumRows[threadIndex]%fNumRowsPerTile : fNumRowsPerTile;
+            uint32_t offset=0;
+            uint32_t currentCatalogRow = (fThreadNumRows[threadIndex]-1)/fNumRowsPerTile;
+            uint64_t compressedOffset = sizeof(TileHeader); //skip the 'TILE' marker and tile size entry
+
+            //now compress each column one by one by calling compression on arrays
+            for (uint32_t i=0;i<fRealColumns.size();i++)
+            {
+                fCatalog[currentCatalogRow][i].second = compressedOffset;
+
+                if (fRealColumns[i].col.num == 0) continue;
+
+                BlockHeader& head = fRealColumns[i].head;
+                const vector<uint16_t>& sequence = fRealColumns[i].comp_sequence;
+
+                //set the default byte telling if uncompressed the compressed Flag
+                uint64_t previousOffset = compressedOffset;
+                //skip header data
+                compressedOffset += sizeof(BlockHeader) + sizeof(uint16_t)*sequence.size();
+
+                for (uint32_t j=0;j<sequence.size(); j++)
+                {
+                    switch (sequence[j])
+                    {
+                        case zfits::kFactRaw:
+                                compressedOffset += compressUNCOMPRESSED(&(fCompressedBuffer[threadIndex][compressedOffset]),
+                                                                         &(fTransposedBuffer[threadIndex][offset]),
+                                                                         thisRoundNumRows,
+                                                                         fRealColumns[i].col.size,
+                                                                         fRealColumns[i].col.num);
+                        break;
+                        case zfits::kFactSmoothing:
+                                applySMOOTHING(&(fCompressedBuffer[threadIndex][compressedOffset]),
+                                               &(fTransposedBuffer[threadIndex][offset]),
+                                               thisRoundNumRows,
+                                               fRealColumns[i].col.size,
+                                               fRealColumns[i].col.num);
+                        break;
+                        case zfits::kFactHuffman16:
+                            if (head.ordering == zfits::kOrderByCol)
+                                compressedOffset += compressHUFFMAN(&(fCompressedBuffer[threadIndex][compressedOffset]),
+                                                                    &(fTransposedBuffer[threadIndex][offset]),
+                                                                    thisRoundNumRows,
+                                                                    fRealColumns[i].col.size,
+                                                                    fRealColumns[i].col.num);
+                            else
+                                compressedOffset += compressHUFFMAN(&(fCompressedBuffer[threadIndex][compressedOffset]),
+                                                                    &(fTransposedBuffer[threadIndex][offset]),
+                                                                    fRealColumns[i].col.num,
+                                                                    fRealColumns[i].col.size,
+                                                                    thisRoundNumRows);
+                        break;
+                        default:
+                            cout << "ERROR: Unkown compression sequence entry: " << sequence[j] << endl;
+                        break;
+                    }
+                }
+
+                //check if compressed size is larger than uncompressed
+                if (sequence[0] != zfits::kFactRaw &&
+                    compressedOffset - previousOffset > fRealColumns[i].col.size*fRealColumns[i].col.num*thisRoundNumRows+sizeof(BlockHeader)+sizeof(uint16_t)*sequence.size())
+                {//if so set flag and redo it uncompressed
+                    //cout << "REDOING UNCOMPRESSED" << endl;
+                    compressedOffset = previousOffset + sizeof(BlockHeader) + 1;
+                    compressedOffset += compressUNCOMPRESSED(&(fCompressedBuffer[threadIndex][compressedOffset]), &(fTransposedBuffer[threadIndex][offset]), thisRoundNumRows, fRealColumns[i].col.size, fRealColumns[i].col.num);
+                    BlockHeader he;
+                    he.size = compressedOffset - previousOffset;
+                    he.numProcs = 1;
+                    he.ordering = zfits::kOrderByRow;
+                    memcpy(&(fCompressedBuffer[threadIndex][previousOffset]), (char*)(&he), sizeof(BlockHeader));
+                    fCompressedBuffer[threadIndex][previousOffset+sizeof(BlockHeader)] = zfits::kFactRaw;
+                    offset += thisRoundNumRows*fRealColumns[i].col.size*fRealColumns[i].col.num;
+                   fCatalog[currentCatalogRow][i].first = compressedOffset - fCatalog[currentCatalogRow][i].second;
+                   continue;
+                }
+                head.size = compressedOffset - previousOffset;
+                memcpy(&(fCompressedBuffer[threadIndex][previousOffset]), (char*)(&head), sizeof(BlockHeader));
+                memcpy(&(fCompressedBuffer[threadIndex][previousOffset+sizeof(BlockHeader)]), sequence.data(), sizeof(uint16_t)*sequence.size());
+
+                 offset += thisRoundNumRows*fRealColumns[i].col.size*fRealColumns[i].col.num;
+                fCatalog[currentCatalogRow][i].first = compressedOffset - fCatalog[currentCatalogRow][i].second;
+            }
+
+            TileHeader tHead(thisRoundNumRows, compressedOffset);
+            memcpy(fCompressedBuffer[threadIndex], &tHead, sizeof(TileHeader));
+            return compressedOffset;
+        }
+
+        void copyTransposeTile(uint32_t index)
+        {
+            uint32_t thisRoundNumRows = (fTable.num_rows%fNumRowsPerTile) ? fTable.num_rows%fNumRowsPerTile : fNumRowsPerTile;
+
+            //copy the tile and transpose it
+            uint32_t offset = 0;
+            for (uint32_t i=0;i<fRealColumns.size();i++)
+            {
+                switch (fRealColumns[i].head.ordering)
+                {
+                    case zfits::kOrderByRow:
+                        for (uint32_t k=0;k<thisRoundNumRows;k++)
+                        {//regular, "semi-transposed" copy
+                            memcpy(&(fTransposedBuffer[index][offset]), &fBuffer[k*fRealRowWidth + fRealColumns[i].col.offset], fRealColumns[i].col.size*fRealColumns[i].col.num);
+                            offset += fRealColumns[i].col.size*fRealColumns[i].col.num;
+                        }
+                    break;
+
+                    case zfits::kOrderByCol :
+                        for (int j=0;j<fRealColumns[i].col.num;j++)
+                            for (uint32_t k=0;k<thisRoundNumRows;k++)
+                            {//transposed copy
+                                memcpy(&(fTransposedBuffer[index][offset]), &fBuffer[k*fRealRowWidth + fRealColumns[i].col.offset + fRealColumns[i].col.size*j], fRealColumns[i].col.size);
+                                offset += fRealColumns[i].col.size;
+                            }
+                    break;
+                    default:
+                            cout << "Error: unknown column ordering: " << fRealColumns[i].head.ordering << endl;
+
+                };
+            }
+        }
+
+        /// Specific compression functions
+        uint32_t compressUNCOMPRESSED(char* dest, const char* src, uint32_t numRows, uint32_t sizeOfElems, uint32_t numRowElems)
+        {
+            memcpy(dest, src, numRows*sizeOfElems*numRowElems);
+            return numRows*sizeOfElems*numRowElems;
+        }
+
+        uint32_t compressHUFFMAN(char* dest, const char* src, uint32_t numRows, uint32_t sizeOfElems, uint32_t numRowElems)
+        {
+            string huffmanOutput;
+            uint32_t previousHuffmanSize = 0;
+            if (numRows < 2)
+            {//if we have less than 2 elems to compress, Huffman encoder does not work (and has no point). Just return larger size than uncompressed to trigger the raw storage.
+                return numRows*sizeOfElems*numRowElems + 1000;
+            }
+            if (sizeOfElems < 2 )
+            {
+                cout << "Fatal ERROR: HUFMANN can only encode short or longer types" << endl;
+                return 0;
+            }
+            uint32_t huffmanOffset = 0;
+            for (uint32_t j=0;j<numRowElems;j++)
+            {
+                Huffman::Encode(huffmanOutput,
+                                reinterpret_cast<const uint16_t*>(&src[j*sizeOfElems*numRows]),
+                                numRows*(sizeOfElems/2));
+                reinterpret_cast<uint32_t*>(&dest[huffmanOffset])[0] = huffmanOutput.size() - previousHuffmanSize;
+                huffmanOffset += sizeof(uint32_t);
+                previousHuffmanSize = huffmanOutput.size();
+            }
+            const size_t totalSize = huffmanOutput.size() + huffmanOffset;
+
+            //only copy if not larger than not-compressed size
+            if (totalSize < numRows*sizeOfElems*numRowElems)
+                memcpy(&dest[huffmanOffset], huffmanOutput.data(), huffmanOutput.size());
+
+            return totalSize;
+        }
+
+        uint32_t compressSMOOTHMAN(char* dest, char* src, uint32_t numRows, uint32_t sizeOfElems, uint32_t numRowElems)
+        {
+            uint32_t colWidth = numRowElems;
+            for (int j=colWidth*numRows-1;j>1;j--)
+                reinterpret_cast<int16_t*>(src)[j] = reinterpret_cast<int16_t*>(src)[j] - (reinterpret_cast<int16_t*>(src)[j-1]+reinterpret_cast<int16_t*>(src)[j-2])/2;
+            //call the huffman transposed
+            return compressHUFFMAN(dest, src, numRowElems, sizeOfElems, numRows);
+        }
+
+        uint32_t applySMOOTHING(char* dest, char* src, uint32_t numRows, uint32_t sizeOfElems, uint32_t numRowElems)
+        {
+            uint32_t colWidth = numRowElems;
+            for (int j=colWidth*numRows-1;j>1;j--)
+                reinterpret_cast<int16_t*>(src)[j] = reinterpret_cast<int16_t*>(src)[j] - (reinterpret_cast<int16_t*>(src)[j-1]+reinterpret_cast<int16_t*>(src)[j-2])/2;
+
+            return numRows*sizeOfElems*numRowElems;
+        }
+
+        //Offsets calibration stuff.
+        vector<int16_t> fOffsetCalibration; ///< The calibration itself
+        int32_t         fStartCellsOffset;  ///< Offset in bytes for the startcell data
+        int32_t         fDataOffset;        ///< Offset in bytes for the data
+        int32_t         fNumSlices;         ///< Number of samples per pixel per event
+
+        //Compressed data stuff
+        int32_t         fCheckOffset;       ///< offset to the data pointer to calculate the checksum
+        uint32_t        fNumTiles;
+        uint32_t        fNumRowsPerTile;
+
+
+        //thread related stuff
+        uint32_t          fNumThreads;    ///< The number of threads that will be used to compress
+        uint32_t          fThreadIndex;   ///< A variable to assign threads indices
+        vector<pthread_t> fThread;        ///< The thread handler of the compressor
+        vector<uint32_t>  fThreadNumRows; ///< Total number of rows for thread to compresswww.;wwwwww
+        vector<uint32_t>  fThreadStatus;  ///< Flag telling whether the buffer to be transposed (and compressed) is full or empty
+        int32_t           fThreadLooper;      ///< Which thread will deal with the upcoming bunch of data ?
+        pthread_mutex_t   fMutex;             ///< mutex for compressing threads
+
+        struct CatalogEntry
+        {
+            CatalogEntry(int64_t f=0, int64_t s=0) : first(f), second(s) {};
+            int64_t first;
+            int64_t second;
+        } __attribute__((__packed__));
+
+       // typedef pair<int64_t, int64_t> CatalogEntry;
+        typedef vector<CatalogEntry>   CatalogRow;
+        typedef vector<CatalogRow>     CatalogType;
+        CatalogType          fCatalog;
+        Checksum             fCatalogSum;
+        Checksum             fRawSum;
+        off_t                fCatalogOffset;
+        vector<char>         fBufferVector;
+        vector<char>         fRawSumBuffer;
+        vector<vector<char>> fTransposedBufferVector;
+        vector<vector<char>> fCompressedBufferVector;
+        char*                fBuffer;
+        vector<char*>        fTransposedBuffer;
+        vector<char*>        fCompressedBuffer;
+        uint32_t             fRealRowWidth;
+        struct CompressedColumn
+        {
+            CompressedColumn(Table::Column& c, BlockHeader& h, vector<uint16_t>& cs) : col(c),
+                                                                                       head(h),
+                                                                                       comp_sequence(cs)
+            {}
+            Table::Column    col;
+            BlockHeader      head;
+            vector<uint16_t> comp_sequence;
+        };
+        vector<CompressedColumn> fRealColumns;
+
+        //thread states. Not all used, but they do not hurt
+        static const uint32_t       _THREAD_WAIT_ = 0; ///< Thread doing nothing
+        static const uint32_t   _THREAD_COMPRESS_ = 1; ///< Thread working, compressing
+        static const uint32_t _THREAD_DECOMPRESS_ = 2; ///< Thread working, decompressing
+        static const uint32_t      _THREAD_WRITE_ = 3; ///< Thread writing data to disk
+        static const uint32_t       _THREAD_READ_ = 4; ///< Thread reading data from disk
+        static const uint32_t       _THREAD_EXIT_ = 5; ///< Thread exiting
+
+};
+
+#ifndef __MARS__
+}; //namespace std
+#endif
+
+#ifdef crappy_example_usage
+zofits zofitsfile(123456, 100);
+zofitsfile.SetNumWorkingThreads(numThreads);
+zofitsfile.open((fileNameOut).c_str());
+std::zofits::BlockHeader zoheader(0, zfits::kOrderByRow, 2);
+vector<uint16_t> smoothmanProcessings(2);
+smoothmanProcessings[0] = zfits::kFactSmoothing;
+smoothmanProcessings[1] = zfits::kFactHuffman16;
+
+zofitsfile.AddColumn(sortedColumns[i].num,
+                     sortedColumns[i].type,
+                     colName,
+                     "");
+
+zofitsfile.AddColumn(sortedColumns[i].num,
+                     sortedColumns[i].type,
+                     colName,
+                     "",
+                     zoheader,
+                     smoothmanProcessings);
+
+zofitsfile.SetStr("ZCHKSUM", i->second.value, i->second.comment);
+zofitsfile.SetDrsCalibration(drsCalibFloat);
+zofitsfile.WriteTableHeader(tableName.c_str());
+zofitsfile.WriteRow(buffer, rowWidth);
+zofitsfile.close();
+
+#endif
