Index: trunk/Mars/mcore/factofits.h
===================================================================
--- trunk/Mars/mcore/factofits.h	(revision 17221)
+++ trunk/Mars/mcore/factofits.h	(revision 17221)
@@ -0,0 +1,228 @@
+/*
+ * factofits.h
+ *
+ *  Created on: Oct 16, 2013
+ *      Author: lyard
+ */
+
+#ifndef FACTOFITS_H_
+#define FACTOFITS_H_
+
+#include "zofits.h"
+
+#ifndef __MARS__
+namespace std
+{
+#else
+using namespace std;
+#endif
+
+class factofits : public zofits
+{
+
+    public:
+
+        factofits(uint32_t numTiles=1000,
+                  uint32_t rowPerTile=100,
+                  uint64_t maxUsableMem=0) : zofits(numTiles, rowPerTile, maxUsableMem)
+        {
+            fStartCellsOffset = -1;
+            fDataOffset       = -1;
+        }
+        factofits(const char* fname,
+                  uint32_t numTiles=1000,
+                  uint32_t rowPerTile=100,
+                  uint64_t maxUsableMem=0) : zofits(fname, numTiles, rowPerTile, maxUsableMem)
+        {
+            fStartCellsOffset = -1;
+            fDataOffset       = -1;
+        }
+        virtual ~factofits()
+        {
+        }
+
+        //whether or not a calibration was given to the file writer
+        virtual bool IsOffsetCalibration()
+        {
+            return (fOffsetCalibration.size() != 0);
+        }
+
+        //assign a given drs offset calibration
+        void SetDrsCalibration(const float* calib)
+        {
+            if (!IsOffsetCalibration())
+                fOffsetCalibration.resize(1440*1024);
+
+            for (uint32_t i=0;i<1440*1024;i++)
+                fOffsetCalibration[i] = (int16_t)(calib[i]*4096.f/2000.f);
+        }
+
+        void SetDrsCalibration(const vector<float>& calib)
+        {
+            if (calib.size() != 1440*1024)
+    #ifdef __EXCEPTIONS
+            throw runtime_error("Cannot load calibration with anything else than 1024 samples per pixel");
+    #else
+            gLog << ___err___ << "ERROR - Cannot load calibration with anything else than 1024 samples per pixel");
+    #endif
+            SetDrsCalibration(calib.data());
+        }
+
+        void SetDrsCalibration(const vector<int16_t>& vec)
+        {
+            if (!IsOffsetCalibration())
+                fOffsetCalibration.resize(1440*1024);
+
+            for (uint32_t i=0;i<1440*1024;i++)
+                fOffsetCalibration[i] = vec[i];
+        }
+
+        bool WriteTableHeader(const char* name="DATA")
+        {
+            if (!zofits::WriteTableHeader(name))
+                return false;
+
+            if (IsOffsetCalibration())
+            {//retrieve the column storing the start cell offsets, if required.
+
+                for (auto it=fRealColumns.begin(); it!=fRealColumns.end(); it++)//Table.cols.begin(); it!= fTable.cols.end(); it++)
+                {
+                    if (it->col.name == "StartCellData")
+                        fStartCellsOffset = it->col.offset;
+                    if (it->col.name == "Data")
+                    {
+                        fNumSlices = it->col.num;
+                        fDataOffset = it->col.offset;
+                        if (fNumSlices % 1440 != 0)
+                        {
+#ifdef __EXCEPTIONS
+                            throw runtime_error("Number of data samples not a multiple of 1440.");
+#else
+                            gLog << ___err___ << "ERROR - Number of data samples not a multiple of 1440. Doing it uncalibrated." << endl;
+#endif
+                            fOffsetCalibration.resize(0);
+                        }
+                        fNumSlices /= 1440;
+                    }
+                }
+                if (fStartCellsOffset < 0)
+                {
+#ifdef __EXCEPTIONS
+                    throw runtime_error("FACT Calibration requested, but \"StartCellData\" column not found.");
+#else
+                    gLog << ___err___ << "ERROR - FACT Calibration requested, but \"StartCellData\" column not found. Doing it uncalibrated." << endl;
+#endif
+                    //throw away the calibration data
+                    fOffsetCalibration.resize(0);
+                }
+                if (fDataOffset < 0)
+                {
+#ifdef __EXCEPTIONS
+                    throw runtime_error("FACT Calibration requested, but \"Data\" column not found.");
+#else
+                    gLog << ___err___ << "ERROR - FACT Calibration requested, but \"Data\" column not found. Doing it uncalibrated." << endl;
+#endif
+                    //throw away the calibration data
+                    fOffsetCalibration.resize(0);
+                }
+            }
+
+            return true;
+        }
+
+        virtual bool WriteDrsOffsetsTable()
+        {
+            if (!IsOffsetCalibration())
+                return false;
+
+            ofits c;
+            c.SetStr("XTENSION", "BINTABLE"            , "binary table extension");
+            c.SetInt("BITPIX"  , 8                     , "8-bit bytes");
+            c.SetInt("NAXIS"   , 2                     , "2-dimensional binary table");
+            c.SetInt("NAXIS1"  , 1024*1440*2           , "width of table in bytes");
+            c.SetInt("NAXIS2"  , 1                     , "number of rows in table");
+            c.SetInt("PCOUNT"  , 0                     , "size of special data area");
+            c.SetInt("GCOUNT"  , 1                     , "one data group (required keyword)");
+            c.SetInt("TFIELDS" , 1                     , "number of fields in each row");
+            c.SetStr("CHECKSUM", "0000000000000000"    , "Checksum for the whole HDU");
+            c.SetStr("DATASUM" ,  "         0"         , "Checksum for the data block");
+            c.SetStr("EXTNAME" , "ZDrsCellOffsets"     , "name of this binary table extension");
+            c.SetStr("TTYPE1"  , "OffsetCalibration"   , "label for field   1");
+            c.SetStr("TFORM1"  , "1474560I"            , "data format of field: 2-byte INTEGER");
+            c.End();
+
+            vector<char> swappedOffsets;
+            swappedOffsets.resize(1024*1440*sizeof(int16_t));
+            revcpy<sizeof(int16_t)>(swappedOffsets.data(), (char*)(fOffsetCalibration.data()), 1024*1440);
+
+            Checksum datasum;
+            datasum.add(swappedOffsets.data(), sizeof(int16_t)*1024*1440);
+
+            ostringstream dataSumStr;
+            dataSumStr << datasum.val();
+            c.SetStr("DATASUM", dataSumStr.str());
+
+            datasum += c.WriteHeader(*this);
+
+            const off_t here_I_am = tellp();
+
+            c.SetStr("CHECKSUM", datasum.str());
+            c.WriteHeader(*this);
+
+            seekp(here_I_am);
+
+            write(swappedOffsets.data(), swappedOffsets.size());
+
+            AlignTo2880Bytes();
+
+            return good();
+        }
+
+        virtual void DrsOffsetCalibrate(char* target_location)
+        {
+            if (IsOffsetCalibration())
+            {
+                int16_t* startCell = reinterpret_cast<int16_t*>(target_location + fStartCellsOffset);
+                int16_t* data      = reinterpret_cast<int16_t*>(target_location + fDataOffset);
+
+                for (uint32_t ch=0; ch<1440; ch++)
+                {
+                    if (startCell[ch] < 0)
+                    {
+                        data += fNumSlices;
+                        continue;
+                    }
+
+                    const int16_t modStart = startCell[ch]%1024;
+                    const int16_t *off     = fOffsetCalibration.data() + ch*1024;
+
+                    const int16_t* cal        = off+modStart;
+                    const int16_t* end_stride = data+fNumSlices;
+
+                    if (modStart+fNumSlices > 1024)
+                    {
+                        while (cal < off+1024)
+                            *data++ -= *cal++;
+                        cal = off;
+                    }
+
+                    while (data<end_stride)
+                        *data++ -= *cal++;
+                }
+            }
+        }
+
+private:
+        //Offsets calibration stuff.
+        vector<int16_t> fOffsetCalibration; ///< The calibration itself
+        int32_t         fStartCellsOffset;  ///< Offset in bytes for the startcell data
+        int32_t         fDataOffset;        ///< Offset in bytes for the data
+        int32_t         fNumSlices;         ///< Number of samples per pixel per event
+
+}; //class factofits
+
+#ifndef __MARS
+}; //namespace std
+#endif
+
+#endif /* FACTOFITS_H_ */
Index: trunk/Mars/mcore/ofits.h
===================================================================
--- trunk/Mars/mcore/ofits.h	(revision 17219)
+++ trunk/Mars/mcore/ofits.h	(revision 17221)
@@ -364,5 +364,5 @@
         this->open(fname);
     }
-    ~ofits() { if (is_open()) close(); }
+    virtual ~ofits() { if (is_open()) close(); }
 
     virtual void open(const char * filename, bool addEXTNAMEKey=true)
@@ -621,17 +621,5 @@
 
         typecom << CommentFromType(typechar);
-/*        switch (typechar)
-        {
-        case 'L': typecom << "1-byte BOOL]";  break;
-        case 'A': typecom << "1-byte CHAR]";  break;
-        case 'B': typecom << "1-byte BOOL]";  break;
-        case 'I': typecom << "2-byte INT]";   break;
-        case 'J': typecom << "4-byte INT]";   break;
-        case 'K': typecom << "8-byte INT]";   break;
-        case 'E': typecom << "4-byte FLOAT]"; break;
-        case 'D': typecom << "8-byte FLOAT]"; break;
-        case 'Q': typecom << "var. Length]"; break;
-        }
-*/
+
         if (addHeaderKeys)
         {
@@ -643,17 +631,4 @@
         size_t size = SizeFromType(typechar);
 
-/*        switch (typechar)
-        {
-        case 'L': size = 1; break;
-        case 'A': size = 1; break;
-        case 'B': size = 1; break;
-        case 'I': size = 2; break;
-        case 'J': size = 4; break;
-        case 'K': size = 8; break;
-        case 'E': size = 4; break;
-        case 'D': size = 8; break;
-        case 'Q': size = 16; break;
-        }
-*/
         Table::Column col;
 
@@ -672,4 +647,23 @@
 
         return true;
+    }
+
+    virtual bool AddColumn(uint32_t cnt, char typechar, const string& name, const string& unit, const BlockHeaderWriter& header, const string& comment="", bool addHeaderKeys=true)
+    {
+        return AddColumn(cnt, typechar, name, unit, comment, addHeaderKeys);
+    }
+    virtual bool AddColumn(const string& compressionScheme, uint32_t cnt, char typechar, const string& name, const string& unit,  const string& comment="", bool addHeaderKeys=true)
+    {
+        if (compressionScheme != "" &&
+            compressionScheme != "RAW")
+        {
+#ifdef __EXCEPTIONS
+            throw runtime_error("Trying to add a compressed column to an uncompressed file");
+#else
+            gLog << ___err___ << "ERROR - Trying to add a compressed column to an uncompressed file" << endl;
+            return false;
+#endif
+        }
+        return AddColumn(cnt, typechar, name, unit, comment, addHeaderKeys);
     }
 
Index: trunk/Mars/mcore/zfits.h
===================================================================
--- trunk/Mars/mcore/zfits.h	(revision 17219)
+++ trunk/Mars/mcore/zfits.h	(revision 17221)
@@ -12,4 +12,8 @@
 #include "huffman.h"
 
+#include "FITS.h"
+
+using namespace FITS;
+
 #ifndef __MARS__
 namespace std
@@ -20,17 +24,4 @@
 {
 public:
-
-    enum CompressionProcess_t
-    {
-        kFactRaw       = 0x0,
-        kFactSmoothing = 0x1,
-        kFactHuffman16 = 0x2
-    };
-
-    enum RowOrdering_t
-    {
-        kOrderByCol = 'C',
-        kOrderByRow = 'R'
-    };
 
     // Basic constructor
@@ -110,42 +101,9 @@
             return;
         }
-
         ReadBinaryRow(row, dest);
     }
 
 private:
-#ifndef __CINT__
-    //Structure helper for reading tiles headers
-    struct TileHeader
-    {
-      char     id[4];
-      uint32_t numRows;
-      uint64_t size;
-
-      TileHeader() {}
-
-      TileHeader(uint32_t nRows,
-                 uint64_t s) : id({'T', 'I', 'L', 'E'}),
-                                 numRows(nRows),
-                                 size(s)
-      { };
-    } __attribute__((__packed__));
-
-    //Structure helper for reading blocks headers and compresion schemes
-    struct BlockHeader
-    {
-        uint64_t      size;
-        char          ordering;
-        unsigned char numProcs;
-        uint16_t      processings[];
-
-        BlockHeader(uint64_t      s=0,
-                    char          o=kOrderByRow,
-                    unsigned char n=1) : size(s),
-                                         ordering(o),
-                                         numProcs(n)
-        {}
-    } __attribute__((__packed__));
-#endif
+
     // Do what it takes to initialize the compressed structured
     void InitCompressionReading()
@@ -240,5 +198,5 @@
         const streampos catalogStart = tellg();
 
-fChkData.reset();
+        fChkData.reset();
 
         //do the actual reading
@@ -501,6 +459,8 @@
                 default:
                     clear(rdstate()|ios::badbit);
+                    ostringstream str;
+                    str << "Unkown processing applied to data. Col " << i << " proc " << j << " out of " << (int)head->numProcs;
 #ifdef __EXCEPTIONS
-                    throw runtime_error("Unknown processing applied to data. Aborting");
+                    throw runtime_error(str.str());
 #else
                     gLog << ___err___ << "ERROR - Unknown processing applied to data. Aborting" << endl;
Index: trunk/Mars/mcore/zofits.h
===================================================================
--- trunk/Mars/mcore/zofits.h	(revision 17219)
+++ trunk/Mars/mcore/zofits.h	(revision 17221)
@@ -10,4 +10,12 @@
 #include "MemoryManager.h"
 
+#include "FITS.h"
+
+#ifdef USE_BOOST_THREADS
+#include <boost/thread.hpp>
+#endif
+
+using namespace FITS;
+
 #ifndef __MARS__
 namespace std
@@ -21,54 +29,9 @@
     public:
 
-        //This has been duplicated from zfits. Should be be located one level up ?
-        //If so, where ?
-        enum CompressionProcess_t
-        {
-            kFactRaw       = 0x0,
-            kFactSmoothing = 0x1,
-            kFactHuffman16 = 0x2
-        };
-
-        enum RowOrdering_t
-        {
-            kOrderByCol = 'C',
-            kOrderByRow = 'R'
-        };
-
-        //TileHeaders are only written, but never read-back
-        //They are here to be able to recover raw data from binary if the header is corrupted
-        //Or to cross-check the data, if desired: the zfits method CheckIfFileIsConsistent can do this
-        struct TileHeader
-        {
-          char     id[4];
-          uint32_t numRows;
-          uint64_t size;
-          TileHeader(uint32_t nRows=0,
-                     uint64_t s=0) : id({'T', 'I', 'L', 'E'}),
-                                     numRows(nRows),
-                                     size(s)
-          { };
-        } __attribute__((__packed__));
-
-        //BlockHeaders are written before every compressed blob of data
-        struct BlockHeader
-        {
-            uint64_t      size;
-            char          ordering;
-            unsigned char numProcs;
-            BlockHeader(uint64_t      s=0,
-                        char          o=zfits::kOrderByRow,
-                        unsigned char n=1) : size(s),
-                                             ordering(o),
-                                             numProcs(n)
-            {}
-        } __attribute__((__packed__)) ;
-
-
         struct WriteTarget
         {
             bool operator < (const WriteTarget& other)
             {
-                tile_num < other.tile_num;
+                return tile_num < other.tile_num;
             }
             uint32_t tile_num;
@@ -84,4 +47,5 @@
             }
             shared_ptr<MemoryChunk> src;
+            shared_ptr<MemoryChunk> transposed_src;
             WriteTarget             target;
             uint32_t                num_rows;
@@ -94,8 +58,8 @@
                uint64_t maxUsableMem=0) : ofits(),
                                           fMemPool(0, maxUsableMem),
-                                          fWriteToDiskQueue(bind(&zofits::WriteBufferToDisk, this, placeholders::_1), true)
+                                          fWriteToDiskQueue(bind(&zofits::WriteBufferToDisk, this, placeholders::_1), true, false)
         {
             InitMemberVariables(numTiles, rowPerTile, maxUsableMem);
-            SetNumWorkingThreads(1);
+            SetNumWorkingThreads(fNumQueues);
         }
 
@@ -105,11 +69,11 @@
                uint64_t maxUsableMem=0) : ofits(fname),
                                           fMemPool(0, maxUsableMem),
-                                          fWriteToDiskQueue(bind(&zofits::WriteBufferToDisk, this, placeholders::_1), true)
+                                          fWriteToDiskQueue(bind(&zofits::WriteBufferToDisk, this, placeholders::_1), true, false)
         {
             InitMemberVariables(numTiles, rowPerTile, maxUsableMem);
-            SetNumWorkingThreads(1);
-        }
-
-        ~zofits()
+            SetNumWorkingThreads(fNumQueues);
+        }
+
+        virtual ~zofits()
         {
         }
@@ -118,4 +82,7 @@
         void InitMemberVariables(uint32_t nt=0, uint32_t rpt=0, uint64_t maxUsableMem=0)
         {
+            if (nt == 0)
+                throw runtime_error("Cannot work with a catalog of size 0. sorry.");
+
             fCheckOffset  = 0;
 
@@ -123,56 +90,19 @@
             fNumRowsPerTile = rpt;
 
-            fNumQueues   = 0;
-            fQueueLooper = 0;
-
             fBuffer       = NULL;
             fRealRowWidth = 0;
+            fCatalogExtraRows = 0;
 
             fCatalogOffset    =  0;
-            fStartCellsOffset = -1;
-            fDataOffset       = -1;
 
             fMaxUsableMem = maxUsableMem;
-        }
-
-        //whether or not a calibration was given to the file writer
-        bool IsOffsetCalibrated()
-        {
-            return (fOffsetCalibration.size() != 0);
-        }
-
-        //assign a given drs offset calibration
-        void SetDrsCalibration(const float* calib)
-        {
-            if (!IsOffsetCalibrated())
-                fOffsetCalibration.resize(1440*1024);
-
-            for (uint32_t i=0;i<1440*1024;i++)
-                fOffsetCalibration[i] = (int16_t)(calib[i]*4096.f/2000.f);
-        }
-
-        void SetDrsCalibration(const vector<float>& calib)
-        {
-            if (calib.size() != 1440*1024)
-#ifdef __EXCEPTIONS
-            throw runtime_error("Cannot load calibration with anything else than 1024 samples per pixel");
-#else
-            gLog << ___err___ << "ERROR - Cannot load calibration with anything else than 1024 samples per pixel");
-#endif
-            SetDrsCalibration(calib.data());
-        }
-
-        void LoadDrsCalibrationFromFile(const string& fileName)
-        {
-            factfits drsFile(fileName);
-            float* drsCalibFloat  = reinterpret_cast<float*>(drsFile.SetPtrAddress("BaselineMean"));
-
-            drsFile.GetNextRow();
-
-            SetDrsCalibration(drsCalibFloat);
-        }
+#ifdef __EXCEPTIONS
+            fThreadsException = exception_ptr();
+#endif
+        }
+
 
         //write the header of the binary table
-        bool WriteTableHeader(const char* name="DATA")
+        virtual bool WriteTableHeader(const char* name="DATA")
         {
             if (!reallocateBuffers())
@@ -181,55 +111,17 @@
             ofits::WriteTableHeader(name);
 
-            //start the compression queues
-            for (auto it=fCompressionQueues.begin(); it!= fCompressionQueues.end(); it++)
-                it->start();
+            if (fNumQueues != 0)
+            {
+                //start the compression queues
+                for (auto it=fCompressionQueues.begin(); it!= fCompressionQueues.end(); it++)
+                    it->start();
+
+                fWriteToDiskQueue.start();
+            }
 
             //mark that no tile has been written so far
             fLatestWrittenTile = -1;
 
-            if (IsOffsetCalibrated())
-            {//retrieve the column storing the start cell offsets, if required.
-
-                for (auto it=fRealColumns.begin(); it!=fRealColumns.end(); it++)//Table.cols.begin(); it!= fTable.cols.end(); it++)
-                {
-                    if (it->col.name == "StartCellData")
-                        fStartCellsOffset = it->col.offset;
-                    if (it->col.name == "Data")
-                    {
-                        fNumSlices = it->col.num;
-                        fDataOffset = it->col.offset;
-                        if (fNumSlices % 1440 != 0)
-                        {
-#ifdef __EXCEPTIONS
-                            throw runtime_error("Number of data samples not a multiple of 1440.");
-#else
-                            gLog << ___err___ << "ERROR - Number of data samples not a multiple of 1440. Doing it uncalibrated." << endl;
-#endif
-                            fOffsetCalibration.resize(0);
-                        }
-                        fNumSlices /= 1440;
-                    }
-                }
-                if (fStartCellsOffset < 0)
-                {
-#ifdef __EXCEPTIONS
-                    throw runtime_error("FACT Calibration requested, but \"StartCellData\" column not found.");
-#else
-                    gLog << ___err___ << "ERROR - FACT Calibration requested, but \"StartCellData\" column not found. Doing it uncalibrated." << endl;
-#endif
-                    //throw away the calibration data
-                    fOffsetCalibration.resize(0);
-                }
-                if (fDataOffset < 0)
-                {
-#ifdef __EXCEPTIONS
-                    throw runtime_error("FACT Calibration requested, but \"Data\" column not found.");
-#else
-                    gLog << ___err___ << "ERROR - FACT Calibration requested, but \"Data\" column not found. Doing it uncalibrated." << endl;
-#endif
-                    //throw away the calibration data
-                    fOffsetCalibration.resize(0);
-                }
-            }
+            return good();
         }
 
@@ -246,55 +138,13 @@
             SetInt("ZTILELEN", fNumRowsPerTile, "Number of rows per tile");
             SetInt("THEAP", 0, "");
-            SetStr("RAWSUM", "         0", "Checksum of raw littlen endian data");
-
-
+            SetStr("RAWSUM", "         0", "Checksum of raw little endian data");
+            SetFloat("ZRATIO", 0, "Compression ratio");
+
+            fCatalogExtraRows = 0;
             fRawSum.reset();
         }
 
-        bool WriteDrsOffsetsTable()
-        {
-            if (!IsOffsetCalibrated())
-                return false;
-
-            ofits c;
-            c.SetStr("XTENSION", "BINTABLE"            , "binary table extension");
-            c.SetInt("BITPIX"  , 8                     , "8-bit bytes");
-            c.SetInt("NAXIS"   , 2                     , "2-dimensional binary table");
-            c.SetInt("NAXIS1"  , 1024*1440*2           , "width of table in bytes");
-            c.SetInt("NAXIS2"  , 1                     , "number of rows in table");
-            c.SetInt("PCOUNT"  , 0                     , "size of special data area");
-            c.SetInt("GCOUNT"  , 1                     , "one data group (required keyword)");
-            c.SetInt("TFIELDS" , 1                     , "number of fields in each row");
-            c.SetStr("CHECKSUM", "0000000000000000"    , "Checksum for the whole HDU");
-            c.SetStr("DATASUM" ,  "         0"         , "Checksum for the data block");
-            c.SetStr("EXTNAME" , "ZDrsCellOffsets"     , "name of this binary table extension");
-            c.SetStr("TTYPE1"  , "OffsetCalibration"   , "label for field   1");
-            c.SetStr("TFORM1"  , "1474560I"            , "data format of field: 2-byte INTEGER");
-            c.End();
-
-            vector<char> swappedOffsets;
-            swappedOffsets.resize(1024*1440*sizeof(int16_t));
-            revcpy<sizeof(int16_t)>(swappedOffsets.data(), (char*)(fOffsetCalibration.data()), 1024*1440);
-
-            Checksum datasum;
-            datasum.add(swappedOffsets.data(), sizeof(int16_t)*1024*1440);
-
-            ostringstream dataSumStr;
-            dataSumStr << datasum.val();
-            c.SetStr("DATASUM", dataSumStr.str());
-
-            datasum += c.WriteHeader(*this);
-
-            const off_t here_I_am = tellp();
-
-            c.SetStr("CHECKSUM", datasum.str());
-            c.WriteHeader(*this);
-
-            seekp(here_I_am);
-
-            write(swappedOffsets.data(), swappedOffsets.size());
-
-            AlignTo2880Bytes();
-
+        virtual bool WriteDrsOffsetsTable()
+        {
             return good();
         }
@@ -335,4 +185,24 @@
             return good();
         }
+        virtual void DrsOffsetCalibrate(char* )
+        {
+
+        }
+
+        void GrowCatalog()
+        {
+            uint32_t orig_catalog_size = fCatalog.size();
+
+            fCatalog.resize(fCatalog.size()*2);
+            for (uint32_t i=orig_catalog_size;i<fCatalog.size(); i++)
+            {
+                fCatalog[i].resize(fTable.num_cols);
+                for (auto it=(fCatalog[i].begin()); it!=fCatalog[i].end(); it++)
+                    *it = CatalogEntry(0,0);
+            }
+
+            fCatalogExtraRows += orig_catalog_size;
+            fNumTiles         += orig_catalog_size;
+        }
 
         bool WriteRow(const void* ptr, size_t cnt, bool byte_swap=true)
@@ -350,4 +220,5 @@
             if (fTable.num_rows >= fNumRowsPerTile*fNumTiles)
             {
+//                GrowCatalog();
 #ifdef __EXCEPTIONS
                 throw runtime_error("Maximum number of rows exceeded for this file");
@@ -384,35 +255,5 @@
             fRawSum.add(fRawSumBuffer, false);
 
-            if (IsOffsetCalibrated())
-            {
-
-                int16_t* startCell = reinterpret_cast<int16_t*>(target_location + fStartCellsOffset);
-                int16_t* data      = reinterpret_cast<int16_t*>(target_location + fDataOffset);
-
-                for (uint32_t ch=0; ch<1440; ch++)
-                {
-                    if (startCell[ch] < 0)
-                    {
-                        data += fNumSlices;
-                        continue;
-                    }
-
-                    const int16_t modStart = startCell[ch]%1024;
-                    const int16_t *off     = fOffsetCalibration.data() + ch*1024;
-
-                    const int16_t* cal        = off+modStart;
-                    const int16_t* end_stride = data+fNumSlices;
-
-                    if (modStart+fNumSlices > 1024)
-                    {
-                        while (cal < off+1024)
-                            *data++ -= *cal++;
-                        cal = off;
-                    }
-
-                    while (data<end_stride)
-                        *data++ -= *cal++;
-                }
-            }
+            DrsOffsetCalibrate(target_location);
 
             fTable.num_rows++;
@@ -423,11 +264,39 @@
                 SetNextCompression(compress_target);
 
-                if (!fCompressionQueues[fQueueLooper].post(compress_target))
-                    throw runtime_error("I could not post this buffer. This does not make sense...");
-
-                fQueueLooper = (fQueueLooper+1)%fNumQueues;
-            }
-
-            return true;
+                if (fNumQueues == 0)
+                { //no worker threads. do everything in-line
+                    uint64_t size_to_write = CompressBuffer(compress_target);
+
+                    WriteTarget write_target;
+                    write_target.size     = size_to_write;
+                    write_target.target   = compress_target.target.target;
+                    write_target.tile_num = compress_target.target.tile_num;
+
+                    if (!WriteBufferToDisk(write_target))
+                        throw runtime_error("Something went wrong while writing to disk");
+                }
+                else
+                {
+                    //if all queues are empty, use queue 0
+                     uint32_t min_index     = 0;
+                     uint32_t min_size      = numeric_limits<uint32_t>::max();
+                     uint32_t current_index = 0;
+
+                     for (auto it=fCompressionQueues.begin(); it!=fCompressionQueues.end(); it++)
+                     {
+                         if (it->size() < min_size)
+                         {
+                             min_index = current_index;
+                             min_size = it->size();
+                         }
+                         current_index++;
+                     }
+
+                    if (!fCompressionQueues[min_index].post(compress_target))
+                        throw runtime_error("I could not post this buffer. This does not make sense...");
+                }
+            }
+
+            return good();
         }
 
@@ -441,8 +310,8 @@
         void SetNextCompression(CompressionTarget& target)
         {
+            //get space for transposed data
             shared_ptr<MemoryChunk> transposed_data = fMemPool.malloc();
 
-            copyTransposeTile(fBuffer, transposed_data.get()->get());
-
+            //fill up write to disk target
             WriteTarget write_target;
             write_target.tile_num = (fTable.num_rows-1)/fNumRowsPerTile;
@@ -450,14 +319,69 @@
             write_target.target   = fMemPool.malloc();
 
-            target.src      = transposed_data;
+            //fill up compression target
+            target.src            = fSmartBuffer;
+            target.transposed_src      = transposed_data;
             target.target   = write_target;
             target.num_rows = fTable.num_rows;
+
+            //get a new buffer to host the incoming data
+            fSmartBuffer = fMemPool.malloc();
+            fBuffer      = fSmartBuffer.get()->get();
+        }
+
+        void ShrinkCatalog()
+        {
+            //did we write more rows than what the catalog could host ?
+            if (fCatalogExtraRows != 0)
+            {
+                //how many rows can the regular catalog host ?
+                const uint32_t max_regular_rows = (fCatalog.size() - fCatalogExtraRows)*fNumRowsPerTile;
+                //what's the shrink factor to be applied ?
+                const uint32_t shrink_factor = fTable.num_rows/max_regular_rows + ((fTable.num_rows%max_regular_rows) ? 1 : 0);
+
+                //shrink the catalog !
+                for (uint32_t i=0; i<fTable.num_rows/fNumRowsPerTile; i+= shrink_factor)
+                {//add the elements one by one, so that the empty ones at the end (i.e. fTable.num_rows%shrink_factor) do not create havok
+                    const uint32_t target_catalog_row = i/shrink_factor;
+                    //move data from current row (i) to target row
+                    for (uint32_t j=0; j<fTable.num_cols; j++)
+                    {
+                        fCatalog[target_catalog_row][j].second = fCatalog[i][j].second;
+                        fCatalog[target_catalog_row][j].first  = 0;
+                        uint64_t last_size   = fCatalog[i][j].first;
+                        uint64_t last_offset = fCatalog[i][j].second;
+
+                        for (uint32_t k=1; k<shrink_factor; k++)
+                        {
+                           if (fCatalog[i+k][j].second != 0)
+                           {
+                               fCatalog[target_catalog_row][j].first +=  fCatalog[i+k][j].second - last_offset;
+                           }
+                           else
+                           {
+                               fCatalog[target_catalog_row][j].first += last_size;
+                               break;
+                           }
+                           last_size   = fCatalog[i+k][j].first;
+                           last_offset = fCatalog[i+k][j].second;
+                        }
+                    }
+                }
+
+                fCatalog.resize(fCatalog.size() - fCatalogExtraRows);
+
+                //update header keywords
+                const uint32_t new_num_rows_per_tiles = fNumRowsPerTile*shrink_factor;
+                const uint32_t new_num_tiles_written = (fTable.num_rows + new_num_rows_per_tiles-1)/new_num_rows_per_tiles;
+                SetInt("THEAP", new_num_tiles_written*2*sizeof(int64_t)*fTable.num_cols);
+                SetInt("NAXIS2", new_num_tiles_written);
+                SetInt("ZTILELEN", new_num_rows_per_tiles);
+                cout << "New num rows per tiles: " << new_num_rows_per_tiles << " shrink factor: " << shrink_factor << endl;
+                cout << "Num tiles written: " << new_num_tiles_written << endl;
+            }
         }
 
         bool close()
         {
-            if (tellp() < 0)
-                return false;
-
             for (auto it=fCompressionQueues.begin(); it != fCompressionQueues.end(); it++)
                 it->wait();
@@ -465,4 +389,21 @@
             fWriteToDiskQueue.wait();
 
+            if (tellp() < 0)
+            {
+#ifdef __EXCEPTIONS
+                throw runtime_error("Something went wrong while writing to disk...");
+#else
+                return false;
+#endif
+            }
+
+#ifdef __EXCEPTIONS
+            //check if something hapenned to the compression threads
+            if (fThreadsException != exception_ptr())
+            {
+                rethrow_exception(fThreadsException);
+            }
+#endif
+
             if (fTable.num_rows%fNumRowsPerTile != 0)
             {
@@ -470,5 +411,9 @@
                 SetNextCompression(compress_target);
 
+                //set number of threads to zero before calling compressBuffer
+                int32_t backup_num_queues = fNumQueues;
+                fNumQueues = 0;
                 uint64_t size_to_write = CompressBuffer(compress_target);
+                fNumQueues = backup_num_queues;
 
                 WriteTarget write_target;
@@ -518,4 +463,7 @@
             }
 
+            float compression_ratio = (float)(fRealRowWidth*fTable.num_rows)/(float)heap_size;
+            SetFloat("ZRATIO", compression_ratio);
+
             //add to the heap size the size of the gap between the catalog and the actual heap
             heap_size += (fCatalog.size() - total_num_tiles_written)*fTable.num_cols*sizeof(uint64_t)*2;
@@ -523,4 +471,5 @@
             SetInt("PCOUNT", heap_size, "size of special data area");
 
+
             //Just for updating the fCatalogSum value
             WriteCatalog();
@@ -548,11 +497,14 @@
         bool AddColumn(uint32_t cnt, char typechar, const string& name, const string& unit, const string& comment="", bool addHeaderKeys=true)
         {
-            BlockHeader head;
-            vector<uint16_t> processing(1);
-            processing[0] = kFactRaw;
-            AddColumn(cnt, typechar, name, unit, head, processing, comment, addHeaderKeys);
-        }
-
-        bool AddColumn(uint32_t cnt, char typechar, const string& name, const string& unit, BlockHeader& header, vector<uint16_t>& comp_sequence, const string& comment="", bool addHeaderKeys=true)
+            BlockHeaderWriter head;
+            return AddColumn(cnt, typechar, name, unit, head, comment, addHeaderKeys);
+        }
+
+        bool AddColumn(const string& compressionScheme, uint32_t cnt, char typechar, const string& name, const string& unit,  const string& comment="", bool addHeaderKeys=true)
+        {
+            BlockHeaderWriter head(compressionScheme);
+            return AddColumn(cnt, typechar, name, unit, head, comment, addHeaderKeys);
+        }
+        bool AddColumn(uint32_t cnt, char typechar, const string& name, const string& unit, const BlockHeaderWriter& header, const string& comment="", bool addHeaderKeys=true)
         {
             if (!ofits::AddColumn(1, 'Q', name, unit, comment, addHeaderKeys))
@@ -570,5 +522,5 @@
             fRealRowWidth += size*cnt;
 
-            fRealColumns.emplace_back(CompressedColumn(col, header, comp_sequence));
+            fRealColumns.emplace_back(CompressedColumn(col, header));
 
             ostringstream strKey, strVal, strCom;
@@ -583,5 +535,5 @@
             strKey << "ZCTYP" << fRealColumns.size();
             strVal << "FACT";
-            strCom << "Comp. of FACT telescope";
+            strCom << "Compression type FACT";
             SetStr(strKey.str(), strVal.str(), strCom.str());
 
@@ -589,5 +541,26 @@
         }
 
-        bool SetNumWorkingThreads(uint32_t num)
+        bool AddColumnShort(const string& compressionScheme, uint32_t cnt, const string &name, const string &unit="", const string &comment="")
+        { return AddColumn(compressionScheme, cnt, 'I', name, unit, comment); }
+        bool AddColumnInt(const string& compressionScheme, uint32_t cnt, const string &name, const string &unit="", const string &comment="")
+        { return AddColumn(compressionScheme, cnt, 'J', name, unit, comment); }
+        bool AddColumnLong(const string& compressionScheme, uint32_t cnt, const string &name, const string &unit="", const string &comment="")
+        { return AddColumn(compressionScheme, cnt, 'K', name, unit, comment); }
+        bool AddColumnFloat(const string& compressionScheme, uint32_t cnt, const string &name, const string &unit="", const string &comment="")
+        { return AddColumn(compressionScheme, cnt, 'E', name, unit, comment); }
+        bool AddColumnDouble(const string& compressionScheme, uint32_t cnt, const string &name, const string &unit="", const string &comment="")
+        { return AddColumn(compressionScheme, cnt, 'D', name, unit, comment); }
+        bool AddColumnChar(const string& compressionScheme, uint32_t cnt, const string &name, const string &unit="", const string &comment="")
+        { return AddColumn(compressionScheme, cnt, 'A', name, unit, comment); }
+        bool AddColumnByte(const string& compressionScheme, uint32_t cnt, const string &name, const string &unit="", const string &comment="")
+        { return AddColumn(compressionScheme, cnt, 'B', name, unit, comment); }
+        bool AddColumnBool(const string& compressionScheme, uint32_t cnt, const string &name, const string &unit="", const string &comment="")
+        { return AddColumn(compressionScheme, cnt, 'L', name, unit, comment); }
+
+        static void SetNumThreads(int32_t num) { fNumQueues = num;}
+        static int32_t GetNumThreads() { return fNumQueues;}
+    protected:
+
+        bool SetNumWorkingThreads(int32_t num)
         {
             if (is_open())
@@ -596,19 +569,35 @@
                 throw runtime_error("File must be closed before changing the number of compression threads");
 #else
-                gLog << ___err___ << "ERROR - File must be closed before changing the number of compression threads");
+                gLog << ___err___ << "ERROR - File must be closed before changing the number of compression threads";
 #endif
                 return false;
             }
-            if (num < 1 || num > 64)
-            {
-#ifdef __EXCEPTIONS
-                throw runtime_error("Number of threads must be between 1 and 64");
-#else
-                gLog << ___err___ << "ERROR - Number of threads must be between 1 and 64");
+#ifdef USE_BOOST_THREADS
+            int32_t num_available_cores = boost::thread::hardware_concurrency();
+#else
+            int32_t num_available_cores = thread::hardware_concurrency();
+#endif
+
+            if (num_available_cores == 0)
+            {//could not detect number of available cores from system properties...
+                //Assuming that 5 cores are availables (4 compression, 1 write)
+                num_available_cores = 5;
+            }
+            if (num > num_available_cores)
+            {
+                ostringstream str;
+                str << "Number of threads cannot be greater than physically available (" << num_available_cores << ")";
+#ifdef __EXCEPTIONS
+                throw runtime_error(str.str());
+#else
+                gLog << ___err___ << "ERROR - " << str.str();
 #endif
                 return false;
             }
 
-            if (fCompressionQueues.size() == num)
+            if (num == -1)
+                num = num_available_cores-2; // 1 for writing, one for the main thread
+
+            if (fCompressionQueues.size() == (uint32_t)num)
                 return true;
 
@@ -617,5 +606,5 @@
 
             //shrink
-            if (num < fCompressionQueues.size())
+            if ((uint32_t)num < fCompressionQueues.size())
             {
                 fCompressionQueues.resize(num, queue);
@@ -626,12 +615,8 @@
             fCompressionQueues.resize(num, queue);
 
-            fNumQueues   = num;
-            fQueueLooper = 0;
+            fNumQueues = num;
 
             return true;
         }
-
-
-    private:
 
         bool reallocateBuffers()
@@ -641,7 +626,5 @@
 
             fSmartBuffer = fMemPool.malloc();
-            fBuffer = fSmartBuffer.get()->get();
-//            memset(fBuffer, 0, 4);
-//            fBuffer += 4;
+            fBuffer      = fSmartBuffer.get()->get();
 
             fRawSumBuffer.resize(fRealRowWidth + 4-fRealRowWidth%4); //for checksuming
@@ -686,8 +669,28 @@
         }
 
-        bool CompressBuffer(const CompressionTarget& target)
-        {
-            //compress the buffer
-            uint64_t compressed_size = compressBuffer(target.target.target.get()->get(), target.src.get()->get(), target.num_rows);
+        uint32_t CompressBuffer(const CompressionTarget& target)
+        {
+            uint64_t compressed_size = 0;
+#ifdef __EXCEPTIONS
+            try
+            {
+#endif
+                //transpose the original data
+                copyTransposeTile(target.src.get()->get(), target.transposed_src.get()->get());
+
+                //compress the buffer
+                compressed_size = compressBuffer(target.target.target.get()->get(), target.transposed_src.get()->get(), target.num_rows);
+#ifdef __EXCEPTIONS
+            }
+            catch (...)
+            {
+                fThreadsException = current_exception();
+                if (fNumQueues == 0)
+                    rethrow_exception(fThreadsException);
+            }
+#endif
+
+            if (fNumQueues == 0)
+                return compressed_size;
 
             //post the result to the writing queue
@@ -699,5 +702,6 @@
 
             fWriteToDiskQueue.post(wt);
-            return true;
+
+            return compressed_size;
         }
 
@@ -705,5 +709,5 @@
         {
             //is this the tile we're supposed to write ?
-            if (target.tile_num != fLatestWrittenTile+1)
+            if (target.tile_num != (uint32_t)(fLatestWrittenTile+1))
                 return false;
 
@@ -711,7 +715,5 @@
 
             //write the buffer to disk.
-            writeCompressedDataToDisk(target.target.get()->get(), target.size);
-
-            return true;
+            return writeCompressedDataToDisk(target.target.get()->get(), target.size);
         }
 
@@ -736,63 +738,55 @@
                 if (fRealColumns[i].col.num == 0) continue;
 
-                BlockHeader& head = fRealColumns[i].head;
-                const vector<uint16_t>& sequence = fRealColumns[i].comp_sequence;
+                BlockHeaderWriter& head = fRealColumns[i].block_head;
 
                 //set the default byte telling if uncompressed the compressed Flag
                 uint64_t previousOffset = compressedOffset;
+
                 //skip header data
-                compressedOffset += sizeof(BlockHeader) + sizeof(uint16_t)*sequence.size();
-
-                for (uint32_t j=0;j<sequence.size(); j++)
+                compressedOffset += head.SizeOnDisk();
+
+                for (uint32_t j=0;j<head.NumProcs();j++)//sequence.size(); j++)
                 {
-                    switch (sequence[j])
+                    switch (head.Proc(j))
                     {
-                        case zfits::kFactRaw:
-                                compressedOffset += compressUNCOMPRESSED(dest + compressedOffset,
-                                                                         src  + offset,
-                                                                         thisRoundNumRows,
-                                                                         fRealColumns[i].col.size,
-                                                                         fRealColumns[i].col.num);
+                        case kFactRaw:
+                                compressedOffset += compressUNCOMPRESSED(dest + compressedOffset, src  + offset, thisRoundNumRows*fRealColumns[i].col.size*fRealColumns[i].col.num);
                         break;
-                        case zfits::kFactSmoothing:
-                                applySMOOTHING(dest + compressedOffset,
-                                               src  + offset,
-                                               thisRoundNumRows,
-                                               fRealColumns[i].col.size,
-                                               fRealColumns[i].col.num);
+                        case kFactSmoothing:
+                                applySMOOTHING(src + offset, thisRoundNumRows*fRealColumns[i].col.num);
                         break;
-                        case zfits::kFactHuffman16:
-                            if (head.ordering == zfits::kOrderByCol)
-                                compressedOffset += compressHUFFMAN(dest + compressedOffset,
-                                                                    src  + offset,
-                                                                    thisRoundNumRows,
-                                                                    fRealColumns[i].col.size,
-                                                                    fRealColumns[i].col.num);
+                        case kFactHuffman16:
+                            if (head.Ordering() == kOrderByCol)
+                                compressedOffset += compressHUFFMAN(dest + compressedOffset, src  + offset, thisRoundNumRows, fRealColumns[i].col.size, fRealColumns[i].col.num);
                             else
-                                compressedOffset += compressHUFFMAN(dest + compressedOffset,
-                                                                    src  + offset,
-                                                                    fRealColumns[i].col.num,
-                                                                    fRealColumns[i].col.size,
-                                                                    thisRoundNumRows);
+                                compressedOffset += compressHUFFMAN(dest + compressedOffset, src  + offset, fRealColumns[i].col.num, fRealColumns[i].col.size, thisRoundNumRows);
                         break;
                         default:
-                            cout << "ERROR: Unkown compression sequence entry: " << sequence[j] << endl;
-                        break;
+                        {
+                            ostringstream str;
+                            str << "Unkown compression sequence entry: " << head.Proc(j);
+#ifdef __EXCEPTIONS
+                            throw runtime_error(str.str());
+#else
+                            gLog << ___err___ << "ERROR - " << str.str();
+                            return 0;
+#endif
+                        }
                     }
                 }
 
-                //check if compressed size is larger than uncompressed
-                if (sequence[0] != zfits::kFactRaw &&
-                    compressedOffset - previousOffset > fRealColumns[i].col.size*fRealColumns[i].col.num*thisRoundNumRows+sizeof(BlockHeader)+sizeof(uint16_t)*sequence.size())
+               //check if compressed size is larger than uncompressed
+                if ((head.Proc(0) != kFactRaw) && (compressedOffset - previousOffset > fRealColumns[i].col.size*fRealColumns[i].col.num*thisRoundNumRows+head.SizeOnDisk()))// && two)
                 {//if so set flag and redo it uncompressed
-                    cout << "REDOING UNCOMPRESSED" << endl;
-                    compressedOffset = previousOffset + sizeof(BlockHeader) + 1;
-                    compressedOffset += compressUNCOMPRESSED(dest + compressedOffset, src + offset, thisRoundNumRows, fRealColumns[i].col.size, fRealColumns[i].col.num);
-                    BlockHeader he;
-                    he.size = compressedOffset - previousOffset;
-                    he.numProcs = 1;
-                    he.ordering = zfits::kOrderByRow;
-                    memcpy(dest + previousOffset, (char*)(&he), sizeof(BlockHeader));
-                    dest[previousOffset+sizeof(BlockHeader)] = zfits::kFactRaw;
+                    cout << "Redoing uncompressed ! " << endl;
+                    //de-smooth !
+                    if (head.Proc(0) == kFactSmoothing)
+                        UnApplySMOOTHING(src+offset, fRealColumns[i].col.num*thisRoundNumRows);
+
+                    BlockHeaderWriter he;
+                    compressedOffset = previousOffset + he.SizeOnDisk();
+                    compressedOffset += compressUNCOMPRESSED(dest + compressedOffset, src + offset, thisRoundNumRows*fRealColumns[i].col.size*fRealColumns[i].col.num);
+                    he.SetBlockSize(compressedOffset - previousOffset);
+                    he.Write(dest+previousOffset);
                     offset += thisRoundNumRows*fRealColumns[i].col.size*fRealColumns[i].col.num;
                     fCatalog[currentCatalogRow][i].first = compressedOffset - fCatalog[currentCatalogRow][i].second;
@@ -800,7 +794,6 @@
                 }
 
-                head.size = compressedOffset - previousOffset;
-                memcpy(dest + previousOffset, (char*)(&head), sizeof(BlockHeader));
-                memcpy(dest + previousOffset+sizeof(BlockHeader), sequence.data(), sizeof(uint16_t)*sequence.size());
+                head.SetBlockSize(compressedOffset - previousOffset);
+                head.Write(dest + previousOffset);
 
                 offset += thisRoundNumRows*fRealColumns[i].col.size*fRealColumns[i].col.num;
@@ -814,5 +807,5 @@
         }
 
-        void copyTransposeTile(const char* src, char* dest)//uint32_t index)
+        void copyTransposeTile(const char* src, char* dest)
         {
             uint32_t thisRoundNumRows = (fTable.num_rows%fNumRowsPerTile) ? fTable.num_rows%fNumRowsPerTile : fNumRowsPerTile;
@@ -821,7 +814,7 @@
             for (uint32_t i=0;i<fRealColumns.size();i++)
             {
-                switch (fRealColumns[i].head.ordering)
+                switch (fRealColumns[i].block_head.Ordering())
                 {
-                    case zfits::kOrderByRow:
+                    case kOrderByRow:
                         for (uint32_t k=0;k<thisRoundNumRows;k++)
                         {//regular, "semi-transposed" copy
@@ -831,6 +824,6 @@
                     break;
 
-                    case zfits::kOrderByCol :
-                        for (int j=0;j<fRealColumns[i].col.num;j++)
+                    case kOrderByCol :
+                        for (uint32_t j=0;j<fRealColumns[i].col.num;j++)
                             for (uint32_t k=0;k<thisRoundNumRows;k++)
                             {//transposed copy
@@ -840,5 +833,14 @@
                     break;
                     default:
-                            cout << "Error: unknown column ordering: " << fRealColumns[i].head.ordering << endl;
+                    {
+                            ostringstream str;
+                            str << "Unkown column ordering: " << fRealColumns[i].block_head.Ordering();
+#ifdef __EXCEPTIONS
+                            throw runtime_error(str.str());
+#else
+                            gLog << ___err___ << "ERROR - " << str.str();
+                            return;
+#endif
+                    }
                 };
             }
@@ -846,8 +848,8 @@
 
         /// Specific compression functions
-        uint32_t compressUNCOMPRESSED(char* dest, const char* src, uint32_t numRows, uint32_t sizeOfElems, uint32_t numRowElems)
-        {
-            memcpy(dest, src, numRows*sizeOfElems*numRowElems);
-            return numRows*sizeOfElems*numRowElems;
+        uint32_t compressUNCOMPRESSED(char* dest, const char* src, uint32_t size)
+        {
+            memcpy(dest, src, size);
+            return size;
         }
 
@@ -862,6 +864,10 @@
             if (sizeOfElems < 2 )
             {
-                cout << "Fatal ERROR: HUFMANN can only encode short or longer types" << endl;
+#ifdef __EXCEPTIONS
+                throw runtime_error("Fatal ERROR: HUFMANN can only encode short or longer types");
+#else
+                gLog << ___err___ << "ERROR - Fatal ERROR: HUFMANN can only encode short or longer types";
                 return 0;
+#endif
             }
             uint32_t huffmanOffset = 0;
@@ -884,19 +890,22 @@
         }
 
-        uint32_t applySMOOTHING(char* dest, char* src, uint32_t numRows, uint32_t sizeOfElems, uint32_t numRowElems)
-        {
-            uint32_t colWidth = numRowElems;
-            for (int j=colWidth*numRows-1;j>1;j--)
-                reinterpret_cast<int16_t*>(src)[j] = reinterpret_cast<int16_t*>(src)[j] - (reinterpret_cast<int16_t*>(src)[j-1]+reinterpret_cast<int16_t*>(src)[j-2])/2;
-
-            return numRows*sizeOfElems*numRowElems;
-        }
-
-        //Offsets calibration stuff.
-        vector<int16_t> fOffsetCalibration; ///< The calibration itself
-        int32_t         fStartCellsOffset;  ///< Offset in bytes for the startcell data
-        int32_t         fDataOffset;        ///< Offset in bytes for the data
-        int32_t         fNumSlices;         ///< Number of samples per pixel per event
-
+        uint32_t applySMOOTHING(char* data, uint32_t numElems)//uint32_t numRows, uint32_t sizeOfElems, uint32_t numRowElems)
+        {
+            int16_t* short_data = reinterpret_cast<int16_t*>(data);
+            for (int j=numElems-1;j>1;j--)
+                short_data[j] = short_data[j] - (short_data[j-1]+short_data[j-2])/2;
+
+            return numElems*sizeof(int16_t);
+        }
+        // Apply the inverse transform of the integer smoothing
+        uint32_t UnApplySMOOTHING(char*   data, uint32_t   numElems)
+        {
+            int16_t* short_data = reinterpret_cast<int16_t*>(data);
+            //un-do the integer smoothing
+            for (uint32_t j=2;j<numElems;j++)
+                short_data[j] = short_data[j] + (short_data[j-1]+short_data[j-2])/2;
+
+            return numElems*sizeof(uint16_t);
+        }
         //Compressed data stuff
         int32_t         fCheckOffset;       ///< offset to the data pointer to calculate the checksum
@@ -904,4 +913,6 @@
         uint32_t        fNumRowsPerTile;
 
+        MemoryManager        fMemPool;
+
         //thread related stuff
         vector<Queue<CompressionTarget>> fCompressionQueues;
@@ -909,8 +920,10 @@
 
         //thread related stuff
-        uint32_t          fNumQueues;    ///< The number of threads that will be used to compress
-        uint32_t          fQueueLooper;
+        static int32_t          fNumQueues;    ///< The number of threads that will be used to compress
+
         int32_t           fLatestWrittenTile;
-
+#ifdef __EXCEPTIONS
+        exception_ptr     fThreadsException;
+#endif
         struct CatalogEntry
         {
@@ -927,7 +940,6 @@
         off_t                fCatalogOffset;
         uint32_t             fRealRowWidth;
-
+        uint32_t             fCatalogExtraRows;
         vector<char>         fRawSumBuffer;
-        MemoryManager        fMemPool;
         uint64_t             fMaxUsableMem;
 
@@ -935,18 +947,17 @@
         char*                   fBuffer;
 
-
         struct CompressedColumn
         {
-            CompressedColumn(Table::Column& c, BlockHeader& h, vector<uint16_t>& cs) : col(c),
-                                                                                       head(h),
-                                                                                       comp_sequence(cs)
+            CompressedColumn(const Table::Column& c, const BlockHeaderWriter& h) : col(c),
+                                                                                   block_head(h)
             {}
-            Table::Column    col;
-            BlockHeader      head;
-            vector<uint16_t> comp_sequence;
+            Table::Column     col;
+            BlockHeaderWriter block_head;
         };
         vector<CompressedColumn> fRealColumns;
 
 };
+
+int32_t zofits::fNumQueues = 0;
 
 #ifndef __MARS__
@@ -958,8 +969,8 @@
 zofitsfile.SetNumWorkingThreads(numThreads);
 zofitsfile.open((fileNameOut).c_str());
-std::zofits::BlockHeader zoheader(0, zfits::kOrderByRow, 2);
+std::zofits::BlockHeader zoheader(0, kOrderByRow, 2);
 vector<uint16_t> smoothmanProcessings(2);
-smoothmanProcessings[0] = zfits::kFactSmoothing;
-smoothmanProcessings[1] = zfits::kFactHuffman16;
+smoothmanProcessings[0] = kFactSmoothing;
+smoothmanProcessings[1] = kFactHuffman16;
 
 zofitsfile.AddColumn(sortedColumns[i].num,