Index: trunk/Mars/mcore/zfits.h
===================================================================
--- trunk/Mars/mcore/zfits.h	(revision 16811)
+++ trunk/Mars/mcore/zfits.h	(revision 16814)
@@ -13,4 +13,12 @@
 #include "fits.h"
 #include "huffman.h"
+
+
+#define FACT_RAW       0x0
+#define FACT_SMOOTHING 0x1
+#define FACT_HUFFMAN16 0x2
+
+#define FACT_COL_MAJOR 'C'
+#define FACT_ROW_MAJOR 'R'
 
 
@@ -79,4 +87,6 @@
 
 private:
+
+    //Structure helper for reading tiles headers
     typedef struct TileHeader
     {
@@ -84,15 +94,29 @@
       uint32_t numRows;
       uint64_t size;
-      TileHeader(uint32_t nRows=0,
-                 uint64_t s=0) : id({'T', 'I', 'L', 'E'}),
+
+      TileHeader() {}
+
+      TileHeader(uint32_t nRows,
+                 uint64_t s) : id({'T', 'I', 'L', 'E'}),
                                  numRows(nRows),
                                  size(s)
       { };
-      friend ostream& operator << (ostream& out, const TileHeader& h)
-      {
-          out << h.id[0] << h.id[1] << h.id[2] << h.id[3] << " num Rows: " << h.numRows << ", tile size: " << h.size;
-          return out;
-      }
     } __attribute__((__packed__)) TileHeader;
+
+    //Structure helper for reading blocks headers and compresion schemes
+    typedef struct BlockHeader
+    {
+        uint64_t      size;
+        char          ordering;
+        unsigned char numProcs;
+        uint16_t      processings[];
+
+        BlockHeader(uint64_t      s=0,
+                    char          o=FACT_ROW_MAJOR,
+                    unsigned char n=1) : size(s),
+                                         ordering(o),
+                                         numProcs(n)
+        {}
+    } __attribute__((__packed__)) BlockHeader;
 
     // Do what it takes to initialize the compressed structured
@@ -103,4 +127,19 @@
             return;
 
+        if (fTable.isCompressed)
+        for (auto it=fTable.sortedCols.begin(); it!= fTable.sortedCols.end(); it++)
+            if (it->comp != FACT)
+            {
+#ifdef __EXCEPTIONS
+                    throw runtime_error("ERROR: Only the FACT compression scheme is handled by this reader.");
+#else
+                    gLog << ___err___ << "ERROR: Only the FACT compression scheme is handled by this reader." << endl;
+                    return;
+#endif
+            }
+
+        fColumnOrdering.resize(fTable.sortedCols.size());
+        for (auto it=fColumnOrdering.begin(); it != fColumnOrdering.end(); it++)
+            (*it) = FACT_ROW_MAJOR;
         //Get compressed specific keywords
         fNumTiles       = fTable.isCompressed ? GetInt("NAXIS2") : 0;
@@ -129,4 +168,5 @@
     vector<char> fTransposedBuffer; ///<intermediate buffer to transpose the rows
     vector<char> fCompressedBuffer; ///<compressed rows
+    vector<char> fColumnOrdering; ///< ordering of the column's rows
 
     size_t fNumTiles;       ///< Total number of tiles
@@ -137,6 +177,6 @@
 
     vector<vector<pair<int64_t, int64_t> > > fCatalog;///< Catalog, i.e. the main table that points to the compressed data.
-    vector<size_t> fTileSize; ///< size in bytes of each compressed tile
-    vector<vector<size_t> > fTileOffsets; ///< offset from start of tile of a given compressed column
+    vector<size_t>                           fTileSize; ///< size in bytes of each compressed tile
+    vector<vector<size_t> >                  fTileOffsets; ///< offset from start of tile of a given compressed column
 
     void AllocateBuffers()
@@ -148,5 +188,5 @@
 
         fTransposedBuffer.resize(fTable.bytes_per_row*fNumRowsPerTile);
-        fCompressedBuffer.resize(fTable.bytes_per_row*fNumRowsPerTile + fTable.num_cols); //use a bit more memory for compression flags
+        fCompressedBuffer.resize(fTable.bytes_per_row*fNumRowsPerTile + fTable.num_cols*(sizeof(BlockHeader)+256)); //use a bit more memory for block headers
     }
 
@@ -266,30 +306,38 @@
             const char *src = fTransposedBuffer.data();
 
-            for (auto it=fTable.sortedCols.begin(); it!=fTable.sortedCols.end(); it++)
+            uint32_t i=0;
+            for (auto it=fTable.sortedCols.begin(); it!=fTable.sortedCols.end(); it++, i++)
             {
                 char *buffer = fBuffer.data() + it->offset; // pointer to column (destination buffer)
 
-                switch (it->comp)
+                switch (fColumnOrdering[i])
                 {
-                case UNCOMPRESSED:
-                case SMOOTHMAN:
-                    // regular, "semi-transposed" copy
-                    for (char *dest=buffer; dest<buffer+thisRoundNumRows*fTable.bytes_per_row; dest+=fTable.bytes_per_row) // row-by-row
-                    {
-                        memcpy(dest, src, it->bytes);
-                        src += it->bytes;  // next column
-                    }
-                    break;
-
-                default:
-                    // transposed copy
-                    for (char *elem=buffer; elem<buffer+it->bytes; elem+=it->size) // element-by-element (arrays)
-                    {
-                        for (char *dest=elem; dest<elem+thisRoundNumRows*fTable.bytes_per_row; dest+=fTable.bytes_per_row) // row-by-row
+                    case FACT_ROW_MAJOR:
+                        // regular, "semi-transposed" copy
+                        for (char *dest=buffer; dest<buffer+thisRoundNumRows*fTable.bytes_per_row; dest+=fTable.bytes_per_row) // row-by-row
                         {
-                            memcpy(dest, src, it->size);
-                            src += it->size; // next element
+                            memcpy(dest, src, it->bytes);
+                            src += it->bytes;  // next column
                         }
-                    }
+                    break;
+
+                    case FACT_COL_MAJOR:
+                        // transposed copy
+                        for (char *elem=buffer; elem<buffer+it->bytes; elem+=it->size) // element-by-element (arrays)
+                        {
+                            for (char *dest=elem; dest<elem+thisRoundNumRows*fTable.bytes_per_row; dest+=fTable.bytes_per_row) // row-by-row
+                            {
+                                memcpy(dest, src, it->size);
+                                src += it->size; // next element
+                            }
+                        }
+                    break;
+                    default:
+    #ifdef __EXCEPTIONS
+                        throw runtime_error("Unkown column ordering scheme");
+    #else
+                        gLog << ___err___ << "ERROR - unkown column ordering scheme" << endl;
+                        return;
+    #endif
                     break;
                 };
@@ -305,34 +353,25 @@
     uint32_t UncompressUNCOMPRESSED(char*       dest,
                                     const char* src,
-                                    uint32_t    numRows,
-                                    uint32_t    sizeOfElems,
-                                    uint32_t    numRowElems)
-    {
-        memcpy(dest, src, numRows*sizeOfElems*numRowElems);
-        return numRows*sizeOfElems*numRowElems;
+                                    uint32_t    numElems,
+                                    uint32_t    sizeOfElems)
+    {
+        memcpy(dest, src, numElems*sizeOfElems);
+        return numElems*sizeOfElems;
     }
 
     // Read a bunch of data compressed with the Huffman algorithm
-    uint32_t UncompressHUFFMAN(char*       dest,
-                               const char* src,
-                               uint32_t ,
-                               uint32_t    sizeOfElems,
-                               uint32_t    numRowElems)
-    {
-        if (sizeOfElems < 2)
-        {
-            cout << "Error, Huffman only works on shorts or longer types. (here: " << sizeOfElems << "). Aborting." << endl;
-            return -1;
-        }
-
+    uint32_t UncompressHUFFMAN16(char*       dest,
+                                 const char* src,
+                                 uint32_t    numChunks)
+    {
         vector<uint16_t> uncompressed;
 
         //read compressed sizes (one per row)
         const uint32_t* compressedSizes = reinterpret_cast<const uint32_t*>(src);
-        src += sizeof(uint32_t)*numRowElems;
+        src += sizeof(uint32_t)*numChunks;
 
         //uncompress the rows, one by one
         uint32_t sizeWritten = 0;
-        for (uint32_t j=0;j<numRowElems;j++)
+        for (uint32_t j=0;j<numChunks;j++)
         {
             Huffman::Decode(reinterpret_cast<const unsigned char*>(src), compressedSizes[j], uncompressed);
@@ -347,21 +386,13 @@
     }
 
-    //Read a bunch of data compressed with the smoothman algorithm
-    uint32_t UncompressSMOOTHMAN(int16_t*   dest,
-                                 const char* src,
-                                 uint32_t numRows,
-                                 uint32_t sizeOfElems,
-                                 uint32_t numRowElems)
-    {
-        //call huffman transposed
-        const uint32_t sizeWritten = UncompressHUFFMAN(reinterpret_cast<char*>(dest), src, numRowElems, sizeOfElems, numRows);
-
+    uint32_t UnApplySMOOTHING(int16_t*   data,
+                              uint32_t numElems)
+    {
         //un-do the integer smoothing
-        for (uint32_t j=2;j<numRowElems*numRows;j++)
-            dest[j] = dest[j] + (dest[j-1]+dest[j-2])/2;
-
-        return sizeWritten;
-    }
-
+        for (uint32_t j=2;j<numElems;j++)
+            data[j] = data[j] + (data[j-1]+data[j-2])/2;
+
+        return numElems*sizeof(uint16_t);
+    }
     // Data has been read from disk. Uncompress it !
     void UncompressBuffer(const uint32_t &catalogCurrentRow, const uint32_t &thisRoundNumRows)
@@ -377,26 +408,42 @@
 
             //get the compression flag
-            const int64_t compressedOffset = fTileOffsets[catalogCurrentRow][i];//fCatalog[catalogCurrentRow][i].second - fCatalog[catalogCurrentRow][0].second;
-            const char    compressedFlag   = fCompressedBuffer[compressedOffset];
-
-            //#define COMPRESSED_FLAG 0x1
-            //#define UNCOMPRESSED_FLAG 0x0
-
-            const char *src = fCompressedBuffer.data()+compressedOffset+1;
-
-            //if this bunch of data is not compressed, modify the compression flag
-            const uint32_t compression = compressedFlag==0 ? UNCOMPRESSED : col.comp;
-            switch (compression)
-            {
-                case UNCOMPRESSED:
-                    dest += UncompressUNCOMPRESSED(dest, src, thisRoundNumRows, col.size, col.num);
-                    break;
-
-                case SMOOTHMAN:
-                    dest += UncompressSMOOTHMAN(reinterpret_cast<int16_t*>(dest), src, thisRoundNumRows, col.size, col.num);
-                    break;
-
-                default:
-                    ;
+            const int64_t compressedOffset = fTileOffsets[catalogCurrentRow][i];
+
+            BlockHeader* head = reinterpret_cast<BlockHeader*>(&fCompressedBuffer[compressedOffset]);
+
+            fColumnOrdering[i] = head->ordering;
+
+            uint32_t numRows = (head->ordering==FACT_ROW_MAJOR) ? thisRoundNumRows : col.num;
+            uint32_t numCols = (head->ordering==FACT_COL_MAJOR) ? thisRoundNumRows : col.num;
+
+            const char *src = fCompressedBuffer.data()+compressedOffset+sizeof(BlockHeader)+sizeof(uint16_t)*head->numProcs;
+
+            for (uint32_t j=head->numProcs;j != 0; j--)
+            {
+                uint32_t sizeWritten=0;
+
+                switch (head->processings[j-1])
+                {
+                    case FACT_RAW:
+                            if (head->numProcs == 1)
+                                sizeWritten = UncompressUNCOMPRESSED(dest, src, numRows*numCols, col.size);
+                    break;
+                    case FACT_SMOOTHING:
+                            sizeWritten = UnApplySMOOTHING(reinterpret_cast<int16_t*>(dest), numRows*numCols);
+                    break;
+                    case FACT_HUFFMAN16:
+                            sizeWritten = UncompressHUFFMAN16(dest, src, numRows);
+                    break;
+                    default:
+#ifdef __EXCEPTIONS
+                    throw runtime_error("Unknown processing applied to data. Aborting");
+#else
+                    gLog << ___err___ << "ERROR - Unknown processing applied to data. Aborting" << endl;
+                    return;
+#endif
+                    break;
+                }
+                //increment destination counter only when processing done.
+                if (j==1) dest+= sizeWritten;
             }
         }
