source: trunk/Mars/mcore/zfits.h@ 16318

Last change on this file since 16318 was 16285, checked in by lyard, 12 years ago
added new files for fitsCompressor and compressed files read headers
File size: 11.8 KB
Line 
1/*
2 * zfits.h
3 *
4 * Created on: May 16, 2013
5 * Author: lyard
6 */
7
8#ifndef ZFITS_H_
9#define ZFITS_H_
10
11#include "fits.h"
12
13#include <stdexcept>
14#include "huffman.h"
15
16#define COMPRESSED_FLAG 0x1
17#define UNCOMPRESSED_FLAG 0x0
18
19#ifndef __MARS__
20namespace std
21{
22#else
23using namespace std;
24#endif
25
26class zfits : public fits
27{
28public:
29
30 /*
31 * Basic constructor
32 */
33 zfits(const string& fname,
34 const string& tableName="",
35 bool force=false,
36 bool mightFail=false) : fits(fname, tableName, force, mightFail),
37 fBuffer(0),
38 fTransposedBuffer(0),
39 fCompressedBuffer(0),
40 fNumTiles(0),
41 fNumRowsPerTile(0),
42 fHeapPtr(0),
43 fCurrentRow(-1)
44 {
45 InitCompressionReading();
46 }
47
48 /*
49 * Alternative contstructor
50 */
51 zfits(const string& fname,
52 const string& fout,
53 const string& tableName="",
54 bool force=false,
55 bool mightFail=false): fits(fname, fout, tableName, force, mightFail),
56 fBuffer(0),
57 fTransposedBuffer(0),
58 fCompressedBuffer(0),
59 fNumTiles(0),
60 fNumRowsPerTile(0),
61 fHeapPtr(0),
62 fCurrentRow(-1)
63 {
64 InitCompressionReading();
65 }
66
67 /*
68 * Default destructor
69 */
70 ~zfits()
71 {}
72
73 /*
74 * Skip the next row
75 */
76 virtual bool SkipNextRow()
77 {
78 if (fTable.isCompressed)
79 {
80 fRow++;
81 return true;
82 }
83 else
84 return fits::SkipNextRow();
85 }
86
87private:
88 /*
89 * Do what it takes to initialize the compressed structured
90 */
91 void InitCompressionReading()
92 {
93 //The constructor may have failed
94 if (!good()) return;
95
96 //Get compressed specific keywords
97 fNumTiles = fTable.isCompressed ? GetInt("NAXIS2") : 0;
98 fNumRowsPerTile = fTable.isCompressed ? GetInt("ZTILELEN") : 0;
99
100 //give it some space for uncompressing
101 AllocateBuffers();
102
103 //read the file's catalog
104 ReadCatalog();
105 }
106 /*
107 * Stage the requested row to internal buffer
108 * Does NOT return data to users
109 */
110 virtual void StageRow(size_t row, char* dest)
111 {
112 if (fTable.isCompressed)
113 ReadBinaryRow(row, dest);
114 else
115 fits::StageRow(row, dest);
116 }
117 /*
118 * Copy decompressed data to location requested by user
119 */
120 virtual void MoveColumnDataToUserSpace(char* dest, const char* src, const Table::Column& c)
121 {
122 if (fTable.isCompressed)
123 memcpy(dest, src, c.num*c.size);
124 else
125 fits::MoveColumnDataToUserSpace(dest, src, c);
126 }
127 vector<char> fBuffer; ///<store the uncompressed rows
128 vector<char> fTransposedBuffer; ///<intermediate buffer to transpose the rows
129 vector<char> fCompressedBuffer; ///<compressed rows
130 size_t fNumTiles; ///< Total number of tiles
131 size_t fNumRowsPerTile; ///< Number of rows per compressed tile
132 size_t fHeapPtr; ///< offset from the beginning of the file of the binary data
133 size_t fCurrentRow; ///< current row in memory.
134
135 vector<vector<pair<int64_t, int64_t> > > fCatalog;///< Catalog, i.e. the main table that points to the compressed data.
136
137 void AllocateBuffers()
138 {
139 if (!fTable.isCompressed)
140 return;
141
142 fBuffer.resize(fTable.bytes_per_row*fNumRowsPerTile);
143 fTransposedBuffer.resize(fTable.bytes_per_row*fNumRowsPerTile);
144 fCompressedBuffer.resize(fTable.bytes_per_row*fNumRowsPerTile + 1024*1024); //use a bit more memory, in case the compression algorithms uses more
145 }
146 /*
147 * Read catalog data. I.e. the address of the compressed data inside the heap
148 */
149 void ReadCatalog()
150 {
151 if (!fTable.isCompressed)
152 return;
153
154 char readBuf[16];
155 fCatalog.resize(fNumTiles);
156
157 for (uint32_t i=0;i<fNumTiles;i++)
158 for (uint32_t j=0;j<fTable.num_cols;j++)
159 {
160 read(readBuf, 2*sizeof(int64_t));
161
162 //swap the bytes
163 int64_t tempValues[2] = {0,0};
164 revcpy<8>((char*)(&tempValues[0]), readBuf, 2);
165
166 //add catalog entry
167 fCatalog[i].push_back(make_pair(tempValues[0], tempValues[1]));
168 }
169
170 //see if there is a gap before heap data
171 const off_t heapShift = ComputeGapFromDataToHeap(fTable);
172 fHeapPtr = tellg() + heapShift;
173 }
174 /*
175 * Compressed versin of the read row
176 */
177 virtual bool ReadBinaryRow(uint64_t rowNum, char* bufferToRead)
178 {
179 if (rowNum >= GetNumRows()) return false;
180
181 const int requestedTile = rowNum/fNumRowsPerTile;
182 const int currentTile = fCurrentRow/fNumRowsPerTile;
183 fCurrentRow = rowNum;
184
185 //should we read yet another chunk of data ?
186 if (requestedTile != currentTile)
187 {
188 //read yet another chunk from the file
189 //the size that we should read is in the catalog. we should sum up the sizes of all columns
190 int64_t sizeToRead = 0;
191 const uint32_t currentCatRow = fCurrentRow/fNumRowsPerTile;
192
193 for (uint32_t i=0;i<fCatalog[currentCatRow].size();i++)
194 sizeToRead += fCatalog[currentCatRow][i].first;
195
196 //skip to the beginning of the tile
197 seekg(fHeapPtr+fCatalog[currentCatRow][0].second);
198 read(fCompressedBuffer.data(), sizeToRead);
199
200 //uncompress it
201 UncompressBuffer();
202
203 //copy the tile and transpose it
204 uint32_t offset=0;
205 const uint32_t thisRoundNumRows = (GetNumRows()<fCurrentRow + fNumRowsPerTile) ? GetNumRows()%fNumRowsPerTile : fNumRowsPerTile;
206 for (uint32_t i=0;i<fTable.sortedCols.size();i++)
207 {
208 switch (fTable.sortedCols[i].comp)
209 {
210 case UNCOMPRESSED:
211 case SMOOTHMAN:
212 for (uint32_t k=0;k<thisRoundNumRows;k++)
213 {//regular, "semi-transposed" copy
214 memcpy(fBuffer.data()+k*fTable.bytes_per_row + fTable.sortedCols[i].offset, fTransposedBuffer.data()+offset, fTable.sortedCols[i].size*fTable.sortedCols[i].num);
215 offset += fTable.sortedCols[i].size*fTable.sortedCols[i].num;
216 }
217 break;
218 default :
219 for (uint32_t j=0;j<fTable.sortedCols[i].num;j++)
220 for (uint32_t k=0;k<thisRoundNumRows;k++)
221 {//transposed copy
222 memcpy(fBuffer.data()+k*fTable.bytes_per_row + fTable.sortedCols[i].offset + fTable.sortedCols[i].size*j, fTransposedBuffer.data()+offset, fTable.sortedCols[i].size);
223 offset += fTable.sortedCols[i].size;
224 }
225 break;
226 };
227 }
228 }
229 //Data loaded and uncompressed. Copy it to destination
230 memcpy(bufferToRead, fBuffer.data()+fTable.bytes_per_row*(fCurrentRow%fNumRowsPerTile), fTable.bytes_per_row);
231 return good();
232 }
233 /*
234 * Read a bunch of uncompressed data
235 */
236 uint32_t UncompressUNCOMPRESSED(char* dest,
237 const char* src,
238 uint32_t numRows,
239 uint32_t sizeOfElems,
240 uint32_t numRowElems)
241 {
242 memcpy(dest, src, numRows*sizeOfElems*numRowElems);
243 return numRows*sizeOfElems*numRowElems;
244 }
245 /*
246 * Read a bunch of data compressed with the Huffman algorithm
247 */
248 uint32_t UncompressHUFFMAN(char* dest,
249 const char* src,
250 uint32_t ,
251 uint32_t sizeOfElems,
252 uint32_t numRowElems)
253 {
254 if (sizeOfElems < 2)
255 {
256 cout << "Error, Huffman only works on shorts or longer types. (here: " << sizeOfElems << "). Aborting." << endl;
257 return -1;
258 }
259 vector<uint16_t> uncompressed;
260
261 //read compressed sizes (one per row)
262 const uint32_t* compressedSizes = reinterpret_cast<const uint32_t*>(src);
263 src += sizeof(uint32_t)*numRowElems;
264
265 //uncompress the rows, one by one
266 uint32_t sizeWritten = 0;
267 for (uint32_t j=0;j<numRowElems;j++)
268 {
269 Huffman::Decode(reinterpret_cast<const unsigned char*>(src), compressedSizes[j], uncompressed);
270 memcpy(dest, uncompressed.data(), uncompressed.size()*sizeof(uint16_t));
271 sizeWritten += uncompressed.size()*sizeof(uint16_t);
272 dest += uncompressed.size()*sizeof(uint16_t);
273 src += compressedSizes[j];
274 }
275 return sizeWritten;
276 }
277 /*
278 * Read a bunch of data compressed with the smoothman algorithm
279 */
280 uint32_t UncompressSMOOTHMAN(int16_t* dest,
281 const char* src,
282 uint32_t numRows,
283 uint32_t sizeOfElems,
284 uint32_t numRowElems)
285 {
286 //call huffman transposed
287 uint32_t sizeWritten = UncompressHUFFMAN(reinterpret_cast<char*>(dest), src, numRowElems, sizeOfElems, numRows);
288
289 //un-do the integer smoothing
290 for (uint32_t j=2;j<numRowElems*numRows;j++)
291 dest[j] = dest[j] + (dest[j-1]+dest[j-2])/2;
292
293 return sizeWritten;
294 }
295 /*
296 * Data has been read from disk. Uncompress it !
297 */
298 void UncompressBuffer()
299 {
300 uint32_t offset = 0;
301 int64_t compressedOffset = 0;
302 const uint32_t catalogCurrentRow = fCurrentRow/fNumRowsPerTile;
303 const uint32_t thisRoundNumRows = (GetNumRows()<fCurrentRow + fNumRowsPerTile) ? GetNumRows()%fNumRowsPerTile : fNumRowsPerTile;
304
305 //uncompress column by column
306 for (uint32_t i=0;i<fTable.sortedCols.size();i++)
307 {
308 compressedOffset = fCatalog[catalogCurrentRow][i].second - fCatalog[catalogCurrentRow][0].second;
309 uint32_t compression = fTable.sortedCols[i].comp;
310
311 if (fTable.sortedCols[i].num == 0) continue;
312
313 //get the compression flag
314 const char compressedFlag = fCompressedBuffer[compressedOffset++];
315
316 //if this bunch of data is not compressed, modify the compression flag
317 if (compressedFlag == UNCOMPRESSED_FLAG)
318 compression = UNCOMPRESSED;
319
320 switch (compression)
321 {
322 case UNCOMPRESSED:
323 offset += UncompressUNCOMPRESSED(fTransposedBuffer.data()+offset, fCompressedBuffer.data()+compressedOffset, thisRoundNumRows, fTable.sortedCols[i].size, fTable.sortedCols[i].num);
324 break;
325 case SMOOTHMAN:
326 offset += UncompressSMOOTHMAN(reinterpret_cast<int16_t*>(fTransposedBuffer.data()+offset), fCompressedBuffer.data()+compressedOffset, thisRoundNumRows, fTable.sortedCols[i].size, fTable.sortedCols[i].num);
327 break;
328 default:
329 ;
330 };
331 }
332 }
333
334};//class zfits
335
336#ifndef __MARS__
337}; //namespace std
338#endif
339
340#endif /* ZFITS_H_ */
Note: See TracBrowser for help on using the repository browser.