source: trunk/Mars/mcore/zfits.h@ 16418

Last change on this file since 16418 was 16418, checked in by lyard, 11 years ago
Fixed bugs from improvements
File size: 10.9 KB
Line 
1/*
2 * zfits.h
3 *
4 * Created on: May 16, 2013
5 * Author: lyard
6 */
7
8#ifndef MARS_ZFITS
9#define MARS_ZFITS
10
11#include <stdexcept>
12
13#include "fits.h"
14#include "huffman.h"
15
16
17#ifndef __MARS__
18namespace std
19{
20#endif
21
22class zfits : public fits
23{
24public:
25
26 // Basic constructor
27 zfits(const string& fname, const string& tableName="",
28 bool force=false) : fits(fname, tableName, force),
29 fBuffer(0),
30 fTransposedBuffer(0),
31 fCompressedBuffer(0),
32 fNumTiles(0),
33 fNumRowsPerTile(0),
34 fCurrentRow(-1),
35 fHeapOff(0)
36 {
37 InitCompressionReading();
38 }
39
40 // Alternative contstructor
41 zfits(const string& fname, const string& fout, const string& tableName,
42 bool force=false) : fits(fname, fout, tableName, force),
43 fBuffer(0),
44 fTransposedBuffer(0),
45 fCompressedBuffer(0),
46 fNumTiles(0),
47 fNumRowsPerTile(0),
48 fCurrentRow(-1),
49 fHeapOff(0)
50 {
51 InitCompressionReading();
52 }
53
54 // Skip the next row
55 bool SkipNextRow()
56 {
57 if (!fTable.isCompressed)
58 return fits::SkipNextRow();
59
60 fRow++;
61 return true;
62 }
63
64private:
65
66 // Do what it takes to initialize the compressed structured
67 void InitCompressionReading()
68 {
69 //The constructor may have failed
70 if (!good())
71 return;
72
73 //Get compressed specific keywords
74 fNumTiles = fTable.isCompressed ? GetInt("NAXIS2") : 0;
75 fNumRowsPerTile = fTable.isCompressed ? GetInt("ZTILELEN") : 0;
76
77 //give it some space for uncompressing
78 AllocateBuffers();
79
80 //read the file's catalog
81 ReadCatalog();
82 }
83
84 // Stage the requested row to internal buffer
85 // Does NOT return data to users
86 void StageRow(size_t row, char* dest)
87 {
88 if (!fTable.isCompressed)
89 {
90 fits::StageRow(row, dest);
91 return;
92 }
93
94 ReadBinaryRow(row, dest);
95 }
96
97 // Copy decompressed data to location requested by user
98 void MoveColumnDataToUserSpace(char* dest, const char* src, const Table::Column& c)
99 {
100 if (!fTable.isCompressed)
101 {
102 fits::MoveColumnDataToUserSpace(dest, src, c);
103 return;
104 }
105
106 memcpy(dest, src, c.num*c.size);
107 }
108
109 vector<char> fBuffer; ///<store the uncompressed rows
110 vector<char> fTransposedBuffer; ///<intermediate buffer to transpose the rows
111 vector<char> fCompressedBuffer; ///<compressed rows
112
113 size_t fNumTiles; ///< Total number of tiles
114 size_t fNumRowsPerTile; ///< Number of rows per compressed tile
115 size_t fCurrentRow; ///< current row in memory.
116
117 streamoff fHeapOff; ///< offset from the beginning of the file of the binary data
118
119 vector<vector<pair<int64_t, int64_t> > > fCatalog;///< Catalog, i.e. the main table that points to the compressed data.
120
121 void AllocateBuffers()
122 {
123 if (!fTable.isCompressed)
124 return;
125
126 fBuffer.resize(fTable.bytes_per_row*fNumRowsPerTile);
127
128 fTransposedBuffer.resize(fTable.bytes_per_row*fNumRowsPerTile);
129 fCompressedBuffer.resize(fTable.bytes_per_row*fNumRowsPerTile + 1024*1024); //use a bit more memory, in case the compression algorithms uses more
130 }
131
132 // Read catalog data. I.e. the address of the compressed data inside the heap
133 void ReadCatalog()
134 {
135 if (!fTable.isCompressed)
136 return;
137
138 char readBuf[16];
139 fCatalog.resize(fNumTiles);
140
141 for (uint32_t i=0;i<fNumTiles;i++)
142 for (uint32_t j=0;j<fTable.num_cols;j++)
143 {
144 read(readBuf, 2*sizeof(int64_t));
145
146 //swap the bytes
147 int64_t tempValues[2] = {0,0};
148 revcpy<8>(reinterpret_cast<char*>(&tempValues[0]), readBuf, 2);
149
150 //add catalog entry
151 fCatalog[i].emplace_back(tempValues[0], tempValues[1]);
152 }
153
154 //see if there is a gap before heap data
155 fHeapOff = tellg()+fTable.GetHeapShift();
156 }
157
158 // Compressed versin of the read row
159 virtual bool ReadBinaryRow(uint64_t rowNum, char* bufferToRead)
160 {
161 if (rowNum >= GetNumRows())
162 return false;
163
164 const int requestedTile = rowNum/fNumRowsPerTile;
165 const int currentTile = fCurrentRow/fNumRowsPerTile;
166
167 fCurrentRow = rowNum;
168
169 //should we read yet another chunk of data ?
170 if (requestedTile != currentTile)
171 {
172 //read yet another chunk from the file
173 //the size that we should read is in the catalog. we should sum up the sizes of all columns
174 const uint32_t currentCatRow = fCurrentRow/fNumRowsPerTile;
175
176 int64_t sizeToRead = 0;
177 for (uint32_t i=0;i<fCatalog[currentCatRow].size();i++)
178 sizeToRead += fCatalog[currentCatRow][i].first;
179
180 //skip to the beginning of the tile
181 seekg(fHeapOff+fCatalog[currentCatRow][0].second);
182 read(fCompressedBuffer.data(), sizeToRead);
183
184 //uncompress it
185 UncompressBuffer();
186
187 //copy the tile and transpose it
188 uint32_t offset=0;
189
190 const uint32_t thisRoundNumRows = (GetNumRows()<fCurrentRow + fNumRowsPerTile) ? GetNumRows()%fNumRowsPerTile : fNumRowsPerTile;
191 for (uint32_t i=0;i<fTable.sortedCols.size();i++)
192 {
193 switch (fTable.sortedCols[i].comp)
194 {
195 case UNCOMPRESSED:
196 case SMOOTHMAN:
197 //regular, "semi-transposed" copy
198 for (uint32_t k=0;k<thisRoundNumRows;k++)
199 {
200 memcpy(fBuffer.data()+k*fTable.bytes_per_row + fTable.sortedCols[i].offset, fTransposedBuffer.data()+offset, fTable.sortedCols[i].size*fTable.sortedCols[i].num);
201 offset += fTable.sortedCols[i].size*fTable.sortedCols[i].num;
202 }
203 break;
204
205 default:
206 //transposed copy
207 for (uint32_t j=0;j<fTable.sortedCols[i].num;j++)
208 for (uint32_t k=0;k<thisRoundNumRows;k++)
209 {
210 memcpy(fBuffer.data()+k*fTable.bytes_per_row + fTable.sortedCols[i].offset + fTable.sortedCols[i].size*j, fTransposedBuffer.data()+offset, fTable.sortedCols[i].size);
211 offset += fTable.sortedCols[i].size;
212 }
213 break;
214 };
215 }
216 }
217
218 //Data loaded and uncompressed. Copy it to destination
219 memcpy(bufferToRead, fBuffer.data()+fTable.bytes_per_row*(fCurrentRow%fNumRowsPerTile), fTable.bytes_per_row);
220 return good();
221 }
222
223 // Read a bunch of uncompressed data
224 uint32_t UncompressUNCOMPRESSED(char* dest,
225 const char* src,
226 uint32_t numRows,
227 uint32_t sizeOfElems,
228 uint32_t numRowElems)
229 {
230 memcpy(dest, src, numRows*sizeOfElems*numRowElems);
231 return numRows*sizeOfElems*numRowElems;
232 }
233
234 // Read a bunch of data compressed with the Huffman algorithm
235 uint32_t UncompressHUFFMAN(char* dest,
236 const char* src,
237 uint32_t ,
238 uint32_t sizeOfElems,
239 uint32_t numRowElems)
240 {
241 if (sizeOfElems < 2)
242 {
243 cout << "Error, Huffman only works on shorts or longer types. (here: " << sizeOfElems << "). Aborting." << endl;
244 return -1;
245 }
246
247 vector<uint16_t> uncompressed;
248
249 //read compressed sizes (one per row)
250 const uint32_t* compressedSizes = reinterpret_cast<const uint32_t*>(src);
251 src += sizeof(uint32_t)*numRowElems;
252
253 //uncompress the rows, one by one
254 uint32_t sizeWritten = 0;
255 for (uint32_t j=0;j<numRowElems;j++)
256 {
257 Huffman::Decode(reinterpret_cast<const unsigned char*>(src), compressedSizes[j], uncompressed);
258
259 memcpy(dest, uncompressed.data(), uncompressed.size()*sizeof(uint16_t));
260
261 sizeWritten += uncompressed.size()*sizeof(uint16_t);
262 dest += uncompressed.size()*sizeof(uint16_t);
263 src += compressedSizes[j];
264 }
265 return sizeWritten;
266 }
267
268 //Read a bunch of data compressed with the smoothman algorithm
269 uint32_t UncompressSMOOTHMAN(int16_t* dest,
270 const char* src,
271 uint32_t numRows,
272 uint32_t sizeOfElems,
273 uint32_t numRowElems)
274 {
275 //call huffman transposed
276 const uint32_t sizeWritten = UncompressHUFFMAN(reinterpret_cast<char*>(dest), src, numRowElems, sizeOfElems, numRows);
277
278 //un-do the integer smoothing
279 for (uint32_t j=2;j<numRowElems*numRows;j++)
280 dest[j] = dest[j] + (dest[j-1]+dest[j-2])/2;
281
282 return sizeWritten;
283 }
284
285 // Data has been read from disk. Uncompress it !
286 void UncompressBuffer()
287 {
288 const uint32_t catalogCurrentRow = fCurrentRow/fNumRowsPerTile;
289 const uint32_t thisRoundNumRows = (GetNumRows()<fCurrentRow + fNumRowsPerTile) ? GetNumRows()%fNumRowsPerTile : fNumRowsPerTile;
290
291 uint32_t offset = 0;
292 int64_t compressedOffset = 0;
293
294 //uncompress column by column
295 for (uint32_t i=0;i<fTable.sortedCols.size();i++)
296 {
297 compressedOffset = fCatalog[catalogCurrentRow][i].second - fCatalog[catalogCurrentRow][0].second;
298
299 if (fTable.sortedCols[i].num == 0)
300 continue;
301
302 //get the compression flag
303 const char compressedFlag = fCompressedBuffer[compressedOffset++];
304
305 //#define COMPRESSED_FLAG 0x1
306 //#define UNCOMPRESSED_FLAG 0x0
307
308 //if this bunch of data is not compressed, modify the compression flag
309 uint32_t compression = fTable.sortedCols[i].comp;
310 if (compressedFlag == 0)
311 compression = UNCOMPRESSED;
312
313 switch (compression)
314 {
315 case UNCOMPRESSED:
316 offset += UncompressUNCOMPRESSED(fTransposedBuffer.data()+offset, fCompressedBuffer.data()+compressedOffset, thisRoundNumRows, fTable.sortedCols[i].size, fTable.sortedCols[i].num);
317 break;
318
319 case SMOOTHMAN:
320 offset += UncompressSMOOTHMAN(reinterpret_cast<int16_t*>(fTransposedBuffer.data()+offset), fCompressedBuffer.data()+compressedOffset, thisRoundNumRows, fTable.sortedCols[i].size, fTable.sortedCols[i].num);
321 break;
322
323 default:
324 ;
325 };
326 }
327 }
328
329};//class zfits
330
331#ifndef __MARS__
332}; //namespace std
333#endif
334
335#endif
Note: See TracBrowser for help on using the repository browser.