source: trunk/Mars/mcore/zfits.h@ 16425

Last change on this file since 16425 was 16425, checked in by tbretz, 11 years ago
requestedTile and currentTile should be uint32_t in ReadBinaryRow; no need to have ReadBinaryRow virtual; thisRoundNumRows and currentCatRow doesn't need to be recalculated in UnCompressBuffer; Replaced indices by pointers where they are not needed and a clear advantage is visible; took some hidden magic out of UncompressBuffer (as some obsolete large scope variables) and a continue at a random place
File size: 10.8 KB
Line 
1/*
2 * zfits.h
3 *
4 * Created on: May 16, 2013
5 * Author: lyard
6 */
7
8#ifndef MARS_ZFITS
9#define MARS_ZFITS
10
11#include <stdexcept>
12
13#include "fits.h"
14#include "huffman.h"
15
16
17#ifndef __MARS__
18namespace std
19{
20#endif
21
22class zfits : public fits
23{
24public:
25
26 // Basic constructor
27 zfits(const string& fname, const string& tableName="",
28 bool force=false) : fits(fname, tableName, force),
29 fBuffer(0),
30 fTransposedBuffer(0),
31 fCompressedBuffer(0),
32 fNumTiles(0),
33 fNumRowsPerTile(0),
34 fCurrentRow(-1),
35 fHeapOff(0)
36 {
37 InitCompressionReading();
38 }
39
40 // Alternative contstructor
41 zfits(const string& fname, const string& fout, const string& tableName,
42 bool force=false) : fits(fname, fout, tableName, force),
43 fBuffer(0),
44 fTransposedBuffer(0),
45 fCompressedBuffer(0),
46 fNumTiles(0),
47 fNumRowsPerTile(0),
48 fCurrentRow(-1),
49 fHeapOff(0)
50 {
51 InitCompressionReading();
52 }
53
54 // Skip the next row
55 bool SkipNextRow()
56 {
57 if (!fTable.isCompressed)
58 return fits::SkipNextRow();
59
60 fRow++;
61 return true;
62 }
63
64private:
65
66 // Do what it takes to initialize the compressed structured
67 void InitCompressionReading()
68 {
69 //The constructor may have failed
70 if (!good())
71 return;
72
73 //Get compressed specific keywords
74 fNumTiles = fTable.isCompressed ? GetInt("NAXIS2") : 0;
75 fNumRowsPerTile = fTable.isCompressed ? GetInt("ZTILELEN") : 0;
76
77 //give it some space for uncompressing
78 AllocateBuffers();
79
80 //read the file's catalog
81 ReadCatalog();
82 }
83
84 // Stage the requested row to internal buffer
85 // Does NOT return data to users
86 void StageRow(size_t row, char* dest)
87 {
88 if (!fTable.isCompressed)
89 {
90 fits::StageRow(row, dest);
91 return;
92 }
93
94 ReadBinaryRow(row, dest);
95 }
96
97 // Copy decompressed data to location requested by user
98 void MoveColumnDataToUserSpace(char* dest, const char* src, const Table::Column& c)
99 {
100 if (!fTable.isCompressed)
101 {
102 fits::MoveColumnDataToUserSpace(dest, src, c);
103 return;
104 }
105
106 memcpy(dest, src, c.num*c.size);
107 }
108
109 vector<char> fBuffer; ///<store the uncompressed rows
110 vector<char> fTransposedBuffer; ///<intermediate buffer to transpose the rows
111 vector<char> fCompressedBuffer; ///<compressed rows
112
113 size_t fNumTiles; ///< Total number of tiles
114 size_t fNumRowsPerTile; ///< Number of rows per compressed tile
115 size_t fCurrentRow; ///< current row in memory.
116
117 streamoff fHeapOff; ///< offset from the beginning of the file of the binary data
118
119 vector<vector<pair<int64_t, int64_t> > > fCatalog;///< Catalog, i.e. the main table that points to the compressed data.
120
121 void AllocateBuffers()
122 {
123 if (!fTable.isCompressed)
124 return;
125
126 fBuffer.resize(fTable.bytes_per_row*fNumRowsPerTile);
127
128 fTransposedBuffer.resize(fTable.bytes_per_row*fNumRowsPerTile);
129 fCompressedBuffer.resize(fTable.bytes_per_row*fNumRowsPerTile + 1024*1024); //use a bit more memory, in case the compression algorithms uses more
130 }
131
132 // Read catalog data. I.e. the address of the compressed data inside the heap
133 void ReadCatalog()
134 {
135 if (!fTable.isCompressed)
136 return;
137
138 char readBuf[16];
139 fCatalog.resize(fNumTiles);
140
141 for (uint32_t i=0;i<fNumTiles;i++)
142 for (uint32_t j=0;j<fTable.num_cols;j++)
143 {
144 read(readBuf, 2*sizeof(int64_t));
145
146 //swap the bytes
147 int64_t tempValues[2] = {0,0};
148 revcpy<8>(reinterpret_cast<char*>(tempValues), readBuf, 2);
149
150 //add catalog entry
151 fCatalog[i].emplace_back(tempValues[0], tempValues[1]);
152 }
153
154 //see if there is a gap before heap data
155 fHeapOff = tellg()+fTable.GetHeapShift();
156 }
157
158 // Compressed versin of the read row
159 bool ReadBinaryRow(const size_t &rowNum, char *bufferToRead)
160 {
161 if (rowNum >= GetNumRows())
162 return false;
163
164 const uint32_t requestedTile = rowNum/fNumRowsPerTile;
165 const uint32_t currentTile = fCurrentRow/fNumRowsPerTile;
166
167 fCurrentRow = rowNum;
168
169 //should we read yet another chunk of data ?
170 if (requestedTile != currentTile)
171 {
172 //read yet another chunk from the file
173 //the size that we should read is in the catalog. we should sum up the sizes of all columns
174 const uint32_t currentCatRow = fCurrentRow/fNumRowsPerTile;
175
176 int64_t sizeToRead = 0;
177 for (uint32_t i=0;i<fCatalog[currentCatRow].size();i++)
178 sizeToRead += fCatalog[currentCatRow][i].first;
179
180 //skip to the beginning of the tile
181 seekg(fHeapOff+fCatalog[currentCatRow][0].second);
182 read(fCompressedBuffer.data(), sizeToRead);
183
184 const uint32_t thisRoundNumRows = (GetNumRows()<fCurrentRow + fNumRowsPerTile) ? GetNumRows()%fNumRowsPerTile : fNumRowsPerTile;
185
186 //uncompress it
187 UncompressBuffer(currentCatRow, thisRoundNumRows);
188
189 // pointer to column (source buffer)
190 const char *src = fTransposedBuffer.data();
191
192 for (auto it=fTable.sortedCols.begin(); it!=fTable.sortedCold.end(); it++)
193 {
194 char *buffer = fBuffer.data() + col->offset; // pointer to column (destination buffer)
195
196 switch (it->comp)
197 {
198 case UNCOMPRESSED:
199 case SMOOTHMAN:
200 // regular, "semi-transposed" copy
201 for (char *dest=buffer; dest<buffer+thisRoundNumRows*fTable.bytes_per_row; dest+=fTable.bytes_per_row) // row-by-row
202 {
203 memcpy(dest, src, col->bytes);
204 src += it->bytes; // next column
205 }
206 break;
207
208 default:
209 // transposed copy
210 for (char *elem=buffer; elem<buffer+col->bytes; elem+=col->size) // element-by-element (arrays)
211 {
212 for (char *dest=elem; dest<elem+thisRoundNumRows*fTable.bytes_per_row; dest+=fTable.bytes_per_row) // row-by-row
213 {
214 memcpy(dest, src, col->size);
215 src += it->size; // next element
216 }
217 }
218 break;
219 };
220 }
221 }
222
223 //Data loaded and uncompressed. Copy it to destination
224 memcpy(bufferToRead, fBuffer.data()+fTable.bytes_per_row*(fCurrentRow%fNumRowsPerTile), fTable.bytes_per_row);
225 return good();
226 }
227
228 // Read a bunch of uncompressed data
229 uint32_t UncompressUNCOMPRESSED(char* dest,
230 const char* src,
231 uint32_t numRows,
232 uint32_t sizeOfElems,
233 uint32_t numRowElems)
234 {
235 memcpy(dest, src, numRows*sizeOfElems*numRowElems);
236 return numRows*sizeOfElems*numRowElems;
237 }
238
239 // Read a bunch of data compressed with the Huffman algorithm
240 uint32_t UncompressHUFFMAN(char* dest,
241 const char* src,
242 uint32_t ,
243 uint32_t sizeOfElems,
244 uint32_t numRowElems)
245 {
246 if (sizeOfElems < 2)
247 {
248 cout << "Error, Huffman only works on shorts or longer types. (here: " << sizeOfElems << "). Aborting." << endl;
249 return -1;
250 }
251
252 vector<uint16_t> uncompressed;
253
254 //read compressed sizes (one per row)
255 const uint32_t* compressedSizes = reinterpret_cast<const uint32_t*>(src);
256 src += sizeof(uint32_t)*numRowElems;
257
258 //uncompress the rows, one by one
259 uint32_t sizeWritten = 0;
260 for (uint32_t j=0;j<numRowElems;j++)
261 {
262 Huffman::Decode(reinterpret_cast<const unsigned char*>(src), compressedSizes[j], uncompressed);
263
264 memcpy(dest, uncompressed.data(), uncompressed.size()*sizeof(uint16_t));
265
266 sizeWritten += uncompressed.size()*sizeof(uint16_t);
267 dest += uncompressed.size()*sizeof(uint16_t);
268 src += compressedSizes[j];
269 }
270 return sizeWritten;
271 }
272
273 //Read a bunch of data compressed with the smoothman algorithm
274 uint32_t UncompressSMOOTHMAN(int16_t* dest,
275 const char* src,
276 uint32_t numRows,
277 uint32_t sizeOfElems,
278 uint32_t numRowElems)
279 {
280 //call huffman transposed
281 const uint32_t sizeWritten = UncompressHUFFMAN(reinterpret_cast<char*>(dest), src, numRowElems, sizeOfElems, numRows);
282
283 //un-do the integer smoothing
284 for (uint32_t j=2;j<numRowElems*numRows;j++)
285 dest[j] = dest[j] + (dest[j-1]+dest[j-2])/2;
286
287 return sizeWritten;
288 }
289
290 // Data has been read from disk. Uncompress it !
291 void UncompressBuffer(const uint32_t &catalogCurrentRow, const uint32_t &thisRoundNumRows)
292 {
293 char *dest = fTransposeBuffer.data();
294
295 //uncompress column by column
296 for (uint32_t i=0; i<fTable.sortedCols.size(); i++)
297 {
298 const Column &col = fTable.sortedCols[i];
299 if (col.num == 0)
300 continue;
301
302 //get the compression flag
303 const int64_t compressedOffset = fCatalog[catalogCurrentRow][i].second - fCatalog[catalogCurrentRow][0].second;
304 const char compressedFlag = fCompressedBuffer[compressedOffset];
305
306 //#define COMPRESSED_FLAG 0x1
307 //#define UNCOMPRESSED_FLAG 0x0
308
309 const char *src = fCompressedBuffer.data()+compressedOffset+1;
310
311 //if this bunch of data is not compressed, modify the compression flag
312 const uint32_t compression = compressedFlag==0 ? UNCOMPRESSED : col.comp;
313 switch (compression)
314 {
315 case UNCOMPRESSED:
316 const uint32_t offset = UncompressUNCOMPRESSED(dest, src, thisRoundNumRows, col.size, col.num);
317 dest += offset;
318 break;
319
320 case SMOOTHMAN:
321 const uint32_t offset = UncompressSMOOTHMAN(reinterpret_cast<int16_t*>(dest), src, thisRoundNumRows, col.size, col.num);
322 dest += offset;
323 break;
324
325 default:
326 ;
327 }
328 }
329 }
330
331};//class zfits
332
333#ifndef __MARS__
334}; //namespace std
335#endif
336
337#endif
Note: See TracBrowser for help on using the repository browser.