source: trunk/FACT++/src/zfits.cc@ 14797

Last change on this file since 14797 was 14797, checked in by tbretz, 12 years ago
File size: 13.0 KB
Line 
1#include <fstream>
2#include <iostream>
3#include <algorithm>
4#include <map>
5#include <vector>
6
7#include "Configuration.h"
8
9#include "externals/fits.h"
10#include "externals/huffman.h"
11
12#include "Time.h"
13
14using namespace std;
15
16void SetupConfiguration(Configuration &conf)
17{
18 po::options_description control("zfits");
19 control.add_options()
20 ("in", var<string>()->required(), "")
21 ("out", var<string>(), "")
22 ("decompress,d", po_switch(), "")
23 ("force,f", var<string>(), "Force overwrite of output file")
24 ;
25
26 po::positional_options_description p;
27 p.add("in", 1); // The 1st positional options
28 p.add("out", 2); // The 2nd positional options
29
30 conf.AddOptions(control);
31 conf.SetArgumentPositions(p);
32}
33
34void PrintUsage()
35{
36 cout <<
37 "zfits - A fits compressor\n"
38 "\n"
39 "\n"
40 "Usage: zfits [-d] input.fits[.gz] [output.zf]\n";
41 cout << endl;
42}
43
44
45string ReplaceEnd(const string &str, const string &expr, const string &repl)
46{
47 string out(str);
48
49 const size_t p = out.rfind(expr);
50 if (p==out.size()-expr.length())
51 out.replace(p, expr.length(), repl);
52
53 return out;
54}
55
56
57string ReplaceExt(const string &name, bool decomp)
58{
59 if (decomp)
60 return ReplaceEnd(name, ".zfits", ".fits");
61
62 string out = ReplaceEnd(name, ".fits", ".zfits");
63 return ReplaceEnd(out, ".fits.gz", ".zfits");
64}
65
66int Compress(const string &ifile, const string &ofile)
67{
68 // when to print some info on the screen (every f percent)
69 float frac = 0.01;
70
71 // open a fits file
72 fits f(ifile);
73
74 // open output file
75 ofstream fout(ofile);
76
77 // counters for total size and compressed size
78 uint64_t tot = 0;
79 uint64_t com = 0;
80
81 // very simple timer
82 double sec = 0;
83
84 // Produce a lookup table with all informations about the
85 // columns in the same order as they are in the file
86 const fits::Table::Columns &cols= f.GetColumns();
87
88 struct col_t : fits::Table::Column
89 {
90 string name;
91 void *ptr;
92 };
93
94
95 map<size_t, col_t> columns;
96
97 size_t row_tot = 0;
98 for (auto it=cols.begin(); it!=cols.end(); it++)
99 {
100 col_t c;
101
102 c.offset = it->second.offset;
103 c.size = it->second.size;
104 c.num = it->second.num;
105 c.name = it->first;
106 c.ptr = f.SetPtrAddress(it->first);
107
108 columns[c.offset] = c;
109
110 row_tot += c.size*c.num;
111 }
112
113 // copy the header from the input to the output file
114 // and prefix the output file as a compressed fits file
115 string header;
116 header.resize(f.tellg());
117
118 f.seekg(0);
119 f.read((char*)header.c_str(), header.size());
120
121 char m[2];
122 m[0] = 'z'+128;
123 m[1] = 'f'+128;
124
125 const size_t hlen = 0;
126
127 size_t hs = header.size();
128
129 fout.write(m, 2); // magic number
130 fout.write((char*)&hlen, sizeof(size_t)); // length of possible header data (e.g. file version, compression algorithm)
131 fout.write((char*)&hs, sizeof(size_t)); // size of FITS header
132 fout.write(header.c_str(), header.size()); // uncompressed FITS header
133
134 tot += header.size();
135 com += header.size()+2+2*sizeof(size_t);
136
137 cout << fixed;
138
139 Time start;
140
141 // loop over all rows
142 vector<char> cache(row_tot);
143 while (f.GetNextRow())
144 {
145 // pointer to the start of the cache for the data of one row
146 char *out = cache.data();
147
148 // mask stroing which column have been compressed and which not
149 vector<uint8_t> mask(cols.size()/8 + 1);
150
151 // loop over all columns
152 uint32_t icol = 0;
153 for (auto it=columns.begin(); it!=columns.end(); it++, icol++)
154 {
155 // size of cell in bytes
156 const size_t len_col = it->second.size * it->second.num;
157
158 // get pointer to data
159 int16_t *ptr = (int16_t*)it->second.ptr;
160
161 // If the column is the data, preprocess the data
162 if (it->second.name=="Data")
163 { /*
164 // e.g. subtract the median (note that this is just
165 // to test the properties of the compression, because
166 // it re-sorts the real data and does not keep the
167 // subtracted offset in the output
168 int n = 30;
169 vector<int16_t> med(1440*n);
170 for (int i=0; i<1440*n; i++)
171 {
172 int16_t *chunk = ptr+i*300/n;
173 sort(chunk, chunk+300/n);
174
175 med[i] = chunk[300/n/2];
176
177 for (int j=0; j<300/n; j++)
178 chunk[j] -= med[i];
179 }
180
181 string buf;
182 int len = huffmans_encode(buf, (uint16_t*)med.data(), med.size());
183 com += buf.size();*/
184 }
185
186 // do not try to compress less than 32bytes
187 if (len_col>32 && it->second.size==2)
188 {
189 Time now;
190
191 // perform 16bit hoffman (option for 8bit missing, skip 64bit)
192 // (what to do with floats?)
193 string buf;
194 /*int len =*/ Huffman::Encode(buf, (uint16_t*)ptr, len_col/2);
195
196 sec += Time().UnixTime()-now.UnixTime();
197
198 // check if data was really compressed
199 if (buf.size()<len_col)
200 {
201 // copy compressed data into output cache
202 memcpy(out, buf.c_str(), buf.size());
203 out += buf.size();
204
205 // update mask
206 const uint64_t bit = (icol%8);
207 mask[icol/8] |= (1<<bit);
208
209 continue;
210 }
211 }
212
213 // just copy the data if it has not been compressed
214 memcpy(out, (char*)ptr, len_col);
215 out += len_col;
216 }
217
218 // calcualte size of output buffer
219 const size_t sz = out-cache.data();
220
221 // update counters
222 tot += row_tot;
223 com += sz + mask.size();
224
225 // write the compression mask and the (partly) copmpressed data stream
226 fout.write((char*)mask.data(), mask.size());
227 fout.write(cache.data(), sz);
228
229 //if (sz2<0 || memcmp(data, dest3.data(), 432000*2)!=0)
230 // cout << "grrrr" << endl;
231
232 const float proc = float(f.GetRow())/f.GetNumRows();
233 if (proc>frac)
234 {
235 const double elep = Time().UnixTime()-start.UnixTime();
236 cout << "\r" << setprecision(0) << setw(3) << 100*proc << "% [" << setw(3) << 100.*com/tot << "%] cpu:" << setprecision(1) << sec << "s in:" << tot/1000000/elep << "MB/s" << flush;
237 frac += 0.01;
238 }
239 }
240
241 const double elep = Time().UnixTime()-start.UnixTime();
242 cout << setprecision(0) << "\r100% [" << setw(3) << 100.*com/tot << "%] cpu:" << setprecision(1) << sec << "s in:" << tot/1000000/elep << "MB/s" << endl;
243
244 return 0;
245}
246
247template<size_t N>
248void revcpy(char *dest, const char *src, int num)
249{
250 const char *pend = src + num*N;
251 for (const char *ptr = src; ptr<pend; ptr+=N, dest+=N)
252 reverse_copy(ptr, ptr+N, dest);
253}
254
255int Decompress(const string &ifile, const string &ofile)
256{
257 // open a fits file
258 ifstream fin(ifile);
259
260 // open output file
261 ofstream fout(ofile);
262
263 // get and check magic number
264 unsigned char m[2];
265 fin.read((char*)m, 2);
266 if (m[0]!='z'+128 || m[1]!='f'+128)
267 throw runtime_error("File not a compressed fits file.");
268
269 // get length of additional header information
270 size_t hlen = 0;
271 fin.read((char*)&hlen, sizeof(size_t));
272 if (hlen>0)
273 throw runtime_error("Only Version-zero files supported.");
274
275 // get size of FITS header
276 size_t hs = 0;
277 fin.read((char*)&hs, sizeof(size_t));
278 if (!fin)
279 throw runtime_error("Could not access header size.");
280
281 // copy the header from the input to the output file
282 // and prefix the output file as a compressed fits file
283 string header;
284 header.resize(hs);
285
286 fin.read((char*)header.c_str(), header.size());
287 fout.write((char*)header.c_str(), header.size());
288 if (!fin)
289 throw runtime_error("Could not read full header");
290
291 string templ("tmpXXXXXX");
292 int fd = mkstemp((char*)templ.c_str());
293 const ssize_t rc = write(fd, header.c_str(), header.size());
294 close(fd);
295
296 if (rc<0)
297 throw runtime_error("Could not write to temporary file: "+string(strerror(errno)));
298
299 // open the output file to get the header parsed
300 fits info(templ);
301
302 remove(templ.c_str());
303
304 // get the maximum size of one row and a list
305 // of all columns ordered by their offset
306 size_t row_tot = 0;
307 const fits::Table::Columns &cols = info.GetColumns();
308 map<size_t, fits::Table::Column> columns;
309 for (auto it=cols.begin(); it!=cols.end(); it++)
310 {
311 columns[it->second.offset] = it->second;
312 row_tot += it->second.num*it->second.size;
313 }
314
315 // very simple timer
316 double sec = 0;
317 double frac = 0;
318
319 size_t com = 2+hs+2*sizeof(size_t);
320 size_t tot = hs;
321
322 const size_t masklen = cols.size()/8+1;
323
324 // loop over all rows
325 vector<char> buf(row_tot+masklen);
326 vector<char> swap(row_tot);
327 uint32_t offset = 0;
328
329 const uint64_t nrows = info.GetUInt("NAXIS2");
330
331 Time start;
332
333 cout << fixed;
334 for (uint32_t irow=0; irow<nrows; irow++)
335 {
336 fin.read(buf.data()+offset, buf.size()-offset);
337
338 const uint8_t *mask = reinterpret_cast<uint8_t*>(buf.data());
339 offset = masklen;
340
341 char *ptr = swap.data();
342
343 uint32_t icol = 0;
344 for (auto it=columns.begin(); it!= columns.end(); it++, icol++)
345 {
346 const size_t &num = it->second.num;
347 const size_t &size = it->second.size;
348
349 if (mask[icol/8]&(1<<(icol%8)))
350 {
351 Time now;
352
353 vector<uint16_t> out(num*size/2);
354 int len = Huffman::Decode((uint8_t*)buf.data()+offset, buf.size()-offset, out);
355 if (len<0)
356 throw runtime_error("Decoding failed.");
357
358 sec += Time().UnixTime()-now.UnixTime();
359
360 offset += len;
361
362 revcpy<2>(ptr, (char*)out.data(), num);
363 }
364 else
365 {
366 switch (size)
367 {
368 case 1: memcpy (ptr, buf.data()+offset, num*size); break;
369 case 2: revcpy<2>(ptr, buf.data()+offset, num); break;
370 case 4: revcpy<4>(ptr, buf.data()+offset, num); break;
371 case 8: revcpy<8>(ptr, buf.data()+offset, num); break;
372 }
373
374 offset += num*size;
375 }
376
377 ptr += num*size;
378 }
379
380 com += offset+masklen;
381 tot += row_tot;
382
383 fout.write((char*)swap.data(), swap.size());
384
385 memmove(buf.data(), buf.data()+offset, buf.size()-offset);
386 offset = buf.size()-offset;
387
388 if (!fout)
389 throw runtime_error("Error writing to output file");
390
391 const float proc = float(irow)/nrows;
392 if (proc>frac)
393 {
394 const double elep = Time().UnixTime()-start.UnixTime();
395 cout << "\r" << setprecision(0) << setw(3) << 100*proc << "% [" << setw(3) << 100.*com/tot << "%] cpu:" << setprecision(1) << sec << "s out:" << tot/1000000/elep << "MB/s" << flush;
396 frac += 0.01;
397 }
398 }
399
400 const double elep = Time().UnixTime()-start.UnixTime();
401 cout << setprecision(0) << "\r100% [" << setw(3) << 100.*com/tot << "%] cpu:" << setprecision(1) << sec << "s out:" << tot/1000000/elep << "MB/s" << endl;
402
403 return 0;
404}
405
406int main(int argc, const char **argv)
407{
408 Configuration conf(argv[0]);
409 conf.SetPrintUsage(PrintUsage);
410 SetupConfiguration(conf);
411
412 if (!conf.DoParse(argc, argv))
413 return 127;
414
415 const bool decomp = conf.Get<bool>("decompress");
416
417 const string ifile = conf.Get<string>("in");
418 const string ofile = conf.Has("out") ? conf.Get<string>("out") : ReplaceExt(ifile, decomp);
419
420 return decomp ? Decompress(ifile, ofile) : Compress(ifile, ofile);
421
422 /*
423 // reading and writing files which just contain the binary data
424 // For simplicity I assume ROI=300
425
426 ifstream finx( "20130117_082.fits.pu");
427 ofstream foutx("20130117_082.fits.puz");
428
429 while (1)
430 {
431 string str;
432 str.resize(432000*2);
433
434 finx.read((char*)str.c_str(), 432000*2);
435 if (!finx)
436 break;
437
438 // Preprocess the data, e.g. subtract median pixelwise
439 for (int i=0; i<1440; i++)
440 {
441 int16_t *chunk = (int16_t*)str.c_str()+i*300;
442 sort(chunk, chunk+300);
443
444 int16_t med = chunk[149];
445
446 for (int j=0; j<300; j++)
447 chunk[j] -= med;
448 }
449
450 // do huffman encoding on shorts
451 string buf;
452 int len = huffmans_encode(buf, (uint16_t*)str.c_str(), 432000);
453
454 // if the result is smaller than the original data write
455 // the result, otherwise the original data
456 if (buf.size()<432000*2)
457 foutx.write(buf.c_str(), buf.size());
458 else
459 foutx.write(str.c_str(), 432000);
460 }
461
462 return 0;
463 */
464}
Note: See TracBrowser for help on using the repository browser.