source: trunk/FACT++/src/zfits.cc@ 19435

Last change on this file since 19435 was 19177, checked in by tbretz, 6 years ago
Made it compile at ISDC
File size: 12.7 KB
Line 
1#include <fstream>
2#include <iostream>
3#include <algorithm>
4#include <map>
5#include <vector>
6
7#include "Configuration.h"
8
9#include "fits.h"
10#include "huffman.h"
11
12#include "Time.h"
13
14using namespace std;
15
16void SetupConfiguration(Configuration &conf)
17{
18 po::options_description control("zfits");
19 control.add_options()
20 ("in", var<string>()
21#if BOOST_VERSION >= 104200
22 ->required()
23#endif
24 )
25 ("out", var<string>(), "")
26 ("decompress,d", po_switch(), "")
27 ("force,f", var<string>(), "Force overwrite of output file")
28 ;
29
30 po::positional_options_description p;
31 p.add("in", 1); // The 1st positional options
32 p.add("out", 2); // The 2nd positional options
33
34 conf.AddOptions(control);
35 conf.SetArgumentPositions(p);
36}
37
38void PrintUsage()
39{
40 cout <<
41 "zfits - A fits compressor\n"
42 "\n"
43 "\n"
44 "Usage: zfits [-d] input.fits[.gz] [output.zf]\n";
45 cout << endl;
46}
47
48
49string ReplaceEnd(const string &str, const string &expr, const string &repl)
50{
51 string out(str);
52
53 const size_t p = out.rfind(expr);
54 if (p==out.size()-expr.length())
55 out.replace(p, expr.length(), repl);
56
57 return out;
58}
59
60
61string ReplaceExt(const string &name, bool decomp)
62{
63 if (decomp)
64 return ReplaceEnd(name, ".zfits", ".fits");
65
66 string out = ReplaceEnd(name, ".fits", ".zfits");
67 return ReplaceEnd(out, ".fits.gz", ".zfits");
68}
69
70struct col_t : fits::Table::Column
71{
72 string name;
73 void *ptr;
74};
75
76int Compress(const string &ifile, const string &ofile)
77{
78 // when to print some info on the screen (every f percent)
79 float frac = 0.01;
80
81 // open a fits file
82 fits f(ifile);
83
84 // open output file
85 ofstream fout(ofile);
86
87 // counters for total size and compressed size
88 uint64_t tot = 0;
89 uint64_t com = 0;
90
91 // very simple timer
92 double sec = 0;
93
94 // Produce a lookup table with all informations about the
95 // columns in the same order as they are in the file
96 const fits::Table::Columns &cols= f.GetColumns();
97
98 map<size_t, col_t> columns;
99
100 size_t row_tot = 0;
101 for (auto it=cols.begin(); it!=cols.end(); it++)
102 {
103 col_t c;
104
105 c.offset = it->second.offset;
106 c.size = it->second.size;
107 c.num = it->second.num;
108 c.name = it->first;
109 c.ptr = f.SetPtrAddress(it->first);
110
111 columns[c.offset] = c;
112
113 row_tot += c.size*c.num;
114 }
115
116 // copy the header from the input to the output file
117 // and prefix the output file as a compressed fits file
118 string header;
119 header.resize(f.tellg());
120
121 f.seekg(0);
122 f.read((char*)header.c_str(), header.size());
123
124 char m[2];
125 m[0] = static_cast<char>('z'+128);
126 m[1] = static_cast<char>('f'+128);
127
128 const size_t hlen = 0;
129
130 size_t hs = header.size();
131
132 fout.write(m, 2); // magic number
133 fout.write((char*)&hlen, sizeof(size_t)); // length of possible header data (e.g. file version, compression algorithm)
134 fout.write((char*)&hs, sizeof(size_t)); // size of FITS header
135 fout.write(header.c_str(), header.size()); // uncompressed FITS header
136
137 tot += header.size();
138 com += header.size()+2+2*sizeof(size_t);
139
140 cout << fixed;
141
142 Time start;
143
144 // loop over all rows
145 vector<char> cache(row_tot);
146 while (f.GetNextRow())
147 {
148 // pointer to the start of the cache for the data of one row
149 char *out = cache.data();
150
151 // mask stroing which column have been compressed and which not
152 vector<uint8_t> mask(cols.size()/8 + 1);
153
154 // loop over all columns
155 uint32_t icol = 0;
156 for (auto it=columns.begin(); it!=columns.end(); it++, icol++)
157 {
158 // size of cell in bytes
159 const size_t len_col = it->second.size * it->second.num;
160
161 // get pointer to data
162 int16_t *ptr = (int16_t*)it->second.ptr;
163
164 // If the column is the data, preprocess the data
165 /*
166 if (it->second.name=="Data")
167 {
168 int16_t *end = ptr+1440*300-4-(1440*300)%2;
169 int16_t *beg = ptr;
170
171 while (end>=beg)
172 {
173 const int16_t avg = (end[0] + end[1])/2;
174 end[2] -= avg;
175 end[3] -= avg;
176 end -=2;
177 }
178 }*/
179
180 // do not try to compress less than 32bytes
181 if (len_col>32 && it->second.size==2)
182 {
183 Time now;
184
185 // perform 16bit hoffman (option for 8bit missing, skip 64bit)
186 // (what to do with floats?)
187 string buf;
188 /*int len =*/ Huffman::Encode(buf, (uint16_t*)ptr, len_col/2);
189
190 sec += Time().UnixTime()-now.UnixTime();
191
192 // check if data was really compressed
193 if (buf.size()<len_col)
194 {
195 // copy compressed data into output cache
196 memcpy(out, buf.c_str(), buf.size());
197 out += buf.size();
198
199 // update mask
200 const uint64_t bit = (icol%8);
201 mask[icol/8] |= (1<<bit);
202
203 continue;
204 }
205 }
206
207 // just copy the data if it has not been compressed
208 memcpy(out, (char*)ptr, len_col);
209 out += len_col;
210 }
211
212 // calcualte size of output buffer
213 const size_t sz = out-cache.data();
214
215 // update counters
216 tot += row_tot;
217 com += sz + mask.size();
218
219 // write the compression mask and the (partly) copmpressed data stream
220 fout.write((char*)mask.data(), mask.size());
221 fout.write(cache.data(), sz);
222
223 //if (sz2<0 || memcmp(data, dest3.data(), 432000*2)!=0)
224 // cout << "grrrr" << endl;
225
226 const float proc = float(f.GetRow())/f.GetNumRows();
227 if (proc>frac)
228 {
229 const double elep = Time().UnixTime()-start.UnixTime();
230 cout << "\r" << setprecision(0) << setw(3) << 100*proc << "% [" << setprecision(1) << setw(5) << 100.*com/tot << "%] cpu:" << sec << "s in:" << tot/1000000/elep << "MB/s" << flush;
231 frac += 0.01;
232 }
233 }
234
235 const double elep = Time().UnixTime()-start.UnixTime();
236 cout << setprecision(0) << "\r100% [" << setprecision(1) << setw(5) << 100.*com/tot << "%] cpu:" << sec << "s in:" << tot/1000000/elep << "MB/s" << endl;
237
238 return 0;
239}
240
241template<size_t N>
242void revcpy(char *dest, const char *src, int num)
243{
244 const char *pend = src + num*N;
245 for (const char *ptr = src; ptr<pend; ptr+=N, dest+=N)
246 reverse_copy(ptr, ptr+N, dest);
247}
248
249int Decompress(const string &ifile, const string &ofile)
250{
251 // open a fits file
252 ifstream fin(ifile);
253
254 // open output file
255 ofstream fout(ofile);
256
257 // get and check magic number
258 unsigned char m[2];
259 fin.read((char*)m, 2);
260 if (m[0]!='z'+128 || m[1]!='f'+128)
261 throw runtime_error("File not a compressed fits file.");
262
263 // get length of additional header information
264 size_t hlen = 0;
265 fin.read((char*)&hlen, sizeof(size_t));
266 if (hlen>0)
267 throw runtime_error("Only Version-zero files supported.");
268
269 // get size of FITS header
270 size_t hs = 0;
271 fin.read((char*)&hs, sizeof(size_t));
272 if (!fin)
273 throw runtime_error("Could not access header size.");
274
275 // copy the header from the input to the output file
276 // and prefix the output file as a compressed fits file
277 string header;
278 header.resize(hs);
279
280 fin.read((char*)header.c_str(), header.size());
281 fout.write((char*)header.c_str(), header.size());
282 if (!fin)
283 throw runtime_error("Could not read full header");
284
285 string templ("tmpXXXXXX");
286 int fd = mkstemp((char*)templ.c_str());
287 const ssize_t rc = write(fd, header.c_str(), header.size());
288 close(fd);
289
290 if (rc<0)
291 throw runtime_error("Could not write to temporary file: "+string(strerror(errno)));
292
293 // open the output file to get the header parsed
294 fits info(templ);
295
296 remove(templ.c_str());
297
298 // get the maximum size of one row and a list
299 // of all columns ordered by their offset
300 size_t row_tot = 0;
301 const fits::Table::Columns &cols = info.GetColumns();
302 map<size_t, fits::Table::Column> columns;
303 for (auto it=cols.begin(); it!=cols.end(); it++)
304 {
305 columns[it->second.offset] = it->second;
306 row_tot += it->second.num*it->second.size;
307 }
308
309 // very simple timer
310 double sec = 0;
311 double frac = 0;
312
313 size_t com = 2+hs+2*sizeof(size_t);
314 size_t tot = hs;
315
316 const size_t masklen = cols.size()/8+1;
317
318 // loop over all rows
319 vector<char> buf(row_tot+masklen);
320 vector<char> swap(row_tot);
321 uint32_t offset = 0;
322
323 const uint64_t nrows = info.GetUInt("NAXIS2");
324
325 Time start;
326
327 cout << fixed;
328 for (uint32_t irow=0; irow<nrows; irow++)
329 {
330 fin.read(buf.data()+offset, buf.size()-offset);
331
332 const uint8_t *mask = reinterpret_cast<uint8_t*>(buf.data());
333 offset = masklen;
334
335 char *ptr = swap.data();
336
337 uint32_t icol = 0;
338 for (auto it=columns.begin(); it!= columns.end(); it++, icol++)
339 {
340 const size_t &num = it->second.num;
341 const size_t &size = it->second.size;
342
343 if (mask[icol/8]&(1<<(icol%8)))
344 {
345 Time now;
346
347 vector<uint16_t> out(num*size/2);
348 int len = Huffman::Decode((uint8_t*)buf.data()+offset, buf.size()-offset, out);
349 if (len<0)
350 throw runtime_error("Decoding failed.");
351
352 sec += Time().UnixTime()-now.UnixTime();
353
354 offset += len;
355
356 revcpy<2>(ptr, (char*)out.data(), num);
357 }
358 else
359 {
360 switch (size)
361 {
362 case 1: memcpy (ptr, buf.data()+offset, num*size); break;
363 case 2: revcpy<2>(ptr, buf.data()+offset, num); break;
364 case 4: revcpy<4>(ptr, buf.data()+offset, num); break;
365 case 8: revcpy<8>(ptr, buf.data()+offset, num); break;
366 }
367
368 offset += num*size;
369 }
370
371 ptr += num*size;
372 }
373
374 com += offset+masklen;
375 tot += row_tot;
376
377 fout.write((char*)swap.data(), swap.size());
378
379 memmove(buf.data(), buf.data()+offset, buf.size()-offset);
380 offset = buf.size()-offset;
381
382 if (!fout)
383 throw runtime_error("Error writing to output file");
384
385 const float proc = float(irow)/nrows;
386 if (proc>frac)
387 {
388 const double elep = Time().UnixTime()-start.UnixTime();
389 cout << "\r" << setprecision(0) << setw(3) << 100*proc << "% [" << setprecision(1) << setw(5) << 100.*com/tot << "%] cpu:" << sec << "s out:" << tot/1000000/elep << "MB/s" << flush;
390 frac += 0.01;
391 }
392 }
393
394 const double elep = Time().UnixTime()-start.UnixTime();
395 cout << setprecision(0) << "\r100% [" << setprecision(1) << setw(5) << 100.*com/tot << "%] cpu:" << sec << "s out:" << tot/1000000/elep << "MB/s" << endl;
396
397 return 0;
398}
399
400int main(int argc, const char **argv)
401{
402 Configuration conf(argv[0]);
403 conf.SetPrintUsage(PrintUsage);
404 SetupConfiguration(conf);
405
406 if (!conf.DoParse(argc, argv))
407 return 127;
408
409 const bool decomp = conf.Get<bool>("decompress");
410
411 const string ifile = conf.Get<string>("in");
412 const string ofile = conf.Has("out") ? conf.Get<string>("out") : ReplaceExt(ifile, decomp);
413
414 return decomp ? Decompress(ifile, ofile) : Compress(ifile, ofile);
415
416 /*
417 // reading and writing files which just contain the binary data
418 // For simplicity I assume ROI=300
419
420 ifstream finx( "20130117_082.fits.pu");
421 ofstream foutx("20130117_082.fits.puz");
422
423 while (1)
424 {
425 string str;
426 str.resize(432000*2);
427
428 finx.read((char*)str.c_str(), 432000*2);
429 if (!finx)
430 break;
431
432 // Preprocess the data, e.g. subtract median pixelwise
433 for (int i=0; i<1440; i++)
434 {
435 int16_t *chunk = (int16_t*)str.c_str()+i*300;
436 sort(chunk, chunk+300);
437
438 int16_t med = chunk[149];
439
440 for (int j=0; j<300; j++)
441 chunk[j] -= med;
442 }
443
444 // do huffman encoding on shorts
445 string buf;
446 int len = huffmans_encode(buf, (uint16_t*)str.c_str(), 432000);
447
448 // if the result is smaller than the original data write
449 // the result, otherwise the original data
450 if (buf.size()<432000*2)
451 foutx.write(buf.c_str(), buf.size());
452 else
453 foutx.write(str.c_str(), 432000);
454 }
455
456 return 0;
457 */
458}
Note: See TracBrowser for help on using the repository browser.