Context Navigation

source: trunk/FACT++/src/zfits.cc@ 19095

Visit:

Last change on this file since 19095 was 19025, checked in by tbretz, 6 years ago
Now it does!
File size: 12.6 KB

Line
1	#include <fstream>
2	#include <iostream>
3	#include <algorithm>
4	#include <map>
5	#include <vector>
6
7	#include "Configuration.h"
8
9	#include "fits.h"
10	#include "huffman.h"
11
12	#include "Time.h"
13
14	using namespace std;
15
16	void SetupConfiguration(Configuration &conf)
17	{
18	po::options_description control("zfits");
19	control.add_options()
20	("in", var<string>()->required(), "")
21	("out", var<string>(), "")
22	("decompress,d", po_switch(), "")
23	("force,f", var<string>(), "Force overwrite of output file")
24	;
25
26	po::positional_options_description p;
27	p.add("in", 1); // The 1st positional options
28	p.add("out", 2); // The 2nd positional options
29
30	conf.AddOptions(control);
31	conf.SetArgumentPositions(p);
32	}
33
34	void PrintUsage()
35	{
36	cout <<
37	"zfits - A fits compressor\n"
38	"\n"
39	"\n"
40	"Usage: zfits [-d] input.fits[.gz] [output.zf]\n";
41	cout << endl;
42	}
43
44
45	string ReplaceEnd(const string &str, const string &expr, const string &repl)
46	{
47	string out(str);
48
49	const size_t p = out.rfind(expr);
50	if (p==out.size()-expr.length())
51	out.replace(p, expr.length(), repl);
52
53	return out;
54	}
55
56
57	string ReplaceExt(const string &name, bool decomp)
58	{
59	if (decomp)
60	return ReplaceEnd(name, ".zfits", ".fits");
61
62	string out = ReplaceEnd(name, ".fits", ".zfits");
63	return ReplaceEnd(out, ".fits.gz", ".zfits");
64	}
65
66	int Compress(const string &ifile, const string &ofile)
67	{
68	// when to print some info on the screen (every f percent)
69	float frac = 0.01;
70
71	// open a fits file
72	fits f(ifile);
73
74	// open output file
75	ofstream fout(ofile);
76
77	// counters for total size and compressed size
78	uint64_t tot = 0;
79	uint64_t com = 0;
80
81	// very simple timer
82	double sec = 0;
83
84	// Produce a lookup table with all informations about the
85	// columns in the same order as they are in the file
86	const fits::Table::Columns &cols= f.GetColumns();
87
88	struct col_t : fits::Table::Column
89	{
90	string name;
91	void *ptr;
92	};
93
94
95	map<size_t, col_t> columns;
96
97	size_t row_tot = 0;
98	for (auto it=cols.begin(); it!=cols.end(); it++)
99	{
100	col_t c;
101
102	c.offset = it->second.offset;
103	c.size = it->second.size;
104	c.num = it->second.num;
105	c.name = it->first;
106	c.ptr = f.SetPtrAddress(it->first);
107
108	columns[c.offset] = c;
109
110	row_tot += c.size*c.num;
111	}
112
113	// copy the header from the input to the output file
114	// and prefix the output file as a compressed fits file
115	string header;
116	header.resize(f.tellg());
117
118	f.seekg(0);
119	f.read((char*)header.c_str(), header.size());
120
121	char m[2];
122	m[0] = static_cast<char>('z'+128);
123	m[1] = static_cast<char>('f'+128);
124
125	const size_t hlen = 0;
126
127	size_t hs = header.size();
128
129	fout.write(m, 2); // magic number
130	fout.write((char*)&hlen, sizeof(size_t)); // length of possible header data (e.g. file version, compression algorithm)
131	fout.write((char*)&hs, sizeof(size_t)); // size of FITS header
132	fout.write(header.c_str(), header.size()); // uncompressed FITS header
133
134	tot += header.size();
135	com += header.size()+2+2*sizeof(size_t);
136
137	cout << fixed;
138
139	Time start;
140
141	// loop over all rows
142	vector<char> cache(row_tot);
143	while (f.GetNextRow())
144	{
145	// pointer to the start of the cache for the data of one row
146	char *out = cache.data();
147
148	// mask stroing which column have been compressed and which not
149	vector<uint8_t> mask(cols.size()/8 + 1);
150
151	// loop over all columns
152	uint32_t icol = 0;
153	for (auto it=columns.begin(); it!=columns.end(); it++, icol++)
154	{
155	// size of cell in bytes
156	const size_t len_col = it->second.size * it->second.num;
157
158	// get pointer to data
159	int16_t ptr = (int16_t)it->second.ptr;
160
161	// If the column is the data, preprocess the data
162	/*
163	if (it->second.name=="Data")
164	{
165	int16_t end = ptr+1440300-4-(1440*300)%2;
166	int16_t *beg = ptr;
167
168	while (end>=beg)
169	{
170	const int16_t avg = (end[0] + end[1])/2;
171	end[2] -= avg;
172	end[3] -= avg;
173	end -=2;
174	}
175	}*/
176
177	// do not try to compress less than 32bytes
178	if (len_col>32 && it->second.size==2)
179	{
180	Time now;
181
182	// perform 16bit hoffman (option for 8bit missing, skip 64bit)
183	// (what to do with floats?)
184	string buf;
185	/int len =/ Huffman::Encode(buf, (uint16_t*)ptr, len_col/2);
186
187	sec += Time().UnixTime()-now.UnixTime();
188
189	// check if data was really compressed
190	if (buf.size()<len_col)
191	{
192	// copy compressed data into output cache
193	memcpy(out, buf.c_str(), buf.size());
194	out += buf.size();
195
196	// update mask
197	const uint64_t bit = (icol%8);
198	mask[icol/8] \|= (1<<bit);
199
200	continue;
201	}
202	}
203
204	// just copy the data if it has not been compressed
205	memcpy(out, (char*)ptr, len_col);
206	out += len_col;
207	}
208
209	// calcualte size of output buffer
210	const size_t sz = out-cache.data();
211
212	// update counters
213	tot += row_tot;
214	com += sz + mask.size();
215
216	// write the compression mask and the (partly) copmpressed data stream
217	fout.write((char*)mask.data(), mask.size());
218	fout.write(cache.data(), sz);
219
220	//if (sz2<0 \|\| memcmp(data, dest3.data(), 432000*2)!=0)
221	// cout << "grrrr" << endl;
222
223	const float proc = float(f.GetRow())/f.GetNumRows();
224	if (proc>frac)
225	{
226	const double elep = Time().UnixTime()-start.UnixTime();
227	cout << "\r" << setprecision(0) << setw(3) << 100proc << "% [" << setprecision(1) << setw(5) << 100.com/tot << "%] cpu:" << sec << "s in:" << tot/1000000/elep << "MB/s" << flush;
228	frac += 0.01;
229	}
230	}
231
232	const double elep = Time().UnixTime()-start.UnixTime();
233	cout << setprecision(0) << "\r100% [" << setprecision(1) << setw(5) << 100.*com/tot << "%] cpu:" << sec << "s in:" << tot/1000000/elep << "MB/s" << endl;
234
235	return 0;
236	}
237
238	template<size_t N>
239	void revcpy(char dest, const char src, int num)
240	{
241	const char pend = src + numN;
242	for (const char *ptr = src; ptr<pend; ptr+=N, dest+=N)
243	reverse_copy(ptr, ptr+N, dest);
244	}
245
246	int Decompress(const string &ifile, const string &ofile)
247	{
248	// open a fits file
249	ifstream fin(ifile);
250
251	// open output file
252	ofstream fout(ofile);
253
254	// get and check magic number
255	unsigned char m[2];
256	fin.read((char*)m, 2);
257	if (m[0]!='z'+128 \|\| m[1]!='f'+128)
258	throw runtime_error("File not a compressed fits file.");
259
260	// get length of additional header information
261	size_t hlen = 0;
262	fin.read((char*)&hlen, sizeof(size_t));
263	if (hlen>0)
264	throw runtime_error("Only Version-zero files supported.");
265
266	// get size of FITS header
267	size_t hs = 0;
268	fin.read((char*)&hs, sizeof(size_t));
269	if (!fin)
270	throw runtime_error("Could not access header size.");
271
272	// copy the header from the input to the output file
273	// and prefix the output file as a compressed fits file
274	string header;
275	header.resize(hs);
276
277	fin.read((char*)header.c_str(), header.size());
278	fout.write((char*)header.c_str(), header.size());
279	if (!fin)
280	throw runtime_error("Could not read full header");
281
282	string templ("tmpXXXXXX");
283	int fd = mkstemp((char*)templ.c_str());
284	const ssize_t rc = write(fd, header.c_str(), header.size());
285	close(fd);
286
287	if (rc<0)
288	throw runtime_error("Could not write to temporary file: "+string(strerror(errno)));
289
290	// open the output file to get the header parsed
291	fits info(templ);
292
293	remove(templ.c_str());
294
295	// get the maximum size of one row and a list
296	// of all columns ordered by their offset
297	size_t row_tot = 0;
298	const fits::Table::Columns &cols = info.GetColumns();
299	map<size_t, fits::Table::Column> columns;
300	for (auto it=cols.begin(); it!=cols.end(); it++)
301	{
302	columns[it->second.offset] = it->second;
303	row_tot += it->second.num*it->second.size;
304	}
305
306	// very simple timer
307	double sec = 0;
308	double frac = 0;
309
310	size_t com = 2+hs+2*sizeof(size_t);
311	size_t tot = hs;
312
313	const size_t masklen = cols.size()/8+1;
314
315	// loop over all rows
316	vector<char> buf(row_tot+masklen);
317	vector<char> swap(row_tot);
318	uint32_t offset = 0;
319
320	const uint64_t nrows = info.GetUInt("NAXIS2");
321
322	Time start;
323
324	cout << fixed;
325	for (uint32_t irow=0; irow<nrows; irow++)
326	{
327	fin.read(buf.data()+offset, buf.size()-offset);
328
329	const uint8_t mask = reinterpret_cast<uint8_t>(buf.data());
330	offset = masklen;
331
332	char *ptr = swap.data();
333
334	uint32_t icol = 0;
335	for (auto it=columns.begin(); it!= columns.end(); it++, icol++)
336	{
337	const size_t &num = it->second.num;
338	const size_t &size = it->second.size;
339
340	if (mask[icol/8]&(1<<(icol%8)))
341	{
342	Time now;
343
344	vector<uint16_t> out(num*size/2);
345	int len = Huffman::Decode((uint8_t*)buf.data()+offset, buf.size()-offset, out);
346	if (len<0)
347	throw runtime_error("Decoding failed.");
348
349	sec += Time().UnixTime()-now.UnixTime();
350
351	offset += len;
352
353	revcpy<2>(ptr, (char*)out.data(), num);
354	}
355	else
356	{
357	switch (size)
358	{
359	case 1: memcpy (ptr, buf.data()+offset, num*size); break;
360	case 2: revcpy<2>(ptr, buf.data()+offset, num); break;
361	case 4: revcpy<4>(ptr, buf.data()+offset, num); break;
362	case 8: revcpy<8>(ptr, buf.data()+offset, num); break;
363	}
364
365	offset += num*size;
366	}
367
368	ptr += num*size;
369	}
370
371	com += offset+masklen;
372	tot += row_tot;
373
374	fout.write((char*)swap.data(), swap.size());
375
376	memmove(buf.data(), buf.data()+offset, buf.size()-offset);
377	offset = buf.size()-offset;
378
379	if (!fout)
380	throw runtime_error("Error writing to output file");
381
382	const float proc = float(irow)/nrows;
383	if (proc>frac)
384	{
385	const double elep = Time().UnixTime()-start.UnixTime();
386	cout << "\r" << setprecision(0) << setw(3) << 100proc << "% [" << setprecision(1) << setw(5) << 100.com/tot << "%] cpu:" << sec << "s out:" << tot/1000000/elep << "MB/s" << flush;
387	frac += 0.01;
388	}
389	}
390
391	const double elep = Time().UnixTime()-start.UnixTime();
392	cout << setprecision(0) << "\r100% [" << setprecision(1) << setw(5) << 100.*com/tot << "%] cpu:" << sec << "s out:" << tot/1000000/elep << "MB/s" << endl;
393
394	return 0;
395	}
396
397	int main(int argc, const char **argv)
398	{
399	Configuration conf(argv[0]);
400	conf.SetPrintUsage(PrintUsage);
401	SetupConfiguration(conf);
402
403	if (!conf.DoParse(argc, argv))
404	return 127;
405
406	const bool decomp = conf.Get<bool>("decompress");
407
408	const string ifile = conf.Get<string>("in");
409	const string ofile = conf.Has("out") ? conf.Get<string>("out") : ReplaceExt(ifile, decomp);
410
411	return decomp ? Decompress(ifile, ofile) : Compress(ifile, ofile);
412
413	/*
414	// reading and writing files which just contain the binary data
415	// For simplicity I assume ROI=300
416
417	ifstream finx( "20130117_082.fits.pu");
418	ofstream foutx("20130117_082.fits.puz");
419
420	while (1)
421	{
422	string str;
423	str.resize(432000*2);
424
425	finx.read((char)str.c_str(), 4320002);
426	if (!finx)
427	break;
428
429	// Preprocess the data, e.g. subtract median pixelwise
430	for (int i=0; i<1440; i++)
431	{
432	int16_t chunk = (int16_t)str.c_str()+i*300;
433	sort(chunk, chunk+300);
434
435	int16_t med = chunk[149];
436
437	for (int j=0; j<300; j++)
438	chunk[j] -= med;
439	}
440
441	// do huffman encoding on shorts
442	string buf;
443	int len = huffmans_encode(buf, (uint16_t*)str.c_str(), 432000);
444
445	// if the result is smaller than the original data write
446	// the result, otherwise the original data
447	if (buf.size()<432000*2)
448	foutx.write(buf.c_str(), buf.size());
449	else
450	foutx.write(str.c_str(), 432000);
451	}
452
453	return 0;
454	*/
455	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: