| 1 | #include <fstream>
|
|---|
| 2 | #include <iostream>
|
|---|
| 3 | #include <algorithm>
|
|---|
| 4 | #include <map>
|
|---|
| 5 | #include <vector>
|
|---|
| 6 |
|
|---|
| 7 | #include "Configuration.h"
|
|---|
| 8 |
|
|---|
| 9 | #include "fits.h"
|
|---|
| 10 | #include "huffman.h"
|
|---|
| 11 |
|
|---|
| 12 | #include "Time.h"
|
|---|
| 13 |
|
|---|
| 14 | using namespace std;
|
|---|
| 15 |
|
|---|
| 16 | void SetupConfiguration(Configuration &conf)
|
|---|
| 17 | {
|
|---|
| 18 | po::options_description control("zfits");
|
|---|
| 19 | control.add_options()
|
|---|
| 20 | ("in", var<string>()
|
|---|
| 21 | #if BOOST_VERSION >= 104200
|
|---|
| 22 | ->required()
|
|---|
| 23 | #endif
|
|---|
| 24 | )
|
|---|
| 25 | ("out", var<string>(), "")
|
|---|
| 26 | ("decompress,d", po_switch(), "")
|
|---|
| 27 | ("force,f", var<string>(), "Force overwrite of output file")
|
|---|
| 28 | ;
|
|---|
| 29 |
|
|---|
| 30 | po::positional_options_description p;
|
|---|
| 31 | p.add("in", 1); // The 1st positional options
|
|---|
| 32 | p.add("out", 2); // The 2nd positional options
|
|---|
| 33 |
|
|---|
| 34 | conf.AddOptions(control);
|
|---|
| 35 | conf.SetArgumentPositions(p);
|
|---|
| 36 | }
|
|---|
| 37 |
|
|---|
| 38 | void PrintUsage()
|
|---|
| 39 | {
|
|---|
| 40 | cout <<
|
|---|
| 41 | "zfits - A fits compressor\n"
|
|---|
| 42 | "\n"
|
|---|
| 43 | "\n"
|
|---|
| 44 | "Usage: zfits [-d] input.fits[.gz] [output.zf]\n";
|
|---|
| 45 | cout << endl;
|
|---|
| 46 | }
|
|---|
| 47 |
|
|---|
| 48 |
|
|---|
| 49 | string ReplaceEnd(const string &str, const string &expr, const string &repl)
|
|---|
| 50 | {
|
|---|
| 51 | string out(str);
|
|---|
| 52 |
|
|---|
| 53 | const size_t p = out.rfind(expr);
|
|---|
| 54 | if (p==out.size()-expr.length())
|
|---|
| 55 | out.replace(p, expr.length(), repl);
|
|---|
| 56 |
|
|---|
| 57 | return out;
|
|---|
| 58 | }
|
|---|
| 59 |
|
|---|
| 60 |
|
|---|
| 61 | string ReplaceExt(const string &name, bool decomp)
|
|---|
| 62 | {
|
|---|
| 63 | if (decomp)
|
|---|
| 64 | return ReplaceEnd(name, ".zfits", ".fits");
|
|---|
| 65 |
|
|---|
| 66 | string out = ReplaceEnd(name, ".fits", ".zfits");
|
|---|
| 67 | return ReplaceEnd(out, ".fits.gz", ".zfits");
|
|---|
| 68 | }
|
|---|
| 69 |
|
|---|
| 70 | struct col_t : fits::Table::Column
|
|---|
| 71 | {
|
|---|
| 72 | string name;
|
|---|
| 73 | void *ptr;
|
|---|
| 74 | };
|
|---|
| 75 |
|
|---|
| 76 | int Compress(const string &ifile, const string &ofile)
|
|---|
| 77 | {
|
|---|
| 78 | // when to print some info on the screen (every f percent)
|
|---|
| 79 | float frac = 0.01;
|
|---|
| 80 |
|
|---|
| 81 | // open a fits file
|
|---|
| 82 | fits f(ifile);
|
|---|
| 83 |
|
|---|
| 84 | // open output file
|
|---|
| 85 | ofstream fout(ofile);
|
|---|
| 86 |
|
|---|
| 87 | // counters for total size and compressed size
|
|---|
| 88 | uint64_t tot = 0;
|
|---|
| 89 | uint64_t com = 0;
|
|---|
| 90 |
|
|---|
| 91 | // very simple timer
|
|---|
| 92 | double sec = 0;
|
|---|
| 93 |
|
|---|
| 94 | // Produce a lookup table with all informations about the
|
|---|
| 95 | // columns in the same order as they are in the file
|
|---|
| 96 | const fits::Table::Columns &cols= f.GetColumns();
|
|---|
| 97 |
|
|---|
| 98 | map<size_t, col_t> columns;
|
|---|
| 99 |
|
|---|
| 100 | size_t row_tot = 0;
|
|---|
| 101 | for (auto it=cols.begin(); it!=cols.end(); it++)
|
|---|
| 102 | {
|
|---|
| 103 | col_t c;
|
|---|
| 104 |
|
|---|
| 105 | c.offset = it->second.offset;
|
|---|
| 106 | c.size = it->second.size;
|
|---|
| 107 | c.num = it->second.num;
|
|---|
| 108 | c.name = it->first;
|
|---|
| 109 | c.ptr = f.SetPtrAddress(it->first);
|
|---|
| 110 |
|
|---|
| 111 | columns[c.offset] = c;
|
|---|
| 112 |
|
|---|
| 113 | row_tot += c.size*c.num;
|
|---|
| 114 | }
|
|---|
| 115 |
|
|---|
| 116 | // copy the header from the input to the output file
|
|---|
| 117 | // and prefix the output file as a compressed fits file
|
|---|
| 118 | string header;
|
|---|
| 119 | header.resize(f.tellg());
|
|---|
| 120 |
|
|---|
| 121 | f.seekg(0);
|
|---|
| 122 | f.read((char*)header.c_str(), header.size());
|
|---|
| 123 |
|
|---|
| 124 | char m[2];
|
|---|
| 125 | m[0] = static_cast<char>('z'+128);
|
|---|
| 126 | m[1] = static_cast<char>('f'+128);
|
|---|
| 127 |
|
|---|
| 128 | const size_t hlen = 0;
|
|---|
| 129 |
|
|---|
| 130 | size_t hs = header.size();
|
|---|
| 131 |
|
|---|
| 132 | fout.write(m, 2); // magic number
|
|---|
| 133 | fout.write((char*)&hlen, sizeof(size_t)); // length of possible header data (e.g. file version, compression algorithm)
|
|---|
| 134 | fout.write((char*)&hs, sizeof(size_t)); // size of FITS header
|
|---|
| 135 | fout.write(header.c_str(), header.size()); // uncompressed FITS header
|
|---|
| 136 |
|
|---|
| 137 | tot += header.size();
|
|---|
| 138 | com += header.size()+2+2*sizeof(size_t);
|
|---|
| 139 |
|
|---|
| 140 | cout << fixed;
|
|---|
| 141 |
|
|---|
| 142 | Time start;
|
|---|
| 143 |
|
|---|
| 144 | // loop over all rows
|
|---|
| 145 | vector<char> cache(row_tot);
|
|---|
| 146 | while (f.GetNextRow())
|
|---|
| 147 | {
|
|---|
| 148 | // pointer to the start of the cache for the data of one row
|
|---|
| 149 | char *out = cache.data();
|
|---|
| 150 |
|
|---|
| 151 | // mask stroing which column have been compressed and which not
|
|---|
| 152 | vector<uint8_t> mask(cols.size()/8 + 1);
|
|---|
| 153 |
|
|---|
| 154 | // loop over all columns
|
|---|
| 155 | uint32_t icol = 0;
|
|---|
| 156 | for (auto it=columns.begin(); it!=columns.end(); it++, icol++)
|
|---|
| 157 | {
|
|---|
| 158 | // size of cell in bytes
|
|---|
| 159 | const size_t len_col = it->second.size * it->second.num;
|
|---|
| 160 |
|
|---|
| 161 | // get pointer to data
|
|---|
| 162 | int16_t *ptr = (int16_t*)it->second.ptr;
|
|---|
| 163 |
|
|---|
| 164 | // If the column is the data, preprocess the data
|
|---|
| 165 | /*
|
|---|
| 166 | if (it->second.name=="Data")
|
|---|
| 167 | {
|
|---|
| 168 | int16_t *end = ptr+1440*300-4-(1440*300)%2;
|
|---|
| 169 | int16_t *beg = ptr;
|
|---|
| 170 |
|
|---|
| 171 | while (end>=beg)
|
|---|
| 172 | {
|
|---|
| 173 | const int16_t avg = (end[0] + end[1])/2;
|
|---|
| 174 | end[2] -= avg;
|
|---|
| 175 | end[3] -= avg;
|
|---|
| 176 | end -=2;
|
|---|
| 177 | }
|
|---|
| 178 | }*/
|
|---|
| 179 |
|
|---|
| 180 | // do not try to compress less than 32bytes
|
|---|
| 181 | if (len_col>32 && it->second.size==2)
|
|---|
| 182 | {
|
|---|
| 183 | Time now;
|
|---|
| 184 |
|
|---|
| 185 | // perform 16bit hoffman (option for 8bit missing, skip 64bit)
|
|---|
| 186 | // (what to do with floats?)
|
|---|
| 187 | string buf;
|
|---|
| 188 | /*int len =*/ Huffman::Encode(buf, (uint16_t*)ptr, len_col/2);
|
|---|
| 189 |
|
|---|
| 190 | sec += Time().UnixTime()-now.UnixTime();
|
|---|
| 191 |
|
|---|
| 192 | // check if data was really compressed
|
|---|
| 193 | if (buf.size()<len_col)
|
|---|
| 194 | {
|
|---|
| 195 | // copy compressed data into output cache
|
|---|
| 196 | memcpy(out, buf.c_str(), buf.size());
|
|---|
| 197 | out += buf.size();
|
|---|
| 198 |
|
|---|
| 199 | // update mask
|
|---|
| 200 | const uint64_t bit = (icol%8);
|
|---|
| 201 | mask[icol/8] |= (1<<bit);
|
|---|
| 202 |
|
|---|
| 203 | continue;
|
|---|
| 204 | }
|
|---|
| 205 | }
|
|---|
| 206 |
|
|---|
| 207 | // just copy the data if it has not been compressed
|
|---|
| 208 | memcpy(out, (char*)ptr, len_col);
|
|---|
| 209 | out += len_col;
|
|---|
| 210 | }
|
|---|
| 211 |
|
|---|
| 212 | // calcualte size of output buffer
|
|---|
| 213 | const size_t sz = out-cache.data();
|
|---|
| 214 |
|
|---|
| 215 | // update counters
|
|---|
| 216 | tot += row_tot;
|
|---|
| 217 | com += sz + mask.size();
|
|---|
| 218 |
|
|---|
| 219 | // write the compression mask and the (partly) copmpressed data stream
|
|---|
| 220 | fout.write((char*)mask.data(), mask.size());
|
|---|
| 221 | fout.write(cache.data(), sz);
|
|---|
| 222 |
|
|---|
| 223 | //if (sz2<0 || memcmp(data, dest3.data(), 432000*2)!=0)
|
|---|
| 224 | // cout << "grrrr" << endl;
|
|---|
| 225 |
|
|---|
| 226 | const float proc = float(f.GetRow())/f.GetNumRows();
|
|---|
| 227 | if (proc>frac)
|
|---|
| 228 | {
|
|---|
| 229 | const double elep = Time().UnixTime()-start.UnixTime();
|
|---|
| 230 | cout << "\r" << setprecision(0) << setw(3) << 100*proc << "% [" << setprecision(1) << setw(5) << 100.*com/tot << "%] cpu:" << sec << "s in:" << tot/1000000/elep << "MB/s" << flush;
|
|---|
| 231 | frac += 0.01;
|
|---|
| 232 | }
|
|---|
| 233 | }
|
|---|
| 234 |
|
|---|
| 235 | const double elep = Time().UnixTime()-start.UnixTime();
|
|---|
| 236 | cout << setprecision(0) << "\r100% [" << setprecision(1) << setw(5) << 100.*com/tot << "%] cpu:" << sec << "s in:" << tot/1000000/elep << "MB/s" << endl;
|
|---|
| 237 |
|
|---|
| 238 | return 0;
|
|---|
| 239 | }
|
|---|
| 240 |
|
|---|
| 241 | template<size_t N>
|
|---|
| 242 | void revcpy(char *dest, const char *src, int num)
|
|---|
| 243 | {
|
|---|
| 244 | const char *pend = src + num*N;
|
|---|
| 245 | for (const char *ptr = src; ptr<pend; ptr+=N, dest+=N)
|
|---|
| 246 | reverse_copy(ptr, ptr+N, dest);
|
|---|
| 247 | }
|
|---|
| 248 |
|
|---|
| 249 | int Decompress(const string &ifile, const string &ofile)
|
|---|
| 250 | {
|
|---|
| 251 | // open a fits file
|
|---|
| 252 | ifstream fin(ifile);
|
|---|
| 253 |
|
|---|
| 254 | // open output file
|
|---|
| 255 | ofstream fout(ofile);
|
|---|
| 256 |
|
|---|
| 257 | // get and check magic number
|
|---|
| 258 | unsigned char m[2];
|
|---|
| 259 | fin.read((char*)m, 2);
|
|---|
| 260 | if (m[0]!='z'+128 || m[1]!='f'+128)
|
|---|
| 261 | throw runtime_error("File not a compressed fits file.");
|
|---|
| 262 |
|
|---|
| 263 | // get length of additional header information
|
|---|
| 264 | size_t hlen = 0;
|
|---|
| 265 | fin.read((char*)&hlen, sizeof(size_t));
|
|---|
| 266 | if (hlen>0)
|
|---|
| 267 | throw runtime_error("Only Version-zero files supported.");
|
|---|
| 268 |
|
|---|
| 269 | // get size of FITS header
|
|---|
| 270 | size_t hs = 0;
|
|---|
| 271 | fin.read((char*)&hs, sizeof(size_t));
|
|---|
| 272 | if (!fin)
|
|---|
| 273 | throw runtime_error("Could not access header size.");
|
|---|
| 274 |
|
|---|
| 275 | // copy the header from the input to the output file
|
|---|
| 276 | // and prefix the output file as a compressed fits file
|
|---|
| 277 | string header;
|
|---|
| 278 | header.resize(hs);
|
|---|
| 279 |
|
|---|
| 280 | fin.read((char*)header.c_str(), header.size());
|
|---|
| 281 | fout.write((char*)header.c_str(), header.size());
|
|---|
| 282 | if (!fin)
|
|---|
| 283 | throw runtime_error("Could not read full header");
|
|---|
| 284 |
|
|---|
| 285 | string templ("tmpXXXXXX");
|
|---|
| 286 | int fd = mkstemp((char*)templ.c_str());
|
|---|
| 287 | const ssize_t rc = write(fd, header.c_str(), header.size());
|
|---|
| 288 | close(fd);
|
|---|
| 289 |
|
|---|
| 290 | if (rc<0)
|
|---|
| 291 | throw runtime_error("Could not write to temporary file: "+string(strerror(errno)));
|
|---|
| 292 |
|
|---|
| 293 | // open the output file to get the header parsed
|
|---|
| 294 | fits info(templ);
|
|---|
| 295 |
|
|---|
| 296 | remove(templ.c_str());
|
|---|
| 297 |
|
|---|
| 298 | // get the maximum size of one row and a list
|
|---|
| 299 | // of all columns ordered by their offset
|
|---|
| 300 | size_t row_tot = 0;
|
|---|
| 301 | const fits::Table::Columns &cols = info.GetColumns();
|
|---|
| 302 | map<size_t, fits::Table::Column> columns;
|
|---|
| 303 | for (auto it=cols.begin(); it!=cols.end(); it++)
|
|---|
| 304 | {
|
|---|
| 305 | columns[it->second.offset] = it->second;
|
|---|
| 306 | row_tot += it->second.num*it->second.size;
|
|---|
| 307 | }
|
|---|
| 308 |
|
|---|
| 309 | // very simple timer
|
|---|
| 310 | double sec = 0;
|
|---|
| 311 | double frac = 0;
|
|---|
| 312 |
|
|---|
| 313 | size_t com = 2+hs+2*sizeof(size_t);
|
|---|
| 314 | size_t tot = hs;
|
|---|
| 315 |
|
|---|
| 316 | const size_t masklen = cols.size()/8+1;
|
|---|
| 317 |
|
|---|
| 318 | // loop over all rows
|
|---|
| 319 | vector<char> buf(row_tot+masklen);
|
|---|
| 320 | vector<char> swap(row_tot);
|
|---|
| 321 | uint32_t offset = 0;
|
|---|
| 322 |
|
|---|
| 323 | const uint64_t nrows = info.GetUInt("NAXIS2");
|
|---|
| 324 |
|
|---|
| 325 | Time start;
|
|---|
| 326 |
|
|---|
| 327 | cout << fixed;
|
|---|
| 328 | for (uint32_t irow=0; irow<nrows; irow++)
|
|---|
| 329 | {
|
|---|
| 330 | fin.read(buf.data()+offset, buf.size()-offset);
|
|---|
| 331 |
|
|---|
| 332 | const uint8_t *mask = reinterpret_cast<uint8_t*>(buf.data());
|
|---|
| 333 | offset = masklen;
|
|---|
| 334 |
|
|---|
| 335 | char *ptr = swap.data();
|
|---|
| 336 |
|
|---|
| 337 | uint32_t icol = 0;
|
|---|
| 338 | for (auto it=columns.begin(); it!= columns.end(); it++, icol++)
|
|---|
| 339 | {
|
|---|
| 340 | const size_t &num = it->second.num;
|
|---|
| 341 | const size_t &size = it->second.size;
|
|---|
| 342 |
|
|---|
| 343 | if (mask[icol/8]&(1<<(icol%8)))
|
|---|
| 344 | {
|
|---|
| 345 | Time now;
|
|---|
| 346 |
|
|---|
| 347 | vector<uint16_t> out(num*size/2);
|
|---|
| 348 | int len = Huffman::Decode((uint8_t*)buf.data()+offset, buf.size()-offset, out);
|
|---|
| 349 | if (len<0)
|
|---|
| 350 | throw runtime_error("Decoding failed.");
|
|---|
| 351 |
|
|---|
| 352 | sec += Time().UnixTime()-now.UnixTime();
|
|---|
| 353 |
|
|---|
| 354 | offset += len;
|
|---|
| 355 |
|
|---|
| 356 | revcpy<2>(ptr, (char*)out.data(), num);
|
|---|
| 357 | }
|
|---|
| 358 | else
|
|---|
| 359 | {
|
|---|
| 360 | switch (size)
|
|---|
| 361 | {
|
|---|
| 362 | case 1: memcpy (ptr, buf.data()+offset, num*size); break;
|
|---|
| 363 | case 2: revcpy<2>(ptr, buf.data()+offset, num); break;
|
|---|
| 364 | case 4: revcpy<4>(ptr, buf.data()+offset, num); break;
|
|---|
| 365 | case 8: revcpy<8>(ptr, buf.data()+offset, num); break;
|
|---|
| 366 | }
|
|---|
| 367 |
|
|---|
| 368 | offset += num*size;
|
|---|
| 369 | }
|
|---|
| 370 |
|
|---|
| 371 | ptr += num*size;
|
|---|
| 372 | }
|
|---|
| 373 |
|
|---|
| 374 | com += offset+masklen;
|
|---|
| 375 | tot += row_tot;
|
|---|
| 376 |
|
|---|
| 377 | fout.write((char*)swap.data(), swap.size());
|
|---|
| 378 |
|
|---|
| 379 | memmove(buf.data(), buf.data()+offset, buf.size()-offset);
|
|---|
| 380 | offset = buf.size()-offset;
|
|---|
| 381 |
|
|---|
| 382 | if (!fout)
|
|---|
| 383 | throw runtime_error("Error writing to output file");
|
|---|
| 384 |
|
|---|
| 385 | const float proc = float(irow)/nrows;
|
|---|
| 386 | if (proc>frac)
|
|---|
| 387 | {
|
|---|
| 388 | const double elep = Time().UnixTime()-start.UnixTime();
|
|---|
| 389 | cout << "\r" << setprecision(0) << setw(3) << 100*proc << "% [" << setprecision(1) << setw(5) << 100.*com/tot << "%] cpu:" << sec << "s out:" << tot/1000000/elep << "MB/s" << flush;
|
|---|
| 390 | frac += 0.01;
|
|---|
| 391 | }
|
|---|
| 392 | }
|
|---|
| 393 |
|
|---|
| 394 | const double elep = Time().UnixTime()-start.UnixTime();
|
|---|
| 395 | cout << setprecision(0) << "\r100% [" << setprecision(1) << setw(5) << 100.*com/tot << "%] cpu:" << sec << "s out:" << tot/1000000/elep << "MB/s" << endl;
|
|---|
| 396 |
|
|---|
| 397 | return 0;
|
|---|
| 398 | }
|
|---|
| 399 |
|
|---|
| 400 | int main(int argc, const char **argv)
|
|---|
| 401 | {
|
|---|
| 402 | Configuration conf(argv[0]);
|
|---|
| 403 | conf.SetPrintUsage(PrintUsage);
|
|---|
| 404 | SetupConfiguration(conf);
|
|---|
| 405 |
|
|---|
| 406 | if (!conf.DoParse(argc, argv))
|
|---|
| 407 | return 127;
|
|---|
| 408 |
|
|---|
| 409 | const bool decomp = conf.Get<bool>("decompress");
|
|---|
| 410 |
|
|---|
| 411 | const string ifile = conf.Get<string>("in");
|
|---|
| 412 | const string ofile = conf.Has("out") ? conf.Get<string>("out") : ReplaceExt(ifile, decomp);
|
|---|
| 413 |
|
|---|
| 414 | return decomp ? Decompress(ifile, ofile) : Compress(ifile, ofile);
|
|---|
| 415 |
|
|---|
| 416 | /*
|
|---|
| 417 | // reading and writing files which just contain the binary data
|
|---|
| 418 | // For simplicity I assume ROI=300
|
|---|
| 419 |
|
|---|
| 420 | ifstream finx( "20130117_082.fits.pu");
|
|---|
| 421 | ofstream foutx("20130117_082.fits.puz");
|
|---|
| 422 |
|
|---|
| 423 | while (1)
|
|---|
| 424 | {
|
|---|
| 425 | string str;
|
|---|
| 426 | str.resize(432000*2);
|
|---|
| 427 |
|
|---|
| 428 | finx.read((char*)str.c_str(), 432000*2);
|
|---|
| 429 | if (!finx)
|
|---|
| 430 | break;
|
|---|
| 431 |
|
|---|
| 432 | // Preprocess the data, e.g. subtract median pixelwise
|
|---|
| 433 | for (int i=0; i<1440; i++)
|
|---|
| 434 | {
|
|---|
| 435 | int16_t *chunk = (int16_t*)str.c_str()+i*300;
|
|---|
| 436 | sort(chunk, chunk+300);
|
|---|
| 437 |
|
|---|
| 438 | int16_t med = chunk[149];
|
|---|
| 439 |
|
|---|
| 440 | for (int j=0; j<300; j++)
|
|---|
| 441 | chunk[j] -= med;
|
|---|
| 442 | }
|
|---|
| 443 |
|
|---|
| 444 | // do huffman encoding on shorts
|
|---|
| 445 | string buf;
|
|---|
| 446 | int len = huffmans_encode(buf, (uint16_t*)str.c_str(), 432000);
|
|---|
| 447 |
|
|---|
| 448 | // if the result is smaller than the original data write
|
|---|
| 449 | // the result, otherwise the original data
|
|---|
| 450 | if (buf.size()<432000*2)
|
|---|
| 451 | foutx.write(buf.c_str(), buf.size());
|
|---|
| 452 | else
|
|---|
| 453 | foutx.write(str.c_str(), 432000);
|
|---|
| 454 | }
|
|---|
| 455 |
|
|---|
| 456 | return 0;
|
|---|
| 457 | */
|
|---|
| 458 | }
|
|---|