| 1 | #include <fstream>
|
|---|
| 2 | #include <iostream>
|
|---|
| 3 | #include <algorithm>
|
|---|
| 4 | #include <map>
|
|---|
| 5 | #include <vector>
|
|---|
| 6 |
|
|---|
| 7 | #include "Configuration.h"
|
|---|
| 8 |
|
|---|
| 9 | #include "fits.h"
|
|---|
| 10 | #include "huffman.h"
|
|---|
| 11 |
|
|---|
| 12 | #include "Time.h"
|
|---|
| 13 |
|
|---|
| 14 | using namespace std;
|
|---|
| 15 |
|
|---|
| 16 | void SetupConfiguration(Configuration &conf)
|
|---|
| 17 | {
|
|---|
| 18 | po::options_description control("zfits");
|
|---|
| 19 | control.add_options()
|
|---|
| 20 | ("in", var<string>()->required(), "")
|
|---|
| 21 | ("out", var<string>(), "")
|
|---|
| 22 | ("decompress,d", po_switch(), "")
|
|---|
| 23 | ("force,f", var<string>(), "Force overwrite of output file")
|
|---|
| 24 | ;
|
|---|
| 25 |
|
|---|
| 26 | po::positional_options_description p;
|
|---|
| 27 | p.add("in", 1); // The 1st positional options
|
|---|
| 28 | p.add("out", 2); // The 2nd positional options
|
|---|
| 29 |
|
|---|
| 30 | conf.AddOptions(control);
|
|---|
| 31 | conf.SetArgumentPositions(p);
|
|---|
| 32 | }
|
|---|
| 33 |
|
|---|
| 34 | void PrintUsage()
|
|---|
| 35 | {
|
|---|
| 36 | cout <<
|
|---|
| 37 | "zfits - A fits compressor\n"
|
|---|
| 38 | "\n"
|
|---|
| 39 | "\n"
|
|---|
| 40 | "Usage: zfits [-d] input.fits[.gz] [output.zf]\n";
|
|---|
| 41 | cout << endl;
|
|---|
| 42 | }
|
|---|
| 43 |
|
|---|
| 44 |
|
|---|
| 45 | string ReplaceEnd(const string &str, const string &expr, const string &repl)
|
|---|
| 46 | {
|
|---|
| 47 | string out(str);
|
|---|
| 48 |
|
|---|
| 49 | const size_t p = out.rfind(expr);
|
|---|
| 50 | if (p==out.size()-expr.length())
|
|---|
| 51 | out.replace(p, expr.length(), repl);
|
|---|
| 52 |
|
|---|
| 53 | return out;
|
|---|
| 54 | }
|
|---|
| 55 |
|
|---|
| 56 |
|
|---|
| 57 | string ReplaceExt(const string &name, bool decomp)
|
|---|
| 58 | {
|
|---|
| 59 | if (decomp)
|
|---|
| 60 | return ReplaceEnd(name, ".zfits", ".fits");
|
|---|
| 61 |
|
|---|
| 62 | string out = ReplaceEnd(name, ".fits", ".zfits");
|
|---|
| 63 | return ReplaceEnd(out, ".fits.gz", ".zfits");
|
|---|
| 64 | }
|
|---|
| 65 |
|
|---|
| 66 | int Compress(const string &ifile, const string &ofile)
|
|---|
| 67 | {
|
|---|
| 68 | // when to print some info on the screen (every f percent)
|
|---|
| 69 | float frac = 0.01;
|
|---|
| 70 |
|
|---|
| 71 | // open a fits file
|
|---|
| 72 | fits f(ifile);
|
|---|
| 73 |
|
|---|
| 74 | // open output file
|
|---|
| 75 | ofstream fout(ofile);
|
|---|
| 76 |
|
|---|
| 77 | // counters for total size and compressed size
|
|---|
| 78 | uint64_t tot = 0;
|
|---|
| 79 | uint64_t com = 0;
|
|---|
| 80 |
|
|---|
| 81 | // very simple timer
|
|---|
| 82 | double sec = 0;
|
|---|
| 83 |
|
|---|
| 84 | // Produce a lookup table with all informations about the
|
|---|
| 85 | // columns in the same order as they are in the file
|
|---|
| 86 | const fits::Table::Columns &cols= f.GetColumns();
|
|---|
| 87 |
|
|---|
| 88 | struct col_t : fits::Table::Column
|
|---|
| 89 | {
|
|---|
| 90 | string name;
|
|---|
| 91 | void *ptr;
|
|---|
| 92 | };
|
|---|
| 93 |
|
|---|
| 94 |
|
|---|
| 95 | map<size_t, col_t> columns;
|
|---|
| 96 |
|
|---|
| 97 | size_t row_tot = 0;
|
|---|
| 98 | for (auto it=cols.begin(); it!=cols.end(); it++)
|
|---|
| 99 | {
|
|---|
| 100 | col_t c;
|
|---|
| 101 |
|
|---|
| 102 | c.offset = it->second.offset;
|
|---|
| 103 | c.size = it->second.size;
|
|---|
| 104 | c.num = it->second.num;
|
|---|
| 105 | c.name = it->first;
|
|---|
| 106 | c.ptr = f.SetPtrAddress(it->first);
|
|---|
| 107 |
|
|---|
| 108 | columns[c.offset] = c;
|
|---|
| 109 |
|
|---|
| 110 | row_tot += c.size*c.num;
|
|---|
| 111 | }
|
|---|
| 112 |
|
|---|
| 113 | // copy the header from the input to the output file
|
|---|
| 114 | // and prefix the output file as a compressed fits file
|
|---|
| 115 | string header;
|
|---|
| 116 | header.resize(f.tellg());
|
|---|
| 117 |
|
|---|
| 118 | f.seekg(0);
|
|---|
| 119 | f.read((char*)header.c_str(), header.size());
|
|---|
| 120 |
|
|---|
| 121 | char m[2];
|
|---|
| 122 | m[0] = static_cast<char>('z'+128);
|
|---|
| 123 | m[1] = static_cast<char>('f'+128);
|
|---|
| 124 |
|
|---|
| 125 | const size_t hlen = 0;
|
|---|
| 126 |
|
|---|
| 127 | size_t hs = header.size();
|
|---|
| 128 |
|
|---|
| 129 | fout.write(m, 2); // magic number
|
|---|
| 130 | fout.write((char*)&hlen, sizeof(size_t)); // length of possible header data (e.g. file version, compression algorithm)
|
|---|
| 131 | fout.write((char*)&hs, sizeof(size_t)); // size of FITS header
|
|---|
| 132 | fout.write(header.c_str(), header.size()); // uncompressed FITS header
|
|---|
| 133 |
|
|---|
| 134 | tot += header.size();
|
|---|
| 135 | com += header.size()+2+2*sizeof(size_t);
|
|---|
| 136 |
|
|---|
| 137 | cout << fixed;
|
|---|
| 138 |
|
|---|
| 139 | Time start;
|
|---|
| 140 |
|
|---|
| 141 | // loop over all rows
|
|---|
| 142 | vector<char> cache(row_tot);
|
|---|
| 143 | while (f.GetNextRow())
|
|---|
| 144 | {
|
|---|
| 145 | // pointer to the start of the cache for the data of one row
|
|---|
| 146 | char *out = cache.data();
|
|---|
| 147 |
|
|---|
| 148 | // mask stroing which column have been compressed and which not
|
|---|
| 149 | vector<uint8_t> mask(cols.size()/8 + 1);
|
|---|
| 150 |
|
|---|
| 151 | // loop over all columns
|
|---|
| 152 | uint32_t icol = 0;
|
|---|
| 153 | for (auto it=columns.begin(); it!=columns.end(); it++, icol++)
|
|---|
| 154 | {
|
|---|
| 155 | // size of cell in bytes
|
|---|
| 156 | const size_t len_col = it->second.size * it->second.num;
|
|---|
| 157 |
|
|---|
| 158 | // get pointer to data
|
|---|
| 159 | int16_t *ptr = (int16_t*)it->second.ptr;
|
|---|
| 160 |
|
|---|
| 161 | // If the column is the data, preprocess the data
|
|---|
| 162 | /*
|
|---|
| 163 | if (it->second.name=="Data")
|
|---|
| 164 | {
|
|---|
| 165 | int16_t *end = ptr+1440*300-4-(1440*300)%2;
|
|---|
| 166 | int16_t *beg = ptr;
|
|---|
| 167 |
|
|---|
| 168 | while (end>=beg)
|
|---|
| 169 | {
|
|---|
| 170 | const int16_t avg = (end[0] + end[1])/2;
|
|---|
| 171 | end[2] -= avg;
|
|---|
| 172 | end[3] -= avg;
|
|---|
| 173 | end -=2;
|
|---|
| 174 | }
|
|---|
| 175 | }*/
|
|---|
| 176 |
|
|---|
| 177 | // do not try to compress less than 32bytes
|
|---|
| 178 | if (len_col>32 && it->second.size==2)
|
|---|
| 179 | {
|
|---|
| 180 | Time now;
|
|---|
| 181 |
|
|---|
| 182 | // perform 16bit hoffman (option for 8bit missing, skip 64bit)
|
|---|
| 183 | // (what to do with floats?)
|
|---|
| 184 | string buf;
|
|---|
| 185 | /*int len =*/ Huffman::Encode(buf, (uint16_t*)ptr, len_col/2);
|
|---|
| 186 |
|
|---|
| 187 | sec += Time().UnixTime()-now.UnixTime();
|
|---|
| 188 |
|
|---|
| 189 | // check if data was really compressed
|
|---|
| 190 | if (buf.size()<len_col)
|
|---|
| 191 | {
|
|---|
| 192 | // copy compressed data into output cache
|
|---|
| 193 | memcpy(out, buf.c_str(), buf.size());
|
|---|
| 194 | out += buf.size();
|
|---|
| 195 |
|
|---|
| 196 | // update mask
|
|---|
| 197 | const uint64_t bit = (icol%8);
|
|---|
| 198 | mask[icol/8] |= (1<<bit);
|
|---|
| 199 |
|
|---|
| 200 | continue;
|
|---|
| 201 | }
|
|---|
| 202 | }
|
|---|
| 203 |
|
|---|
| 204 | // just copy the data if it has not been compressed
|
|---|
| 205 | memcpy(out, (char*)ptr, len_col);
|
|---|
| 206 | out += len_col;
|
|---|
| 207 | }
|
|---|
| 208 |
|
|---|
| 209 | // calcualte size of output buffer
|
|---|
| 210 | const size_t sz = out-cache.data();
|
|---|
| 211 |
|
|---|
| 212 | // update counters
|
|---|
| 213 | tot += row_tot;
|
|---|
| 214 | com += sz + mask.size();
|
|---|
| 215 |
|
|---|
| 216 | // write the compression mask and the (partly) copmpressed data stream
|
|---|
| 217 | fout.write((char*)mask.data(), mask.size());
|
|---|
| 218 | fout.write(cache.data(), sz);
|
|---|
| 219 |
|
|---|
| 220 | //if (sz2<0 || memcmp(data, dest3.data(), 432000*2)!=0)
|
|---|
| 221 | // cout << "grrrr" << endl;
|
|---|
| 222 |
|
|---|
| 223 | const float proc = float(f.GetRow())/f.GetNumRows();
|
|---|
| 224 | if (proc>frac)
|
|---|
| 225 | {
|
|---|
| 226 | const double elep = Time().UnixTime()-start.UnixTime();
|
|---|
| 227 | cout << "\r" << setprecision(0) << setw(3) << 100*proc << "% [" << setprecision(1) << setw(5) << 100.*com/tot << "%] cpu:" << sec << "s in:" << tot/1000000/elep << "MB/s" << flush;
|
|---|
| 228 | frac += 0.01;
|
|---|
| 229 | }
|
|---|
| 230 | }
|
|---|
| 231 |
|
|---|
| 232 | const double elep = Time().UnixTime()-start.UnixTime();
|
|---|
| 233 | cout << setprecision(0) << "\r100% [" << setprecision(1) << setw(5) << 100.*com/tot << "%] cpu:" << sec << "s in:" << tot/1000000/elep << "MB/s" << endl;
|
|---|
| 234 |
|
|---|
| 235 | return 0;
|
|---|
| 236 | }
|
|---|
| 237 |
|
|---|
| 238 | template<size_t N>
|
|---|
| 239 | void revcpy(char *dest, const char *src, int num)
|
|---|
| 240 | {
|
|---|
| 241 | const char *pend = src + num*N;
|
|---|
| 242 | for (const char *ptr = src; ptr<pend; ptr+=N, dest+=N)
|
|---|
| 243 | reverse_copy(ptr, ptr+N, dest);
|
|---|
| 244 | }
|
|---|
| 245 |
|
|---|
| 246 | int Decompress(const string &ifile, const string &ofile)
|
|---|
| 247 | {
|
|---|
| 248 | // open a fits file
|
|---|
| 249 | ifstream fin(ifile);
|
|---|
| 250 |
|
|---|
| 251 | // open output file
|
|---|
| 252 | ofstream fout(ofile);
|
|---|
| 253 |
|
|---|
| 254 | // get and check magic number
|
|---|
| 255 | unsigned char m[2];
|
|---|
| 256 | fin.read((char*)m, 2);
|
|---|
| 257 | if (m[0]!='z'+128 || m[1]!='f'+128)
|
|---|
| 258 | throw runtime_error("File not a compressed fits file.");
|
|---|
| 259 |
|
|---|
| 260 | // get length of additional header information
|
|---|
| 261 | size_t hlen = 0;
|
|---|
| 262 | fin.read((char*)&hlen, sizeof(size_t));
|
|---|
| 263 | if (hlen>0)
|
|---|
| 264 | throw runtime_error("Only Version-zero files supported.");
|
|---|
| 265 |
|
|---|
| 266 | // get size of FITS header
|
|---|
| 267 | size_t hs = 0;
|
|---|
| 268 | fin.read((char*)&hs, sizeof(size_t));
|
|---|
| 269 | if (!fin)
|
|---|
| 270 | throw runtime_error("Could not access header size.");
|
|---|
| 271 |
|
|---|
| 272 | // copy the header from the input to the output file
|
|---|
| 273 | // and prefix the output file as a compressed fits file
|
|---|
| 274 | string header;
|
|---|
| 275 | header.resize(hs);
|
|---|
| 276 |
|
|---|
| 277 | fin.read((char*)header.c_str(), header.size());
|
|---|
| 278 | fout.write((char*)header.c_str(), header.size());
|
|---|
| 279 | if (!fin)
|
|---|
| 280 | throw runtime_error("Could not read full header");
|
|---|
| 281 |
|
|---|
| 282 | string templ("tmpXXXXXX");
|
|---|
| 283 | int fd = mkstemp((char*)templ.c_str());
|
|---|
| 284 | const ssize_t rc = write(fd, header.c_str(), header.size());
|
|---|
| 285 | close(fd);
|
|---|
| 286 |
|
|---|
| 287 | if (rc<0)
|
|---|
| 288 | throw runtime_error("Could not write to temporary file: "+string(strerror(errno)));
|
|---|
| 289 |
|
|---|
| 290 | // open the output file to get the header parsed
|
|---|
| 291 | fits info(templ);
|
|---|
| 292 |
|
|---|
| 293 | remove(templ.c_str());
|
|---|
| 294 |
|
|---|
| 295 | // get the maximum size of one row and a list
|
|---|
| 296 | // of all columns ordered by their offset
|
|---|
| 297 | size_t row_tot = 0;
|
|---|
| 298 | const fits::Table::Columns &cols = info.GetColumns();
|
|---|
| 299 | map<size_t, fits::Table::Column> columns;
|
|---|
| 300 | for (auto it=cols.begin(); it!=cols.end(); it++)
|
|---|
| 301 | {
|
|---|
| 302 | columns[it->second.offset] = it->second;
|
|---|
| 303 | row_tot += it->second.num*it->second.size;
|
|---|
| 304 | }
|
|---|
| 305 |
|
|---|
| 306 | // very simple timer
|
|---|
| 307 | double sec = 0;
|
|---|
| 308 | double frac = 0;
|
|---|
| 309 |
|
|---|
| 310 | size_t com = 2+hs+2*sizeof(size_t);
|
|---|
| 311 | size_t tot = hs;
|
|---|
| 312 |
|
|---|
| 313 | const size_t masklen = cols.size()/8+1;
|
|---|
| 314 |
|
|---|
| 315 | // loop over all rows
|
|---|
| 316 | vector<char> buf(row_tot+masklen);
|
|---|
| 317 | vector<char> swap(row_tot);
|
|---|
| 318 | uint32_t offset = 0;
|
|---|
| 319 |
|
|---|
| 320 | const uint64_t nrows = info.GetUInt("NAXIS2");
|
|---|
| 321 |
|
|---|
| 322 | Time start;
|
|---|
| 323 |
|
|---|
| 324 | cout << fixed;
|
|---|
| 325 | for (uint32_t irow=0; irow<nrows; irow++)
|
|---|
| 326 | {
|
|---|
| 327 | fin.read(buf.data()+offset, buf.size()-offset);
|
|---|
| 328 |
|
|---|
| 329 | const uint8_t *mask = reinterpret_cast<uint8_t*>(buf.data());
|
|---|
| 330 | offset = masklen;
|
|---|
| 331 |
|
|---|
| 332 | char *ptr = swap.data();
|
|---|
| 333 |
|
|---|
| 334 | uint32_t icol = 0;
|
|---|
| 335 | for (auto it=columns.begin(); it!= columns.end(); it++, icol++)
|
|---|
| 336 | {
|
|---|
| 337 | const size_t &num = it->second.num;
|
|---|
| 338 | const size_t &size = it->second.size;
|
|---|
| 339 |
|
|---|
| 340 | if (mask[icol/8]&(1<<(icol%8)))
|
|---|
| 341 | {
|
|---|
| 342 | Time now;
|
|---|
| 343 |
|
|---|
| 344 | vector<uint16_t> out(num*size/2);
|
|---|
| 345 | int len = Huffman::Decode((uint8_t*)buf.data()+offset, buf.size()-offset, out);
|
|---|
| 346 | if (len<0)
|
|---|
| 347 | throw runtime_error("Decoding failed.");
|
|---|
| 348 |
|
|---|
| 349 | sec += Time().UnixTime()-now.UnixTime();
|
|---|
| 350 |
|
|---|
| 351 | offset += len;
|
|---|
| 352 |
|
|---|
| 353 | revcpy<2>(ptr, (char*)out.data(), num);
|
|---|
| 354 | }
|
|---|
| 355 | else
|
|---|
| 356 | {
|
|---|
| 357 | switch (size)
|
|---|
| 358 | {
|
|---|
| 359 | case 1: memcpy (ptr, buf.data()+offset, num*size); break;
|
|---|
| 360 | case 2: revcpy<2>(ptr, buf.data()+offset, num); break;
|
|---|
| 361 | case 4: revcpy<4>(ptr, buf.data()+offset, num); break;
|
|---|
| 362 | case 8: revcpy<8>(ptr, buf.data()+offset, num); break;
|
|---|
| 363 | }
|
|---|
| 364 |
|
|---|
| 365 | offset += num*size;
|
|---|
| 366 | }
|
|---|
| 367 |
|
|---|
| 368 | ptr += num*size;
|
|---|
| 369 | }
|
|---|
| 370 |
|
|---|
| 371 | com += offset+masklen;
|
|---|
| 372 | tot += row_tot;
|
|---|
| 373 |
|
|---|
| 374 | fout.write((char*)swap.data(), swap.size());
|
|---|
| 375 |
|
|---|
| 376 | memmove(buf.data(), buf.data()+offset, buf.size()-offset);
|
|---|
| 377 | offset = buf.size()-offset;
|
|---|
| 378 |
|
|---|
| 379 | if (!fout)
|
|---|
| 380 | throw runtime_error("Error writing to output file");
|
|---|
| 381 |
|
|---|
| 382 | const float proc = float(irow)/nrows;
|
|---|
| 383 | if (proc>frac)
|
|---|
| 384 | {
|
|---|
| 385 | const double elep = Time().UnixTime()-start.UnixTime();
|
|---|
| 386 | cout << "\r" << setprecision(0) << setw(3) << 100*proc << "% [" << setprecision(1) << setw(5) << 100.*com/tot << "%] cpu:" << sec << "s out:" << tot/1000000/elep << "MB/s" << flush;
|
|---|
| 387 | frac += 0.01;
|
|---|
| 388 | }
|
|---|
| 389 | }
|
|---|
| 390 |
|
|---|
| 391 | const double elep = Time().UnixTime()-start.UnixTime();
|
|---|
| 392 | cout << setprecision(0) << "\r100% [" << setprecision(1) << setw(5) << 100.*com/tot << "%] cpu:" << sec << "s out:" << tot/1000000/elep << "MB/s" << endl;
|
|---|
| 393 |
|
|---|
| 394 | return 0;
|
|---|
| 395 | }
|
|---|
| 396 |
|
|---|
| 397 | int main(int argc, const char **argv)
|
|---|
| 398 | {
|
|---|
| 399 | Configuration conf(argv[0]);
|
|---|
| 400 | conf.SetPrintUsage(PrintUsage);
|
|---|
| 401 | SetupConfiguration(conf);
|
|---|
| 402 |
|
|---|
| 403 | if (!conf.DoParse(argc, argv))
|
|---|
| 404 | return 127;
|
|---|
| 405 |
|
|---|
| 406 | const bool decomp = conf.Get<bool>("decompress");
|
|---|
| 407 |
|
|---|
| 408 | const string ifile = conf.Get<string>("in");
|
|---|
| 409 | const string ofile = conf.Has("out") ? conf.Get<string>("out") : ReplaceExt(ifile, decomp);
|
|---|
| 410 |
|
|---|
| 411 | return decomp ? Decompress(ifile, ofile) : Compress(ifile, ofile);
|
|---|
| 412 |
|
|---|
| 413 | /*
|
|---|
| 414 | // reading and writing files which just contain the binary data
|
|---|
| 415 | // For simplicity I assume ROI=300
|
|---|
| 416 |
|
|---|
| 417 | ifstream finx( "20130117_082.fits.pu");
|
|---|
| 418 | ofstream foutx("20130117_082.fits.puz");
|
|---|
| 419 |
|
|---|
| 420 | while (1)
|
|---|
| 421 | {
|
|---|
| 422 | string str;
|
|---|
| 423 | str.resize(432000*2);
|
|---|
| 424 |
|
|---|
| 425 | finx.read((char*)str.c_str(), 432000*2);
|
|---|
| 426 | if (!finx)
|
|---|
| 427 | break;
|
|---|
| 428 |
|
|---|
| 429 | // Preprocess the data, e.g. subtract median pixelwise
|
|---|
| 430 | for (int i=0; i<1440; i++)
|
|---|
| 431 | {
|
|---|
| 432 | int16_t *chunk = (int16_t*)str.c_str()+i*300;
|
|---|
| 433 | sort(chunk, chunk+300);
|
|---|
| 434 |
|
|---|
| 435 | int16_t med = chunk[149];
|
|---|
| 436 |
|
|---|
| 437 | for (int j=0; j<300; j++)
|
|---|
| 438 | chunk[j] -= med;
|
|---|
| 439 | }
|
|---|
| 440 |
|
|---|
| 441 | // do huffman encoding on shorts
|
|---|
| 442 | string buf;
|
|---|
| 443 | int len = huffmans_encode(buf, (uint16_t*)str.c_str(), 432000);
|
|---|
| 444 |
|
|---|
| 445 | // if the result is smaller than the original data write
|
|---|
| 446 | // the result, otherwise the original data
|
|---|
| 447 | if (buf.size()<432000*2)
|
|---|
| 448 | foutx.write(buf.c_str(), buf.size());
|
|---|
| 449 | else
|
|---|
| 450 | foutx.write(str.c_str(), 432000);
|
|---|
| 451 | }
|
|---|
| 452 |
|
|---|
| 453 | return 0;
|
|---|
| 454 | */
|
|---|
| 455 | }
|
|---|