00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013 #ifdef HAVE_CONFIG_H
00014 #include "config.h"
00015 #endif
00016
00017 #include <stdlib.h>
00018 #include <string.h>
00019
00020 #include"WordBitCompress.h"
00021 #include"HtMaxMin.h"
00022
00023 inline static int bitcount(unsigned int maxval)
00024 {
00025 unsigned int mv = maxval;
00026 int nbits;
00027 for (nbits = 0; mv; nbits++)
00028 mv >>= 1;
00029 return nbits;
00030 }
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040 int
00041 log2(unsigned int v)
00042 {
00043 int res;
00044 for(res=-1;v;res++){v>>=1;}
00045 return(res);
00046 }
00047
00048
00049 #define pow2(x) (1<<(x))
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087 class WordInterval {
00088 public:
00089 inline void SizeFromBits() {
00090 size = ((nbits > 0 ? pow2(nbits - 1) : 0));
00091 }
00092
00093
00094
00095
00096
00097 int nbits;
00098
00099
00100
00101 unsigned int size;
00102
00103
00104
00105 unsigned int low;
00106 };
00107
00108 class VlengthCoder {
00109 public:
00110
00111
00112
00113 VlengthCoder(WordBitStream & nbs);
00114
00115 ~VlengthCoder() {
00116 if(intervals) delete [] intervals;
00117 }
00118
00119 void PutUints(unsigned int *vals, int n);
00120 void PutUintsPrepare(unsigned int *vals, int n);
00121 void GetUints(unsigned int *vals, int n);
00122
00123
00124 inline void PutUint(unsigned int val) {
00125 unsigned int low = 0;
00126 int interval = 0;
00127 FindInterval(val, interval, low);
00128
00129 bs.PutUint(interval, interval_bits);
00130
00131 const int bitsremaining = (intervals[interval].nbits > 0 ? intervals[interval].nbits - 1 : 0);
00132 val -= low;
00133 bs.PutUint(val, bitsremaining);
00134 }
00135
00136
00137 inline unsigned int GetUint() {
00138 int interval = bs.GetUint(interval_bits);
00139 const int bitsremaining =
00140 (intervals[interval].nbits > 0 ? intervals[interval].nbits - 1 : 0);
00141 unsigned int val = bs.GetUint(bitsremaining);
00142 val += intervals[interval].low;
00143 return (val);
00144 }
00145
00146
00147
00148 inline void FindInterval(const unsigned int v, int &interval, unsigned int &low) {
00149 int i0 = 0;
00150 int i1 = nintervals;
00151 int i;
00152 for (;;) {
00153 if (i1 == i0 + 1) {
00154 break;
00155 }
00156 i = (i0 + i1) >> 1;
00157 low = intervals[i].low;
00158 if (v < low) {
00159 i1 = i;
00160 continue;
00161 } else {
00162 i0 = i;
00163 continue;
00164 }
00165
00166 }
00167
00168 low = intervals[i0].low;
00169 interval = i0;
00170 }
00171
00172 void GenerateLowBoundaries(WordInterval *intervals, int nintervals);
00173
00174 private:
00175 int interval_bits;
00176
00177
00178
00179 WordInterval *intervals;
00180 int nintervals;
00181
00182 WordBitStream & bs;
00183 };
00184
00185 VlengthCoder::VlengthCoder(WordBitStream & nbs):bs(nbs)
00186 {
00187 interval_bits = 0;
00188 nintervals = 0;
00189 intervals = NULL;
00190 }
00191
00192
00193 static int qsort_uint_cmp(const void *a, const void *b)
00194 {
00195 if ((*((unsigned int *) a)) > (*((unsigned int *) b)))
00196 return 1;
00197 else if ((*((unsigned int *) a)) < (*((unsigned int *) b)))
00198 return -1;
00199 else
00200 return 0;
00201
00202
00203
00204 }
00205
00206
00207 static void qsort_uint(unsigned int *v, int n)
00208 {
00209 qsort((void *) v, (unsigned int) n, sizeof(unsigned int), &qsort_uint_cmp);
00210 }
00211
00212 void VlengthCoder::PutUints(unsigned int *vals, int n)
00213 {
00214 PutUintsPrepare(vals, n);
00215
00216 int i;
00217 bs.PutUint(interval_bits, WORD_CMPR_LOG32_BITS);
00218 for (i = 0; i < nintervals; i++) {
00219 bs.PutUint(intervals[i].nbits, WORD_CMPR_LOG32_BITS);
00220 }
00221
00222 for (i = 0; i < n; i++)
00223 PutUint(vals[i]);
00224 }
00225
00226 void VlengthCoder::PutUintsPrepare(unsigned int *vals, int n)
00227 {
00228 unsigned int *sorted = new unsigned int[n];
00229 memcpy((void *) sorted, (void *) vals, n * sizeof(unsigned int));
00230 qsort_uint(sorted, n);
00231
00232 int nbits = bitcount(sorted[n - 1]);
00233
00234
00235
00236
00237 interval_bits = bitcount((n * nbits) / (10 * WORD_CMPR_LOG32_BITS));
00238
00239 if (interval_bits >= nbits) {
00240 interval_bits = nbits - 1;
00241 }
00242
00243 if (interval_bits < 1) {
00244 interval_bits = 1;
00245 }
00246
00247 nintervals = (1 << interval_bits);
00248 int i;
00249
00250
00251
00252 intervals = new WordInterval[nintervals + 1];
00253
00254
00255 unsigned int lboundary = 0;
00256 unsigned int boundary;
00257 for (i = 0; i < nintervals - 1; i++) {
00258 boundary = sorted[(n * (i + 1)) / nintervals];
00259 intervals[i].nbits = 1 + log2(boundary - lboundary);
00260 intervals[i].SizeFromBits();
00261 lboundary += intervals[i].size;
00262 }
00263 boundary = sorted[n - 1];
00264 intervals[i].nbits = 1 + log2(boundary - lboundary) + 1;
00265 intervals[i].SizeFromBits();
00266
00267 GenerateLowBoundaries(intervals, nintervals);
00268
00269 delete [] sorted;
00270 }
00271
00272 void VlengthCoder::GetUints(unsigned int *vals, int n)
00273 {
00274 int i;
00275 interval_bits = bs.GetUint(WORD_CMPR_LOG32_BITS);
00276 nintervals = pow2(interval_bits);
00277
00278
00279
00280 intervals = new WordInterval[nintervals + 1];
00281
00282 for (i = 0; i < nintervals; i++) {
00283 intervals[i].nbits = bs.GetUint(WORD_CMPR_LOG32_BITS);
00284 intervals[i].SizeFromBits();
00285 }
00286 GenerateLowBoundaries(intervals, nintervals);
00287
00288 for (i = 0; i < n; i++)
00289 vals[i] = GetUint();
00290 }
00291
00292 void VlengthCoder::GenerateLowBoundaries(WordInterval *intervals, int nintervals)
00293 {
00294 unsigned int low = 0;
00295 for (int j = 0; j <= nintervals; j++) {
00296 intervals[j].low = low;
00297 if (j < nintervals) {
00298 low += intervals[j].size;
00299 }
00300 }
00301 }
00302
00303
00304
00305
00306
00307 void
00308 WordBitStream::PutUint(unsigned int v, int n)
00309 {
00310 if(freezeon) {
00311 freeze_bitcount += n;
00312 return;
00313 }
00314
00315 if(n <= 0) return;
00316
00317 int relative_bitpos = bitpos & 0x07;
00318
00319 if(relative_bitpos + n < 8) {
00320
00321
00322 buff[buff_idx] |= (v << relative_bitpos);
00323 BitposIncr(n);
00324 return;
00325 } else {
00326 const int central = ((relative_bitpos + n) >> 3) - 1;
00327
00328
00329 const int bits_in_first_byte = 8 - relative_bitpos;
00330 buff[buff_idx] |= ((v & 0xff) << relative_bitpos) & 0xff;
00331 BitposIncr(bits_in_first_byte);
00332 v >>= bits_in_first_byte;
00333
00334
00335 for(int i = central; i ;i--) {
00336 buff[buff_idx] = v & 0xff ;
00337 BitposIncr(8);
00338 v >>= 8;
00339 }
00340
00341
00342 const int bits_in_last_byte = n - ((central << 3) + bits_in_first_byte);
00343
00344 if(bits_in_last_byte > 0) {
00345 buff[buff_idx] = v & ((1 << bits_in_last_byte) - 1);
00346 BitposIncr(bits_in_last_byte);
00347 }
00348 }
00349 }
00350
00351 unsigned int
00352 WordBitStream::GetUint(int n)
00353 {
00354 if(n <= 0) return 0;
00355
00356 unsigned int res = 0;
00357
00358 int relative_bitpos = bitpos & 0x07;
00359
00360 if(relative_bitpos + n < 8) {
00361
00362 res = (buff[bitpos >> 3] >> relative_bitpos) & ((1 << n) - 1);
00363 bitpos += n;
00364 return res;
00365 } else {
00366 int bytepos = bitpos >> 3;
00367 const int central = ((relative_bitpos + n) >> 3) - 1;
00368
00369
00370 res = (buff[bytepos] >> relative_bitpos) & 0xff;
00371 const int bits_in_first_byte = 8 - relative_bitpos;
00372 bytepos++;
00373
00374
00375 if(central) {
00376 unsigned int tmp_res = 0;
00377 for(int i = central - 1; i >= 0; i--) {
00378 tmp_res |= buff[bytepos + i] & 0xff;
00379 if(i) tmp_res <<= 8;
00380 }
00381 bytepos += central;
00382 res |= tmp_res << bits_in_first_byte;
00383 }
00384
00385
00386 const int bits_in_last_byte = n - ((central << 3) + bits_in_first_byte);
00387 if(bits_in_last_byte > 0) {
00388 res |= ((unsigned int)(buff[bytepos] & ((1 << bits_in_last_byte) - 1))) << (bits_in_first_byte + ((bytepos - (bitpos>>3) - 1) << 3));
00389 }
00390
00391 bitpos+=n;
00392 return res;
00393 }
00394 }
00395
00396 void
00397 WordBitStream::PutZone(unsigned char *vals, int n)
00398 {
00399 int limit = (n + 7) / 8;
00400 for(int i = 0; i < limit; i++) {
00401 const int remains = n - (8 * i);
00402 PutUint(vals[i], remains > 8 ? 8 : remains);
00403 }
00404 }
00405
00406 void
00407 WordBitStream::GetZone(unsigned char *vals, int n)
00408 {
00409 int limit = (n + 7) / 8;
00410 for(int i = 0; i < limit; i++) {
00411 const int remains = n - (8 * i);
00412 vals[i] = GetUint(remains > 8 ? 8 : remains);
00413 }
00414 }
00415
00416
00417
00418
00419
00420
00421
00422
00423
00424
00425
00426
00427
00428
00429
00430
00431
00432
00433 void
00434 WordBitCompress::PutUint(unsigned int v, int n)
00435 {
00436 int nbits = bitcount(v);
00437 WordBitStream::PutUint(nbits, bitcount(n));
00438 if(nbits)
00439 WordBitStream::PutUint(v, nbits);
00440 }
00441
00442 unsigned int
00443 WordBitCompress::GetUint(int n)
00444 {
00445 int nbits = WordBitStream::GetUint(bitcount(n));
00446 if(!nbits)
00447 return 0;
00448 else
00449 return WordBitStream::GetUint(nbits);
00450 }
00451
00452 int
00453 WordBitCompress::PutUints(unsigned int *vals, int n)
00454 {
00455 int cpos = Length();
00456
00457 if(n >= (1 << WORD_CMPR_NBITS_NVALS)) {
00458 fprintf(stderr, "WordBitCompress::PutUints: : overflow: n >= %d\n", (1 << WORD_CMPR_NBITS_NVALS));
00459 abort();
00460 }
00461
00462 PutUint(n, WORD_CMPR_NBITS_NVALS);
00463 if(n == 0)
00464 return Length() - cpos;
00465
00466 int max_nbits = bitcount(HtMaxMin::max_v(vals, n));
00467
00468 int sdecr;
00469 int sfixed;
00470
00471
00472
00473
00474
00475
00476 if(n > 15 && max_nbits > 3) {
00477 Freeze();
00478 PutUintsDecr(vals, n);
00479 sdecr = Length();
00480 UnFreeze();
00481
00482 Freeze();
00483 PutUintsFixed(vals, n);
00484 sfixed = Length();
00485 UnFreeze();
00486 } else {
00487
00488
00489
00490
00491 sdecr = 4242;
00492 sfixed = 0;
00493 }
00494
00495
00496
00497
00498
00499 if(sdecr < sfixed) {
00500 WordBitStream::PutUint(WORD_CMPR_MODEL_DECR, WORD_CMPR_MODEL_BITS);
00501 PutUintsDecr(vals, n);
00502 } else {
00503 WordBitStream::PutUint(WORD_CMPR_MODEL_FIXED, WORD_CMPR_MODEL_BITS);
00504 PutUintsFixed(vals,n);
00505 }
00506
00507 return Length() - cpos;
00508 }
00509
00510 int
00511 WordBitCompress::GetUints(unsigned int **valsp)
00512 {
00513 int n = GetUint(WORD_CMPR_NBITS_NVALS);
00514
00515 if(!n) {
00516 *valsp = NULL;
00517 return 0;
00518 }
00519
00520 unsigned int *vals = new unsigned int[n];
00521
00522 int model = WordBitStream::GetUint(WORD_CMPR_MODEL_BITS);
00523
00524 switch(model) {
00525 case WORD_CMPR_MODEL_DECR:
00526 GetUintsDecr(vals, n);
00527 break;
00528 case WORD_CMPR_MODEL_FIXED:
00529 GetUintsFixed(vals, n);
00530 break;
00531 default:
00532 fprintf(stderr, "WordBitCompress::GetUints invalid compression model %d\n", model);
00533 abort();
00534 break;
00535 }
00536
00537 *valsp = vals;
00538
00539 return n;
00540 }
00541
00542 int
00543 WordBitCompress::GetUints(unsigned int **valsp, int* vals_sizep)
00544 {
00545 int n = GetUint(WORD_CMPR_NBITS_NVALS);
00546
00547 if(!n) {
00548 return 0;
00549 }
00550
00551 while(n >= *vals_sizep) {
00552 (*vals_sizep) *= 2;
00553 (*valsp) = (unsigned int*)realloc(*valsp, (*vals_sizep) * sizeof(unsigned int));
00554 }
00555
00556 int model = WordBitStream::GetUint(WORD_CMPR_MODEL_BITS);
00557
00558 switch(model) {
00559 case WORD_CMPR_MODEL_DECR:
00560 GetUintsDecr(*valsp, n);
00561 break;
00562 case WORD_CMPR_MODEL_FIXED:
00563 GetUintsFixed(*valsp, n);
00564 break;
00565 default:
00566 fprintf(stderr, "WordBitCompress::GetUints invalid compression model %d\n", model);
00567 abort();
00568 break;
00569 }
00570
00571 return n;
00572 }
00573
00574 int WordBitCompress::PutUchars(unsigned char *vals, int n)
00575 {
00576 int cpos = Length();
00577
00578 if(n >= (1 << WORD_CMPR_NBITS_NVALS)) {
00579 fprintf(stderr, "WordBitCompress::PutUchars: : overflow: n >= %d\n", (1 << WORD_CMPR_NBITS_NVALS));
00580 abort();
00581 }
00582
00583 PutUint(n, WORD_CMPR_NBITS_NVALS);
00584 if (n == 0) {
00585 return 0;
00586 }
00587
00588 int max_nbits = bitcount(HtMaxMin::max_v(vals, n));
00589
00590 if(max_nbits >= (1 << WORD_CMPR_LOG8_BITS)) {
00591 fprintf(stderr, "WordBitCompress::PutUchars: : overflow: max_nbits >= %d\n", (1 << WORD_CMPR_LOG8_BITS));
00592 abort();
00593 }
00594
00595 WordBitStream::PutUint(max_nbits, WORD_CMPR_LOG8_BITS);
00596
00597 for(int i = 0; i < n; i++) {
00598 WordBitStream::PutUint(vals[i] & 0xff, max_nbits);
00599 #if 0
00600 unsigned char v = vals[i];
00601 for (int j = 0; j < max_nbits; j++) {
00602 Put(v & (1 << j));
00603 }
00604 #endif
00605 }
00606 return Length() - cpos;
00607 }
00608
00609 int
00610 WordBitCompress::GetUchars(unsigned char **valsp)
00611 {
00612 int n = GetUint(WORD_CMPR_NBITS_NVALS);
00613 if (!n) {
00614 *valsp = NULL;
00615 return 0;
00616 }
00617
00618 int nbits = WordBitStream::GetUint(WORD_CMPR_LOG8_BITS);
00619 int i;
00620 unsigned char *vals = new unsigned char[n];
00621
00622 for (i = 0; i < n; i++)
00623 vals[i] = WordBitStream::GetUint(nbits);
00624
00625 *valsp = vals;
00626 return n;
00627 }
00628
00629 int
00630 WordBitCompress::GetUchars(unsigned char **valsp, int *vals_sizep)
00631 {
00632 int n = GetUint(WORD_CMPR_NBITS_NVALS);
00633 if (!n) {
00634 return 0;
00635 }
00636
00637 while(n >= *vals_sizep) {
00638 (*vals_sizep) *= 2;
00639 (*valsp) = (unsigned char*)realloc(*valsp, (*vals_sizep) * sizeof(unsigned char));
00640 }
00641
00642 int nbits = WordBitStream::GetUint(WORD_CMPR_LOG8_BITS);
00643 int i;
00644
00645 for (i = 0; i < n; i++)
00646 (*valsp)[i] = WordBitStream::GetUint(nbits);
00647
00648 return n;
00649 }
00650
00651 void WordBitCompress::PutUintsFixed(unsigned int *vals, int n)
00652 {
00653 int max_nbits = bitcount(HtMaxMin::max_v(vals, n));
00654
00655 if(max_nbits > (1 << WORD_CMPR_LOG32_BITS)) {
00656 fprintf(stderr, "WordBitCompress::PutUintsFixed: : overflow: %d > %d\n", max_nbits, (1 << WORD_CMPR_LOG32_BITS));
00657 abort();
00658 }
00659
00660 PutUint(max_nbits, WORD_CMPR_LOG32_BITS);
00661
00662 for (int i = 0; i < n; i++)
00663 WordBitStream::PutUint(vals[i], max_nbits);
00664 }
00665
00666 void WordBitCompress::GetUintsFixed(unsigned int *vals, int n)
00667 {
00668 int nbits = GetUint(WORD_CMPR_LOG32_BITS);
00669
00670 int i;
00671 for (i = 0; i < n; i++)
00672 vals[i] = WordBitStream::GetUint(nbits);
00673 }
00674
00675 void WordBitCompress::PutUintsDecr(unsigned int *vals, int n)
00676 {
00677 VlengthCoder coder(*this);
00678 coder.PutUints(vals, n);
00679 }
00680
00681 void WordBitCompress::GetUintsDecr(unsigned int *vals, int n)
00682 {
00683 VlengthCoder coder(*this);
00684 coder.GetUints(vals, n);
00685 }