NCBI C++ ToolKit
encoding.h
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef BMENCODING_H__INCLUDED__
2 #define BMENCODING_H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 /*! \file encoding.h
22  \brief Encoding utilities for serialization (internal)
23 */
24 
25 
26 #include <memory.h>
27 #include "bmutil.h"
28 
29 
30 #ifdef _MSC_VER
31 #pragma warning (push)
32 #pragma warning (disable : 4702)
33 #endif
34 
35 namespace bm
36 {
37 
38 
39 // ----------------------------------------------------------------
40 
41 /*!
42  \brief Memory encoding.
43 
44  Class for encoding data into memory.
45  Class handles aligment issues with the integer data types.
46 
47  \ingroup gammacode
48 */
49 class encoder
50 {
51 public:
52  typedef unsigned char* position_type;
53 public:
54  encoder(unsigned char* buf, size_t size) BMNOEXCEPT;
55  void put_8(unsigned char c) BMNOEXCEPT;
57  void put_16(const bm::short_t* s, unsigned count) BMNOEXCEPT;
60  void put_32(const bm::word_t* w, unsigned count) BMNOEXCEPT;
64 
65  void put_8_16_32(unsigned w,
66  unsigned char c8, unsigned char c16, unsigned char c32) BMNOEXCEPT;
67  void put_prefixed_array_32(unsigned char c,
68  const bm::word_t* w, unsigned count) BMNOEXCEPT;
69  void put_prefixed_array_16(unsigned char c,
70  const bm::short_t* s, unsigned count,
71  bool encode_count) BMNOEXCEPT;
72  void memcpy(const unsigned char* src, size_t count) BMNOEXCEPT;
73  size_t size() const BMNOEXCEPT;
74  unsigned char* get_pos() const BMNOEXCEPT;
75  void set_pos(unsigned char* buf_pos) BMNOEXCEPT;
76 private:
77  unsigned char* buf_;
78  unsigned char* start_;
79  size_t size_;
80 };
81 
82 // ----------------------------------------------------------------
83 /**
84  Base class for all decoding functionality
85  \ingroup gammacode
86 */
88 {
89 public:
90  decoder_base(const unsigned char* buf) BMNOEXCEPT { buf_ = start_ = buf; }
91 
92  /// Reads character from the decoding buffer.
93  unsigned char get_8() BMNOEXCEPT { return *buf_++; }
94 
95  /// Returns size of the current decoding stream.
96  size_t size() const BMNOEXCEPT { return size_t(buf_ - start_); }
97 
98  /// change current position
99  void seek(int delta) BMNOEXCEPT { buf_ += delta; }
100 
101  /// read bytes from the decode buffer
102  void memcpy(unsigned char* dst, size_t count) BMNOEXCEPT;
103 
104  /// Return current buffer pointer
105  const unsigned char* get_pos() const BMNOEXCEPT { return buf_; }
106 
107  /// Set current buffer pointer
108  void set_pos(const unsigned char* pos) BMNOEXCEPT { buf_ = pos; }
109 
110  /// Read h-64-bit
111  bm::id64_t get_h64() BMNOEXCEPT;
112 
113 protected:
114  const unsigned char* buf_;
115  const unsigned char* start_;
116 };
117 
118 
119 // ----------------------------------------------------------------
120 /**
121  Class for decoding data from memory buffer.
122  Properly handles aligment issues with integer data types.
123  \ingroup gammacode
124 */
125 class decoder : public decoder_base
126 {
127 public:
128  decoder(const unsigned char* buf) BMNOEXCEPT;
129  bm::short_t get_16() BMNOEXCEPT;
130  bm::word_t get_24() BMNOEXCEPT;
131  bm::word_t get_32() BMNOEXCEPT;
132  bm::id64_t get_48() BMNOEXCEPT;
133  bm::id64_t get_64() BMNOEXCEPT;
134  void get_32(bm::word_t* w, unsigned count) BMNOEXCEPT;
135  bool get_32_OR(bm::word_t* w, unsigned count) BMNOEXCEPT;
136  void get_32_AND(bm::word_t* w, unsigned count) BMNOEXCEPT;
137  void get_16(bm::short_t* s, unsigned count) BMNOEXCEPT;
138 };
139 
140 // ----------------------------------------------------------------
141 /**
142  Class for decoding data from memory buffer.
143  Properly handles aligment issues with integer data types.
144  Converts data to big endian architecture
145  (presumed it was encoded as little endian)
146  \ingroup gammacode
147 */
149 
150 
151 // ----------------------------------------------------------------
152 /**
153  Class for decoding data from memory buffer.
154  Properly handles aligment issues with integer data types.
155  Converts data to little endian architecture
156  (presumed it was encoded as big endian)
157  \ingroup gammacode
158 */
160 {
161 public:
162  decoder_little_endian(const unsigned char* buf);
163  bm::short_t get_16();
164  bm::word_t get_24();
165  bm::word_t get_32();
166  bm::id64_t get_48();
167  bm::id64_t get_64();
168  void get_32(bm::word_t* w, unsigned count);
169  bool get_32_OR(bm::word_t* w, unsigned count);
170  void get_32_AND(bm::word_t* w, unsigned count);
171  void get_16(bm::short_t* s, unsigned count);
172 };
173 
174 
175 /**
176  Byte based writer for un-aligned bit streaming
177 
178  @ingroup gammacode
179  @sa encoder
180 */
181 template<class TEncoder>
182 class bit_out
183 {
184 public:
185  bit_out(TEncoder& dest)
186  : dest_(dest), used_bits_(0), accum_(0)
187  {}
188 
189  ~bit_out() { flush(); }
190 
191  /// issue single bit into encode bit-stream
192  void put_bit(unsigned value) BMNOEXCEPT;
193 
194  /// issue count bits out of value
195  void put_bits(unsigned value, unsigned count) BMNOEXCEPT;
196 
197  /// issue 0 into output stream
198  void put_zero_bit() BMNOEXCEPT;
199 
200  /// issue specified number of 0s
201  void put_zero_bits(unsigned count) BMNOEXCEPT;
202 
203  /// Elias Gamma encode the specified value
204  void gamma(unsigned value) BMNOEXCEPT;
205 
206  /// Binary Interpolative array decode
207  void bic_encode_u16(const bm::gap_word_t* arr, unsigned sz,
209  {
210  bic_encode_u16_cm(arr, sz, lo, hi);
211  }
212 
213  /// Binary Interpolative encoding (array of 16-bit ints)
214  void bic_encode_u16_rg(const bm::gap_word_t* arr, unsigned sz,
215  bm::gap_word_t lo,
217 
218  /// Binary Interpolative encoding (array of 16-bit ints)
219  /// cm - "center-minimal"
220  void bic_encode_u16_cm(const bm::gap_word_t* arr, unsigned sz,
221  bm::gap_word_t lo,
223 
224  /// Binary Interpolative encoding (array of 32-bit ints)
225  /// cm - "center-minimal"
226  void bic_encode_u32_cm(const bm::word_t* arr, unsigned sz,
228 
229  /// Flush the incomplete 32-bit accumulator word
230  void flush() BMNOEXCEPT { if (used_bits_) flush_accum(); }
231 
232 private:
234  {
235  dest_.put_32(accum_);
236  used_bits_ = accum_ = 0;
237  }
238 private:
239  bit_out(const bit_out&);
241 
242 private:
243  TEncoder& dest_; ///< Bit stream target
244  unsigned used_bits_; ///< Bits used in the accumulator
245  unsigned accum_; ///< write bit accumulator
246 };
247 
248 
249 /**
250  Byte based reader for un-aligned bit streaming
251 
252  @ingroup gammacode
253  @sa encoder
254 */
255 template<class TDecoder>
256 class bit_in
257 {
258 public:
260  : src_(decoder),
261  used_bits_(unsigned(sizeof(accum_) * 8)),
262  accum_(0)
263  {}
264 
265  /// decode unsigned value using Elias Gamma coding
266  unsigned gamma() BMNOEXCEPT;
267 
268  /// read number of bits out of the stream
269  unsigned get_bits(unsigned count) BMNOEXCEPT;
270 
271  /// read 1 bit
272  unsigned get_bit() BMNOEXCEPT;
273 
274  /// Binary Interpolative array decode
275  void bic_decode_u16(bm::gap_word_t* arr, unsigned sz,
277  {
278  if (sz)
279  bic_decode_u16_cm(arr, sz, lo, hi);
280  }
281 
282  void bic_decode_u16_bitset(bm::word_t* block, unsigned sz,
284  {
285  if (sz)
286  bic_decode_u16_cm_bitset(block, sz, lo, hi);
287  }
288  void bic_decode_u16_dry(unsigned sz,
290  {
291  if (sz)
292  bic_decode_u16_cm_dry(sz, lo, hi);
293  }
294 
295 
296  /// Binary Interpolative array decode
297  void bic_decode_u16_rg(bm::gap_word_t* arr, unsigned sz,
299  /// Binary Interpolative array decode
300  void bic_decode_u16_cm(bm::gap_word_t* arr, unsigned sz,
302 
303  /// Binary Interpolative array decode (32-bit)
304  void bic_decode_u32_cm(bm::word_t* arr, unsigned sz,
306 
307 
308  /// Binary Interpolative array decode into bitset (32-bit based)
309  void bic_decode_u16_rg_bitset(bm::word_t* block, unsigned sz,
311 
312  /// Binary Interpolative array decode into /dev/null
313  void bic_decode_u16_rg_dry(unsigned sz,
315 
316  /// Binary Interpolative array decode into bitset (32-bit based)
317  void bic_decode_u16_cm_bitset(bm::word_t* block, unsigned sz,
318  bm::gap_word_t lo,
320 
321  /// Binary Interpolative array decode into /dev/null
322  void bic_decode_u16_cm_dry(unsigned sz,
324 
325 private:
326  bit_in(const bit_in&);
328 private:
329  TDecoder& src_; ///< Source of bytes
330  unsigned used_bits_; ///< Bits used in the accumulator
331  unsigned accum_; ///< read bit accumulator
332 };
333 
334 
335 /**
336  Functor for Elias Gamma encoding
337  @ingroup gammacode
338 */
339 template<typename T, typename TBitIO>
341 {
342 public:
343  gamma_encoder(TBitIO& bout) : bout_(bout)
344  {}
345 
346  /** Encode word */
347  void operator()(T value) { bout_.gamma(value); }
348 private:
351 private:
352  TBitIO& bout_;
353 };
354 
355 
356 /**
357  Elias Gamma decoder
358  @ingroup gammacode
359 */
360 template<typename T, typename TBitIO>
361 class gamma_decoder
362 {
363 public:
364  gamma_decoder(TBitIO& bin) : bin_(bin) {}
365 
366  /**
367  Start encoding sequence
368  */
369  void start() {}
370 
371  /**
372  Stop decoding sequence
373  */
374  void stop() {}
375 
376  /**
377  Decode word
378  */
379  T operator()(void) { return (T)bin_.gamma(); }
380 private:
383 private:
384  TBitIO& bin_;
385 };
386 
387 
388 // ----------------------------------------------------------------
389 // Implementation details.
390 // ----------------------------------------------------------------
391 
392 /*!
393  \fn encoder::encoder(unsigned char* buf, unsigned size)
394  \brief Construction.
395  \param buf - memory buffer pointer.
396  \param size - size of the buffer
397 */
398 inline encoder::encoder(unsigned char* buf, size_t a_size) BMNOEXCEPT
399 : buf_(buf), start_(buf)
400 {
401  size_ = a_size;
402 }
403 /*!
404  \brief Encode 8-bit prefix + an array
405 */
406 inline void encoder::put_prefixed_array_32(unsigned char c,
407  const bm::word_t* w,
408  unsigned count) BMNOEXCEPT
409 {
410  put_8(c);
411  put_32(w, count);
412 }
413 
414 /*!
415  \brief Encode 8-bit prefix + an array
416 */
417 inline void encoder::put_prefixed_array_16(unsigned char c,
418  const bm::short_t* s,
419  unsigned count,
420  bool encode_count) BMNOEXCEPT
421 {
422  put_8(c);
423  if (encode_count)
424  put_16((bm::short_t) count);
425  put_16(s, count);
426 }
427 
428 
429 /*!
430  \fn void encoder::put_8(unsigned char c)
431  \brief Puts one character into the encoding buffer.
432  \param c - character to encode
433 */
435 {
436  *buf_++ = c;
437 }
438 
439 /*!
440  \fn encoder::put_16(bm::short_t s)
441  \brief Puts short word (16 bits) into the encoding buffer.
442  \param s - short word to encode
443 */
445 {
446 #if (BM_UNALIGNED_ACCESS_OK == 1)
447  ::memcpy(buf_, &s, sizeof(bm::short_t)); // optimizer takes care of it
448  buf_ += sizeof(bm::short_t);
449 #else
450  *buf_++ = (unsigned char) s;
451  s >>= 8;
452  *buf_++ = (unsigned char) s;
453 #endif
454 }
455 
456 /*!
457  \brief Method puts array of short words (16 bits) into the encoding buffer.
458 */
459 inline void encoder::put_16(const bm::short_t* s, unsigned count) BMNOEXCEPT
460 {
461  BM_ASSERT(count);
462 #if (BM_UNALIGNED_ACCESS_OK == 1)
463  ::memcpy(buf_, s, sizeof(bm::short_t)*count);
464  buf_ += sizeof(bm::short_t) * count;
465 #else
466  unsigned char* buf = buf_;
467  const bm::short_t* s_end = s + count;
468  do
469  {
470  bm::short_t w16 = *s++;
471  unsigned char a = (unsigned char) w16;
472  unsigned char b = (unsigned char) (w16 >> 8);
473 
474  *buf++ = a;
475  *buf++ = b;
476 
477  } while (s < s_end);
478 
479  buf_ = (unsigned char*)buf;
480 #endif
481 }
482 
483 /*!
484  \brief but gat plus value based on its VBR evaluation
485 */
486 inline
487 void encoder::put_8_16_32(unsigned w,
488  unsigned char c8,
489  unsigned char c16,
490  unsigned char c32) BMNOEXCEPT
491 {
492  if (w < 256)
493  {
494  put_8(c8);
495  put_8((unsigned char)w);
496  }
497  else
498  {
499  if (w < 65536)
500  {
501  put_8(c16);
502  put_16((unsigned short) w);
503  }
504  else
505  {
506  put_8(c32);
507  put_32(w);
508  }
509  }
510 }
511 
512 /*!
513  \brief copy bytes into target buffer or just rewind if src is NULL
514 */
515 inline
516 void encoder::memcpy(const unsigned char* src, size_t count) BMNOEXCEPT
517 {
518  BM_ASSERT((buf_ + count) < (start_ + size_));
519  if (src)
520  ::memcpy(buf_, src, count);
521  buf_ += count;
522 }
523 
524 
525 /*!
526  \fn unsigned encoder::size() const
527  \brief Returns size of the current encoding stream.
528 */
530 {
531  return size_t(buf_ - start_);
532 }
533 
534 /**
535  \brief Get current memory stream position
536 */
538 {
539  return buf_;
540 }
541 
542 /**
543  \brief Set current memory stream position
544 */
546 {
547  buf_ = buf_pos;
548 }
549 
550 /*!
551  \fn void encoder::put_24(bm::word_t w)
552  \brief Puts 24 bits word into encoding buffer.
553  \param w - word to encode.
554 */
556 {
557  BM_ASSERT((w & ~(0xFFFFFFU)) == 0);
558 
559  buf_[0] = (unsigned char)w;
560  buf_[1] = (unsigned char)(w >> 8);
561  buf_[2] = (unsigned char)(w >> 16);
562  buf_ += 3;
563 }
564 
565 
566 /*!
567  \fn void encoder::put_32(bm::word_t w)
568  \brief Puts 32 bits word into encoding buffer.
569  \param w - word to encode.
570 */
572 {
573 #if (BM_UNALIGNED_ACCESS_OK == 1)
574  ::memcpy(buf_, &w, sizeof(bm::word_t));
575  buf_ += sizeof(bm::word_t);
576 #else
577  *buf_++ = (unsigned char) w;
578  *buf_++ = (unsigned char) (w >> 8);
579  *buf_++ = (unsigned char) (w >> 16);
580  *buf_++ = (unsigned char) (w >> 24);
581 #endif
582 }
583 
584 /*!
585  \fn void encoder::put_48(bm::id64_t w)
586  \brief Puts 48 bits word into encoding buffer.
587  \param w - word to encode.
588 */
590 {
591  BM_ASSERT((w & ~(0xFFFFFFFFFFFFUL)) == 0);
592  *buf_++ = (unsigned char)w;
593  *buf_++ = (unsigned char)(w >> 8);
594  *buf_++ = (unsigned char)(w >> 16);
595  *buf_++ = (unsigned char)(w >> 24);
596  *buf_++ = (unsigned char)(w >> 32);
597  *buf_++ = (unsigned char)(w >> 40);
598 }
599 
600 
601 /*!
602  \fn void encoder::put_64(bm::id64_t w)
603  \brief Puts 64 bits word into encoding buffer.
604  \param w - word to encode.
605 */
607 {
608 #if (BM_UNALIGNED_ACCESS_OK == 1)
609  ::memcpy(buf_, &w, sizeof(bm::id64_t));
610  buf_ += sizeof(bm::id64_t);
611 #else
612  *buf_++ = (unsigned char) w;
613  *buf_++ = (unsigned char) (w >> 8);
614  *buf_++ = (unsigned char) (w >> 16);
615  *buf_++ = (unsigned char) (w >> 24);
616  *buf_++ = (unsigned char) (w >> 32);
617  *buf_++ = (unsigned char) (w >> 40);
618  *buf_++ = (unsigned char) (w >> 48);
619  *buf_++ = (unsigned char) (w >> 56);
620 #endif
621 }
622 
623 /*!
624  \fn void encoder::put_h64(bm::id64_t w)
625  \brief Puts 64 bits word into encoding buffer with h-compression
626  \param w - word to encode.
627 */
629 {
630  unsigned h_mask = bm::compute_h64_mask(w);
631  put_8((unsigned char) h_mask);
632  for (unsigned i = 0; w && (i < 8); ++i, w >>= 8)
633  {
634  if ((unsigned char) w)
635  put_8((unsigned char) w);
636  } // for i
637 }
638 
639 
640 
641 /*!
642  \brief Encodes array of 32-bit words
643 */
644 inline void encoder::put_32(const bm::word_t* w, unsigned count) BMNOEXCEPT
645 {
646 #if (BM_UNALIGNED_ACCESS_OK == 1)
647  // use memcpy() because compilers now understand it as an idiom and inline
648  ::memcpy(buf_, w, sizeof(bm::word_t) * count);
649  buf_ += sizeof(bm::word_t) * count;
650 #else
651  unsigned char* buf = buf_;
652  const bm::word_t* w_end = w + count;
653  do
654  {
655  bm::word_t w32 = *w++;
656  unsigned char a = (unsigned char) w32;
657  unsigned char b = (unsigned char) (w32 >> 8);
658  unsigned char c = (unsigned char) (w32 >> 16);
659  unsigned char d = (unsigned char) (w32 >> 24);
660 
661  *buf++ = a;
662  *buf++ = b;
663  *buf++ = c;
664  *buf++ = d;
665  } while (w < w_end);
666 
667  buf_ = (unsigned char*)buf;
668 #endif
669 }
670 
671 
672 // ---------------------------------------------------------------------
673 
674 
675 /*!
676  Load bytes from the decode buffer
677 */
678 inline
679 void decoder_base::memcpy(unsigned char* dst, size_t count) BMNOEXCEPT
680 {
681  if (dst)
682  ::memcpy(dst, buf_, count);
683  buf_ += count;
684 }
685 
686 /*!
687  \fn bm::id64_t decoder_base::get_h64()
688  \brief Reads 64-bit word from the decoding buffer.
689 */
690 inline
692 {
693  bm::id64_t w = 0;
694  unsigned h_mask = (unsigned char) *buf_++;
695  for (unsigned i = 0; h_mask && (i < 8); ++i)
696  {
697  if (h_mask & (1u<<i))
698  {
699  h_mask &= ~(1u<<i);
700  unsigned char a = (unsigned char) *buf_++;
701  w |= (bm::id64_t(a) << (i*8));
702  }
703  } // for i
704  return w;
705 }
706 
707 
708 /*!
709  \fn decoder::decoder(const unsigned char* buf)
710  \brief Construction
711  \param buf - pointer to the decoding memory.
712 */
713 inline decoder::decoder(const unsigned char* buf) BMNOEXCEPT
714 : decoder_base(buf)
715 {
716 }
717 
718 /*!
719  \fn bm::short_t decoder::get_16()
720  \brief Reads 16-bit word from the decoding buffer.
721 */
723 {
724 #if (BM_UNALIGNED_ACCESS_OK == 1)
725  bm::short_t a;
726  ::memcpy(&a, buf_, sizeof(bm::short_t));
727 #else
728  bm::short_t a = (bm::short_t)(buf_[0] + ((bm::short_t)buf_[1] << 8));
729 #endif
730  buf_ += sizeof(a);
731  return a;
732 }
733 
734 /*!
735  \fn bm::word_t decoder::get_24()
736  \brief Reads 32-bit word from the decoding buffer.
737 */
739 {
740  bm::word_t a = buf_[0] + ((unsigned)buf_[1] << 8) +
741  ((unsigned)buf_[2] << 16);
742  buf_ += 3;
743  return a;
744 }
745 
746 
747 /*!
748  \fn bm::word_t decoder::get_32()
749  \brief Reads 32-bit word from the decoding buffer.
750 */
752 {
753 #if (BM_UNALIGNED_ACCESS_OK == 1)
754  bm::word_t a;
755  ::memcpy(&a, buf_, sizeof(bm::word_t));
756 #else
757  bm::word_t a = buf_[0]+ ((unsigned)buf_[1] << 8) +
758  ((unsigned)buf_[2] << 16) + ((unsigned)buf_[3] << 24);
759 #endif
760  buf_+=sizeof(a);
761  return a;
762 }
763 
764 /*!
765  \fn bm::word_t decoder::get_48()
766  \brief Reads 64-bit word from the decoding buffer.
767 */
768 inline
770 {
771  bm::id64_t a = buf_[0] +
772  ((bm::id64_t)buf_[1] << 8) +
773  ((bm::id64_t)buf_[2] << 16) +
774  ((bm::id64_t)buf_[3] << 24) +
775  ((bm::id64_t)buf_[4] << 32) +
776  ((bm::id64_t)buf_[5] << 40);
777  buf_ += 6;
778  return a;
779 }
780 
781 /*!
782  \fn bm::id64_t decoder::get_64()
783  \brief Reads 64-bit word from the decoding buffer.
784 */
785 inline
787 {
788 #if (BM_UNALIGNED_ACCESS_OK == 1)
789  bm::id64_t a;
790  ::memcpy(&a, buf_, sizeof(bm::id64_t));
791 #else
792  bm::id64_t a = buf_[0]+
793  ((bm::id64_t)buf_[1] << 8) +
794  ((bm::id64_t)buf_[2] << 16) +
795  ((bm::id64_t)buf_[3] << 24) +
796  ((bm::id64_t)buf_[4] << 32) +
797  ((bm::id64_t)buf_[5] << 40) +
798  ((bm::id64_t)buf_[6] << 48) +
799  ((bm::id64_t)buf_[7] << 56);
800 #endif
801  buf_ += sizeof(a);
802  return a;
803 }
804 
805 
806 /*!
807  \fn void decoder::get_32(bm::word_t* w, unsigned count)
808  \brief Reads block of 32-bit words from the decoding buffer.
809  \param w - pointer on memory block to read into.
810  \param count - size of memory block in words.
811 */
812 inline void decoder::get_32(bm::word_t* w, unsigned count) BMNOEXCEPT
813 {
814  if (!w)
815  {
816  seek(int(count * sizeof(bm::word_t)));
817  return;
818  }
819 #if (BM_UNALIGNED_ACCESS_OK == 1)
820  ::memcpy(w, buf_, count * sizeof(bm::word_t));
821  seek(int(count * sizeof(bm::word_t)));
822  return;
823 #else
824  const unsigned char* buf = buf_;
825  const bm::word_t* w_end = w + count;
826  do
827  {
828  bm::word_t a = buf[0]+ ((unsigned)buf[1] << 8) +
829  ((unsigned)buf[2] << 16) + ((unsigned)buf[3] << 24);
830  *w++ = a;
831  buf += sizeof(a);
832  } while (w < w_end);
833  buf_ = (unsigned char*)buf;
834 #endif
835 }
836 
837 /*!
838  \brief Reads block of 32-bit words from the decoding buffer and ORs
839  to the destination
840  \param w - pointer on memory block to read into
841  \param count - should match bm::set_block_size
842 */
843 inline
844 bool decoder::get_32_OR(bm::word_t* w, unsigned count) BMNOEXCEPT
845 {
846  if (!w)
847  {
848  seek(int(count * sizeof(bm::word_t)));
849  return false;
850  }
851 #if defined(BMAVX2OPT)
852  __m256i* buf_start = (__m256i*)buf_;
853  seek(int(count * sizeof(bm::word_t)));
854  __m256i* buf_end = (__m256i*)buf_;
855 
856  return bm::avx2_or_arr_unal((__m256i*)w, buf_start, buf_end);
857 #elif defined(BMSSE42OPT) || defined(BMSSE2OPT)
858  __m128i* buf_start = (__m128i*)buf_;
859  seek(int(count * sizeof(bm::word_t)));
860  __m128i* buf_end = (__m128i*)buf_;
861 
862  return bm::sse2_or_arr_unal((__m128i*)w, buf_start, buf_end);
863 #else
864  bm::word_t acc = 0;
865  const bm::word_t not_acc = acc = ~acc;
866 
867  for (unsigned i = 0; i < count; i+=4)
868  {
869  acc &= (w[i+0] |= get_32());
870  acc &= (w[i+1] |= get_32());
871  acc &= (w[i+2] |= get_32());
872  acc &= (w[i+3] |= get_32());
873  }
874  return acc == not_acc;
875 #endif
876 }
877 
878 /*!
879  \brief Reads block of 32-bit words from the decoding buffer and ANDs
880  to the destination
881  \param w - pointer on memory block to read into
882  \param count - should match bm::set_block_size
883 */
884 inline
885 void decoder::get_32_AND(bm::word_t* w, unsigned count) BMNOEXCEPT
886 {
887  if (!w)
888  {
889  seek(int(count * sizeof(bm::word_t)));
890  return;
891  }
892 #if defined(BMAVX2OPT)
893  __m256i* buf_start = (__m256i*)buf_;
894  seek(int(count * sizeof(bm::word_t)));
895  __m256i* buf_end = (__m256i*)buf_;
896 
897  bm::avx2_and_arr_unal((__m256i*)w, buf_start, buf_end);
898 #elif defined(BMSSE42OPT) || defined(BMSSE2OPT)
899  __m128i* buf_start = (__m128i*)buf_;
900  seek(int(count * sizeof(bm::word_t)));
901  __m128i* buf_end = (__m128i*)buf_;
902 
903  bm::sse2_and_arr_unal((__m128i*)w, buf_start, buf_end);
904 #else
905  for (unsigned i = 0; i < count; i+=4)
906  {
907  w[i+0] &= get_32();
908  w[i+1] &= get_32();
909  w[i+2] &= get_32();
910  w[i+3] &= get_32();
911  }
912 #endif
913 }
914 
915 
916 
917 /*!
918  \fn void decoder::get_16(bm::short_t* s, unsigned count)
919  \brief Reads block of 32-bit words from the decoding buffer.
920  \param s - pointer on memory block to read into.
921  \param count - size of memory block in words.
922 */
923 inline void decoder::get_16(bm::short_t* s, unsigned count) BMNOEXCEPT
924 {
925  BM_ASSERT(count);
926  if (!s)
927  {
928  seek(int(count * sizeof(bm::short_t)));
929  return;
930  }
931 #if (BM_UNALIGNED_ACCESS_OK == 1)
932  ::memcpy(s, buf_, sizeof(bm::short_t) * count);
933  buf_ += sizeof(bm::short_t) * count;
934 #else
935  const unsigned char* buf = buf_;
936  const bm::short_t* s_end = s + count;
937  do
938  {
939  bm::short_t a = (bm::short_t)(buf[0] + ((bm::short_t)buf[1] << 8));
940  *s++ = a;
941  buf += sizeof(a);
942  } while (s < s_end);
943  buf_ = (unsigned char*)buf;
944 #endif
945 
946 }
947 
948 
949 
950 // ---------------------------------------------------------------------
951 
953 : decoder_base(buf)
954 {
955 }
956 
957 inline
959 {
960  bm::short_t v1 = bm::short_t(buf_[0]);
962  bm::short_t a = bm::short_t((v1 << 8) + v2);
963  buf_ += sizeof(a);
964  return a;
965 }
966 
968 {
969  // TODO: validate if this is a correct for cross endian opts
970  bm::word_t a = buf_[0] + ((unsigned)buf_[1] << 8) +
971  ((unsigned)buf_[2] << 16);
972  buf_ += 3;
973  return a;
974 }
975 
976 inline
978 {
979  bm::word_t a = ((unsigned)buf_[0] << 24)+ ((unsigned)buf_[1] << 16) +
980  ((unsigned)buf_[2] << 8) + ((unsigned)buf_[3]);
981  buf_+=sizeof(a);
982  return a;
983 }
984 
985 inline
987 {
988  bm::id64_t a = buf_[0] +
989  ((bm::id64_t)buf_[1] << 8) +
990  ((bm::id64_t)buf_[2] << 16) +
991  ((bm::id64_t)buf_[3] << 24) +
992  ((bm::id64_t)buf_[4] << 32) +
993  ((bm::id64_t)buf_[5] << 40);
994  buf_ += 6;
995  return a;
996 }
997 
998 inline
1000 {
1001  bm::id64_t a = buf_[0]+
1002  ((bm::id64_t)buf_[1] << 56) +
1003  ((bm::id64_t)buf_[2] << 48) +
1004  ((bm::id64_t)buf_[3] << 40) +
1005  ((bm::id64_t)buf_[4] << 32) +
1006  ((bm::id64_t)buf_[5] << 24) +
1007  ((bm::id64_t)buf_[6] << 16) +
1008  ((bm::id64_t)buf_[7] << 8);
1009  buf_+=sizeof(a);
1010  return a;
1011 }
1012 
1013 inline
1015 {
1016  if (!w)
1017  {
1018  seek(int(count * sizeof(bm::word_t)));
1019  return;
1020  }
1021 
1022  const unsigned char* buf = buf_;
1023  const bm::word_t* w_end = w + count;
1024  do
1025  {
1026  bm::word_t a = ((unsigned)buf[0] << 24)+ ((unsigned)buf[1] << 16) +
1027  ((unsigned)buf[2] << 8) + ((unsigned)buf[3]);
1028  *w++ = a;
1029  buf += sizeof(a);
1030  } while (w < w_end);
1031  buf_ = (unsigned char*)buf;
1032 }
1033 
1034 inline
1036 {
1037  if (!w)
1038  {
1039  seek(int(count * sizeof(bm::word_t)));
1040  return false;
1041  }
1042 
1043  bm::word_t acc = 0;
1044  const bm::word_t not_acc = acc = ~acc;
1045 
1046  for (unsigned i = 0; i < count; i+=4)
1047  {
1048  acc &= (w[i+0] |= get_32());
1049  acc &= (w[i+1] |= get_32());
1050  acc &= (w[i+2] |= get_32());
1051  acc &= (w[i+3] |= get_32());
1052  }
1053  return acc == not_acc;
1054 }
1055 
1056 inline
1058 {
1059  for (unsigned i = 0; i < count; i+=4)
1060  {
1061  w[i+0] &= get_32();
1062  w[i+1] &= get_32();
1063  w[i+2] &= get_32();
1064  w[i+3] &= get_32();
1065  }
1066 }
1067 
1068 
1069 inline
1071 {
1072  if (!s)
1073  {
1074  seek(int(count * sizeof(bm::short_t)));
1075  return;
1076  }
1077 
1078  const unsigned char* buf = buf_;
1079  const bm::short_t* s_end = s + count;
1080  do
1081  {
1082  bm::short_t v1 = bm::short_t(buf_[0]);
1084  bm::short_t a = bm::short_t((v1 << 8) + v2);
1085  *s++ = a;
1086  buf += sizeof(a);
1087  } while (s < s_end);
1088  buf_ = (unsigned char*)buf;
1089 }
1090 
1091 // ----------------------------------------------------------------------
1092 //
1093 
1094 template<typename TEncoder>
1096 {
1097  BM_ASSERT(value <= 1);
1098  accum_ |= (value << used_bits_);
1099  if (++used_bits_ == (sizeof(accum_) * 8))
1100  flush_accum();
1101 }
1102 
1103 // ----------------------------------------------------------------------
1104 
1105 template<typename TEncoder>
1106 void bit_out<TEncoder>::put_bits(unsigned value, unsigned count) BMNOEXCEPT
1107 {
1108  unsigned used = used_bits_;
1109  unsigned acc = accum_;
1110 
1111  {
1112  unsigned mask = ~0u;
1113  mask >>= (sizeof(accum_) * 8) - count;
1114  value &= mask;
1115  }
1116  for (;count;)
1117  {
1118  unsigned free_bits = unsigned(sizeof(accum_) * 8) - used;
1119  BM_ASSERT(free_bits);
1120  acc |= value << used;
1121 
1122  if (count <= free_bits)
1123  {
1124  used += count;
1125  break;
1126  }
1127  else
1128  {
1129  value >>= free_bits;
1130  count -= free_bits;
1131  dest_.put_32(acc);
1132  acc = used = 0;
1133  continue;
1134  }
1135  }
1136  if (used == (sizeof(accum_) * 8))
1137  {
1138  dest_.put_32(acc);
1139  acc = used = 0;
1140  }
1141  used_bits_ = used;
1142  accum_ = acc;
1143 }
1144 
1145 // ----------------------------------------------------------------------
1146 
1147 template<typename TEncoder>
1149 {
1150  if (++used_bits_ == (sizeof(accum_) * 8))
1151  flush_accum();
1152 }
1153 
1154 // ----------------------------------------------------------------------
1155 
1156 template<typename TEncoder>
1158 {
1159  unsigned used = used_bits_;
1160  unsigned free_bits = (sizeof(accum_) * 8) - used;
1161  if (count >= free_bits)
1162  {
1163  flush_accum();
1164  count -= free_bits;
1165  used = 0;
1166 
1167  for ( ;count >= sizeof(accum_) * 8; count -= sizeof(accum_) * 8)
1168  {
1169  dest_.put_32(0);
1170  }
1171  used += count;
1172  }
1173  else
1174  {
1175  used += count;
1176  }
1177  accum_ |= (1u << used);
1178  if (++used == (sizeof(accum_) * 8))
1179  flush_accum();
1180  else
1181  used_bits_ = used;
1182 }
1183 
1184 // ----------------------------------------------------------------------
1185 
1186 template<typename TEncoder>
1188 {
1189  BM_ASSERT(value);
1190 
1191  unsigned logv = bm::bit_scan_reverse32(value);
1192 
1193  // Put zeroes + 1 bit
1194 
1195  unsigned used = used_bits_;
1196  unsigned acc = accum_;
1197  const unsigned acc_bits = (sizeof(acc) * 8);
1198  unsigned free_bits = acc_bits - used;
1199 
1200  {
1201  unsigned count = logv;
1202  if (count >= free_bits)
1203  {
1204  dest_.put_32(acc);
1205  acc = used ^= used;
1206  count -= free_bits;
1207 
1208  for ( ;count >= acc_bits; count -= acc_bits)
1209  {
1210  dest_.put_32(0);
1211  }
1212  used += count;
1213  }
1214  else
1215  {
1216  used += count;
1217  }
1218  acc |= (1 << used);
1219  if (++used == acc_bits)
1220  {
1221  dest_.put_32(acc);
1222  acc = used ^= used;
1223  }
1224  }
1225 
1226  // Put the value bits
1227  //
1228  {
1229  unsigned mask = (~0u);
1230  mask >>= acc_bits - logv;
1231  value &= mask;
1232  }
1233  for (;logv;)
1234  {
1235  acc |= value << used;
1236  free_bits = acc_bits - used;
1237  if (logv <= free_bits)
1238  {
1239  used += logv;
1240  break;
1241  }
1242  else
1243  {
1244  value >>= free_bits;
1245  logv -= free_bits;
1246  dest_.put_32(acc);
1247  acc = used ^= used;
1248  continue;
1249  }
1250  } // for
1251 
1252  used_bits_ = used;
1253  accum_ = acc;
1254 }
1255 
1256 // ----------------------------------------------------------------------
1257 
1258 template<typename TEncoder>
1260  const bm::gap_word_t* arr,
1261  unsigned sz,
1263 {
1264  for (;sz;)
1265  {
1266  BM_ASSERT(lo <= hi);
1267  unsigned mid_idx = sz >> 1;
1268  bm::gap_word_t val = arr[mid_idx];
1269 
1270  // write the interpolated value
1271  // write(x, r) where x=(arr[mid] - lo - mid) r=(hi - lo - sz + 1);
1272  {
1273  unsigned r = hi - lo - sz + 1;
1274  if (r)
1275  {
1276  unsigned value = val - lo - mid_idx;
1277  unsigned logv = bm::bit_scan_reverse32(r);
1278  put_bits(value, logv+1);
1279  }
1280  }
1281 
1282  bic_encode_u16_rg(arr, mid_idx, lo, gap_word_t(val-1));
1283  // tail recursion
1284  // bic_encode_u16(arr + mid_idx + 1, sz - mid_idx - 1, gap_word_t(val+1), hi);
1285  arr += mid_idx + 1;
1286  sz -= mid_idx + 1;
1287  lo = gap_word_t(val + 1);
1288  } // for sz
1289 }
1290 
1291 // ----------------------------------------------------------------------
1292 
1293 template<typename TEncoder>
1295  unsigned sz,
1296  bm::word_t lo,
1298 {
1299  for (;sz;)
1300  {
1301  BM_ASSERT(lo <= hi);
1302  unsigned mid_idx = sz >> 1;
1303  bm::word_t val = arr[mid_idx];
1304 
1305  // write the interpolated value
1306  // write(x, r) where x=(arr[mid] - lo - mid) r=(hi - lo - sz + 1);
1307  {
1308  unsigned r = hi - lo - sz + 1;
1309  if (r)
1310  {
1311  unsigned value = val - lo - mid_idx;
1312 
1313  unsigned n = r + 1;
1314  unsigned logv = bm::bit_scan_reverse32(n);
1315  unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1316  int64_t half_c = c >> 1; // c / 2;
1317  int64_t half_r = r >> 1; // r / 2;
1318  int64_t lo1 = half_r - half_c;
1319  int64_t hi1 = half_r + half_c + 1;
1320  lo1 -= (n & 1);
1321  logv += (value <= lo1 || value >= hi1);
1322 
1323  put_bits(value, logv);
1324  }
1325  }
1326 
1327  bic_encode_u32_cm(arr, mid_idx, lo, val-1);
1328  // tail recursive call:
1329  // bic_encode_u32_cm(arr + mid_idx + 1, sz - mid_idx - 1, val+1, hi);
1330  arr += mid_idx + 1;
1331  sz -= mid_idx + 1;
1332  lo = val + 1;
1333  } // for sz
1334 }
1335 
1336 // ----------------------------------------------------------------------
1337 
1338 #if 0
1339 /**
1340  Shuffle structure for BIC
1341  @internal
1342 */
1343 template<unsigned N>
1344 struct bic_encode_stack_u16
1345 {
1346  bm::gap_word_t val_[N];
1347  bm::gap_word_t mid_[N];
1348  bm::gap_word_t lo_[N];
1349  bm::gap_word_t r_[N];
1350 
1351  unsigned stack_size_ = 0;
1352 
1353  void bic_shuffle(const bm::gap_word_t* arr,
1355  {
1356  for (;sz;)
1357  {
1358  BM_ASSERT(lo <= hi);
1359  bm::gap_word_t mid_idx = sz >> 1;
1360  bm::gap_word_t val = arr[mid_idx];
1361  unsigned r = hi - lo - sz + 1;
1362  if (r)
1363  {
1364  unsigned s = stack_size_++;
1365  r_[s] = (bm::gap_word_t)r;
1366  val_[s] = val;
1367  mid_[s] = mid_idx;
1368  lo_[s] = lo;
1369  }
1370 
1371  bic_shuffle(arr, mid_idx, lo, bm::gap_word_t(val-1));
1372  // tail recursive call:
1373  // bic_shuffle(arr + mid_idx + 1, sz - mid_idx - 1, val+1, hi);
1374  arr += mid_idx + 1;
1375  sz -= mid_idx + 1;
1376  lo = bm::gap_word_t(val + 1);
1377  } // for sz
1378  }
1379 };
1380 
1381 template<typename TEncoder>
1383  unsigned sz_i,
1384  bm::gap_word_t lo_i,
1386 {
1387  BM_ASSERT(sz_i <= 65535);
1388 
1389  bic_encode_stack_u16<bm::bie_cut_off> u16_stack;
1390  // BIC re-ordering
1391  u16_stack.bic_shuffle(arr, bm::gap_word_t(sz_i), lo_i, hi_i);
1392 
1393  BM_ASSERT(sz_i == u16_stack.stack_size_);
1394  for (unsigned i = 0; i < sz_i; ++i)
1395  {
1396  bm::gap_word_t val = u16_stack.val_[i];
1397  bm::gap_word_t mid_idx = u16_stack.mid_[i];
1398  bm::gap_word_t lo = u16_stack.lo_[i];
1399  unsigned r = u16_stack.r_[i];
1400 
1401  unsigned value = val - lo - mid_idx;
1402  unsigned n = r + 1;
1403  unsigned logv = bm::bit_scan_reverse32(n);
1404  unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1405 
1406  int64_t half_c = c >> 1; // c / 2;
1407  int64_t half_r = r >> 1; // r / 2;
1408  int64_t lo1 = half_r - half_c;
1409  int64_t hi1 = half_r + half_c + 1;
1410  lo1 -= (n & 1);
1411  logv += (value <= lo1 || value >= hi1);
1412 
1413  put_bits(value, logv);
1414  } // for i
1415 }
1416 #endif
1417 
1418 
1419 template<typename TEncoder>
1421  unsigned sz,
1422  bm::gap_word_t lo,
1424 {
1425  for (;sz;)
1426  {
1427  BM_ASSERT(lo <= hi);
1428  unsigned mid_idx = sz >> 1;
1429  bm::gap_word_t val = arr[mid_idx];
1430 
1431  // write the interpolated value
1432  // write(x, r) where x=(arr[mid] - lo - mid) r=(hi - lo - sz + 1);
1433  {
1434  unsigned r = hi - lo - sz + 1;
1435  if (r)
1436  {
1437  unsigned value = val - lo - mid_idx;
1438 
1439  unsigned n = r + 1;
1440  unsigned logv = bm::bit_scan_reverse32(n);
1441  unsigned c = (unsigned)(1ull << (logv + 1)) - n;
1442  unsigned half_c = c >> 1; // c / 2;
1443  unsigned half_r = r >> 1; // r / 2;
1444  int64_t lo1 = (int64_t(half_r) - half_c - (n & 1u));
1445  unsigned hi1 = (half_r + half_c);
1446  logv += (value <= lo1 || value > hi1);
1447 
1448  put_bits(value, logv);
1449  }
1450  }
1451 
1452  bic_encode_u16_cm(arr, mid_idx, lo, bm::gap_word_t(val-1));
1453  // tail recursive call:
1454  // bic_encode_u32_cm(arr + mid_idx + 1, sz - mid_idx - 1, val+1, hi);
1455  arr += ++mid_idx;
1456  sz -= mid_idx;
1457  lo = bm::gap_word_t(val + 1);
1458  } // for sz
1459 }
1460 
1461 
1462 
1463 
1464 
1465 
1466 // ----------------------------------------------------------------------
1467 //
1468 // ----------------------------------------------------------------------
1469 
1470 
1471 template<class TDecoder>
1473  bm::gap_word_t lo,
1475 {
1476  BM_ASSERT(sz);
1477  do // for (;sz;)
1478  {
1479  BM_ASSERT(lo <= hi);
1480 
1481  unsigned val;
1482  // read the value
1483  {
1484  unsigned r = hi - lo - sz + 1;
1485  if (r)
1486  {
1487  unsigned logv = bm::bit_scan_reverse32(r) + 1;
1488  val = get_bits(logv);
1489  BM_ASSERT(val <= r);
1490  }
1491  else
1492  {
1493  val = 0;
1494  }
1495  }
1496  unsigned mid_idx = sz >> 1;
1497  val += lo + mid_idx;
1498 
1499  BM_ASSERT(val < 65536);
1500  BM_ASSERT(mid_idx < 65536);
1501 
1502  arr[mid_idx] = bm::gap_word_t(val);
1503  if (sz <= 1)
1504  return;
1505 
1506  bic_decode_u16_rg(arr, mid_idx, lo, bm::gap_word_t(val - 1));
1507  arr += mid_idx + 1;
1508  sz -= mid_idx + 1;
1509  lo = bm::gap_word_t(val + 1);
1510  } while (sz); // for sz
1511 }
1512 
1513 // ----------------------------------------------------------------------
1514 
1515 template<class TDecoder>
1517  bm::word_t lo,
1519 {
1520  BM_ASSERT(sz);
1521  do
1522  {
1523  BM_ASSERT(lo <= hi);
1524  // read the interpolated value
1525  // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1526  unsigned val = hi - lo - sz + 1;
1527  if (val)
1528  {
1529  unsigned logv = bm::bit_scan_reverse32(val+1);
1530 
1531  unsigned c = unsigned((1ull << (logv + 1)) - val - 1);
1532  int64_t half_c = c >> 1;
1533  int64_t half_r = val >> 1;
1534  int64_t lo1 = half_r - half_c - ((val + 1) & 1);
1535  int64_t hi1 = half_r + half_c + 1;
1536 
1537  val = get_bits(logv);
1538  if (val <= lo1 || val >= hi1)
1539  val += (get_bit() << logv);
1540  }
1541 
1542  unsigned mid_idx = sz >> 1;
1543  val += lo + mid_idx;
1544  arr[mid_idx] = val;
1545 
1546  if (sz <= 1)
1547  return;
1548 
1549  bic_decode_u32_cm(arr, mid_idx, lo, val-1);
1550  // tail recursive call:
1551  // bic_decode_u32_cm(arr + mid_idx + 1, sz - mid_idx - 1, val + 1, hi);
1552  arr += ++mid_idx;
1553  sz -= mid_idx;
1554  lo = val + 1;
1555  } while (sz);// for sz
1556 }
1557 
1558 // ----------------------------------------------------------------------
1559 
1560 template<class TDecoder>
1562  bm::gap_word_t lo,
1564 {
1565  BM_ASSERT(sz);
1566  do
1567  {
1568  BM_ASSERT(lo <= hi);
1569  // read the interpolated value
1570  // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1571  unsigned val = hi - lo - sz + 1;
1572  if (val)
1573  {
1574  unsigned logv = bm::bit_scan_reverse32(val+1);
1575 
1576  unsigned c = unsigned((1ull << (logv + 1)) - val - 1);
1577  int64_t half_c = c >> 1; // c / 2;
1578  int64_t half_r = val >> 1; // r / 2;
1579  int64_t lo1 = half_r - half_c - ((val + 1) & 1);
1580  int64_t hi1 = half_r + half_c + 1;
1581  val = get_bits(logv);
1582  if (val <= lo1 || val >= hi1)
1583  val += (get_bit() << logv);
1584  }
1585 
1586  unsigned mid_idx = sz >> 1;
1587  val += lo + mid_idx;
1588  arr[mid_idx] = bm::gap_word_t(val);
1589 
1590  if (sz <= 1)
1591  return;
1592 
1593  bic_decode_u16_cm(arr, mid_idx, lo, bm::gap_word_t(val-1));
1594  // tail recursive call:
1595  // bic_decode_u16_cm(arr + mid_idx + 1, sz - mid_idx - 1, val + 1, hi);
1596  arr += ++mid_idx;
1597  sz -= mid_idx;
1598  lo = bm::gap_word_t(val + 1);
1599  } while (sz);// for sz
1600 }
1601 
1602 // ----------------------------------------------------------------------
1603 
1604 template<class TDecoder>
1606  bm::gap_word_t lo,
1608 {
1609  BM_ASSERT(sz);
1610  do
1611  {
1612  BM_ASSERT(lo <= hi);
1613  // read the interpolated value
1614  // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1615  unsigned val = hi - lo - sz + 1;
1616  if (val)
1617  {
1618  unsigned logv = bm::bit_scan_reverse32(val+1);
1619 
1620  unsigned c = unsigned((1ull << (logv + 1)) - val - 1);
1621  int64_t half_c = c >> 1; // c / 2;
1622  int64_t half_r = val >> 1; // r / 2;
1623  int64_t lo1 = half_r - half_c - ((val + 1) & 1);
1624  int64_t hi1 = half_r + half_c + 1;
1625 
1626  val = get_bits(logv);
1627  if (val <= lo1 || val >= hi1)
1628  val += (get_bit() << logv);
1629  }
1630 
1631  unsigned mid_idx = sz >> 1;
1632  val += lo + mid_idx;
1633 
1634  // set bit in the target block
1635  {
1636  unsigned nword = (val >> bm::set_word_shift);
1637  block[nword] |= (1u << (val & bm::set_word_mask));
1638  }
1639 
1640  if (sz <= 1)
1641  return;
1642 
1643  bic_decode_u16_cm_bitset(block, mid_idx, lo, bm::gap_word_t(val-1));
1644  // tail recursive call:
1645  // bic_decode_u32_cm(block, sz - mid_idx - 1, val + 1, hi);
1646  sz -= ++mid_idx;
1647  lo = bm::gap_word_t(val + 1);
1648  } while (sz);
1649 }
1650 
1651 // ----------------------------------------------------------------------
1652 
1653 template<class TDecoder>
1655  bm::gap_word_t lo,
1657 {
1658  BM_ASSERT(sz);
1659  do
1660  {
1661  BM_ASSERT(lo <= hi);
1662 
1663  unsigned val;
1664 
1665  // read the interpolated value
1666  // x = read(r)+ lo + mid, where r = (hi - lo - sz + 1);
1667  {
1668  unsigned r = hi - lo - sz + 1;
1669  if (r)
1670  {
1671  unsigned logv = bm::bit_scan_reverse32(r+1);
1672 
1673  unsigned c = unsigned((1ull << (logv + 1)) - r - 1);
1674  int64_t half_c = c >> 1; // c / 2;
1675  int64_t half_r = r >> 1; // r / 2;
1676  int64_t lo1 = half_r - half_c - ((r + 1) & 1);
1677  int64_t hi1 = half_r + half_c + 1;
1678  r = get_bits(logv);
1679  if (r <= lo1 || r >= hi1)
1680  r += (get_bit() << logv);
1681  }
1682  val = r;
1683  }
1684 
1685  unsigned mid_idx = sz >> 1;
1686  val += lo + mid_idx;
1687 
1688  if (sz <= 1)
1689  return;
1690 
1691  bic_decode_u16_cm_dry(mid_idx, lo, bm::gap_word_t(val-1));
1692  // tail recursive call:
1693  // bic_decode_u32_cm_dry(sz - mid_idx - 1, val + 1, hi);
1694  sz -= mid_idx + 1;
1695  lo = bm::gap_word_t(val + 1);
1696  } while (sz); // for sz
1697 }
1698 
1699 
1700 // ----------------------------------------------------------------------
1701 
1702 template<class TDecoder>
1704  bm::gap_word_t lo,
1706 {
1707  BM_ASSERT(sz);
1708  do //for (;sz;)
1709  {
1710  BM_ASSERT(lo <= hi);
1711 
1712  unsigned val;
1713  // read the value
1714  {
1715  unsigned r = hi - lo - sz + 1;
1716  if (r)
1717  {
1718  unsigned logv = bm::bit_scan_reverse32(r) + 1;
1719  val = get_bits(logv);
1720  BM_ASSERT(val <= r);
1721  }
1722  else
1723  {
1724  val = 0;
1725  }
1726  }
1727  unsigned mid_idx = sz >> 1;
1728  val += lo + mid_idx;
1729  BM_ASSERT(val < 65536);
1730  BM_ASSERT(mid_idx < 65536);
1731 
1732  // set bit in the target block
1733  {
1734  unsigned nword = (val >> bm::set_word_shift);
1735  block[nword] |= (1u << (val & bm::set_word_mask));
1736  }
1737 
1738  if (sz == 1)
1739  return;
1740  bic_decode_u16_rg_bitset(block, mid_idx, lo, bm::gap_word_t(val - 1));
1741  // tail recursion of:
1742  //bic_decode_u16_bitset(block, sz - mid_idx - 1, bm::gap_word_t(val + 1), hi);
1743  sz -= mid_idx + 1;
1744  lo = bm::gap_word_t(val + 1);
1745  } while(sz); // for sz
1746 }
1747 
1748 // ----------------------------------------------------------------------
1749 
1750 template<class TDecoder>
1752  bm::gap_word_t lo,
1754 {
1755  for (;sz;)
1756  {
1757  BM_ASSERT(lo <= hi);
1758 
1759  unsigned val;
1760  // read the value
1761  {
1762  unsigned r = hi - lo - sz + 1;
1763  if (r)
1764  {
1765  unsigned logv = bm::bit_scan_reverse32(r) + 1;
1766  val = get_bits(logv);
1767  BM_ASSERT(val <= r);
1768  }
1769  else
1770  {
1771  val = 0;
1772  }
1773  }
1774  unsigned mid_idx = sz >> 1;
1775  val += lo + mid_idx;
1776  BM_ASSERT(val < 65536);
1777  BM_ASSERT(mid_idx < 65536);
1778 
1779  if (sz == 1)
1780  return;
1781  bic_decode_u16_rg_dry(mid_idx, lo, bm::gap_word_t(val - 1));
1782  sz -= mid_idx + 1;
1783  lo = bm::gap_word_t(val + 1);
1784  } // for sz
1785 }
1786 
1787 
1788 
1789 // ----------------------------------------------------------------------
1790 
1791 template<class TDecoder>
1793 {
1794  unsigned acc = accum_;
1795  unsigned used = used_bits_;
1796 
1797  if (used == (sizeof(acc) * 8))
1798  {
1799  acc = src_.get_32();
1800  used ^= used;
1801  }
1802  unsigned zero_bits = 0;
1803  while (true)
1804  {
1805  if (acc == 0)
1806  {
1807  zero_bits = unsigned(zero_bits +(sizeof(acc) * 8) - used);
1808  used = 0;
1809  acc = src_.get_32();
1810  continue;
1811  }
1812  unsigned first_bit_idx =
1813  #if defined(BM_x86) && (defined(__GNUG__) || defined(_MSC_VER)) && !(defined(__arm64__) || defined(__arm__))
1814  bm::bsf_asm32(acc);
1815  #else
1816  bm::bit_scan_fwd(acc);
1817  #endif
1818  acc >>= first_bit_idx;
1819  zero_bits += first_bit_idx;
1820  used += first_bit_idx;
1821  break;
1822  } // while
1823 
1824  // eat the border bit
1825  //
1826  if (used == (sizeof(acc) * 8))
1827  {
1828  acc = src_.get_32();
1829  used = 1;
1830  }
1831  else
1832  {
1833  ++used;
1834  }
1835  acc >>= 1;
1836 
1837  // get the value
1838  unsigned current;
1839 
1840  unsigned free_bits = unsigned((sizeof(acc) * 8) - used);
1841  if (zero_bits <= free_bits)
1842  {
1843  take_accum:
1844  current =
1845  (acc & block_set_table<true>::_left[zero_bits]) | (1 << zero_bits);
1846  acc >>= zero_bits;
1847  used += zero_bits;
1848  goto ret;
1849  }
1850 
1851  if (used == (sizeof(acc) * 8))
1852  {
1853  acc = src_.get_32();
1854  used ^= used;
1855  goto take_accum;
1856  }
1857 
1858  // take the part
1859  current = acc;
1860  // read the next word
1861  acc = src_.get_32();
1862  used = zero_bits - free_bits;
1863  current |=
1864  ((acc & block_set_table<true>::_left[used]) << free_bits) |
1865  (1 << zero_bits);
1866 
1867  acc >>= used;
1868 ret:
1869  accum_ = acc;
1870  used_bits_ = used;
1871  return current;
1872 }
1873 
1874 // ----------------------------------------------------------------------
1875 
1876 template<class TDecoder>
1877 unsigned bit_in<TDecoder>::get_bits(unsigned count) BMNOEXCEPT
1878 {
1879  BM_ASSERT(count);
1880  const unsigned maskFF = ~0u;
1881  unsigned acc = accum_;
1882  unsigned used = used_bits_;
1883 
1884  unsigned value = 0;
1885  unsigned free_bits = unsigned((sizeof(acc) * 8) - used);
1886  if (count <= free_bits)
1887  {
1888  take_accum:
1889  value = acc & (maskFF >> (32 - count));
1890  acc >>= count;
1891  used += count;
1892  goto ret;
1893  }
1894  if (used == (sizeof(acc) * 8))
1895  {
1896  acc = src_.get_32();
1897  used = 0;
1898  goto take_accum;
1899  }
1900  value = acc;
1901  acc = src_.get_32();
1902  used = count - free_bits;
1903  value |= ((acc & (maskFF >> (32 - used))) << free_bits);
1904  acc >>= used;
1905 ret:
1906  accum_ = acc;
1907  used_bits_ = used;
1908  return value;
1909 }
1910 
1911 // ----------------------------------------------------------------------
1912 
1913 template<class TDecoder>
1915 {
1916  const unsigned mask = (~0u) >> (32 - 1); // 100000...
1917  unsigned value = accum_ & mask;
1918  if (unsigned free_bits = unsigned(32u - used_bits_); free_bits)
1919  {
1920  accum_ >>= 1; ++used_bits_;
1921  }
1922  else
1923  {
1924  BM_ASSERT(used_bits_ == (sizeof(accum_) * 8));
1925  unsigned a = src_.get_32();
1926  value = a & mask;
1927  used_bits_ = 1;
1928  accum_ = (a >> 1);
1929  }
1930  return value;
1931 }
1932 
1933 // ----------------------------------------------------------------------
1934 
1935 } // namespace bm
1936 
1937 #ifdef _MSC_VER
1938 #pragma warning(pop)
1939 #endif
1940 
1941 #endif
ncbi::TMaskedQueryRegions mask
#define BMNOEXCEPT
Definition: bmdef.h:82
#define BMFORCEINLINE
Definition: bmdef.h:213
#define BM_ASSERT
Definition: bmdef.h:139
Bit manipulation primitives (internal)
Byte based reader for un-aligned bit streaming.
Definition: encoding.h:257
unsigned used_bits_
Bits used in the accumulator.
Definition: encoding.h:330
bit_in & operator=(const bit_in &)
unsigned gamma() noexcept
decode unsigned value using Elias Gamma coding
Definition: encoding.h:1792
void bic_decode_u32_cm(bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) noexcept
Binary Interpolative array decode (32-bit)
Definition: encoding.h:1516
bit_in(const bit_in &)
unsigned accum_
read bit accumulator
Definition: encoding.h:331
void bic_decode_u16_rg_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) noexcept
Binary Interpolative array decode into /dev/null.
Definition: encoding.h:1751
void bic_decode_u16_rg(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) noexcept
Binary Interpolative array decode.
Definition: encoding.h:1472
void bic_decode_u16_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) noexcept
Definition: encoding.h:288
TDecoder & src_
Source of bytes.
Definition: encoding.h:329
void bic_decode_u16_cm(bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) noexcept
Binary Interpolative array decode.
Definition: encoding.h:1561
unsigned get_bit() noexcept
read 1 bit
Definition: encoding.h:1914
void bic_decode_u16_cm_dry(unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) noexcept
Binary Interpolative array decode into /dev/null.
Definition: encoding.h:1654
void bic_decode_u16_cm_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) noexcept
Binary Interpolative array decode into bitset (32-bit based)
Definition: encoding.h:1605
void bic_decode_u16_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) noexcept
Definition: encoding.h:282
void bic_decode_u16_rg_bitset(bm::word_t *block, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) noexcept
Binary Interpolative array decode into bitset (32-bit based)
Definition: encoding.h:1703
bit_in(TDecoder &decoder) noexcept
Definition: encoding.h:259
unsigned get_bits(unsigned count) noexcept
read number of bits out of the stream
Definition: encoding.h:1877
Byte based writer for un-aligned bit streaming.
Definition: encoding.h:183
bit_out(const bit_out &)
void bic_encode_u16_rg(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) noexcept
Binary Interpolative encoding (array of 16-bit ints)
Definition: encoding.h:1259
TEncoder & dest_
Bit stream target.
Definition: encoding.h:243
bit_out(TEncoder &dest)
Definition: encoding.h:185
bit_out & operator=(const bit_out &)
void put_zero_bits(unsigned count) noexcept
issue specified number of 0s
Definition: encoding.h:1157
void bic_encode_u16_cm(const bm::gap_word_t *arr, unsigned sz, bm::gap_word_t lo, bm::gap_word_t hi) noexcept
Binary Interpolative encoding (array of 16-bit ints) cm - "center-minimal".
Definition: encoding.h:1420
void gamma(unsigned value) noexcept
Elias Gamma encode the specified value.
Definition: encoding.h:1187
unsigned accum_
write bit accumulator
Definition: encoding.h:245
void bic_encode_u32_cm(const bm::word_t *arr, unsigned sz, bm::word_t lo, bm::word_t hi) noexcept
Binary Interpolative encoding (array of 32-bit ints) cm - "center-minimal".
Definition: encoding.h:1294
void flush() noexcept
Flush the incomplete 32-bit accumulator word.
Definition: encoding.h:230
unsigned used_bits_
Bits used in the accumulator.
Definition: encoding.h:244
void put_zero_bit() noexcept
issue 0 into output stream
Definition: encoding.h:1148
void put_bit(unsigned value) noexcept
issue single bit into encode bit-stream
Definition: encoding.h:1095
void put_bits(unsigned value, unsigned count) noexcept
issue count bits out of value
Definition: encoding.h:1106
void flush_accum() noexcept
Definition: encoding.h:233
Base class for all decoding functionality.
Definition: encoding.h:88
const unsigned char * get_pos() const noexcept
Return current buffer pointer.
Definition: encoding.h:105
size_t size() const noexcept
Returns size of the current decoding stream.
Definition: encoding.h:96
bm::id64_t get_h64() noexcept
Read h-64-bit.
Definition: encoding.h:691
unsigned char get_8() noexcept
Reads character from the decoding buffer.
Definition: encoding.h:93
decoder_base(const unsigned char *buf) noexcept
Definition: encoding.h:90
void memcpy(unsigned char *dst, size_t count) noexcept
read bytes from the decode buffer
Definition: encoding.h:679
void set_pos(const unsigned char *pos) noexcept
Set current buffer pointer.
Definition: encoding.h:108
void seek(int delta) noexcept
change current position
Definition: encoding.h:99
const unsigned char * buf_
Definition: encoding.h:114
Class for decoding data from memory buffer.
Definition: encoding.h:160
decoder_little_endian(const unsigned char *buf)
Definition: encoding.h:952
bm::short_t get_16()
Definition: encoding.h:958
bool get_32_OR(bm::word_t *w, unsigned count)
Definition: encoding.h:1035
void get_32_AND(bm::word_t *w, unsigned count)
Definition: encoding.h:1057
Class for decoding data from memory buffer.
Definition: encoding.h:126
void get_32_AND(bm::word_t *w, unsigned count) noexcept
Reads block of 32-bit words from the decoding buffer and ANDs to the destination.
Definition: encoding.h:885
decoder(const unsigned char *buf) noexcept
Construction.
Definition: encoding.h:713
bm::id64_t get_48() noexcept
Reads 64-bit word from the decoding buffer.
Definition: encoding.h:769
bool get_32_OR(bm::word_t *w, unsigned count) noexcept
Reads block of 32-bit words from the decoding buffer and ORs to the destination.
Definition: encoding.h:844
bm::id64_t get_64() noexcept
Reads 64-bit word from the decoding buffer.
Definition: encoding.h:786
bm::word_t get_24() noexcept
Reads 32-bit word from the decoding buffer.
Definition: encoding.h:738
bm::short_t get_16() noexcept
Reads 16-bit word from the decoding buffer.
Definition: encoding.h:722
bm::word_t get_32() noexcept
Reads 32-bit word from the decoding buffer.
Definition: encoding.h:751
Memory encoding.
Definition: encoding.h:50
unsigned char * position_type
Definition: encoding.h:52
void put_prefixed_array_32(unsigned char c, const bm::word_t *w, unsigned count) noexcept
Encode 8-bit prefix + an array.
Definition: encoding.h:406
void put_16(bm::short_t s) noexcept
Puts short word (16 bits) into the encoding buffer.
Definition: encoding.h:444
void put_24(bm::word_t w) noexcept
Puts 24 bits word into encoding buffer.
Definition: encoding.h:555
void put_32(bm::word_t w) noexcept
Puts 32 bits word into encoding buffer.
Definition: encoding.h:571
void put_64(bm::id64_t w) noexcept
Puts 64 bits word into encoding buffer.
Definition: encoding.h:606
void put_48(bm::id64_t w) noexcept
Puts 48 bits word into encoding buffer.
Definition: encoding.h:589
size_t size() const noexcept
Returns size of the current encoding stream.
Definition: encoding.h:529
void put_8(unsigned char c) noexcept
Puts one character into the encoding buffer.
Definition: encoding.h:434
void set_pos(unsigned char *buf_pos) noexcept
Set current memory stream position.
Definition: encoding.h:545
void put_h64(bm::id64_t w) noexcept
Puts 64 bits word into encoding buffer with h-compression.
Definition: encoding.h:628
encoder(unsigned char *buf, size_t size) noexcept
Construction.
Definition: encoding.h:398
size_t size_
Definition: encoding.h:79
void memcpy(const unsigned char *src, size_t count) noexcept
copy bytes into target buffer or just rewind if src is NULL
Definition: encoding.h:516
void put_prefixed_array_16(unsigned char c, const bm::short_t *s, unsigned count, bool encode_count) noexcept
Encode 8-bit prefix + an array.
Definition: encoding.h:417
unsigned char * get_pos() const noexcept
Get current memory stream position.
Definition: encoding.h:537
unsigned char * start_
Definition: encoding.h:78
unsigned char * buf_
Definition: encoding.h:77
void put_8_16_32(unsigned w, unsigned char c8, unsigned char c16, unsigned char c32) noexcept
but gat plus value based on its VBR evaluation
Definition: encoding.h:487
Elias Gamma decoder.
Definition: bmgamma.h:43
gamma_decoder & operator=(const gamma_decoder &)
void stop()
Stop decoding sequence.
Definition: encoding.h:374
void start()
Start encoding sequence.
Definition: encoding.h:369
T operator()(void)
Decode word.
Definition: encoding.h:379
gamma_decoder(TBitIO &bin)
Definition: encoding.h:364
gamma_decoder(const gamma_decoder &)
Functor for Elias Gamma encoding.
Definition: encoding.h:341
gamma_encoder(TBitIO &bout)
Definition: encoding.h:343
TBitIO & bout_
Definition: encoding.h:352
gamma_encoder(const gamma_encoder &)
void operator()(T value)
Encode word.
Definition: encoding.h:347
gamma_encoder & operator=(const gamma_encoder &)
char value[7]
Definition: config.c:431
#define T(s)
Definition: common.h:230
bool avx2_or_arr_unal(__m256i *dst, const __m256i *src, const __m256i *src_end)
OR array elements against another unaligned array dst |= *src.
Definition: bmavx2.h:888
unsigned avx2_and_arr_unal(__m256i *dst, const __m256i *src, const __m256i *src_end)
AND array elements against another array (unaligned) dst &= *src.
Definition: bmavx2.h:777
const CVect2< U > & v2
Definition: globals.hpp:440
bool sse2_or_arr_unal(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) BMNOEXCEPT
OR array elements against another array (unaligned) dst |= *src.
Definition: bmsse_util.h:426
unsigned sse2_and_arr_unal(__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) BMNOEXCEPT
AND array elements against another array (unaligned) dst &= *src.
Definition: bmsse_util.h:259
decoder decoder_big_endian
Class for decoding data from memory buffer.
Definition: encoding.h:148
char * buf
int i
yy_size_t n
#include<zmmintrin.h>
Definition: bm.h:78
unsigned int word_t
Definition: bmconst.h:39
const unsigned set_word_shift
Definition: bmconst.h:72
unsigned long long int id64_t
Definition: bmconst.h:35
T bit_scan_fwd(T v) noexcept
Definition: bmutil.h:297
unsigned compute_h64_mask(unsigned long long w) noexcept
Сompute mask of bytes presense in 64-bit word.
Definition: bmutil.h:556
unsigned short gap_word_t
Definition: bmconst.h:78
const unsigned set_word_mask
Definition: bmconst.h:73
unsigned short short_t
Definition: bmconst.h:40
unsigned bit_scan_reverse32(unsigned w) noexcept
Definition: bmutil.h:304
unsigned int a
Definition: ncbi_localip.c:102
Int4 delta(size_t dimension_, const Int4 *score_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
int64x2_t __m128i
Definition: sse2neon.h:200
signed __int64 int64_t
Definition: stdint.h:135
Structure keeps all-left/right ON bits masks.
Definition: bmconst.h:364
#define N
Definition: crc32.c:57
#define const
Definition: zconf.h:230
Modified on Sat Dec 02 09:19:36 2023 by modify_doxy.py rev. 669887