NCBI C++ ToolKit
|
Search Toolkit Book for bm::serializer
Bit-vector serialization class. More...
#include <util/bitset/bmserial.h>
Classes | |
struct | bookmark_state |
Bookmark state structure. More... | |
Public Types | |
typedef BV | bvector_type |
typedef bvector_type::allocator_type | allocator_type |
typedef bvector_type::blocks_manager_type | blocks_manager_type |
typedef bvector_type::statistics | statistics_type |
typedef bvector_type::block_idx_type | block_idx_type |
typedef bvector_type::size_type | size_type |
typedef byte_buffer< allocator_type > | buffer |
typedef bm::bv_ref_vector< BV > | bv_ref_vector_type |
typedef bm::xor_sim_model< BV > | xor_sim_model_type |
typedef xor_sim_model_type::block_match_chain_type | block_match_chain_type |
Public Member Functions | |
serializer (const allocator_type &alloc=allocator_type(), bm::word_t *temp_block=0) | |
Constructor. More... | |
serializer (bm::word_t *temp_block) | |
~serializer () | |
Compression level settings | |
void | set_compression_level (unsigned clevel) noexcept |
Set compression level. More... | |
unsigned | get_compression_level () const noexcept |
Get current compression level. More... | |
Serialization Methods | |
typedef bm::bit_out< bm::encoder > | bit_out_type |
typedef bm::gamma_encoder< bm::gap_word_t, bit_out_type > | gamma_encoder_func |
typedef bm::heap_vector< bm::gap_word_t, allocator_type, true > | block_arridx_type |
typedef bm::heap_vector< unsigned, allocator_type, true > | sblock_arridx_type |
typedef allocator_type::allocator_pool_type | allocator_pool_type |
bm::id64_t | digest0_ |
unsigned | bit_model_d0_size_ |
memory (bytes) by d0 method (bytes) More... | |
unsigned | bit_model_0run_size_ |
memory (bytes) by run-0 method (bytes) More... | |
block_arridx_type | bit_idx_arr_ |
sblock_arridx_type | sb_bit_idx_arr_ |
unsigned | scores_ [bm::block_waves] |
unsigned char | models_ [bm::block_waves] |
unsigned | mod_size_ |
allocator_type | alloc_ |
size_type * | compression_stat_ |
bool | allow_stat_reset_ = true |
controls zeroing of telemetry More... | |
bool | gap_serial_ |
bool | byte_order_serial_ |
bool | sb_bookmarks_ |
Bookmarks flag. More... | |
unsigned | sb_range_ |
Desired bookmarks interval. More... | |
bm::word_t * | temp_block_ |
unsigned | compression_level_ |
bool | own_temp_block_ |
bool | optimize_ |
flag to optimize the input vector More... | |
bool | free_ |
flag to free the input vector More... | |
allocator_pool_type | pool_ |
unsigned char * | enc_header_pos_ |
pos of top level header to roll back More... | |
unsigned char | header_flag_ |
set of masks used to save More... | |
const bv_ref_vector_type * | ref_vect_ |
ref.vector for XOR compression More... | |
const xor_sim_model_type * | sim_model_ |
similarity model matrix More... | |
bm::xor_scanner< BV > | xor_scan_ |
scanner for XOR similarity More... | |
size_type | ref_idx_ |
current reference index More... | |
bm::word_t * | xor_tmp_block_ |
tmp area for xor product More... | |
bm::word_t * | xor_tmp1_ |
bm::word_t * | xor_tmp2_ |
unsigned | sparse_cutoff_ |
number of bits per blocks to consider sparse More... | |
size_type | serialize (const BV &bv, unsigned char *buf, size_t buf_size) |
Bitvector serialization into memory block. More... | |
void | serialize (const BV &bv, typename serializer< BV >::buffer &buf, const statistics_type *bv_stat=0) |
Bitvector serialization into buffer object (resized automatically) More... | |
void | optimize_serialize_destroy (BV &bv, typename serializer< BV >::buffer &buf) |
Bitvector serialization into buffer object (resized automatically) Input bit-vector gets optimized and then destroyed, content is NOT guaranteed after this operation. More... | |
const size_type * | get_compression_stat () const noexcept |
Return serialization counter vector. More... | |
void | allow_stat_reset (bool allow) noexcept |
Enable/disable statistics reset on each serilaization. More... | |
void | reset_compression_stats () noexcept |
Reset all accumulated compression statistics. More... | |
void | gap_length_serialization (bool value) noexcept |
Set GAP length serialization (serializes GAP levels of the original vector) More... | |
void | byte_order_serialization (bool value) noexcept |
Set byte-order serialization (for cross platform compatibility) More... | |
void | set_bookmarks (bool enable, unsigned bm_interval=256) noexcept |
Add skip-markers to serialization BLOB for faster range decode at the expense of some BLOB size increase. More... | |
void | set_sparse_cutoff (unsigned cutoff) noexcept |
Fine tuning for Binary Interpolative Compression (levels 5+) The parameter sets average population count per block (64Kbits) below which block is considered very sparse. More... | |
void | set_ref_vectors (const bv_ref_vector_type *ref_vect) |
Attach collection of reference vectors for XOR serialization (no transfer of ownership for the pointers) More... | |
bool | compute_sim_model (xor_sim_model_type &sim_model, const bv_ref_vector_type &ref_vect, const bm::xor_sim_params ¶ms) |
Calculate XOR similarity model for ref_vector refernece vector must be associated before. More... | |
void | set_sim_model (const xor_sim_model_type *sim_model) noexcept |
Atach XOR similarity model (must be computed by the same ref vector) More... | |
void | set_curr_ref_idx (size_type ref_idx) noexcept |
Set current index in rer.vector collection (not a row idx or plain idx) More... | |
void | encode_header (const BV &bv, bm::encoder &enc) noexcept |
Encode serialization header information. More... | |
void | encode_gap_block (const bm::gap_word_t *gap_block, bm::encoder &enc) |
void | gamma_gap_block (const bm::gap_word_t *gap_block, bm::encoder &enc) noexcept |
void | gamma_gap_array (const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted=false) noexcept |
Encode GAP block as delta-array with Elias Gamma coder. More... | |
void | encode_bit_array (const bm::word_t *block, bm::encoder &enc, bool inverted) noexcept |
Encode bit-block as an array of bits. More... | |
void | gamma_gap_bit_block (const bm::word_t *block, bm::encoder &enc) noexcept |
void | gamma_arr_bit_block (const bm::word_t *block, bm::encoder &enc, bool inverted) noexcept |
void | bienc_arr_bit_block (const bm::word_t *block, bm::encoder &enc, bool inverted) noexcept |
void | bienc_arr_sblock (const BV &bv, unsigned sb, bm::encoder &enc) noexcept |
void | bienc_gap_bit_block (const bm::word_t *block, bm::encoder &enc) noexcept |
encode bit-block as interpolated bit block of gaps More... | |
void | interpolated_arr_bit_block (const bm::word_t *block, bm::encoder &enc, bool inverted) noexcept |
void | interpolated_gap_bit_block (const bm::word_t *block, bm::encoder &enc) noexcept |
encode bit-block as interpolated gap block More... | |
void | interpolated_gap_array (const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted) noexcept |
Encode GAP block as an array with binary interpolated coder. More... | |
void | interpolated_gap_array_v0 (const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted) noexcept |
void | interpolated_encode_gap_block (const bm::gap_word_t *gap_block, bm::encoder &enc) noexcept |
void | encode_bit_interval (const bm::word_t *blk, bm::encoder &enc, unsigned size_control) noexcept |
Encode BIT block with repeatable runs of zeroes. More... | |
void | encode_bit_digest (const bm::word_t *blk, bm::encoder &enc, bm::id64_t d0) noexcept |
Encode bit-block using digest (hierarchical compression) More... | |
void | encode_xor_match_chain (bm::encoder &enc, const block_match_chain_type &mchain) noexcept |
Encode XOR match chain. More... | |
unsigned char | find_gap_best_encoding (const bm::gap_word_t *gap_block) noexcept |
Determine best representation for GAP block based on current set compression level. More... | |
unsigned char | find_bit_best_encoding (const bm::word_t *block) noexcept |
Determine best representation for a bit-block. More... | |
unsigned char | find_bit_best_encoding_l5 (const bm::word_t *block) noexcept |
Determine best representation for a bit-block (level 5) More... | |
void | reset_models () noexcept |
void | add_model (unsigned char mod, unsigned score) noexcept |
void | xor_tmp_product (const bm::word_t *s_block, const block_match_chain_type &mchain, unsigned i, unsigned j) noexcept |
Compute digest based XOR product, place into tmp XOR block. More... | |
static void | process_bookmark (block_idx_type nb, bookmark_state &bookm, bm::encoder &enc) noexcept |
Check if bookmark needs to be placed and if so, encode it into serialization BLOB. More... | |
serializer (const serializer &) | |
serializer & | operator= (const serializer &) |
Bit-vector serialization class.
Class designed to convert sparse bit-vectors into a single block of memory ready for file or database storage or network transfer.
Reuse of this class for multiple serializations (but not across threads). Class resue offers some performance advantage (helps with temp memory reallocations).
Definition at line 75 of file bmserial.h.
|
private |
Definition at line 441 of file bmserial.h.
typedef bvector_type::allocator_type bm::serializer< BV >::allocator_type |
Definition at line 79 of file bmserial.h.
|
private |
Definition at line 437 of file bmserial.h.
|
private |
Definition at line 439 of file bmserial.h.
typedef bvector_type::block_idx_type bm::serializer< BV >::block_idx_type |
Definition at line 82 of file bmserial.h.
typedef xor_sim_model_type::block_match_chain_type bm::serializer< BV >::block_match_chain_type |
Definition at line 90 of file bmserial.h.
typedef bvector_type::blocks_manager_type bm::serializer< BV >::blocks_manager_type |
Definition at line 80 of file bmserial.h.
typedef byte_buffer<allocator_type> bm::serializer< BV >::buffer |
Definition at line 85 of file bmserial.h.
typedef bm::bv_ref_vector<BV> bm::serializer< BV >::bv_ref_vector_type |
Definition at line 86 of file bmserial.h.
typedef BV bm::serializer< BV >::bvector_type |
Definition at line 78 of file bmserial.h.
|
private |
Definition at line 438 of file bmserial.h.
|
private |
Definition at line 440 of file bmserial.h.
typedef bvector_type::size_type bm::serializer< BV >::size_type |
Definition at line 83 of file bmserial.h.
typedef bvector_type::statistics bm::serializer< BV >::statistics_type |
Definition at line 81 of file bmserial.h.
typedef bm::xor_sim_model<BV> bm::serializer< BV >::xor_sim_model_type |
Definition at line 87 of file bmserial.h.
bm::serializer< BV >::serializer | ( | const allocator_type & | alloc = allocator_type() , |
bm::word_t * | temp_block = 0 |
||
) |
Constructor.
alloc | - memory allocator |
temp_block | - temporary block for various operations (if NULL it will be allocated and managed by serializer class) Temp block is used as a scratch memory during serialization, use of external temp block allows to avoid unnecessary re-allocations. |
Temp block attached is not owned by the class and NOT deallocated on destruction.
Definition at line 1167 of file bmserial.h.
References bm::serializer< BV >::alloc_, bm::serializer< BV >::bit_idx_arr_, bm::serializer< BV >::compression_stat_, bm::serializer< BV >::free_, bm::gap_max_bits, bm::serializer< BV >::optimize_, bm::serializer< BV >::own_temp_block_, bm::heap_vector< Val, BVAlloc, trivial_type >::resize(), bm::serializer< BV >::temp_block_, bm::serializer< BV >::xor_tmp1_, and bm::serializer< BV >::xor_tmp2_.
bm::serializer< BV >::serializer | ( | bm::word_t * | temp_block | ) |
Definition at line 1200 of file bmserial.h.
References bm::serializer< BV >::alloc_, bm::serializer< BV >::bit_idx_arr_, bm::serializer< BV >::compression_stat_, bm::serializer< BV >::free_, bm::gap_max_bits, bm::serializer< BV >::optimize_, bm::serializer< BV >::own_temp_block_, bm::heap_vector< Val, BVAlloc, trivial_type >::resize(), bm::serializer< BV >::temp_block_, bm::serializer< BV >::xor_tmp1_, and bm::serializer< BV >::xor_tmp2_.
bm::serializer< BV >::~serializer |
Definition at line 1232 of file bmserial.h.
|
private |
|
protectednoexcept |
Definition at line 1652 of file bmserial.h.
|
inlinenoexcept |
Enable/disable statistics reset on each serilaization.
Definition at line 202 of file bmserial.h.
|
protectednoexcept |
Definition at line 2327 of file bmserial.h.
References bm::bit_block_convert_to_arr().
|
protectednoexcept |
Definition at line 2411 of file bmserial.h.
References bm::bit_out< TEncoder >::bic_encode_u32_cm(), BM_ASSERT, bm::convert_sub_to_arr(), len, bm::sblock_flag_len16, bm::sblock_flag_max16, bm::sblock_flag_max24, bm::sblock_flag_max32, bm::sblock_flag_min16, bm::sblock_flag_min24, bm::sblock_flag_min32, bm::sblock_flag_sb16, bm::sblock_flag_sb32, bm::set_sblock_bienc, and bm::set_sub_total_bits.
|
protectednoexcept |
encode bit-block as interpolated bit block of gaps
Definition at line 2352 of file bmserial.h.
References bm::bit_out< TEncoder >::bic_encode_u16(), bm::bit_to_gap(), BM_ASSERT, bm::bit_out< TEncoder >::flush(), bm::gap_max_bits, head, len, bm::set_block_bitgap_bienc, and bm::set_block_size.
|
noexcept |
Set byte-order serialization (for cross platform compatibility)
value | - TRUE serialization format includes byte-order marker |
Definition at line 1278 of file bmserial.h.
References rapidjson::value.
Referenced by bm::serialize(), CDataFrame< MAX_SIZE >::Serialize(), and StressTest().
bool bm::serializer< BV >::compute_sim_model | ( | xor_sim_model_type & | sim_model, |
const bv_ref_vector_type & | ref_vect, | ||
const bm::xor_sim_params & | params | ||
) |
Calculate XOR similarity model for ref_vector refernece vector must be associated before.
sim_model | - [out] similarity model to compute |
ref_vect | - [in] reference vectors |
params | - parameters to regulate search depth |
Definition at line 1313 of file bmserial.h.
Referenced by SerializationCompressionLevelsTest().
|
protectednoexcept |
Encode bit-block as an array of bits.
Definition at line 2283 of file bmserial.h.
References bm::bit_block_convert_to_arr(), bm::set_block_arrbit, and bm::set_block_arrbit_inv.
|
protectednoexcept |
Encode bit-block using digest (hierarchical compression)
Definition at line 2108 of file bmserial.h.
References bm::bmi_blsi_u64(), bm::bmi_bslr_u64(), bm::set_block_bit, bm::set_block_bit_digest0, bm::set_block_digest_wave_size, bm::set_block_size, t, and bm::word_bitcount64().
|
protectednoexcept |
Encode BIT block with repeatable runs of zeroes.
Definition at line 2056 of file bmserial.h.
References BM_ASSERT, i, bm::set_block_bit_0runs, and bm::set_block_size.
|
protected |
Encode GAP block
Definition at line 1996 of file bmserial.h.
References BM_ASSERT, BM_FALLTHROUGH, bm::gap_convert_to_arr(), bm::gap_equiv_len, invert, bm::encoder::put_16(), bm::encoder::put_8(), bm::set_block_arrgap, bm::set_block_arrgap_bienc, bm::set_block_arrgap_bienc_inv, bm::set_block_arrgap_egamma, bm::set_block_arrgap_egamma_inv, bm::set_block_arrgap_inv, bm::set_block_bit_1bit, bm::set_block_gap, and bm::set_block_gap_bienc.
|
protectednoexcept |
Encode serialization header information.
Definition at line 1333 of file bmserial.h.
References bm::BM_HM_64_BIT, bm::BM_HM_DEFAULT, bm::BM_HM_HXOR, bm::BM_HM_NO_BO, bm::BM_HM_NO_GAPL, bm::BM_HM_RESIZE, bm::globals< T >::byte_order(), bm::gap_levels, and bm::id_max.
|
protectednoexcept |
Encode XOR match chain.
Definition at line 2161 of file bmserial.h.
References BM_ASSERT, bm::check_pair_vect_vbr(), and bm::set_block_xor_chain.
|
protectednoexcept |
Determine best representation for a bit-block.
Definition at line 1777 of file bmserial.h.
References bm::best_representation(), bm::bit_block_change_bc(), bm::bit_block_count(), bm::bit_count_nonzero_size(), bm::block_waves, BM_ASSERT, bm::calc_block_digest0(), bm::compute_s_block_descr(), bm::e_bit_0, bm::e_bit_1, bm::e_bit_bit, bm::e_bit_end, bm::e_bit_GAP, bm::e_bit_IINT, bm::e_bit_INT, bm::gap_equiv_len, bm::gap_max_bits, bm::gap_max_buff_len, i, bm::block_waves_xor_descr::sb_bc, bm::block_waves_xor_descr::sb_gc, bm::set_block_aone, bm::set_block_arrbit, bm::set_block_arrbit_inv, bm::set_block_arrgap_egamma, bm::set_block_arrgap_egamma_inv, bm::set_block_azero, bm::set_block_bit, bm::set_block_bit_0runs, bm::set_block_bit_1bit, bm::set_block_bit_digest0, bm::set_block_digest_wave_size, bm::set_block_gap_egamma, bm::set_block_size, and bm::word_bitcount64().
|
protectednoexcept |
Determine best representation for a bit-block (level 5)
Definition at line 1661 of file bmserial.h.
References bm::bit_block_change_bc(), bm::bit_count_nonzero_size(), bm::calc_block_digest0(), bm::gap_max_bits, bm::gap_max_buff_len, i, int, bm::set_block_aone, bm::set_block_arr_bienc, bm::set_block_arr_bienc_inv, bm::set_block_arrbit, bm::set_block_arrbit_inv, bm::set_block_arrgap, bm::set_block_arrgap_bienc, bm::set_block_arrgap_bienc_inv, bm::set_block_arrgap_egamma, bm::set_block_arrgap_egamma_inv, bm::set_block_arrgap_inv, bm::set_block_azero, bm::set_block_bit, bm::set_block_bit_0runs, bm::set_block_bit_1bit, bm::set_block_bit_digest0, bm::set_block_bitgap_bienc, bm::set_block_gap_bienc, bm::set_block_gap_egamma, bm::set_block_size, and bm::word_bitcount64().
|
protectednoexcept |
Determine best representation for GAP block based on current set compression level.
Definition at line 1945 of file bmserial.h.
References bm::gap_bit_count_unr(), bm::gap_length(), bm::gap_max_bits, len, bm::set_block_arrgap, bm::set_block_arrgap_bienc, bm::set_block_arrgap_bienc_inv, bm::set_block_arrgap_egamma, bm::set_block_arrgap_egamma_inv, bm::set_block_arrgap_inv, bm::set_block_bit_1bit, bm::set_block_gap, bm::set_block_gap_bienc, and bm::set_block_gap_egamma.
|
protectednoexcept |
Definition at line 2311 of file bmserial.h.
References bm::bit_block_convert_to_arr(), bm::set_block_bit, and bm::set_block_size.
|
protectednoexcept |
Encode GAP block as delta-array with Elias Gamma coder.
Definition at line 1484 of file bmserial.h.
References bm::bit_out< TEncoder >::gamma(), i, prev(), bm::set_block_arrgap, bm::set_block_arrgap_egamma, bm::set_block_arrgap_egamma_inv, and bm::set_block_arrgap_inv.
|
protectednoexcept |
Definition at line 2302 of file bmserial.h.
References bm::bit_to_gap(), BM_ASSERT, bm::gap_equiv_len, and len.
|
protectednoexcept |
Encode GAP block with Elias Gamma coder
Definition at line 1445 of file bmserial.h.
References bm::for_each_dgap(), bm::gap_length(), len, bm::set_block_gap, and bm::set_block_gap_egamma.
|
noexcept |
Set GAP length serialization (serializes GAP levels of the original vector)
value | - when TRUE serialized vector includes GAP levels parameters |
Definition at line 1272 of file bmserial.h.
References rapidjson::value.
Referenced by bm::serialize(), bm::compressed_collection_serializer< CBC >::serialize(), CDataFrame< MAX_SIZE >::Serialize(), bm::sparse_vector_serializer< SV >::sparse_vector_serializer(), and StressTest().
|
inlinenoexcept |
Get current compression level.
Definition at line 133 of file bmserial.h.
|
inlinenoexcept |
Return serialization counter vector.
Definition at line 196 of file bmserial.h.
Referenced by SerializationCompressionLevelsTest(), SparseSerializationTest(), TestSignedSparseVectorSerial(), TestSparseVectorSerial(), TestSparseVectorSerialization2(), and TestStrSparseVectorSerial().
|
protectednoexcept |
Definition at line 2502 of file bmserial.h.
References bm::bit_out< TEncoder >::bic_encode_u16(), bm::bit_block_convert_to_arr(), BM_ASSERT, bm::bit_out< TEncoder >::flush(), bm::set_block_arr_bienc, bm::set_block_arr_bienc_8bh, bm::set_block_arr_bienc_inv, and bm::set_block_size.
|
protectednoexcept |
Encode GAP block with using binary interpolated encoder
Definition at line 1385 of file bmserial.h.
References bm::bit_out< TEncoder >::bic_encode_u16(), BM_ASSERT, bm::bit_out< TEncoder >::flush(), bm::gap_length(), head, len, bm::set_block_gap, bm::set_block_gap_bienc, and bm::set_block_gap_bienc_v2.
|
protectednoexcept |
Encode GAP block as an array with binary interpolated coder.
Definition at line 1577 of file bmserial.h.
References bm::bit_out< TEncoder >::bic_encode_u16(), BM_ASSERT, bm::bit_out< TEncoder >::flush(), bm::set_block_arrgap, bm::set_block_arrgap_bienc_inv_v2, bm::set_block_arrgap_bienc_v2, and bm::set_block_arrgap_inv.
|
protectednoexcept |
Definition at line 1529 of file bmserial.h.
References bm::bit_out< TEncoder >::bic_encode_u16(), BM_ASSERT, bm::bit_out< TEncoder >::flush(), bm::bit_out< TEncoder >::gamma(), bm::set_block_arrgap, bm::set_block_arrgap_bienc, bm::set_block_arrgap_bienc_inv, and bm::set_block_arrgap_inv.
|
protectednoexcept |
encode bit-block as interpolated gap block
Definition at line 2342 of file bmserial.h.
References bm::bit_to_gap(), BM_ASSERT, bm::gap_max_bits, and len.
|
private |
void bm::serializer< BV >::optimize_serialize_destroy | ( | BV & | bv, |
typename serializer< BV >::buffer & | buf | ||
) |
Bitvector serialization into buffer object (resized automatically) Input bit-vector gets optimized and then destroyed, content is NOT guaranteed after this operation.
Effectively it moves data into the buffer.
The reason this operation exsists is because it is faster to do all three operations in one single pass. This is a destructive serialization!
bv | - input/output bitvector |
buf | - output buffer object |
Definition at line 2265 of file bmserial.h.
References buf, bm::serialize(), and st().
Referenced by SerializationTest().
|
staticprotectednoexcept |
Check if bookmark needs to be placed and if so, encode it into serialization BLOB.
nb | - block idx |
bookm | - bookmark state structure |
enc | - BLOB encoder |
Definition at line 2593 of file bmserial.h.
References BM_ASSERT, bm::set_nb_bookmark16, bm::set_nb_bookmark24, bm::set_nb_bookmark32, bm::set_nb_sync_mark16, bm::set_nb_sync_mark24, bm::set_nb_sync_mark32, bm::set_nb_sync_mark48, bm::set_nb_sync_mark64, bm::set_nb_sync_mark8, and U.
|
noexcept |
Reset all accumulated compression statistics.
Definition at line 1244 of file bmserial.h.
References i.
|
inlineprotectednoexcept |
Definition at line 383 of file bmserial.h.
void bm::serializer< BV >::serialize | ( | const BV & | bv, |
typename serializer< BV >::buffer & | buf, | ||
const statistics_type * | bv_stat = 0 |
||
) |
Bitvector serialization into buffer object (resized automatically)
bv | - input bitvector |
buf | - output buffer object |
bv_stat | - input (optional) bit-vector statistics object if NULL, serialize will compute the statistics |
Definition at line 2241 of file bmserial.h.
References BM_ASSERT, buf, and bm::serialize().
serializer< BV >::size_type bm::serializer< BV >::serialize | ( | const BV & | bv, |
unsigned char * | buf, | ||
size_t | buf_size | ||
) |
Bitvector serialization into memory block.
bv | - input bitvector |
buf | - out buffer (pre-allocated) No range checking is done in this method. It is responsibility of caller to allocate sufficient amount of memory using information from calc_stat() function. |
buf_size | - size of the output buffer |
Definition at line 2703 of file bmserial.h.
References bm::bit_block_find(), BM_ASSERT, bm::BM_HM_SPARSE, BM_IS_GAP, BM_SER_NEXT_GRP, BMGAP_PTR, buf, bm::block_match_chain< BLOCK_IDX >::chain_size, bm::check_block_one(), bm::check_block_zero(), bm::e_no_xor_match, bm::e_xor_match_BC, bm::e_xor_match_EQ, bm::e_xor_match_GC, bm::e_xor_match_iBC, FULL_BLOCK_FAKE_ADDR, bm::get_block_coord(), i, bm::block_match_chain< BLOCK_IDX >::match, bm::block_match_chain< BLOCK_IDX >::nb, bm::encoder::put_16(), bm::encoder::put_32(), bm::encoder::put_64(), bm::encoder::put_8(), bm::encoder::put_8_16_32(), bm::encoder::put_prefixed_array_32(), bm::block_match_chain< BLOCK_IDX >::ref_idx, bm::set_block_16one, bm::set_block_16zero, bm::set_block_1one, bm::set_block_1zero, bm::set_block_32one, bm::set_block_32zero, bm::set_block_64one, bm::set_block_64zero, bm::set_block_8one, bm::set_block_8zero, bm::set_block_aone, bm::set_block_arr_bienc, bm::set_block_arr_bienc_inv, bm::set_block_arrbit, bm::set_block_arrbit_inv, bm::set_block_arrgap_bienc, bm::set_block_arrgap_bienc_inv, bm::set_block_arrgap_egamma, bm::set_block_arrgap_egamma_inv, bm::set_block_azero, bm::set_block_bit, bm::set_block_bit_0runs, bm::set_block_bit_1bit, bm::set_block_bit_digest0, bm::set_block_bitgap_bienc, bm::set_block_end, bm::set_block_gap_bienc, bm::set_block_gap_egamma, bm::set_block_ref_eq, bm::set_block_size, bm::set_block_xor_ref16, bm::set_block_xor_ref16_um, bm::set_block_xor_ref32, bm::set_block_xor_ref32_um, bm::set_block_xor_ref8, bm::set_block_xor_ref8_um, bm::encoder::set_pos(), bm::set_sub_array_size, bm::set_total_blocks, bm::encoder::size(), and bm::block_match_chain< BLOCK_IDX >::xor_d64.
Referenced by CheckRangeDeserial(), bm::compute_serialization_size(), DesrializationTest2(), MutationOperationsTest(), bm::print_bvector_stat(), SerializationCompressionLevelsTest(), SerializationOperation(), SerializationTest(), bm::serialize(), bm::compressed_collection_serializer< CBC >::serialize(), CDataFrame< MAX_SIZE >::Serialize(), SparseSerializationTest(), and StressTest().
|
noexcept |
Add skip-markers to serialization BLOB for faster range decode at the expense of some BLOB size increase.
enable | - TRUE searilization will add bookmark codes |
bm_interval | - bookmark interval in (number of blocks) suggested values between 4 and 512 (block size is 64K bits) smaller interval means more bookmarks added to the skip list allows faster range deserialization at the expense of somewhat increased BLOB size. |
Definition at line 1284 of file bmserial.h.
Referenced by CheckRangeDeserial(), SerializationCompressionLevelsTest(), SerializationOperation(), bm::sparse_vector_serializer< SV >::set_bookmarks(), and StressTest().
|
noexcept |
Set compression level.
Higher compression takes more time to process.
clevel | - compression level (0-6) 0 - take as is 1, 2 - apply light weight RLE/GAP encodings, limited depth hierarchical compression, intervals encoding 3 - variant of 2 with different cut-offs 4 - delta transforms plus Elias Gamma encoding where possible legacy) 5 - Binary Interpolative Coding (BIC) - light settings 6 - Binary Interpolative Coding (BIC) - harder settings |
Definition at line 1251 of file bmserial.h.
References bm::set_compression_max, bm::sparse_max_l5, and bm::sparse_max_l6.
Referenced by SerializationCompressionLevelsTest().
|
noexcept |
Set current index in rer.vector collection (not a row idx or plain idx)
Definition at line 1327 of file bmserial.h.
Referenced by SerializationCompressionLevelsTest().
void bm::serializer< BV >::set_ref_vectors | ( | const bv_ref_vector_type * | ref_vect | ) |
Attach collection of reference vectors for XOR serialization (no transfer of ownership for the pointers)
Definition at line 1299 of file bmserial.h.
References bm::set_block_size.
Referenced by SerializationCompressionLevelsTest().
|
noexcept |
Atach XOR similarity model (must be computed by the same ref vector)
Definition at line 1321 of file bmserial.h.
Referenced by SerializationCompressionLevelsTest().
|
noexcept |
Fine tuning for Binary Interpolative Compression (levels 5+) The parameter sets average population count per block (64Kbits) below which block is considered very sparse.
If super block (group of 256 blocks) is very sparse it applies block size expansion (for the compression purposes) to improve compression rates.
Definition at line 1262 of file bmserial.h.
References BM_ASSERT, and bm::sparse_max_l6.
|
protectednoexcept |
Compute digest based XOR product, place into tmp XOR block.
Definition at line 2206 of file bmserial.h.
References bm::bit_block_xor(), BM_IS_GAP, BMGAP_PTR, bm::gap_convert_to_bitset(), and i.
|
private |
Definition at line 453 of file bmserial.h.
Referenced by bm::serializer< BV >::serializer().
|
private |
controls zeroing of telemetry
Definition at line 455 of file bmserial.h.
Referenced by bm::serializer< bvector_type >::allow_stat_reset().
|
private |
Definition at line 447 of file bmserial.h.
Referenced by bm::serializer< BV >::serializer().
|
private |
memory (bytes) by run-0 method (bytes)
Definition at line 446 of file bmserial.h.
|
private |
memory (bytes) by d0 method (bytes)
Definition at line 445 of file bmserial.h.
|
private |
Definition at line 457 of file bmserial.h.
|
private |
Definition at line 463 of file bmserial.h.
Referenced by bm::serializer< bvector_type >::get_compression_level().
|
private |
Definition at line 454 of file bmserial.h.
Referenced by bm::serializer< bvector_type >::get_compression_stat(), and bm::serializer< BV >::serializer().
|
private |
Definition at line 444 of file bmserial.h.
|
private |
pos of top level header to roll back
Definition at line 471 of file bmserial.h.
|
private |
flag to free the input vector
Definition at line 467 of file bmserial.h.
Referenced by bm::serializer< BV >::serializer().
|
private |
Definition at line 456 of file bmserial.h.
|
private |
set of masks used to save
Definition at line 472 of file bmserial.h.
|
private |
Definition at line 451 of file bmserial.h.
Referenced by bm::serializer< bvector_type >::reset_models().
|
private |
Definition at line 450 of file bmserial.h.
|
private |
flag to optimize the input vector
Definition at line 466 of file bmserial.h.
Referenced by bm::serializer< BV >::serializer().
|
private |
Definition at line 464 of file bmserial.h.
Referenced by bm::serializer< BV >::serializer().
|
private |
Definition at line 468 of file bmserial.h.
|
private |
current reference index
Definition at line 479 of file bmserial.h.
|
private |
ref.vector for XOR compression
Definition at line 476 of file bmserial.h.
|
private |
Definition at line 448 of file bmserial.h.
|
private |
Bookmarks flag.
Definition at line 459 of file bmserial.h.
|
private |
Desired bookmarks interval.
Definition at line 460 of file bmserial.h.
|
private |
Definition at line 449 of file bmserial.h.
|
private |
similarity model matrix
Definition at line 477 of file bmserial.h.
|
private |
number of bits per blocks to consider sparse
Definition at line 484 of file bmserial.h.
|
private |
Definition at line 462 of file bmserial.h.
Referenced by bm::serializer< BV >::serializer().
|
private |
scanner for XOR similarity
Definition at line 478 of file bmserial.h.
|
private |
Definition at line 481 of file bmserial.h.
Referenced by bm::serializer< BV >::serializer().
|
private |
Definition at line 482 of file bmserial.h.
Referenced by bm::serializer< BV >::serializer().
|
private |
tmp area for xor product
Definition at line 480 of file bmserial.h.