NCBI C++ ToolKit
Classes | Public Types | Public Member Functions | List of all members
bm::serializer< BV > Class Template Reference

Search Toolkit Book for bm::serializer

Bit-vector serialization class. More...

#include <util/bitset/bmserial.h>

+ Inheritance diagram for bm::serializer< BV >:
+ Collaboration diagram for bm::serializer< BV >:

Classes

struct  bookmark_state
 Bookmark state structure. More...
 

Public Types

typedef BV bvector_type
 
typedef bvector_type::allocator_type allocator_type
 
typedef bvector_type::blocks_manager_type blocks_manager_type
 
typedef bvector_type::statistics statistics_type
 
typedef bvector_type::block_idx_type block_idx_type
 
typedef bvector_type::size_type size_type
 
typedef byte_buffer< allocator_typebuffer
 
typedef bm::bv_ref_vector< BV > bv_ref_vector_type
 
typedef bm::xor_sim_model< BV > xor_sim_model_type
 
typedef xor_sim_model_type::block_match_chain_type block_match_chain_type
 

Public Member Functions

 serializer (const allocator_type &alloc=allocator_type(), bm::word_t *temp_block=0)
 Constructor. More...
 
 serializer (bm::word_t *temp_block)
 
 ~serializer ()
 
Compression level settings
void set_compression_level (unsigned clevel) noexcept
 Set compression level. More...
 
unsigned get_compression_level () const noexcept
 Get current compression level. More...
 

Serialization Methods

typedef bm::bit_out< bm::encoderbit_out_type
 
typedef bm::gamma_encoder< bm::gap_word_t, bit_out_typegamma_encoder_func
 
typedef bm::heap_vector< bm::gap_word_t, allocator_type, trueblock_arridx_type
 
typedef bm::heap_vector< unsigned, allocator_type, truesblock_arridx_type
 
typedef allocator_type::allocator_pool_type allocator_pool_type
 
bm::id64_t digest0_
 
unsigned bit_model_d0_size_
 memory (bytes) by d0 method (bytes) More...
 
unsigned bit_model_0run_size_
 memory (bytes) by run-0 method (bytes) More...
 
block_arridx_type bit_idx_arr_
 
sblock_arridx_type sb_bit_idx_arr_
 
unsigned scores_ [bm::block_waves]
 
unsigned char models_ [bm::block_waves]
 
unsigned mod_size_
 
allocator_type alloc_
 
size_typecompression_stat_
 
bool allow_stat_reset_ = true
 controls zeroing of telemetry More...
 
bool gap_serial_
 
bool byte_order_serial_
 
bool sb_bookmarks_
 Bookmarks flag. More...
 
unsigned sb_range_
 Desired bookmarks interval. More...
 
bm::word_ttemp_block_
 
unsigned compression_level_
 
bool own_temp_block_
 
bool optimize_
 flag to optimize the input vector More...
 
bool free_
 flag to free the input vector More...
 
allocator_pool_type pool_
 
unsigned char * enc_header_pos_
 pos of top level header to roll back More...
 
unsigned char header_flag_
 set of masks used to save More...
 
const bv_ref_vector_typeref_vect_
 ref.vector for XOR compression More...
 
const xor_sim_model_typesim_model_
 similarity model matrix More...
 
bm::xor_scanner< BV > xor_scan_
 scanner for XOR similarity More...
 
size_type ref_idx_
 current reference index More...
 
bm::word_txor_tmp_block_
 tmp area for xor product More...
 
bm::word_txor_tmp1_
 
bm::word_txor_tmp2_
 
unsigned sparse_cutoff_
 number of bits per blocks to consider sparse More...
 
size_type serialize (const BV &bv, unsigned char *buf, size_t buf_size)
 Bitvector serialization into memory block. More...
 
void serialize (const BV &bv, typename serializer< BV >::buffer &buf, const statistics_type *bv_stat=0)
 Bitvector serialization into buffer object (resized automatically) More...
 
void optimize_serialize_destroy (BV &bv, typename serializer< BV >::buffer &buf)
 Bitvector serialization into buffer object (resized automatically) Input bit-vector gets optimized and then destroyed, content is NOT guaranteed after this operation. More...
 
const size_typeget_compression_stat () const noexcept
 Return serialization counter vector. More...
 
void allow_stat_reset (bool allow) noexcept
 Enable/disable statistics reset on each serilaization. More...
 
void reset_compression_stats () noexcept
 Reset all accumulated compression statistics. More...
 
void gap_length_serialization (bool value) noexcept
 Set GAP length serialization (serializes GAP levels of the original vector) More...
 
void byte_order_serialization (bool value) noexcept
 Set byte-order serialization (for cross platform compatibility) More...
 
void set_bookmarks (bool enable, unsigned bm_interval=256) noexcept
 Add skip-markers to serialization BLOB for faster range decode at the expense of some BLOB size increase. More...
 
void set_sparse_cutoff (unsigned cutoff) noexcept
 Fine tuning for Binary Interpolative Compression (levels 5+) The parameter sets average population count per block (64Kbits) below which block is considered very sparse. More...
 
void set_ref_vectors (const bv_ref_vector_type *ref_vect)
 Attach collection of reference vectors for XOR serialization (no transfer of ownership for the pointers) More...
 
bool compute_sim_model (xor_sim_model_type &sim_model, const bv_ref_vector_type &ref_vect, const bm::xor_sim_params &params)
 Calculate XOR similarity model for ref_vector refernece vector must be associated before. More...
 
void set_sim_model (const xor_sim_model_type *sim_model) noexcept
 Atach XOR similarity model (must be computed by the same ref vector) More...
 
void set_curr_ref_idx (size_type ref_idx) noexcept
 Set current index in rer.vector collection (not a row idx or plain idx) More...
 
void encode_header (const BV &bv, bm::encoder &enc) noexcept
 Encode serialization header information. More...
 
void encode_gap_block (const bm::gap_word_t *gap_block, bm::encoder &enc)
 
void gamma_gap_block (const bm::gap_word_t *gap_block, bm::encoder &enc) noexcept
 
void gamma_gap_array (const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted=false) noexcept
 Encode GAP block as delta-array with Elias Gamma coder. More...
 
void encode_bit_array (const bm::word_t *block, bm::encoder &enc, bool inverted) noexcept
 Encode bit-block as an array of bits. More...
 
void gamma_gap_bit_block (const bm::word_t *block, bm::encoder &enc) noexcept
 
void gamma_arr_bit_block (const bm::word_t *block, bm::encoder &enc, bool inverted) noexcept
 
void bienc_arr_bit_block (const bm::word_t *block, bm::encoder &enc, bool inverted) noexcept
 
void bienc_arr_sblock (const BV &bv, unsigned sb, bm::encoder &enc) noexcept
 
void bienc_gap_bit_block (const bm::word_t *block, bm::encoder &enc) noexcept
 encode bit-block as interpolated bit block of gaps More...
 
void interpolated_arr_bit_block (const bm::word_t *block, bm::encoder &enc, bool inverted) noexcept
 
void interpolated_gap_bit_block (const bm::word_t *block, bm::encoder &enc) noexcept
 encode bit-block as interpolated gap block More...
 
void interpolated_gap_array (const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted) noexcept
 Encode GAP block as an array with binary interpolated coder. More...
 
void interpolated_gap_array_v0 (const bm::gap_word_t *gap_block, unsigned arr_len, bm::encoder &enc, bool inverted) noexcept
 
void interpolated_encode_gap_block (const bm::gap_word_t *gap_block, bm::encoder &enc) noexcept
 
void encode_bit_interval (const bm::word_t *blk, bm::encoder &enc, unsigned size_control) noexcept
 Encode BIT block with repeatable runs of zeroes. More...
 
void encode_bit_digest (const bm::word_t *blk, bm::encoder &enc, bm::id64_t d0) noexcept
 Encode bit-block using digest (hierarchical compression) More...
 
void encode_xor_match_chain (bm::encoder &enc, const block_match_chain_type &mchain) noexcept
 Encode XOR match chain. More...
 
unsigned char find_gap_best_encoding (const bm::gap_word_t *gap_block) noexcept
 Determine best representation for GAP block based on current set compression level. More...
 
unsigned char find_bit_best_encoding (const bm::word_t *block) noexcept
 Determine best representation for a bit-block. More...
 
unsigned char find_bit_best_encoding_l5 (const bm::word_t *block) noexcept
 Determine best representation for a bit-block (level 5) More...
 
void reset_models () noexcept
 
void add_model (unsigned char mod, unsigned score) noexcept
 
void xor_tmp_product (const bm::word_t *s_block, const block_match_chain_type &mchain, unsigned i, unsigned j) noexcept
 Compute digest based XOR product, place into tmp XOR block. More...
 
static void process_bookmark (block_idx_type nb, bookmark_state &bookm, bm::encoder &enc) noexcept
 Check if bookmark needs to be placed and if so, encode it into serialization BLOB. More...
 
 serializer (const serializer &)
 
serializeroperator= (const serializer &)
 

Detailed Description

template<class BV>
class bm::serializer< BV >

Bit-vector serialization class.

Class designed to convert sparse bit-vectors into a single block of memory ready for file or database storage or network transfer.

Reuse of this class for multiple serializations (but not across threads). Class resue offers some performance advantage (helps with temp memory reallocations).

Definition at line 75 of file bmserial.h.

Member Typedef Documentation

◆ allocator_pool_type

template<class BV >
typedef allocator_type::allocator_pool_type bm::serializer< BV >::allocator_pool_type
private

Definition at line 441 of file bmserial.h.

◆ allocator_type

template<class BV >
typedef bvector_type::allocator_type bm::serializer< BV >::allocator_type

Definition at line 79 of file bmserial.h.

◆ bit_out_type

template<class BV >
typedef bm::bit_out<bm::encoder> bm::serializer< BV >::bit_out_type
private

Definition at line 437 of file bmserial.h.

◆ block_arridx_type

template<class BV >
typedef bm::heap_vector<bm::gap_word_t, allocator_type, true> bm::serializer< BV >::block_arridx_type
private

Definition at line 439 of file bmserial.h.

◆ block_idx_type

template<class BV >
typedef bvector_type::block_idx_type bm::serializer< BV >::block_idx_type

Definition at line 82 of file bmserial.h.

◆ block_match_chain_type

Definition at line 90 of file bmserial.h.

◆ blocks_manager_type

template<class BV >
typedef bvector_type::blocks_manager_type bm::serializer< BV >::blocks_manager_type

Definition at line 80 of file bmserial.h.

◆ buffer

template<class BV >
typedef byte_buffer<allocator_type> bm::serializer< BV >::buffer

Definition at line 85 of file bmserial.h.

◆ bv_ref_vector_type

template<class BV >
typedef bm::bv_ref_vector<BV> bm::serializer< BV >::bv_ref_vector_type

Definition at line 86 of file bmserial.h.

◆ bvector_type

template<class BV >
typedef BV bm::serializer< BV >::bvector_type

Definition at line 78 of file bmserial.h.

◆ gamma_encoder_func

template<class BV >
typedef bm::gamma_encoder<bm::gap_word_t, bit_out_type> bm::serializer< BV >::gamma_encoder_func
private

Definition at line 438 of file bmserial.h.

◆ sblock_arridx_type

template<class BV >
typedef bm::heap_vector<unsigned, allocator_type, true> bm::serializer< BV >::sblock_arridx_type
private

Definition at line 440 of file bmserial.h.

◆ size_type

template<class BV >
typedef bvector_type::size_type bm::serializer< BV >::size_type

Definition at line 83 of file bmserial.h.

◆ statistics_type

template<class BV >
typedef bvector_type::statistics bm::serializer< BV >::statistics_type

Definition at line 81 of file bmserial.h.

◆ xor_sim_model_type

template<class BV >
typedef bm::xor_sim_model<BV> bm::serializer< BV >::xor_sim_model_type

Definition at line 87 of file bmserial.h.

Constructor & Destructor Documentation

◆ serializer() [1/3]

template<class BV >
bm::serializer< BV >::serializer ( const allocator_type alloc = allocator_type(),
bm::word_t temp_block = 0 
)

Constructor.

Parameters
alloc- memory allocator
temp_block- temporary block for various operations (if NULL it will be allocated and managed by serializer class) Temp block is used as a scratch memory during serialization, use of external temp block allows to avoid unnecessary re-allocations.

Temp block attached is not owned by the class and NOT deallocated on destruction.

Definition at line 1167 of file bmserial.h.

References bm::serializer< BV >::alloc_, bm::serializer< BV >::bit_idx_arr_, bm::serializer< BV >::compression_stat_, bm::serializer< BV >::free_, bm::gap_max_bits, bm::serializer< BV >::optimize_, bm::serializer< BV >::own_temp_block_, bm::heap_vector< Val, BVAlloc, trivial_type >::resize(), bm::serializer< BV >::temp_block_, bm::serializer< BV >::xor_tmp1_, and bm::serializer< BV >::xor_tmp2_.

◆ serializer() [2/3]

template<class BV >
bm::serializer< BV >::serializer ( bm::word_t temp_block)

◆ ~serializer()

template<class BV >
bm::serializer< BV >::~serializer

Definition at line 1232 of file bmserial.h.

◆ serializer() [3/3]

template<class BV >
bm::serializer< BV >::serializer ( const serializer< BV > &  )
private

Member Function Documentation

◆ add_model()

template<class BV >
void bm::serializer< BV >::add_model ( unsigned char  mod,
unsigned  score 
)
protectednoexcept

Definition at line 1652 of file bmserial.h.

References BM_ASSERT, and mod().

◆ allow_stat_reset()

template<class BV >
void bm::serializer< BV >::allow_stat_reset ( bool  allow)
inlinenoexcept

Enable/disable statistics reset on each serilaization.

Definition at line 202 of file bmserial.h.

◆ bienc_arr_bit_block()

template<class BV >
void bm::serializer< BV >::bienc_arr_bit_block ( const bm::word_t block,
bm::encoder enc,
bool  inverted 
)
protectednoexcept

Definition at line 2327 of file bmserial.h.

References bm::bit_block_convert_to_arr().

◆ bienc_arr_sblock()

template<class BV >
void bm::serializer< BV >::bienc_arr_sblock ( const BV &  bv,
unsigned  sb,
bm::encoder enc 
)
protectednoexcept

◆ bienc_gap_bit_block()

template<class BV >
void bm::serializer< BV >::bienc_gap_bit_block ( const bm::word_t block,
bm::encoder enc 
)
protectednoexcept

◆ byte_order_serialization()

template<class BV >
void bm::serializer< BV >::byte_order_serialization ( bool  value)
noexcept

Set byte-order serialization (for cross platform compatibility)

Parameters
value- TRUE serialization format includes byte-order marker

Definition at line 1278 of file bmserial.h.

References rapidjson::value.

Referenced by bm::serialize(), CDataFrame< MAX_SIZE >::Serialize(), and StressTest().

◆ compute_sim_model()

template<class BV >
bool bm::serializer< BV >::compute_sim_model ( xor_sim_model_type sim_model,
const bv_ref_vector_type ref_vect,
const bm::xor_sim_params params 
)

Calculate XOR similarity model for ref_vector refernece vector must be associated before.

Parameters
sim_model- [out] similarity model to compute
ref_vect- [in] reference vectors
params- parameters to regulate search depth
Returns
true - if similarity model created successfully
See also
set_ref_vectors

Definition at line 1313 of file bmserial.h.

Referenced by SerializationCompressionLevelsTest().

◆ encode_bit_array()

template<class BV >
void bm::serializer< BV >::encode_bit_array ( const bm::word_t block,
bm::encoder enc,
bool  inverted 
)
protectednoexcept

Encode bit-block as an array of bits.

Definition at line 2283 of file bmserial.h.

References bm::bit_block_convert_to_arr(), bm::set_block_arrbit, and bm::set_block_arrbit_inv.

◆ encode_bit_digest()

template<class BV >
void bm::serializer< BV >::encode_bit_digest ( const bm::word_t blk,
bm::encoder enc,
bm::id64_t  d0 
)
protectednoexcept

Encode bit-block using digest (hierarchical compression)

Definition at line 2108 of file bmserial.h.

References bm::bmi_blsi_u64(), bm::bmi_bslr_u64(), bm::set_block_bit, bm::set_block_bit_digest0, bm::set_block_digest_wave_size, bm::set_block_size, t, and bm::word_bitcount64().

◆ encode_bit_interval()

template<class BV >
void bm::serializer< BV >::encode_bit_interval ( const bm::word_t blk,
bm::encoder enc,
unsigned  size_control 
)
protectednoexcept

Encode BIT block with repeatable runs of zeroes.

Definition at line 2056 of file bmserial.h.

References BM_ASSERT, i, bm::set_block_bit_0runs, and bm::set_block_size.

◆ encode_gap_block()

template<class BV >
void bm::serializer< BV >::encode_gap_block ( const bm::gap_word_t gap_block,
bm::encoder enc 
)
protected

◆ encode_header()

template<class BV >
void bm::serializer< BV >::encode_header ( const BV &  bv,
bm::encoder enc 
)
protectednoexcept

◆ encode_xor_match_chain()

template<class BV >
void bm::serializer< BV >::encode_xor_match_chain ( bm::encoder enc,
const block_match_chain_type mchain 
)
protectednoexcept

Encode XOR match chain.

Definition at line 2161 of file bmserial.h.

References BM_ASSERT, bm::check_pair_vect_vbr(), and bm::set_block_xor_chain.

◆ find_bit_best_encoding()

template<class BV >
unsigned char bm::serializer< BV >::find_bit_best_encoding ( const bm::word_t block)
protectednoexcept

◆ find_bit_best_encoding_l5()

template<class BV >
unsigned char bm::serializer< BV >::find_bit_best_encoding_l5 ( const bm::word_t block)
protectednoexcept

◆ find_gap_best_encoding()

template<class BV >
unsigned char bm::serializer< BV >::find_gap_best_encoding ( const bm::gap_word_t gap_block)
protectednoexcept

Determine best representation for GAP block based on current set compression level.

Returns
set_block_bit, set_block_bit_1bit, set_block_arrgap set_block_arrgap_egamma, set_block_arrgap_bienc set_block_arrgap_inv, set_block_arrgap_egamma_inv set_block_arrgap_bienc_inv, set_block_gap_egamma set_block_gap_bienc

Definition at line 1945 of file bmserial.h.

References bm::gap_bit_count_unr(), bm::gap_length(), bm::gap_max_bits, len, bm::set_block_arrgap, bm::set_block_arrgap_bienc, bm::set_block_arrgap_bienc_inv, bm::set_block_arrgap_egamma, bm::set_block_arrgap_egamma_inv, bm::set_block_arrgap_inv, bm::set_block_bit_1bit, bm::set_block_gap, bm::set_block_gap_bienc, and bm::set_block_gap_egamma.

◆ gamma_arr_bit_block()

template<class BV >
void bm::serializer< BV >::gamma_arr_bit_block ( const bm::word_t block,
bm::encoder enc,
bool  inverted 
)
protectednoexcept

◆ gamma_gap_array()

template<class BV >
void bm::serializer< BV >::gamma_gap_array ( const bm::gap_word_t gap_block,
unsigned  arr_len,
bm::encoder enc,
bool  inverted = false 
)
protectednoexcept

Encode GAP block as delta-array with Elias Gamma coder.

Definition at line 1484 of file bmserial.h.

References bm::bit_out< TEncoder >::gamma(), i, prev(), bm::set_block_arrgap, bm::set_block_arrgap_egamma, bm::set_block_arrgap_egamma_inv, and bm::set_block_arrgap_inv.

◆ gamma_gap_bit_block()

template<class BV >
void bm::serializer< BV >::gamma_gap_bit_block ( const bm::word_t block,
bm::encoder enc 
)
protectednoexcept

Definition at line 2302 of file bmserial.h.

References bm::bit_to_gap(), BM_ASSERT, bm::gap_equiv_len, and len.

◆ gamma_gap_block()

template<class BV >
void bm::serializer< BV >::gamma_gap_block ( const bm::gap_word_t gap_block,
bm::encoder enc 
)
protectednoexcept

Encode GAP block with Elias Gamma coder

Definition at line 1445 of file bmserial.h.

References bm::for_each_dgap(), bm::gap_length(), len, bm::set_block_gap, and bm::set_block_gap_egamma.

◆ gap_length_serialization()

template<class BV >
void bm::serializer< BV >::gap_length_serialization ( bool  value)
noexcept

Set GAP length serialization (serializes GAP levels of the original vector)

Parameters
value- when TRUE serialized vector includes GAP levels parameters

Definition at line 1272 of file bmserial.h.

References rapidjson::value.

Referenced by bm::serialize(), bm::compressed_collection_serializer< CBC >::serialize(), CDataFrame< MAX_SIZE >::Serialize(), bm::sparse_vector_serializer< SV >::sparse_vector_serializer(), and StressTest().

◆ get_compression_level()

template<class BV >
unsigned bm::serializer< BV >::get_compression_level ( ) const
inlinenoexcept

Get current compression level.

Definition at line 133 of file bmserial.h.

◆ get_compression_stat()

template<class BV >
const size_type* bm::serializer< BV >::get_compression_stat ( ) const
inlinenoexcept

◆ interpolated_arr_bit_block()

template<class BV >
void bm::serializer< BV >::interpolated_arr_bit_block ( const bm::word_t block,
bm::encoder enc,
bool  inverted 
)
protectednoexcept

◆ interpolated_encode_gap_block()

template<class BV >
void bm::serializer< BV >::interpolated_encode_gap_block ( const bm::gap_word_t gap_block,
bm::encoder enc 
)
protectednoexcept

◆ interpolated_gap_array()

template<class BV >
void bm::serializer< BV >::interpolated_gap_array ( const bm::gap_word_t gap_block,
unsigned  arr_len,
bm::encoder enc,
bool  inverted 
)
protectednoexcept

◆ interpolated_gap_array_v0()

template<class BV >
void bm::serializer< BV >::interpolated_gap_array_v0 ( const bm::gap_word_t gap_block,
unsigned  arr_len,
bm::encoder enc,
bool  inverted 
)
protectednoexcept

◆ interpolated_gap_bit_block()

template<class BV >
void bm::serializer< BV >::interpolated_gap_bit_block ( const bm::word_t block,
bm::encoder enc 
)
protectednoexcept

encode bit-block as interpolated gap block

Definition at line 2342 of file bmserial.h.

References bm::bit_to_gap(), BM_ASSERT, bm::gap_max_bits, and len.

◆ operator=()

template<class BV >
serializer& bm::serializer< BV >::operator= ( const serializer< BV > &  )
private

◆ optimize_serialize_destroy()

template<class BV >
void bm::serializer< BV >::optimize_serialize_destroy ( BV &  bv,
typename serializer< BV >::buffer buf 
)

Bitvector serialization into buffer object (resized automatically) Input bit-vector gets optimized and then destroyed, content is NOT guaranteed after this operation.

Effectively it moves data into the buffer.

The reason this operation exsists is because it is faster to do all three operations in one single pass. This is a destructive serialization!

Parameters
bv- input/output bitvector
buf- output buffer object

Definition at line 2265 of file bmserial.h.

References buf, bm::serialize(), and st().

Referenced by SerializationTest().

◆ process_bookmark()

template<class BV >
void bm::serializer< BV >::process_bookmark ( block_idx_type  nb,
bookmark_state bookm,
bm::encoder enc 
)
staticprotectednoexcept

Check if bookmark needs to be placed and if so, encode it into serialization BLOB.

Parameters
nb- block idx
bookm- bookmark state structure
enc- BLOB encoder

Definition at line 2593 of file bmserial.h.

References BM_ASSERT, bm::set_nb_bookmark16, bm::set_nb_bookmark24, bm::set_nb_bookmark32, bm::set_nb_sync_mark16, bm::set_nb_sync_mark24, bm::set_nb_sync_mark32, bm::set_nb_sync_mark48, bm::set_nb_sync_mark64, bm::set_nb_sync_mark8, and U.

◆ reset_compression_stats()

template<class BV >
void bm::serializer< BV >::reset_compression_stats
noexcept

Reset all accumulated compression statistics.

Definition at line 1244 of file bmserial.h.

References i.

◆ reset_models()

template<class BV >
void bm::serializer< BV >::reset_models ( )
inlineprotectednoexcept

Definition at line 383 of file bmserial.h.

◆ serialize() [1/2]

template<class BV >
void bm::serializer< BV >::serialize ( const BV &  bv,
typename serializer< BV >::buffer buf,
const statistics_type bv_stat = 0 
)

Bitvector serialization into buffer object (resized automatically)

Parameters
bv- input bitvector
buf- output buffer object
bv_stat- input (optional) bit-vector statistics object if NULL, serialize will compute the statistics

Definition at line 2241 of file bmserial.h.

References BM_ASSERT, buf, and bm::serialize().

◆ serialize() [2/2]

template<class BV >
serializer< BV >::size_type bm::serializer< BV >::serialize ( const BV &  bv,
unsigned char *  buf,
size_t  buf_size 
)

Bitvector serialization into memory block.

Parameters
bv- input bitvector
buf- out buffer (pre-allocated) No range checking is done in this method. It is responsibility of caller to allocate sufficient amount of memory using information from calc_stat() function.
buf_size- size of the output buffer
Returns
Size of serialization block.
See also
calc_stat

Definition at line 2703 of file bmserial.h.

References bm::bit_block_find(), BM_ASSERT, bm::BM_HM_SPARSE, BM_IS_GAP, BM_SER_NEXT_GRP, BMGAP_PTR, buf, bm::block_match_chain< BLOCK_IDX >::chain_size, bm::check_block_one(), bm::check_block_zero(), bm::e_no_xor_match, bm::e_xor_match_BC, bm::e_xor_match_EQ, bm::e_xor_match_GC, bm::e_xor_match_iBC, FULL_BLOCK_FAKE_ADDR, bm::get_block_coord(), i, bm::block_match_chain< BLOCK_IDX >::match, bm::block_match_chain< BLOCK_IDX >::nb, bm::encoder::put_16(), bm::encoder::put_32(), bm::encoder::put_64(), bm::encoder::put_8(), bm::encoder::put_8_16_32(), bm::encoder::put_prefixed_array_32(), bm::block_match_chain< BLOCK_IDX >::ref_idx, bm::set_block_16one, bm::set_block_16zero, bm::set_block_1one, bm::set_block_1zero, bm::set_block_32one, bm::set_block_32zero, bm::set_block_64one, bm::set_block_64zero, bm::set_block_8one, bm::set_block_8zero, bm::set_block_aone, bm::set_block_arr_bienc, bm::set_block_arr_bienc_inv, bm::set_block_arrbit, bm::set_block_arrbit_inv, bm::set_block_arrgap_bienc, bm::set_block_arrgap_bienc_inv, bm::set_block_arrgap_egamma, bm::set_block_arrgap_egamma_inv, bm::set_block_azero, bm::set_block_bit, bm::set_block_bit_0runs, bm::set_block_bit_1bit, bm::set_block_bit_digest0, bm::set_block_bitgap_bienc, bm::set_block_end, bm::set_block_gap_bienc, bm::set_block_gap_egamma, bm::set_block_ref_eq, bm::set_block_size, bm::set_block_xor_ref16, bm::set_block_xor_ref16_um, bm::set_block_xor_ref32, bm::set_block_xor_ref32_um, bm::set_block_xor_ref8, bm::set_block_xor_ref8_um, bm::encoder::set_pos(), bm::set_sub_array_size, bm::set_total_blocks, bm::encoder::size(), and bm::block_match_chain< BLOCK_IDX >::xor_d64.

Referenced by CheckRangeDeserial(), bm::compute_serialization_size(), DesrializationTest2(), MutationOperationsTest(), bm::print_bvector_stat(), SerializationCompressionLevelsTest(), SerializationOperation(), SerializationTest(), bm::serialize(), bm::compressed_collection_serializer< CBC >::serialize(), CDataFrame< MAX_SIZE >::Serialize(), SparseSerializationTest(), and StressTest().

◆ set_bookmarks()

template<class BV >
void bm::serializer< BV >::set_bookmarks ( bool  enable,
unsigned  bm_interval = 256 
)
noexcept

Add skip-markers to serialization BLOB for faster range decode at the expense of some BLOB size increase.

Parameters
enable- TRUE searilization will add bookmark codes
bm_interval- bookmark interval in (number of blocks) suggested values between 4 and 512 (block size is 64K bits) smaller interval means more bookmarks added to the skip list allows faster range deserialization at the expense of somewhat increased BLOB size.

Definition at line 1284 of file bmserial.h.

Referenced by CheckRangeDeserial(), SerializationCompressionLevelsTest(), SerializationOperation(), bm::sparse_vector_serializer< SV >::set_bookmarks(), and StressTest().

◆ set_compression_level()

template<class BV >
void bm::serializer< BV >::set_compression_level ( unsigned  clevel)
noexcept

Set compression level.

Higher compression takes more time to process.

Parameters
clevel- compression level (0-6) 0 - take as is 1, 2 - apply light weight RLE/GAP encodings, limited depth hierarchical compression, intervals encoding 3 - variant of 2 with different cut-offs 4 - delta transforms plus Elias Gamma encoding where possible legacy) 5 - Binary Interpolative Coding (BIC) - light settings 6 - Binary Interpolative Coding (BIC) - harder settings
See also
get_compression_level

Definition at line 1251 of file bmserial.h.

References bm::set_compression_max, bm::sparse_max_l5, and bm::sparse_max_l6.

Referenced by SerializationCompressionLevelsTest().

◆ set_curr_ref_idx()

template<class BV >
void bm::serializer< BV >::set_curr_ref_idx ( size_type  ref_idx)
noexcept

Set current index in rer.vector collection (not a row idx or plain idx)

Definition at line 1327 of file bmserial.h.

Referenced by SerializationCompressionLevelsTest().

◆ set_ref_vectors()

template<class BV >
void bm::serializer< BV >::set_ref_vectors ( const bv_ref_vector_type ref_vect)

Attach collection of reference vectors for XOR serialization (no transfer of ownership for the pointers)

Definition at line 1299 of file bmserial.h.

References bm::set_block_size.

Referenced by SerializationCompressionLevelsTest().

◆ set_sim_model()

template<class BV >
void bm::serializer< BV >::set_sim_model ( const xor_sim_model_type sim_model)
noexcept

Atach XOR similarity model (must be computed by the same ref vector)

Definition at line 1321 of file bmserial.h.

Referenced by SerializationCompressionLevelsTest().

◆ set_sparse_cutoff()

template<class BV >
void bm::serializer< BV >::set_sparse_cutoff ( unsigned  cutoff)
noexcept

Fine tuning for Binary Interpolative Compression (levels 5+) The parameter sets average population count per block (64Kbits) below which block is considered very sparse.

If super block (group of 256 blocks) is very sparse it applies block size expansion (for the compression purposes) to improve compression rates.

Definition at line 1262 of file bmserial.h.

References BM_ASSERT, and bm::sparse_max_l6.

◆ xor_tmp_product()

template<class BV >
void bm::serializer< BV >::xor_tmp_product ( const bm::word_t s_block,
const block_match_chain_type mchain,
unsigned  i,
unsigned  j 
)
protectednoexcept

Compute digest based XOR product, place into tmp XOR block.

Definition at line 2206 of file bmserial.h.

References bm::bit_block_xor(), BM_IS_GAP, BMGAP_PTR, bm::gap_convert_to_bitset(), and i.

Member Data Documentation

◆ alloc_

template<class BV >
allocator_type bm::serializer< BV >::alloc_
private

Definition at line 453 of file bmserial.h.

Referenced by bm::serializer< BV >::serializer().

◆ allow_stat_reset_

template<class BV >
bool bm::serializer< BV >::allow_stat_reset_ = true
private

controls zeroing of telemetry

Definition at line 455 of file bmserial.h.

Referenced by bm::serializer< bvector_type >::allow_stat_reset().

◆ bit_idx_arr_

template<class BV >
block_arridx_type bm::serializer< BV >::bit_idx_arr_
private

Definition at line 447 of file bmserial.h.

Referenced by bm::serializer< BV >::serializer().

◆ bit_model_0run_size_

template<class BV >
unsigned bm::serializer< BV >::bit_model_0run_size_
private

memory (bytes) by run-0 method (bytes)

Definition at line 446 of file bmserial.h.

◆ bit_model_d0_size_

template<class BV >
unsigned bm::serializer< BV >::bit_model_d0_size_
private

memory (bytes) by d0 method (bytes)

Definition at line 445 of file bmserial.h.

◆ byte_order_serial_

template<class BV >
bool bm::serializer< BV >::byte_order_serial_
private

Definition at line 457 of file bmserial.h.

◆ compression_level_

template<class BV >
unsigned bm::serializer< BV >::compression_level_
private

Definition at line 463 of file bmserial.h.

Referenced by bm::serializer< bvector_type >::get_compression_level().

◆ compression_stat_

template<class BV >
size_type* bm::serializer< BV >::compression_stat_
private

◆ digest0_

template<class BV >
bm::id64_t bm::serializer< BV >::digest0_
private

Definition at line 444 of file bmserial.h.

◆ enc_header_pos_

template<class BV >
unsigned char* bm::serializer< BV >::enc_header_pos_
private

pos of top level header to roll back

Definition at line 471 of file bmserial.h.

◆ free_

template<class BV >
bool bm::serializer< BV >::free_
private

flag to free the input vector

Definition at line 467 of file bmserial.h.

Referenced by bm::serializer< BV >::serializer().

◆ gap_serial_

template<class BV >
bool bm::serializer< BV >::gap_serial_
private

Definition at line 456 of file bmserial.h.

◆ header_flag_

template<class BV >
unsigned char bm::serializer< BV >::header_flag_
private

set of masks used to save

Definition at line 472 of file bmserial.h.

◆ mod_size_

template<class BV >
unsigned bm::serializer< BV >::mod_size_
private

Definition at line 451 of file bmserial.h.

Referenced by bm::serializer< bvector_type >::reset_models().

◆ models_

template<class BV >
unsigned char bm::serializer< BV >::models_[bm::block_waves]
private

Definition at line 450 of file bmserial.h.

◆ optimize_

template<class BV >
bool bm::serializer< BV >::optimize_
private

flag to optimize the input vector

Definition at line 466 of file bmserial.h.

Referenced by bm::serializer< BV >::serializer().

◆ own_temp_block_

template<class BV >
bool bm::serializer< BV >::own_temp_block_
private

Definition at line 464 of file bmserial.h.

Referenced by bm::serializer< BV >::serializer().

◆ pool_

template<class BV >
allocator_pool_type bm::serializer< BV >::pool_
private

Definition at line 468 of file bmserial.h.

◆ ref_idx_

template<class BV >
size_type bm::serializer< BV >::ref_idx_
private

current reference index

Definition at line 479 of file bmserial.h.

◆ ref_vect_

template<class BV >
const bv_ref_vector_type* bm::serializer< BV >::ref_vect_
private

ref.vector for XOR compression

Definition at line 476 of file bmserial.h.

◆ sb_bit_idx_arr_

template<class BV >
sblock_arridx_type bm::serializer< BV >::sb_bit_idx_arr_
private

Definition at line 448 of file bmserial.h.

◆ sb_bookmarks_

template<class BV >
bool bm::serializer< BV >::sb_bookmarks_
private

Bookmarks flag.

Definition at line 459 of file bmserial.h.

◆ sb_range_

template<class BV >
unsigned bm::serializer< BV >::sb_range_
private

Desired bookmarks interval.

Definition at line 460 of file bmserial.h.

◆ scores_

template<class BV >
unsigned bm::serializer< BV >::scores_[bm::block_waves]
private

Definition at line 449 of file bmserial.h.

◆ sim_model_

template<class BV >
const xor_sim_model_type* bm::serializer< BV >::sim_model_
private

similarity model matrix

Definition at line 477 of file bmserial.h.

◆ sparse_cutoff_

template<class BV >
unsigned bm::serializer< BV >::sparse_cutoff_
private

number of bits per blocks to consider sparse

Definition at line 484 of file bmserial.h.

◆ temp_block_

template<class BV >
bm::word_t* bm::serializer< BV >::temp_block_
private

Definition at line 462 of file bmserial.h.

Referenced by bm::serializer< BV >::serializer().

◆ xor_scan_

template<class BV >
bm::xor_scanner<BV> bm::serializer< BV >::xor_scan_
private

scanner for XOR similarity

Definition at line 478 of file bmserial.h.

◆ xor_tmp1_

template<class BV >
bm::word_t* bm::serializer< BV >::xor_tmp1_
private

Definition at line 481 of file bmserial.h.

Referenced by bm::serializer< BV >::serializer().

◆ xor_tmp2_

template<class BV >
bm::word_t* bm::serializer< BV >::xor_tmp2_
private

Definition at line 482 of file bmserial.h.

Referenced by bm::serializer< BV >::serializer().

◆ xor_tmp_block_

template<class BV >
bm::word_t* bm::serializer< BV >::xor_tmp_block_
private

tmp area for xor product

Definition at line 480 of file bmserial.h.


The documentation for this class was generated from the following file:
Modified on Fri Sep 20 14:58:19 2024 by modify_doxy.py rev. 669887