1 #ifndef BMALGO_IMPL__H__INCLUDED__
2 #define BMALGO_IMPL__H__INCLUDED__
26 #pragma warning( push )
27 #pragma warning( disable : 4311 4312 4127)
119 template <
class VCBT,
class size_type>
130 for (
unsigned i = 0;
i <
size; ++
i)
140 for (size_type
i = 0;
i <
size; ++
i)
157 template <
class BII,
class size_type>
167 for (
unsigned i = 0;
i <
size; ++
i)
173 for (size_type
i = 0;
i <
size; ++
i)
373 dmd.
result += (*gfunc)(blk, arg_blk);
701 return unsigned(dmd.
result);
771 const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
772 const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
774 bool is_all_and =
true;
777 bm::word_t*** blk_root = bman1.top_blocks_root();
778 typename BV::size_type block_idx = 0;
784 unsigned top_block_size = bman1.top_block_size();
785 unsigned ebs2 = bman2.top_block_size();
787 if (ebs2 > top_block_size)
790 top_size = top_block_size;
792 for (
i = 0;
i < top_size; ++
i)
794 bm::word_t** blk_blk = (blk_root && (
i < top_block_size)) ? blk_root[
i] : 0;
803 const bm::word_t*
const* bvbb = bman2.get_topblock(
i);
813 arg_blk = bman2.get_block(
i, j);
825 blk = bman1.get_block(
i, j);
826 if (blk == 0 && is_all_and)
829 arg_blk = bman2.get_block(
i, j);
831 if (!blk && !arg_blk)
856 const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
857 const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
859 if (!bman1.is_init() || !bman2.is_init())
862 bm::word_t*** blk_root = bman1.top_blocks_root();
863 bm::word_t*** blk_root_arg = bman2.top_blocks_root();
864 typename BV::size_type
count = 0;
866 unsigned top_block_size =
867 bm::min_value(bman1.top_block_size(),bman2.top_block_size());
869 for (
unsigned i = 0;
i < top_block_size; ++
i)
873 if ((blk_blk = blk_root[
i]) == 0 || (blk_blk_arg= blk_root_arg[
i]) == 0)
883 (blk_blk[j] && blk_blk_arg[j]) ?
886 (blk_blk[j+1] && blk_blk_arg[j+1]) ?
889 (blk_blk[j+2] && blk_blk_arg[j+2]) ?
892 (blk_blk[j+3] && blk_blk_arg[j+3]) ?
927 const typename BV::blocks_manager_type& bman1 = bv1.get_blocks_manager();
928 const typename BV::blocks_manager_type& bman2 = bv2.get_blocks_manager();
930 bool is_all_and =
true;
933 bm::word_t*** blk_root = bman1.top_blocks_root();
934 unsigned block_idx = 0;
942 unsigned top_block_size = bman1.top_block_size();
943 unsigned ebs2 = bman2.top_block_size();
945 if (ebs2 > top_block_size)
948 top_size = top_block_size;
950 for (
i = 0;
i < top_size; ++
i)
952 bm::word_t** blk_blk = (blk_root && (
i < top_block_size)) ? blk_root[
i] : 0;
962 const bm::word_t*
const* bvbb = bman2.get_topblock(
i);
974 arg_blk = bman2.get_block(
i, j);
984 bool all_resolved =
false;
990 all_resolved =
false;
994 }
while (it < dmit_end);
1004 blk = bman1.get_block(
i, j);
1005 if (blk == 0 && is_all_and)
1008 arg_blk = bman2.get_block(
i, j);
1010 if (blk == 0 && arg_blk == 0)
1021 bool all_resolved =
true;
1027 all_resolved =
false;
1031 }
while (it < dmit_end);
1046 template<
typename It,
typename SIZE_TYPE>
1052 for (right =
first; right !=
last; ++right)
1079 template<
class BV,
class It>
1082 typedef typename BV::size_type size_type;
1083 typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1084 if (!bman.is_init())
1087 size_type max_id = 0;
1093 if (max_id >= bv.size())
1096 bv.resize(max_id + 1);
1105 bman.check_allocate_block(nblock,
1107 bv.get_new_blocks_strat(),
1112 if (block_type == 1)
1122 unsigned new_block_len =
1124 if (new_block_len > threshold)
1126 bman.extend_gap_block(nblock, gap_blk);
1136 size_type pos = *
first;
1140 blk[nword] |= (1u << nbit);
1160 template<
class BV,
class It>
1163 typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1164 if (!bman.is_init())
1167 typename BV::size_type max_id = 0;
1174 if (max_id >= bv.size())
1177 bv.resize(max_id + 1);
1186 bman.check_allocate_block(nblock,
1188 bv.get_new_blocks_strat(),
1193 if (block_type == 1)
1207 unsigned new_block_len =
1209 if (new_block_len > threshold)
1211 bman.extend_gap_block(nblock, gap_blk);
1224 blk[nword] ^= (1u << nbit);
1247 template<
class BV,
class It>
1250 typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1251 if (!bman.is_init())
1254 typename BV::size_type max_id = 0;
1261 if (max_id >= bv.size())
1264 bv.resize(max_id + 1);
1273 bman.check_allocate_block(nblock,
1275 bv.get_new_blocks_strat(),
1281 if (block_type == 1)
1295 unsigned new_block_len =
1297 if (new_block_len > threshold)
1299 bman.extend_gap_block(nblock, gap_blk);
1332 template<
class BV,
class It>
1335 typename BV::size_type
prev = 0;
1338 typename BV::size_type
id = *
first;
1340 bv.set_bit_and(
id,
true);
1343 bv.set_range(
prev,
id-1,
false);
1364 template<
class BV,
class It>
1391 const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1393 if (!bman.is_init())
1396 bm::word_t*** blk_root = bman.top_blocks_root();
1397 typename BV::blocks_manager_type::block_count_change_func func(bman);
1398 typename BV::blocks_manager_type::block_idx_type
st = 0;
1401 typename BV::size_type intervals = func.count();
1404 intervals -= last_bit_set;
1422 template<
typename BV,
class It>
1425 typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1426 if (!bman.is_init())
1429 unsigned inp_word_size =
sizeof(*first);
1430 size_t array_size = size_t(
last -
first);
1431 size_t bit_cnt = array_size * inp_word_size * 8;
1436 if (bit_cnt >= bv.size())
1439 bv.set_range((
typename BV::size_type)bit_cnt, bv.size() - 1,
false);
1440 switch (inp_word_size)
1444 size_t word_cnt = array_size / 4;
1448 bman.check_allocate_block(
i,
1453 if (block_type == 1)
1455 blk = bman.deoptimize_block(
i);
1464 tmp = b1 | (b2 << 8u) | (b3 << 16u) | (b4 << 24u);
1466 }
while (wrd_ptr < wrd_end);
1471 size_t to_convert = size_t(
last -
first);
1472 for (
size_t j = 0; j < to_convert / 4; ++j)
1476 tmp = b1 | (b2 << 8u) | (b3 << 16u) | (b4 << 24u);
1498 size_t word_cnt = array_size / 2;
1502 bman.check_allocate_block(
i,
1508 blk = bman.deoptimize_block(
i);
1515 tmp = b1 | (b2 << 16);
1517 }
while (wrd_ptr < wrd_end);
1522 size_t to_convert = size_t(
last -
first);
1523 for (
unsigned j = 0; j < to_convert / 2; ++j)
1526 tmp = b1 | (b2 << 16u);
1544 size_t word_cnt = array_size;
1548 bman.check_allocate_block(
i,
1553 if (block_type == 1)
1554 blk = bman.deoptimize_block(
i);
1562 }
while (wrd_ptr < wrd_end);
1596 template<
typename Func,
typename SIZE_TYPE>
1614 ret = bit_functor.add_bits(offs, bits,
cnt);
1620 }
while (block < block_end);
1636 template<
typename Func,
typename SIZE_TYPE>
1638 unsigned left,
unsigned right,
1648 unsigned sz = right - left + 1;
1649 ret = bit_functor.add_range(
offset + left, sz);
1654 unsigned cnt, nword, nbit, bitcount, temp;
1660 if ((*word >> nbit) & 1u)
1662 bits[0] = (
unsigned char)nbit;
1663 ret = bit_functor.add_bits(
offset + (nword * 32), bits, 1);
1668 bitcount = right - left + 1u;
1671 unsigned right_margin = nbit + right - left;
1672 if (right_margin < 32)
1676 unsigned mask = mask_r & mask_l;
1678 temp = (*word &
mask);
1681 ret = bit_functor.add_bits(
offset + (nword * 32), bits,
cnt);
1685 temp = *word & mask_r;
1689 ret = bit_functor.add_bits(
offset + (nword * 32), bits,
cnt);
1693 bitcount -= 32 - nbit;
1698 bitcount = right - left + 1u;
1703 for ( ;bitcount >= 128;
1710 ret = bit_functor.add_bits(
offset + (nword * 32), bits,
cnt);
1716 for ( ;bitcount >= 32; bitcount-=32, ++word)
1722 ret = bit_functor.add_bits(
offset + (nword * 32), bits,
cnt);
1734 temp = *word & mask_l;
1738 ret = bit_functor.add_bits(
offset + (nword * 32), bits,
cnt);
1759 template<
typename T,
typename Func,
typename SIZE_TYPE>
1763 const T* pcurr =
buf + 1;
1764 const T* pend =
buf + (*
buf >> 3);
1768 ret = bit_functor.add_range(
offset, *pcurr + 1);
1773 for (++pcurr; (pcurr <= pend) && (ret >= 0); pcurr += 2)
1775 T prev = *(pcurr-1);
1794 template<
typename T,
typename Func,
typename SIZE_TYPE>
1797 unsigned left,
unsigned right,
1809 if (right <= *pcurr)
1811 ret = bit_functor.add_range(
offset + left, (right + 1)-left);
1814 ret = bit_functor.add_range(
offset + left, (*pcurr + 1)-left);
1821 for (++pcurr; pcurr <= pend; pcurr += 2)
1823 T prev = *(pcurr-1);
1824 if (right <= *pcurr)
1828 ret = bit_functor.add_range(
offset +
prev + 1,
unsigned(sz));
1843 template<
typename T,
typename N,
typename F>
1845 N top_size,
N nb_from,
N nb_to,
F&
f)
1848 if (nb_from > nb_to)
1855 if (i_from >= top_size)
1857 if (i_to >= top_size)
1859 i_to = unsigned(top_size-1);
1863 for (
unsigned i = i_from;
i <= i_to; ++
i)
1865 T** blk_blk = root[
i];
1870 unsigned j = (
i == i_from) ? j_from : 0;
1871 if (!j && (
i != i_to))
1873 N base_idx = bm::get_super_block_start<N>(
i);
1882 N base_idx = bm::get_block_start<N>(
i, j);
1886 if ((
i == i_to) && (j == j_to))
1893 unsigned j = (
i == i_from) ? j_from : 0;
1899 N base_idx = bm::get_block_start<N>(
i, j);
1900 if (0 != (block = blk_blk[j]))
1911 if ((
i == i_to) && (j == j_to))
1924 template<
class BV,
class Func>
1926 typename BV::size_type left,
1927 typename BV::size_type right,
1930 typedef typename BV::size_type size_type;
1931 typedef typename BV::block_idx_type block_idx_type;
1933 const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
1934 bm::word_t*** blk_root = bman.top_blocks_root();
1943 const bm::word_t* block = bman.get_block_ptr(i0, j0);
1947 if (nblock_left == nblock_right)
1955 nbit_left, nbit_right, bit_functor);
1965 if (nbit_left && block)
1984 block_idx_type top_blocks_size = bman.top_block_size();
1986 nblock_left, nblock_right-1, bit_functor);
1993 block = bman.get_block_ptr(i0, j0);
2001 0, nbit_right, bit_functor);
2015 template<
typename BV,
typename VECT>
2020 typename BV::size_type from, to, idx;
2022 from = sb * sb_max_bc;
2023 to = (sb+1) * sb_max_bc;
2027 typename BV::enumerator en = bv.get_enumerator(from);
2028 for (; en.valid(); ++en)
2042 #pragma warning( pop )
#define IS_FULL_BLOCK(addr)
#define BLOCK_ADDR_SAN(addr)
#define FULL_BLOCK_FAKE_ADDR
#define BM_VECT_ALIGN_ATTR
#define FULL_SUB_BLOCK_REAL_ADDR
Bit manipulation primitives (internal)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
NCBI_NS_STD::string::size_type SIZE_TYPE
bm::id_t bit_block_count(const bm::word_t *block) noexcept
Bitcount for bit block.
bm::id_t bit_operation_sub_count(const bm::word_t *src1, const bm::word_t *src2) noexcept
Performs bitblock SUB operation and calculates bitcount of the result.
int for_each_bit_blk(const bm::word_t *block, SIZE_TYPE offset, Func &bit_functor)
for-each visitor, calls a visitor functor for each 1 bit group
unsigned short bitscan(V w, B *bits) noexcept
Templated Bitscan with dynamic dispatch for best type.
unsigned short bitscan_wave(const bm::word_t *w_ptr, unsigned char *bits) noexcept
Unpacks word wave (Nx 32-bit words)
bm::id_t bit_operation_xor_any(const bm::word_t *src1, const bm::word_t *src2) noexcept
Performs bitblock XOR operation test.
bool bit_is_all_zero(const bm::word_t *start) noexcept
Returns "true" if all bits in the block are 0.
bm::id_t bit_operation_or_any(const bm::word_t *src1, const bm::word_t *src2) noexcept
Performs bitblock OR operation test.
bm::id_t bit_operation_and_any(const bm::word_t *src1, const bm::word_t *src2) noexcept
Performs bitblock AND operation test.
bm::id_t bit_operation_sub_any(const bm::word_t *src1, const bm::word_t *src2) noexcept
Performs bitblock test of SUB operation.
bm::id_t bit_operation_and_count(const bm::word_t *src1, const bm::word_t *src2) noexcept
Performs bitblock AND operation and calculates bitcount of the result.
set_operation
Codes of set operations.
@ BM_BIT
No GAP compression strategy. All new blocks are bit blocks.
void distance_operation(const BV &bv1, const BV &bv2, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end) noexcept
Distance computing template function.
BV::size_type distance_and_operation(const BV &bv1, const BV &bv2) noexcept
Distance AND computing template function.
void distance_operation_any(const BV &bv1, const BV &bv2, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end) noexcept
Distance screening template function.
distance_metric
Distance metrics codes defined for vectors A and B.
distance_metric operation2metric(set_operation op) noexcept
Convert set operation into compatible distance metric.
@ COUNT_XOR
(A ^ B).count()
@ COUNT_SUB_AB
(A - B).count()
@ COUNT_AND
(A & B).count()
@ COUNT_OR
(A | B).count()
@ COUNT_SUB_BA
(B - A).count()
bm::id_t gap_bitset_sub_any(const unsigned *block, const T *buf) noexcept
Compute bitcount test of bit block SUB masked by GAP block.
unsigned gap_operation_any_xor(const gap_word_t *vect1, const gap_word_t *vect2) noexcept
GAP XOR operation test.
unsigned gap_limit(const T *buf, const T *glevel_len) noexcept
Returs GAP block capacity limit.
unsigned gap_count_or(const gap_word_t *vect1, const gap_word_t *vect2) noexcept
GAP bitcount OR operation test.
int for_each_gap_blk(const T *buf, SIZE_TYPE offset, Func &bit_functor)
for-each visitor, calls a special visitor functor for each 1 bit range
bm::id_t gap_bitset_xor_count(const unsigned *block, const T *buf) noexcept
Compute bitcount of bit block XOR masked by GAP block.
gap_word_t * gap_operation_or(const gap_word_t *vect1, const gap_word_t *vect2, gap_word_t *tmp_buf, unsigned &dsize) noexcept
GAP OR operation.
bm::id_t gap_bitset_sub_count(const unsigned *block, const T *buf) noexcept
Compute bitcount of bit block SUB masked by GAP block.
unsigned gap_count_xor(const gap_word_t *vect1, const gap_word_t *vect2) noexcept
GAP bitcount XOR operation test.
unsigned gap_test_unr(const T *buf, const unsigned pos) noexcept
Tests if bit = pos is true. Analog of bm::gap_test with SIMD unrolling.
bool gap_is_all_zero(const bm::gap_word_t *buf) noexcept
Checks if GAP block is all-zero.
void gap_convert_to_bitset(unsigned *dest, const T *buf, unsigned len=0) noexcept
GAP block to bitblock conversion.
unsigned gap_count_sub(const gap_word_t *vect1, const gap_word_t *vect2) noexcept
GAP bitcount SUB (AND NOT) operation test.
bm::id_t gap_bitset_xor_any(const unsigned *block, const T *buf) noexcept
Compute bitcount test of bit block XOR masked by GAP block.
unsigned gap_bit_count_unr(const T *buf) noexcept
Calculates number of bits ON in GAP buffer. Loop unrolled version.
unsigned gap_operation_any_and(const gap_word_t *vect1, const gap_word_t *vect2) noexcept
GAP AND operation test.
bm::id_t gap_bitset_or_count(const unsigned *block, const T *buf) noexcept
Compute bitcount of bit block OR masked by GAP block.
bm::id_t gap_bitset_and_count(const unsigned *block, const T *pcurr) noexcept
Compute bitcount of bit block AND masked by GAP block.
int for_each_gap_blk_range(const T *buf, SIZE_TYPE offset, unsigned left, unsigned right, Func &bit_functor)
for-each visitor, calls a special visitor functor for each 1 bit range
unsigned gap_set_value(unsigned val, T *buf, unsigned pos, unsigned *is_set) noexcept
Sets or clears bit in the GAP buffer.
unsigned gap_count_and(const gap_word_t *vect1, const gap_word_t *vect2) noexcept
GAP bitcount AND operation test.
bm::id_t gap_bitset_and_any(const unsigned *block, const T *pcurr) noexcept
Bitcount test of bit block AND masked by GAP block.
unsigned gap_operation_any_sub(const gap_word_t *vect1, const gap_word_t *vect2) noexcept
GAP SUB operation test.
bm::id_t gap_bitset_or_any(const unsigned *block, const T *buf) noexcept
Compute bitcount test of bit block OR masked by GAP block.
unsigned int
A callback function used to compare two keys in a database.
void combine_and_sorted(BV &bv, It first, It last)
AND Combine bitvector and the iterable sequence.
void export_array(BV &bv, It first, It last)
Export bitset from an array of binary data representing the bit vector.
void combine_and(BV &bv, It first, It last)
AND Combine bitvector and the iterable sequence.
void combine_sub(BV &bv, It first, It last)
SUB Combine bitvector and the iterable sequence.
void combine_xor(BV &bv, It first, It last)
XOR Combine bitvector and the iterable sequence.
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
BV::size_type count_intervals(const BV &bv)
Compute number of bit intervals (GAPs) in the bitvector.
const unsigned set_array_mask
void combine_any_operation_with_block(const bm::word_t *blk, unsigned gap, const bm::word_t *arg_blk, unsigned arg_gap, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end) noexcept
Internal function computes different existense of distance metric.
It block_range_scan(It first, It last, SIZE_TYPE nblock, SIZE_TYPE *max_id) noexcept
Internal algorithms scans the input for the block range limit.
bm::id_t(* bit_operation_count_func_type)(const bm::word_t *, const bm::word_t *)
int for_each_bit_range_no_check(const BV &bv, typename BV::size_type left, typename BV::size_type right, Func &bit_functor)
Implementation of for_each_bit_range without boilerplave checks.
const unsigned set_block_mask
const unsigned set_sub_array_size
void get_block_coord(BI_TYPE nb, unsigned &i, unsigned &j) noexcept
Recalc linear bvector block index into 2D matrix coordinates.
const unsigned set_total_blocks
int for_each_bit_block_range(T ***root, N top_size, N nb_from, N nb_to, F &f)
void convert_sub_to_arr(const BV &bv, unsigned sb, VECT &vect)
convert sub-blocks to an array of set 1s (32-bit)
const unsigned set_word_shift
const unsigned set_sub_total_bits
const unsigned set_block_size
unsigned long long int id64_t
unsigned gap_bfind(const T *buf, unsigned pos, unsigned *is_set) noexcept
const unsigned gap_max_buff_len
unsigned mask_r_u32(unsigned nbit) noexcept
unsigned combine_count_and_operation_with_block(const bm::word_t *blk, const bm::word_t *arg_blk) noexcept
Internal function computes AND distance.
void distance_stage(const distance_metric_descriptor *dmit, const distance_metric_descriptor *dmit_end, bool *is_all_and) noexcept
Staging function for distance operation.
const unsigned short set_bitscan_wave_size
Size of bit decode wave in words.
const unsigned set_array_shift
unsigned short gap_word_t
const unsigned gap_max_bits
void for_each_block(T ***root, unsigned size1, F &f, BLOCK_IDX start)
const unsigned set_block_shift
const unsigned set_word_mask
T min_value(T v1, T v2) noexcept
Get minimum of 2 values.
const unsigned bits_in_block
void combine_count_operation_with_block(const bm::word_t *blk, const bm::word_t *arg_blk, distance_metric_descriptor *dmit, distance_metric_descriptor *dmit_end) noexcept
Internal function computes different distance metrics.
bool is_const_set_operation(set_operation op) noexcept
Returns true if set operation is constant (bitcount)
unsigned mask_l_u32(unsigned nbit) noexcept
double value_type
The numeric datatype used by the parser.
const struct ncbi::grid::netcache::search::fields::SIZE size
#define F(x)
Make a parametrized function appear to have only one variable.
double f(double x_, const double &y_)
static SLJIT_INLINE sljit_ins st(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
functor-adaptor for back-inserter
int add_range(size_type offset, size_type size)
int add_bits(size_type offset, const unsigned char *bits, unsigned size)
bit_visitor_back_inserter_adaptor(BII bi)
functor-adaptor for C-style callbacks
VCBT bit_visitor_callback_type
bit_visitor_callback_adaptor(void *h, bit_visitor_callback_type cb_func)
int add_bits(size_type offset, const unsigned char *bits, unsigned size)
bit_visitor_callback_type func_
int add_range(size_type offset, size_type size)
Distance metric descriptor, holds metric code and result.
void reset() noexcept
Sets metric result to 0.
distance_metric_descriptor(distance_metric m) noexcept
distance_metric_descriptor() noexcept
static bit_operation_count_func_type bit_operation_count(unsigned i)