1 #ifndef BMAGGREGATOR__H__INCLUDED__
2 #define BMAGGREGATOR__H__INCLUDED__
26 #ifndef BM__H__INCLUDED__
29 # error missing include (bm.h or bm64.h)
120 template<
typename BV>
222 template<
class Opt = bm::agg_run_options<> >
360 typename bvector_type::optmode opt = bvector_type::opt_compress)
BMNOEXCEPT
426 template<
class TPipe>
449 template<
typename BII>
532 template<
typename BII>
595 size_t src_sub_size);
678 bool init_clear =
true);
720 unsigned i,
unsigned j);
725 unsigned i,
unsigned j);
727 const size_t* src_idx,
729 unsigned i,
unsigned j);
733 unsigned i,
unsigned j,
const arena& ar);
787 return new(p) arena();
799 return new(p) arg_groups();
873 template<
typename Agg,
typename It>
878 int pipeline_size = 0;
879 for (It it =
first; it !=
last; ++it, ++pipeline_size)
891 for (It it =
first; it !=
last; ++it, ++w)
894 auto op_st = agg.get_operation_status();
895 if (op_st != Agg::operation_status::op_done)
897 op_st = agg.run_step(
i, j);
898 pipeline_size -= (op_st == Agg::operation_status::op_done);
901 if (pipeline_size <= 0)
915 template<
typename BV>
927 template<
typename BV>
940 template<
typename BV>
949 template<
typename BV>
953 ar_->reset_all_blocks();
954 operation_ = top_block_size_ = 0;
955 operation_status_ = operation_status::op_undefined;
956 count_ = 0; bcache_ptr_ = 0; gap_cache_cnt_ = 0;
961 template<
typename BV>
966 range_gap_blk_[0] = 0;
967 is_single_bit_ =
false;
973 template<
typename BV>
976 range_from_ = from; range_to_ = to;
978 typename bvector_type::block_idx_type
980 if (nb_from == nb_to)
982 gap_init_range_block<gap_word_t>(
991 range_gap_blk_[0] = 0;
998 template<
typename BV>
1012 template<
typename BV>
1015 return ag_.add(bv, agr_group);
1020 template<
typename BV>
1024 combine_or(bv_target, ag_.arg_bv0.data(), ag_.arg_bv0.size());
1029 template<
typename BV>
1035 combine_and_sub(bv_target,
1036 ag_.arg_bv0.data(), ag_.arg_bv0.size(),
1043 template<
typename BV>
1047 return combine_and_sub(bv_target,
1048 ag_.arg_bv0.data(), ag_.arg_bv0.size(),
1049 ag_.arg_bv1.data(), ag_.arg_bv1.size(),
1055 template<
typename BV>
1059 return combine_and_sub(bv_target,
1060 ag_.arg_bv0.data(), ag_.arg_bv0.size(),
1061 ag_.arg_bv1.data(), ag_.arg_bv1.size(),
1067 template<
typename BV>
template<
typename BII>
1070 return combine_and_sub(bi,
1071 ag_.arg_bv0.data(), ag_.arg_bv0.size(),
1072 ag_.arg_bv1.data(), ag_.arg_bv1.size());
1078 template<
typename BV>
1081 return find_first_and_sub(idx,
1082 ag_.arg_bv0.data(), ag_.arg_bv0.size(),
1083 ag_.arg_bv1.data(), ag_.arg_bv1.size());
1088 template<
typename BV>
1093 ar_->reset_all_blocks();
1094 combine_shift_right_and(bv_target, ag_.arg_bv0.data(), ag_.arg_bv0.size(),
1100 template<
typename BV>
1111 ar_->reset_or_blocks();
1112 unsigned top_blocks = resize_target(bv_target, bv_src, src_size);
1113 for (
unsigned i = 0;
i < top_blocks; ++
i)
1115 unsigned set_array_max =
1116 find_effective_sub_block_size(
i, bv_src, src_size,
false);
1117 for (
unsigned j = 0; j < set_array_max; ++j)
1126 template<
typename BV>
1144 ar_->reset_and_blocks();
1145 unsigned top_blocks = resize_target(bv_target, bv_src, src_size);
1146 for (
unsigned i = 0;
i < top_blocks; ++
i)
1149 unsigned set_array_max =
1150 find_effective_sub_block_size(
i, bv_src, src_size,
true);
1151 for (
unsigned j = 0; j < set_array_max; ++j)
1161 template<
typename BV>
1168 bool global_found =
false;
1170 if (!bv_src_and || !src_and_size)
1178 unsigned top_blocks = resize_target(bv_target, bv_src_and, src_and_size);
1179 unsigned top_blocks2 = resize_target(bv_target, bv_src_sub, src_sub_size,
false);
1181 if (top_blocks2 > top_blocks)
1182 top_blocks = top_blocks2;
1184 for (
unsigned i = 0;
i < top_blocks; ++
i)
1186 const unsigned set_array_max =
1187 find_effective_sub_block_size(
i, bv_src_and, src_and_size,
1188 bv_src_sub, src_sub_size);
1189 for (
unsigned j = 0; j < set_array_max; ++j)
1193 bv_src_and, src_and_size,
1194 bv_src_sub, src_sub_size,
1195 &is_res_full, !any);
1198 bman_target.check_alloc_top_subblock(
i);
1201 bman_target.validate_top_full(
i);
1207 bool found = digest;
1210 bman_target.opt_copy_bit_block(
i, j, tb_ar_->tb1,
1211 bvector_type::opt_compress, tb_ar_->tb_opt);
1215 global_found |= found;
1219 return global_found;
1224 template<
typename BV>
template<
typename BII>
1229 bool global_found =
false;
1231 if (!bv_src_and || !src_and_size)
1234 unsigned top_blocks = 0;
1237 for (
unsigned i = 0;
i < src_and_size; ++
i)
1241 unsigned arg_top_blocks = bv->get_blocks_manager().top_block_size();
1242 if (arg_top_blocks > top_blocks)
1243 top_blocks = arg_top_blocks;
1245 for (
unsigned i = 0;
i < src_sub_size; ++
i)
1249 unsigned arg_top_blocks = bv->get_blocks_manager().top_block_size();
1250 if (arg_top_blocks > top_blocks)
1251 top_blocks = arg_top_blocks;
1255 for (
unsigned i = 0;
i < top_blocks; ++
i)
1257 const unsigned set_array_max =
1258 find_effective_sub_block_size(
i, bv_src_and, src_and_size,
1259 bv_src_sub, src_sub_size);
1260 for (
unsigned j = 0; j < set_array_max; ++j)
1264 bv_src_and, src_and_size,
1265 bv_src_sub, src_sub_size,
1266 &is_res_full,
true);
1276 bool found = digest;
1277 global_found |= found;
1283 return global_found;
1291 template<
typename BV>
template<
class TPipe>
1297 size_t pipe_size = pipe_args.
size();
1303 bcache_ptr_ = &pipe.get_bcache();
1305 unsigned top_blocks = pipe.get_top_blocks();
1308 if (pipe.bv_or_target_)
1309 pipe.bv_or_target_->get_blocks_manager().reserve_top_blocks(top_blocks);
1311 unsigned i_from(0), j_from(0), i_to(0), j_to(0);
1314 typename bvector_type::block_idx_type nb;
1322 size_t batch_size = pipe.get_options().batch_size;
1324 batch_size = pipe.compute_run_batch();
1326 for (
size_t batch_from(0), batch_to(0); batch_from < pipe_size;
1327 batch_from = batch_to)
1329 batch_to = batch_from + batch_size;
1330 if (batch_to > pipe_size)
1331 batch_to = pipe_size;
1334 for (
unsigned i = i_from;
i < top_blocks; ++
i)
1337 if constexpr(TPipe::options_type::is_masks())
1348 for (; j < sub_size; ++j)
1350 size_t p = batch_from;
1351 for (; p < batch_to; ++p)
1362 if constexpr (TPipe::options_type::is_compute_counts())
1365 if (pipe.count_res_vect_[p] >= pipe.search_count_limit_)
1371 bv_src_and, src_and_size,
1372 bv_src_sub, src_sub_size,
1376 if (digest || is_res_full)
1378 if (pipe.bv_or_target_)
1381 pipe.bv_or_target_->get_blocks_manager();
1385 bman.check_alloc_top_subblock(
i);
1387 pipe.bv_or_target_->combine_operation_block_or(
1388 i, j, blk, arg_blk);
1390 if constexpr (!TPipe::options_type::is_make_results())
1392 if constexpr (TPipe::options_type::is_compute_counts())
1397 pipe.count_res_vect_[p]+=
1404 pipe.get_bv_res_vector();
1410 bv_targets_vect[p] = bv_target;
1411 typename bvector_type::blocks_manager_type& bman =
1412 bv_target->get_blocks_manager();
1414 bman.reserve_top_blocks(top_blocks);
1417 bv_target->get_blocks_manager();
1420 bman.check_alloc_top_subblock(
i);
1421 bman.set_block_ptr(
i, j,
1424 bman.validate_top_full(
i);
1425 if constexpr (TPipe::options_type::is_compute_counts())
1430 if constexpr (TPipe::options_type::is_compute_counts())
1431 pipe.count_res_vect_[p] +=
1433 bman.opt_copy_bit_block(
i, j, tb_ar_->tb1,
1434 bvector_type::opt_compress, tb_ar_->tb_opt);
1440 if (pipe.bv_or_target_ && p == pipe_size)
1443 pipe.bv_or_target_->get_blocks_manager();
1445 bman.optimize_block(
i, j, blk,
1446 tb_ar_->tb_opt, bvector_type::opt_compress, 0);
1457 template<
typename BV>
1462 unsigned top_blocks = max_top_blocks(bv_src_and, src_and_size);
1463 unsigned top_blocks2 = max_top_blocks(bv_src_sub, src_sub_size);
1465 if (top_blocks2 > top_blocks)
1466 top_blocks = top_blocks2;
1477 if (nblock_from == nblock_to)
1480 unsigned i = top_from;
1483 bv_src_and, src_and_size,
1484 bv_src_sub, src_sub_size,
1490 unsigned block_bit_idx = 0;
1499 if (top_to < top_blocks)
1500 top_blocks = top_to+1;
1507 for (
unsigned i = top_from;
i < top_blocks; ++
i)
1520 set_array_max = find_effective_sub_block_size(
i, bv_src_and, src_and_size,
true);
1525 unsigned set_array_max2 =
1526 find_effective_sub_block_size(
i, bv_src_sub, src_sub_size,
false);
1529 if (set_array_max2 < set_array_max)
1530 set_array_max = set_array_max2;
1533 for (; j < set_array_max; ++j)
1537 bv_src_and, src_and_size,
1538 bv_src_sub, src_sub_size,
1539 &is_res_full,
false);
1542 unsigned block_bit_idx = 0;
1555 template<
typename BV>
1568 unsigned max_size = 1;
1569 for (
size_t k = 0; k < src_size; ++k)
1573 const typename bvector_type::blocks_manager_type& bman_arg =
1574 bv->get_blocks_manager();
1575 const bm::word_t*
const* blk_blk_arg = bman_arg.get_topblock(
i);
1578 if (top_null_as_zero)
1604 template<
typename BV>
1611 unsigned set_array_max = find_effective_sub_block_size(
i, bv_src1, src_size1,
true);
1612 if (set_array_max && src_size2)
1614 unsigned set_array_max2 =
1615 find_effective_sub_block_size(
i, bv_src2, src_size2,
false);
1616 if (set_array_max2 > set_array_max)
1617 set_array_max = set_array_max2;
1619 return set_array_max;
1625 template<
typename BV>
1631 typename bvector_type::blocks_manager_type& bman_target =
1632 bv_target.get_blocks_manager();
1634 ar_->reset_or_blocks();
1635 bm::word_t* blk = sort_input_blocks_or( bv_src, src_size,
i, j);
1641 bman_target.check_alloc_top_subblock(
i);
1642 bman_target.set_block_ptr(
i, j, blk);
1644 bman_target.validate_top_full(
i);
1648 size_t arg_blk_count = ar_->v_arg_or_blk.size();
1649 size_t arg_blk_gap_count = ar_->v_arg_or_blk_gap.size();
1650 if (arg_blk_count || arg_blk_gap_count)
1652 bool all_one = process_bit_blocks_or(bman_target,
i, j, *ar_);
1655 if (arg_blk_gap_count)
1656 process_gap_blocks_or(*ar_);
1658 bman_target.opt_copy_bit_block(
i, j, tb_ar_->tb1,
1659 opt_mode_, tb_ar_->tb_opt);
1667 template<
typename BV>
1671 size_t src_and_size)
1675 bm::word_t* blk = sort_input_blocks_and( bv_src, src_and_size,
i, j);
1680 size_t arg_blk_and_count = ar_->v_arg_and_blk.size();
1681 size_t arg_blk_and_gap_count = ar_->v_arg_and_blk_gap.size();
1682 if (arg_blk_and_count || arg_blk_and_gap_count)
1684 if (!arg_blk_and_gap_count && (arg_blk_and_count == 1))
1690 bman_target.check_alloc_top_subblock(
i);
1691 bman_target.set_block_ptr(
i, j, blk);
1693 bman_target.validate_top_full(
i);
1699 bm::id64_t digest = process_bit_blocks_and(*ar_, ~0ull,
true);
1705 if (arg_blk_and_gap_count)
1706 digest = process_gap_blocks_and(*ar_, digest);
1710 bman_target.opt_copy_bit_block(
i, j, tb_ar_->tb1,
1711 opt_mode_, tb_ar_->tb_opt);
1718 template<
typename BV>
1721 unsigned i,
unsigned j,
1724 int* is_result_full,
bool find_all)
1728 is_single_bit_ =
false;
1729 *is_result_full = 0;
1730 bm::word_t* blk = sort_input_blocks_and( bv_src_and, src_and_size,
i, j);
1736 size_t arg_blk_and_count = ar_->v_arg_and_blk.size();
1737 size_t arg_blk_and_gap_count = ar_->v_arg_and_blk_gap.size();
1738 if (!(arg_blk_and_count | arg_blk_and_gap_count))
1741 ar_->reset_or_blocks();
1744 blk = sort_input_blocks_or( bv_src_sub, src_sub_size,
i, j);
1751 if (!arg_blk_and_gap_count && (arg_blk_and_count == 1))
1755 *is_result_full = 1;
1764 digest_type digest = process_bit_blocks_and(*ar_, ~0ull, find_all);
1767 digest = process_bit_blocks_sub(*ar_, digest);
1779 size_t arg_blk_gap_count = ar_->v_arg_and_blk_gap.size();
1780 for (
size_t k = 0; k < arg_blk_gap_count; ++k)
1783 arg_blk_gap_count = ar_->v_arg_or_blk_gap.size();
1784 for (
size_t k = 0; k < arg_blk_gap_count; ++k)
1795 digest = process_gap_blocks_and(*ar_, digest);
1798 digest = process_gap_blocks_sub(*ar_, digest);
1800 is_single_bit_ =
false;
1807 template<
typename BV>
1812 for (
size_t k = 0; k < arg_blk_gap_count; ++k)
1818 template<
typename BV>
1826 for (
size_t k = 0; (k < arg_blk_gap_count) && digest; ++k)
1837 for (++k; k < arg_blk_gap_count; ++k)
1852 template<
typename BV>
1862 for (
size_t k = 0; k < arg_blk_gap_count; ++k)
1868 for (
size_t k = 0; digest && (k < arg_blk_gap_count); ++k)
1879 for (++k; k < arg_blk_gap_count; ++k)
1894 template<
typename BV>
1899 for (
size_t i = 0;
i < arg_blk_gap_count &&
b; ++
i)
1906 template<
typename BV>
1911 for (
size_t i = 0;
i < arg_blk_gap_count; ++
i)
1923 template<
typename BV>
1925 unsigned i,
unsigned j,
1941 size_t unroll_factor,
len, len_unr;
1944 len = arg_blk_count - k;
1945 len_unr =
len - (
len % unroll_factor);
1946 for( ;k < len_unr; k+=unroll_factor)
1961 len = arg_blk_count - k;
1962 len_unr =
len - (
len % unroll_factor);
1963 for( ;k < len_unr; k+=unroll_factor)
1975 for (; k < arg_blk_count; ++k)
1992 template<
typename BV>
2006 if (range_set_ && (nb_from == nb_to))
2013 if (arg_blk_count > 1)
2021 args[k], args[k+1], digest);
2031 switch (arg_blk_count)
2046 const size_t unroll_factor = 4;
2047 for (; k + unroll_factor < arg_blk_count; k += unroll_factor)
2050 args[k], args[k+1], args[k+2], args[k+3],
2063 for (; k + unroll_factor < arg_blk_count; k += unroll_factor)
2066 args[k+2][nword] & args[k+3][nword];
2070 for (; k + 2 < arg_blk_count; k += 2)
2078 for (; k < arg_blk_count; ++k)
2079 acc &= args[k][nword];
2089 for (; k + 2 < arg_blk_count; k += 2)
2094 case 0:
return digest;
2097 &single_bit_idx_, digest);
2098 if (is_single_bit_) { ++k;
goto sbit_check; }
2103 for (; k < arg_blk_count; ++k)
2108 case 0:
return digest;
2111 &single_bit_idx_, digest);
2113 { ++k;
goto sbit_check; }
2123 template<
typename BV>
2132 const size_t unroll_factor = 4;
2138 for (; k + unroll_factor < arg_blk_count; k += unroll_factor)
2141 args[k], args[k+1],args[k+2], args[k+3],
2149 &single_bit_idx_, digest);
2154 const unsigned mask =
2156 const unsigned nword =
2159 for (; k + unroll_factor < arg_blk_count; k += unroll_factor)
2161 acc = args[k][nword] | args[k+1][nword] |
2162 args[k+2][nword] | args[k+3][nword];
2166 for (; k < arg_blk_count; ++k)
2167 acc |= args[k][nword];
2176 for (; k + 2 < arg_blk_count; k += 2)
2181 case 0:
return digest;
2184 &single_bit_idx_, digest);
2185 if (is_single_bit_) { ++k;
goto sbit_check; }
2190 for (; k < arg_blk_count; ++k)
2195 case 0:
return digest;
2199 { ++k;
goto sbit_check; }
2209 template<
typename BV>
2214 typename bvector_type::blocks_manager_type& bman_target = bv_target.get_blocks_manager();
2217 if (bman_target.is_init())
2218 bman_target.deinit_tree();
2221 unsigned top_blocks = bman_target.top_block_size();
2223 bool need_realloc =
false;
2226 for (
unsigned i = 0;
i < src_size; ++
i)
2230 const typename bvector_type::blocks_manager_type& bman_arg =
2231 bv->get_blocks_manager();
2232 unsigned arg_top_blocks = bman_arg.top_block_size();
2233 if (arg_top_blocks > top_blocks)
2235 need_realloc =
true;
2236 top_blocks = arg_top_blocks;
2239 if (arg_size >
size)
2244 bman_target.reserve_top_blocks(top_blocks);
2245 if (!bman_target.is_init())
2246 bman_target.init_tree();
2247 if (
size > bv_target.size())
2248 bv_target.resize(
size);
2255 template<
typename BV>
2260 unsigned top_blocks = 1;
2261 for (
unsigned i = 0;
i < src_size; ++
i)
2265 const typename bvector_type::blocks_manager_type& bman_arg =
2266 bv->get_blocks_manager();
2267 unsigned arg_top_blocks = bman_arg.top_block_size();
2268 if (arg_top_blocks > top_blocks)
2269 top_blocks = arg_top_blocks;
2277 template<
typename BV>
2281 unsigned i,
unsigned j)
2283 auto bit_arr = ar_->v_arg_or_blk.resize_no_copy(src_size);
2284 auto gap_arr = ar_->v_arg_or_blk_gap.resize_no_copy(src_size);
2286 size_t bc(0), gc(0);
2288 for (
size_t k = 0; k < src_size; ++k)
2291 bv_src[k]->get_blocks_manager().get_block_ptr(
i, j);
2302 bit_arr[bc++] = arg_blk;
2306 ar_->v_arg_or_blk.resize_no_check(bc);
2307 ar_->v_arg_or_blk_gap.resize_no_check(gc);
2314 template<
typename BV>
2318 unsigned i,
unsigned j)
2320 ar_->v_arg_tmp_blk.resize_no_copy(src_size);
2322 auto blocks_arr = ar_->v_arg_tmp_blk.data();
2323 for (
size_t k = 0; k < src_size; ++k)
2326 bv_src[k]->get_blocks_manager().get_block_ptr(
i, j);
2331 bool has_full_blk =
false;
2332 auto bit_arr = ar_->v_arg_and_blk.resize_no_copy(src_size + 1);
2333 auto gap_arr = ar_->v_arg_and_blk_gap.resize_no_copy(src_size + 1);
2334 size_t bc(0), gc(0);
2336 for (
size_t k = 0; k < src_size; ++k)
2342 gap_arr[gc++] = gap_blk;
2348 has_full_blk =
true;
2351 bit_arr[bc++] = arg_blk;
2354 if (range_gap_blk_[0])
2357 gap_arr[gc++] = range_gap_blk_;
2359 ar_->v_arg_and_blk_gap.resize_no_check(gc);
2361 if (has_full_blk && (!bc && !gc))
2363 ar_->v_arg_and_blk.resize_no_check(bc);
2370 template<
typename BV>
2372 const size_t* src_idx,
2374 unsigned i,
unsigned j)
2379 size_t bv_idx = src_idx[k];
2380 auto cnt = bcache_ptr_->cnt_vect_[bv_idx];
2383 bm::word_t* bit_blk = bcache_ptr_->blk_vect_[bv_idx];
2389 bcache_ptr_->blk_vect_[bv_idx] = bit_blk;
2396 bcache_ptr_->blk_ij_vect_[bv_idx] = pair_ij;
2406 template<
typename BV>
2420 for (
unsigned i = 1;
i < src_size; ++
i)
2424 bv_target.bit_or(*bv);
2430 template<
typename BV>
2445 for (
unsigned i = 1;
i < src_size; ++
i)
2449 bv_target.bit_and(*bv);
2455 template<
typename BV>
2458 size_t src_and_size,
2460 size_t src_sub_size)
2465 combine_and_horizontal(bv_target, bv_src_and, src_and_size);
2467 for (
unsigned i = 0;
i < src_sub_size; ++
i)
2477 template<
typename BV>
2482 top_block_size_ = resize_target(bv_target, bv_src, src_size);
2485 ar_->carry_overs.resize(src_size);
2486 for (
unsigned i = 0;
i < src_size; ++
i)
2487 ar_->carry_overs[
i] = 0;
2492 template<
typename BV>
2503 prepare_shift_right_and(bv_target, bv_src_and, src_and_size);
2507 if (
i > top_block_size_)
2509 if (!any_carry_overs(ar_->carry_overs.data(), src_and_size))
2517 combine_shift_right_and(
i, j, bv_target, bv_src_and, src_and_size);
2525 return bool(count_);
2527 return bv_target.any();
2532 template<
typename BV>
2538 bm::word_t* blk = temp_blk_ ? temp_blk_ : tb_ar_->tb1;
2539 unsigned char* carry_overs = ar_->carry_overs.data();
2543 bool blk_zero =
false;
2548 const bm::word_t* arg_blk = bman_arg.get_block(
i, j);
2569 for (
unsigned k = 1; k < src_size; ++k)
2571 unsigned carry_over = carry_overs[k];
2572 if (!digest && !carry_over)
2577 blk_zero = !blk_zero;
2579 const bm::word_t* arg_blk = get_arg_block(bv_src, k,
i, j);
2580 carry_overs[k] = (
unsigned char)
2581 process_shift_right_and(blk, arg_blk, digest, carry_over);
2582 BM_ASSERT(carry_overs[k] == 0 || carry_overs[k] == 1);
2602 bman_target.opt_copy_bit_block(
i, j, blk, opt_mode_, tb_ar_->tb_opt);
2611 template<
typename BV>
2618 BM_ASSERT(carry_over == 1 || carry_over == 0);
2630 blk[0] = carry_over;
2651 blk[0] = carry_over & arg_blk[0];
2672 template<
typename BV>
2677 return bv_src[k]->get_blocks_manager().get_block(
i, j);
2682 template<
typename BV>
2687 unsigned acc = carry_overs[0];
2688 for (
size_t i = 1;
i < co_size; ++
i)
2689 acc |= carry_overs[
i];
2695 template<
typename BV>
2701 temp_blk_ = temp_block;
2705 case BM_NOT_DEFINED:
2707 case BM_SHIFT_R_AND:
2708 prepare_shift_right_and(*bv_target, ag_.arg_bv0.data(), ag_.arg_bv0.size());
2709 operation_status_ = operation_status::op_prepared;
2718 template<
typename BV>
2722 BM_ASSERT(operation_status_ == operation_status::op_prepared
2723 || operation_status_ == operation_status::op_in_progress);
2728 case BM_NOT_DEFINED:
2731 case BM_SHIFT_R_AND:
2733 if (
i > top_block_size_)
2735 if (!this->any_carry_overs(ar_->carry_overs.data(), ag_.arg_bv0.size()))
2737 operation_status_ = operation_status::op_done;
2738 return operation_status_;
2742 this->combine_shift_right_and(
i, j, *bv_target_,
2743 ag_.arg_bv0.data(), ag_.arg_bv0.size());
2744 operation_status_ = operation_status::op_in_progress;
2752 return operation_status_;
2760 template<
typename BV>
template<
class Opt>
2763 size_t sz = arg_vect_.size();
2765 for (
size_t i = 0;
i < sz; ++
i)
2766 free_arg_group(
arr[
i]);
2767 sz = bv_res_vect_.size();
2769 for (
size_t i = 0;
i < sz; ++
i)
2774 sz = bcache_.blk_vect_.size();
2775 bm::word_t** blk_arr = bcache_.blk_vect_.data();
2776 for (
size_t i = 0;
i < sz; ++
i)
2782 template<
typename BV>
template<
class Opt>
2788 if constexpr (Opt::is_make_results())
2791 size_t sz = arg_vect_.size();
2792 bv_res_vect_.resize(sz);
2794 for (
size_t i = 0;
i < sz; ++
i)
2797 if constexpr (Opt::is_compute_counts())
2799 size_t sz = arg_vect_.size();
2800 count_res_vect_.resize(sz);
2801 size_type* cnt_arr = count_res_vect_.data();
2802 ::memset(cnt_arr, 0, sz *
sizeof(cnt_arr[0]));
2806 size_t pipe_size = pipe_args.
size();
2808 for (
size_t p = 0; p < pipe_size; ++p)
2811 complete_arg_group(ag);
2815 unsigned top_blocks1 = max_top_blocks(bv_src_and, src_and_size);
2816 if (top_blocks1 > top_blocks_)
2817 top_blocks_ = top_blocks1;
2821 unsigned top_blocks2 = max_top_blocks(bv_src_sub, src_sub_size);
2822 if (top_blocks2 > top_blocks_)
2823 top_blocks_ = top_blocks2;
2826 is_complete_ =
true;
2828 BM_ASSERT(bcache_.bv_inp_vect_.size() == bcache_.cnt_vect_.size());
2829 BM_ASSERT(bcache_.bv_inp_vect_.size() == bcache_.blk_vect_.size());
2830 BM_ASSERT(bcache_.bv_inp_vect_.size() == bcache_.blk_ij_vect_.size());
2835 template<
typename BV>
template<
class Opt>
2849 template<
typename BV>
template<
class Opt>
2856 for (
size_t k = 0; k <
size; ++k)
2858 bool found(
false);
size_t bv_idx(0);
2862 const bvector_type** bv_arr = bcache_.bv_inp_vect_.data();
2864 bm::find_ptr((
void**)bv_arr, bcache_.bv_inp_vect_.size(),
2868 bcache_.cnt_vect_[bv_idx]++;
2871 bv_idx = bcache_.bv_inp_vect_.size();
2872 bcache_.bv_inp_vect_.push_back(bv);
2873 bcache_.cnt_vect_.push_back(0);
2874 bcache_.blk_vect_.push_back(0);
2886 template<
typename BV>
template<
class Opt>
2892 arg_vect_.push_back(arg);
2898 template<
typename BV>
template<
class Opt>
2901 const size_t cache_size = 256 * 1024;
2906 const float bv_struct_overhead = (64 * 8);
2907 const float cached_vect = float(cache_size) / float(block_size + bv_struct_overhead);
2909 size_t bv_count = unique_vectors();
2910 size_t args_total = arg_vect_.size();
2911 if ((bv_count < cached_vect) || (args_total < 2))
2914 size_t avg_vect_per_group = total_vect_ / args_total;
2916 const float reuse_coeff = 0.7f;
2917 float f_batch_size =
2918 (1+reuse_coeff)*(
float(avg_vect_per_group) / float(cached_vect) + 0.99f);
2919 size_t batch_size = size_t(f_batch_size);
2929 template<
typename BV>
#define BM_DECLARE_TEMP_BLOCK(x)
#define BM_ASSERT_THROW(x, xerrcode)
#define FULL_BLOCK_FAKE_ADDR
#define FULL_BLOCK_REAL_ADDR
Bit manipulation primitives (internal)
Pipeline vector for running a group of aggregation operations on a family of vectors.
const bv_vector_type & get_all_input_vect() const noexcept
bool is_complete_
ready to run state flag
size_t compute_run_batch() const noexcept
Function looks at the pipeline to apply euristics to suggest optimal run_batch parameter.
run_options & options() noexcept
Set pipeline run options.
void complete_arg_sub_group(index_vector_type &idx_vect, const bvector_type_const_ptr *bv_src, size_t size)
size_t total_vect_
total number of vector mentions in all groups
const count_vector_type & get_all_input_cnt_vect() const noexcept
bv_count_vector_type & get_bv_count_vector() noexcept
Return results vector count used for pipeline execution.
const run_options & get_options() const noexcept
Get pipeline run options.
bool is_complete() const noexcept
return true if pipeline is ready for execution (complete)
void set_search_count_limit(size_type limit) noexcept
Set search limit for results.
void set_or_target(bvector_type *bv_or) noexcept
Attach OR (aggregator vector).
void complete_arg_group(arg_groups *ag)
size_type size() const noexcept
Return size() of pileine.
bvector_type * bv_or_target_
OR target bit-bector ptr.
unsigned top_blocks_
top-level structure size, max of all bvectors
const arg_vector_type & get_args_vector() const noexcept
Return argument vector used for pipeline execution.
bv_count_vector_type count_res_vect_
results (counts)
arg_groups * add()
Add new arguments group.
size_type search_count_limit_
search limit by count
pipeline(const pipeline &)=delete
pipeline & operator=(const pipeline &)=delete
pipeline_bcache & get_bcache() noexcept
void complete()
Prepare pipeline for the execution (resize and init internal structures) Once complete,...
pipeline_bcache bcache_
blocks cache structure
arg_vector_type arg_vect_
input arg. groups
run_options options_
execution parameters
unsigned get_top_blocks() const noexcept
Return number of top blocks after complete.
bvect_vector_type bv_res_vect_
results (bit-vector ptrs)
size_t unique_vectors() const noexcept
Return number of unique vectors in the pipeline (after complete())
bvect_vector_type & get_bv_res_vector() noexcept
Return results vector used for pipeline execution.
Algorithms for fast aggregation of a group of bit-vectors.
BV::block_idx_type block_idx_type
void combine_or(unsigned i, unsigned j, bvector_type &bv_target, const bvector_type_const_ptr *bv_src, size_t src_size)
bm::word_t * cache_gap_block(const bm::word_t *arg_blk, const size_t *src_idx, size_t k, unsigned i, unsigned j)
bvector_type * check_create_target()
unsigned top_block_size_
operation top block (i) size
bool compute_count_
compute search result count
bool test_gap_blocks_sub(size_t block_count, unsigned bit_idx)
arg_groups * arg_groups_type_ptr
digest_type process_bit_blocks_sub(const arena &ar, digest_type digest)
pipeline_bcache * bcache_ptr_
bm::heap_vector< bm::pair< unsigned, unsigned >, allocator_type, true > block_ij_vector_type
void combine_and(unsigned i, unsigned j, bvector_type &bv_target, const bvector_type_const_ptr *bv_src, size_t src_size)
void combine_or(bvector_type &bv_target, const bvector_type_const_ptr *bv_src, size_t src_size)
Aggregate group of vectors using logical OR.
int operation_
operation code (default: not defined)
static const bm::word_t * get_arg_block(const bvector_type_const_ptr *bv_src, unsigned k, unsigned i, unsigned j) noexcept
allocator_pool_type pool_
pool for operations with cyclic mem.use
arg_groups ag_
aggregator argument groups
bm::word_t * temp_blk_
external temp block ptr
bm::word_t * sort_input_blocks_and(const bvector_type_const_ptr *bv_src, size_t src_size, unsigned i, unsigned j)
bm::heap_vector< bvector_type *, allocator_type, true > bvect_vector_type
bm::heap_vector< bvector_type_const_ptr, allocator_type, true > bv_vector_type
void combine_shift_right_and(bvector_type &bv_target)
Aggregate added group of vectors using SHIFT-RIGHT and logical AND Operation does NOT perform an expl...
bvector_type * bv_target_
target bit-vector
bm::heap_vector< size_type, allocator_type, true > bv_count_vector_type
static unsigned process_shift_right_and(bm::word_t *blk, const bm::word_t *arg_blk, digest_type &digest, unsigned carry_over) noexcept
BV::allocator_type allocator_type
bool is_single_bit_
single bit flag
bm::gap_word_t range_gap_blk_[5]
temp GAP range block
static void free_arg_group(arg_groups *arg)
operation_status run_step(unsigned i, unsigned j)
Run a step of current arrgegation operation.
static bool any_carry_overs(const unsigned char *carry_overs, size_t co_size) noexcept
void combine_and_sub(TPipe &pipe)
Run AND-SUB: AND (groups1) AND NOT ( OR(group0)) for a pipeline.
void stage(bm::word_t *temp_block)
Prepare operation, create internal resources, analyse dependencies.
bool combine_and_sub(bvector_type &bv_target)
Aggregate added group of vectors using fused logical AND-SUB Operation does NOT perform an explicit r...
bm::heap_vector< size_t, allocator_type, true > index_vector_type
void reset()
Reset aggregate groups, forget all attached vectors.
void set_operation(int op_code) noexcept
Set operation code for the aggregator.
bvector_type::blocks_manager_type blocks_manager_type
arena * ar_
data arena ptr
digest_type combine_and_sub(unsigned i, unsigned j, const bvector_type_const_ptr *bv_src_and, size_t src_and_size, const bvector_type_const_ptr *bv_src_sub, size_t src_sub_size, int *is_result_full, bool find_all)
void combine_or(bvector_type &bv_target)
Aggregate added group of vectors using logical OR Operation does NOT perform an explicit reset of arg...
bm::heap_vector< unsigned, allocator_type, true > count_vector_type
digest_type process_bit_blocks_and(const arena &ar, digest_type digest, bool find_all)
bool combine_and_sub(bvector_type &bv_target, bool any)
Aggregate added group of vectors using fused logical AND-SUB Operation does NOT perform an explicit r...
void combine_and(bvector_type &bv_target)
Aggregate added group of vectors using logical AND Operation does NOT perform an explicit reset of ar...
digest_type process_gap_blocks_sub(const arena &ar, digest_type digest)
bm::heap_vector< const bm::word_t *, allocator_type, true > block_ptr_vector_type
size_type range_from_
search from
bool combine_shift_right_and(bvector_type &bv_target, const bvector_type_const_ptr *bv_src_and, size_t src_and_size, bool any)
Fusion aggregate group of vectors using SHIFT right with AND.
static arg_groups * construct_arg_group()
const bvector_type * bvector_type_const_ptr
const bvector_type * get_target() const noexcept
static unsigned find_effective_sub_block_size(unsigned i, const bvector_type_const_ptr *bv_src1, size_t src_size1, const bvector_type_const_ptr *bv_src2, size_t src_size2) noexcept
int get_operation() const noexcept
Get current operation code.
bool find_first_and_sub(size_type &idx)
Aggregate added group of vectors using fused logical AND-SUB, find the first match.
bm::id64_t get_cache_gap_hits() const noexcept
size_t add(const bvector_type *bv, unsigned agr_group=0)
Attach source bit-vector to a argument group (0 or 1).
bool combine_shift_right_and(unsigned i, unsigned j, bvector_type &bv_target, const bvector_type_const_ptr *bv_src, size_t src_size)
void reset_range_hint() noexcept
Reset range hint to false.
operation
Codes for aggregation operations which can be pipelined for efficient execution.
bm::word_t * sort_input_blocks_or(const bvector_type_const_ptr *bv_src, size_t src_size, unsigned i, unsigned j)
bm::heap_vector< unsigned char, allocator_type, true > uchar_vector_type
bm::word_t * get_temp_block() noexcept
bool combine_and_sub(BII bi, const bvector_type_const_ptr *bv_src_and, size_t src_and_size, const bvector_type_const_ptr *bv_src_sub, size_t src_sub_size)
void combine_and_horizontal(bvector_type &bv_target, const bvector_type_const_ptr *bv_src, size_t src_size)
Horizontal AND aggregation (potentially slower) method.
bm::heap_vector< bm::word_t *, allocator_type, true > blockptr_vector_type
digest_type process_gap_blocks_and(const arena &ar, digest_type digest)
static unsigned find_effective_sub_block_size(unsigned i, const bvector_type_const_ptr *bv_src, size_t src_size, bool top_null_as_zero) noexcept
bool test_gap_blocks_and(size_t block_count, unsigned bit_idx)
void set_optimization(typename bvector_type::optmode opt=bvector_type::opt_compress) noexcept
set on-the-fly bit-block compression By default aggregator does not try to optimize result,...
bool range_set_
pipeline blocks cache ptr
size_type range_to_
search to
bool combine_and_sub_bi(BII bi)
Aggregate added group of vectors using fused logical AND-SUB.
bool find_first_and_sub(size_type &idx, const bvector_type_const_ptr *bv_src_and, size_t src_and_size, const bvector_type_const_ptr *bv_src_sub, size_t src_sub_size)
void prepare_shift_right_and(bvector_type &bv_target, const bvector_type_const_ptr *bv_src, size_t src_size)
void process_gap_blocks_or(const arena &ar)
void set_compute_count(bool count_mode) noexcept
tb_arena * tb_ar_
data arena ptr (heap allocated)
static unsigned resize_target(bvector_type &bv_target, const bvector_type_const_ptr *bv_src, size_t src_size, bool init_clear=true)
size_type count_
search result count
bvector_type::allocator_type::allocator_pool_type allocator_pool_type
static arena * construct_arena()
bm::heap_vector< arg_groups_type_ptr, allocator_type, true > arg_vector_type
static unsigned max_top_blocks(const bvector_type_const_ptr *bv_src, size_t src_size) noexcept
bool combine_and_sub(bvector_type &bv_target, const bvector_type_const_ptr *bv_src_and, size_t src_and_size, const bvector_type_const_ptr *bv_src_sub, size_t src_sub_size, bool any)
Fusion aggregate group of vectors using logical AND MINUS another set.
bm::heap_vector< const bm::gap_word_t *, allocator_type, true > gap_block_ptr_vector_type
void combine_or_horizontal(bvector_type &bv_target, const bvector_type_const_ptr *bv_src, size_t src_size)
Horizontal OR aggregation (potentially slower) method.
operation_status operation_status_
static void free_arena(arena *ar) noexcept
void combine_and_sub_horizontal(bvector_type &bv_target, const bvector_type_const_ptr *bv_src_and, size_t src_and_size, const bvector_type_const_ptr *bv_src_sub, size_t src_sub_size)
Horizontal AND-SUB aggregation (potentially slower) method.
bm::id64_t gap_cache_cnt_
void combine_and(bvector_type &bv_target, const bvector_type_const_ptr *bv_src, size_t src_size)
Aggregate group of vectors using logical AND.
bool set_range_hint(size_type from, size_type to) noexcept
Set search hint for the range, where results needs to be searched (experimental for internal use).
bvector_type::optmode opt_mode_
perform search result optimization
operation_status get_operation_status() const
bool process_bit_blocks_or(blocks_manager_type &bman_target, unsigned i, unsigned j, const arena &ar)
value_type * data() const noexcept
void resize(size_type new_size)
vector resize
size_type size() const noexcept
void reset() noexcept
Quick resize to zero.
void push_back(const value_type &v)
push new element to the back of the vector
static vector< string > arr
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
bm::id_t bit_block_count(const bm::word_t *block) noexcept
Bitcount for bit block.
bool bit_find_first(const bm::word_t *block, unsigned *pos) noexcept
BIT block find the first set bit.
bm::id64_t bit_block_and_3way(bm::word_t *dst, const bm::word_t *src1, const bm::word_t *src2, bm::id64_t digest) noexcept
digest based bit-block AND
bm::id64_t digest_mask(unsigned from, unsigned to) noexcept
Compute digest mask for [from..to] positions.
unsigned word_bitcount64(bm::id64_t x) noexcept
int for_each_bit_blk(const bm::word_t *block, SIZE_TYPE offset, Func &bit_functor)
for-each visitor, calls a visitor functor for each 1 bit group
bool bit_block_or(bm::word_t *dst, const bm::word_t *src) noexcept
Plain bitblock OR operation. Function does not analyse availability of source and destination blocks.
bm::id64_t bit_block_and(bm::word_t *dst, const bm::word_t *src) noexcept
Plain bitblock AND operation. Function does not analyse availability of source and destination blocks...
void block_init_digest0(bm::word_t *const block, bm::id64_t digest) noexcept
Init block with 000111000 pattren based on digest.
bool bit_block_or_3way(bm::word_t *dst, const bm::word_t *src1, const bm::word_t *src2) noexcept
3 way (target | source1 | source2) bitblock OR operation. Function does not analyse availability of s...
bool is_bits_one(const bm::wordop_t *start) noexcept
Returns "true" if all bits in the block are 1.
bool bit_is_all_zero(const bm::word_t *start) noexcept
Returns "true" if all bits in the block are 0.
void bit_block_set(bm::word_t *dst, bm::word_t value) noexcept
Bitblock memset operation.
bool bit_block_or_5way(bm::word_t *dst, const bm::word_t *src1, const bm::word_t *src2, const bm::word_t *src3, const bm::word_t *src4) noexcept
5 way (target, source1, source2) bitblock OR operation. Function does not analyse availability of sou...
void bit_block_copy(bm::word_t *dst, const bm::word_t *src) noexcept
Bitblock copy operation.
bm::id64_t bit_block_and_2way(bm::word_t *dst, const bm::word_t *src1, const bm::word_t *src2, bm::id64_t digest) noexcept
digest based bit-block AND
bool bit_block_shift_r1_and_unr(bm::word_t *block, bm::word_t co_flag, const bm::word_t *mask_block, bm::id64_t *digest) noexcept
Right bit-shift bitblock by 1 bit (reference) + AND.
bm::id64_t bit_block_sub_3way(bm::word_t *dst, const bm::word_t *src0, const bm::word_t *src1, bm::id64_t digest) noexcept
digest based bit-block SUB 3-way
bool bit_find_first_if_1(const bm::word_t *block, unsigned *first, bm::id64_t digest) noexcept
BIT block find the first set bit if only 1 bit is set.
bm::id64_t bit_block_sub_5way(bm::word_t *dst, const bm::word_t *src0, const bm::word_t *src1, const bm::word_t *src2, const bm::word_t *src3, bm::id64_t digest) noexcept
digest based bit-block SUB 5-way
bm::id64_t bit_block_init_and_2way(bm::word_t *dst, const bm::word_t *src1, const bm::word_t *src2, bm::id64_t digest) noexcept
digest based bit-block AND (0 elements of digest will be zeroed)
bm::id64_t calc_block_digest0(const bm::word_t *const block) noexcept
Compute digest for 64 non-zero areas.
bm::id64_t bit_block_and_5way(bm::word_t *dst, const bm::word_t *src0, const bm::word_t *src1, const bm::word_t *src2, const bm::word_t *src3, bm::id64_t digest) noexcept
digest based bit-block AND 5-way
bm::id64_t bit_block_sub(bm::word_t *dst, const bm::word_t *src) noexcept
Plain bitblock SUB (AND NOT) operation. Function does not analyse availability of source and destinat...
@ READWRITE
mutable (read-write object)
@ BM_GAP
GAP compression is ON.
void gap_add_to_bitset(unsigned *dest, const T *pcurr, unsigned len) noexcept
Adds(OR) GAP block to bitblock.
unsigned gap_test_unr(const T *buf, const unsigned pos) noexcept
Tests if bit = pos is true. Analog of bm::gap_test with SIMD unrolling.
void gap_convert_to_bitset(unsigned *dest, const T *buf, unsigned len=0) noexcept
GAP block to bitblock conversion.
void gap_and_to_bitset(unsigned *dest, const T *pcurr) noexcept
ANDs GAP block to bitblock.
void gap_sub_to_bitset(unsigned *dest, const T *pcurr) noexcept
SUB (AND NOT) GAP block to bitblock.
bm::agg_run_options< agg_disable_result_bvectors, agg_disable_counts > agg_opt_disable_bvects_and_counts
Pre-defined aggregator options to disable both intermediate results and counts.
void combine_and(BV &bv, It first, It last)
AND Combine bitvector and the iterable sequence.
const bool agg_disable_search_masks
const bool agg_produce_result_bvectors
bm::agg_run_options< agg_disable_result_bvectors, agg_compute_counts > agg_opt_only_counts
Pre-defined aggregator options for counts-only (results dropped) operation.
void combine_or(BV &bv, It first, It last)
OR Combine bitvector and the iterable sequence.
void aggregator_pipeline_execute(It first, It last)
Experimental method ro run multiple aggregators in sync.
const bool agg_disable_counts
bm::agg_run_options< agg_produce_result_bvectors, agg_compute_counts > agg_opt_bvect_and_counts
Pre-defined aggregator options for results plus counts operation.
const bool agg_disable_result_bvectors
const bool agg_compute_counts
const unsigned set_array_mask
const unsigned set_block_mask
void aligned_free(void *ptr) BMNOEXCEPT
Aligned free.
const unsigned set_sub_array_size
void get_block_coord(BI_TYPE nb, unsigned &i, unsigned &j) noexcept
Recalc linear bvector block index into 2D matrix coordinates.
bm::id_t block_to_global_index(unsigned i, unsigned j, unsigned block_idx) noexcept
calculate bvector<> global bit-index from block-local coords
void * aligned_new_malloc(size_t size)
Aligned malloc (unlike classic malloc it throws bad_alloc exception)
bool find_ptr(const void *const *p_arr, size_t arr_size, const void *ptr, size_t *idx) noexcept
Scan search for pointer value in unordered array.
const unsigned set_word_shift
const unsigned set_block_size
unsigned long long int id64_t
const unsigned set_array_shift
unsigned short gap_word_t
const unsigned gap_max_bits
const unsigned set_top_array_size
const unsigned set_block_shift
const unsigned set_word_mask
const unsigned bits_in_block
const struct ncbi::grid::netcache::search::fields::SIZE size
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
Aggregation options to control execution Default settings are to support only result bit-vector filte...
static constexpr bool is_make_results() noexcept
make result(target) vectors (aggregation search results) (Default: true) when false is used - means w...
static constexpr bool is_compute_counts() noexcept
Compute counts for the target vectors, when set to true, population count is computed for each result...
static constexpr bool is_masks() noexcept
Support for masking operations (Default: false)
Temporary operations vectors.
gap_block_ptr_vector_type v_arg_and_blk_gap
source GAP blocks list (AND)
block_ptr_vector_type v_arg_and_blk
source blocks list (AND)
block_ptr_vector_type v_arg_or_blk
source blocks list (OR)
gap_block_ptr_vector_type v_arg_or_blk_gap
source GAP blocks list (OR)
block_ptr_vector_type v_arg_tmp_blk
source blocks list
uchar_vector_type carry_overs
shift carry over flags
size_t add(const bvector_type *bv, unsigned agr_group)
Add bit-vector pointer to its aggregation group.
bv_vector_type arg_bv0
arg group 0
bv_vector_type arg_bv1
arg group 1
void reset()
Reset argument groups to zero.
index_vector_type arg_idx0
indexes of vectors for arg group 0
index_vector_type arg_idx1
Block cache for pipeline execution.
blockptr_vector_type blk_vect_
cached block ptrs for bv_inp_vect_
count_vector_type cnt_vect_
usage count for bv_inp (all groups)
bv_vector_type bv_inp_vect_
all input vectors from all groups
block_ij_vector_type blk_ij_vect_
current block coords
Aggregation options for runtime control.
size_t batch_size
Batch size sets number of argument groups processed at a time Default: 0 means this parameter will be...
Alllocated blocka of scratch memory.
bm::bit_block_t tb_opt
temp block for results optimization
functor-adaptor for back-inserter