1 #ifndef BMRANDOM__H__INCLUDED__
2 #define BMRANDOM__H__INCLUDED__
25 #ifndef BM__H__INCLUDED__
28 # error missing include (bm.h or bm64.h)
105 unsigned bit_list_size,
116 typename bvector_type::rs_index_type
rsi_;
136 delete [] sub_block_;
144 if (sample_count == 0)
151 bv_in.build_rs_index(&rsi_, &bv_nb_);
154 if (in_count <= sample_count)
160 float pick_ratio = float(sample_count) / float(in_count);
161 if (pick_ratio < 0.054f)
167 simple_pick(bv_out, bv_in, sample_count,
first,
last);
171 if (sample_count > in_count/2)
175 size_type delta_count = in_count - sample_count;
177 get_subset(tmp_bv, bv_in, in_count, delta_count);
178 bv_out.bit_sub(bv_in, tmp_bv);
181 get_subset(bv_out, bv_in, in_count, sample_count);
193 std::random_device rd;
195 std::mt19937_64 mt_rand(rd());
197 std::mt19937 mt_rand(rd());
199 std::uniform_int_distribution<size_type> dist(
first,
last);
208 bool b = bv_in.find(ridx, fidx);
213 bool is_set = bv_out.set_bit_conditional(fidx,
true,
false);
214 sample_count -= is_set;
219 b = bv_in.find(fidx, fidx);
224 is_set = bv_out.set_bit_conditional(fidx,
true,
false);
225 sample_count -= is_set;
239 bv_out.resize(bv_in.size());
244 bm::word_t* tmp_block = bman_out.check_allocate_tempblock();
247 bool b = bv_nb_.find_range(first_nb, last_nb);
252 std::random_device rd;
254 std::mt19937_64 mt_rand(rd());
256 std::mt19937 mt_rand(rd());
258 std::uniform_int_distribution<size_type> dist_nb(first_nb, last_nb);
260 size_type curr_sample_count = sample_count;
261 for (
unsigned take_count = 0; curr_sample_count; curr_sample_count -= take_count)
267 BM_ASSERT(ridx >= first_nb && ridx <= last_nb);
269 b = bv_nb_.find(ridx, nb);
272 b = bv_nb_.find(first_nb, nb);
282 bv_nb_.clear_bit_no_check(nb);
286 unsigned bc = rsi_.count(nb);
288 take_count = compute_take_count(bc, in_count, sample_count);
289 if (take_count > curr_sample_count)
290 take_count = unsigned(curr_sample_count);
297 const bm::word_t* blk_src = bman_in.get_block(i0, j0);
301 bm::word_t* blk_out = bman_out.get_block_ptr(i0, j0);
305 blk_out = bman_out.deoptimize_block(nb);
309 blk_out = bman_out.get_allocator().alloc_bit_block();
310 bman_out.set_block(nb, blk_out);
312 if (take_count == bc)
337 get_random_array(blk_out, bit_list_, arr_len, take_count);
348 get_block_subset(blk_out, blk_src, take_count);
361 float block_percent = float(bc) / float(in_count);
362 float bits_to_take = float(sample_count) * block_percent;
363 bits_to_take += 0.99f;
364 unsigned to_take = unsigned(bits_to_take);
368 to_take = unsigned(sample_count);
379 for (
unsigned rounds = 0; take_count && rounds < 10; ++rounds)
388 if (blk_src[
i] && (blk_out[
i] != blk_src[
i]))
390 new_count = process_word(blk_out, blk_src,
i, take_count);
391 take_count -= new_count;
403 sub_block_[
i] = blk_src[
i] & ~blk_out[
i];
413 get_random_array(blk_out, bit_list_, arr_len, take_count);
423 unsigned new_bits,
mask;
426 mask = unsigned(rand());
430 std::random_device rd;
432 std::mt19937_64 mt_rand(rd());
434 std::mt19937 mt_rand(rd());
436 unsigned src_rand = blk_src[nword] &
mask;
437 new_bits = src_rand & ~blk_out[nword];
443 if (new_count > take_count)
447 unsigned char blist[64];
450 std::shuffle(blist, blist + arr_size, mt_rand);
452 for (
unsigned j = 0; j < take_count; ++j)
454 value |= (1u << blist[j]);
457 new_count = take_count;
460 BM_ASSERT((new_bits & ~blk_src[nword]) == 0);
463 blk_out[nword] |= new_bits;
473 unsigned bit_list_size,
476 std::random_device rd;
478 std::mt19937_64 mt_rand(rd());
480 std::mt19937 mt_rand(rd());
483 for (
unsigned i = 0;
i <
count; ++
i)
Bit manipulation primitives (internal)
void get_block_subset(bm::word_t *blk_out, const bm::word_t *blk_src, unsigned count)
static unsigned process_word(bm::word_t *blk_out, const bm::word_t *blk_src, unsigned nword, unsigned take_count) noexcept
random_subset & operator=(const random_subset &)
bvector_type bv_nb_
blocks vector
void get_subset(BV &bv_out, const BV &bv_in, size_type in_count, size_type sample_count)
bm::gap_word_t bit_list_[bm::gap_max_bits]
BV::blocks_manager_type blocks_manager_type
static void get_random_array(bm::word_t *blk_out, bm::gap_word_t *bit_list, unsigned bit_list_size, unsigned count)
void sample(BV &bv_out, const BV &bv_in, size_type sample_count)
Get random subset of input vector.
static unsigned compute_take_count(unsigned bc, size_type in_count, size_type sample_count) noexcept
void simple_pick(BV &bv_out, const BV &bv_in, size_type sample_count, size_type first, size_type last)
simple picking algorithm for small number of items in [first,last] range
bvector_type::rs_index_type rsi_
RS index (block counts)
random_subset(const random_subset &)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
bm::id_t word_bitcount(bm::id_t w) noexcept
unsigned bit_block_convert_to_arr(T *dest, const unsigned *src, bool inverted) noexcept
Convert bit block into an array of ints corresponding to 1 bits.
unsigned short bitscan(V w, B *bits) noexcept
Templated Bitscan with dynamic dispatch for best type.
void bit_block_set(bm::word_t *dst, bm::word_t value) noexcept
Bitblock memset operation.
void bit_block_copy(bm::word_t *dst, const bm::word_t *src) noexcept
Bitblock copy operation.
void set_bit(unsigned *dest, unsigned bitpos) noexcept
Set 1 bit in a block.
unsigned bit_list(T w, B *bits) noexcept
Unpacks word into list of ON bit indexes.
void gap_convert_to_bitset(unsigned *dest, const T *buf, unsigned len=0) noexcept
GAP block to bitblock conversion.
D gap_convert_to_arr(D *dest, const T *buf, unsigned dest_len, bool invert=false) noexcept
Convert gap block into array of ints corresponding to 1 bits.
void get_block_coord(BI_TYPE nb, unsigned &i, unsigned &j) noexcept
Recalc linear bvector block index into 2D matrix coordinates.
const unsigned set_block_size
unsigned short gap_word_t
const unsigned gap_max_bits
const GenericPointer< typename T::ValueType > T2 value