NCBI C++ ToolKit
|
Processor specific optimizations for SSE2 instructions (internals) More...
Classes | |
class | bm::sse_empty_guard |
SSE2 reinitialization guard class. More... | |
Functions | |
bm::id_t | bm::sse2_bit_count (const __m128i *block, const __m128i *block_end) |
bool | bm::sse2_is_all_zero (const __m128i *block) noexcept |
check if block is all zero bits More... | |
bool | bm::sse2_is_all_one (const __m128i *block) noexcept |
check if block is all ONE bits More... | |
bool | bm::sse2_is_digest_zero (const __m128i *block) noexcept |
check if digest stride is all zero bits More... | |
void | bm::sse2_block_set_digest (__m128i *dst, unsigned value) noexcept |
set digest stride to 0xFF.. or 0x0 value More... | |
void | bm::sse2_bit_block_xor (bm::word_t *target_block, const bm::word_t *block, const bm::word_t *xor_block, bm::id64_t digest) noexcept |
Build partial XOR product of 2 bit-blocks using digest mask. More... | |
void | bm::sse2_bit_block_xor_2way (bm::word_t *target_block, const bm::word_t *xor_block, bm::id64_t digest) noexcept |
Build partial XOR product of 2 bit-blocks using digest mask. More... | |
bool | bm::sse2_and_digest (__m128i *dst, const __m128i *src) noexcept |
AND block digest stride dst &= *src. More... | |
bool | bm::sse2_and_or_digest_2way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept |
AND-OR block digest stride dst |= *src1 & src2. More... | |
bool | bm::sse2_and_digest_5way (__m128i *dst, const __m128i *src1, const __m128i *src2, const __m128i *src3, const __m128i *src4) noexcept |
AND block digest stride. More... | |
bool | bm::sse2_and_digest_3way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept |
AND block digest stride. More... | |
bool | bm::sse2_and_digest_2way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept |
AND block digest stride dst = *src1 & src2. More... | |
bool | bm::sse2_sub_digest (__m128i *dst, const __m128i *src) noexcept |
SUB (AND NOT) block digest stride dst &= ~*src. More... | |
bool | bm::sse2_sub_digest_2way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept |
2-operand SUB (AND NOT) block digest stride dst = src1 & ~*src2 More... | |
bool | bm::sse2_bit_find_first (const __m128i *block, unsigned off, unsigned *pos) noexcept |
Find first non-zero bit. More... | |
bool | bm::sse2_bit_find_first_diff (const __m128i *block1, const __m128i *block2, unsigned *pos) noexcept |
Find first bit which is different between two bit-blocks. More... | |
bool | bm::sse2_shift_r1 (__m128i *block, unsigned *empty_acc, unsigned co1) noexcept |
block shift right by 1 More... | |
bool | bm::sse2_shift_l1 (__m128i *block, unsigned *empty_acc, unsigned co1) noexcept |
block shift left by 1 More... | |
unsigned | bm::sse2_gap_bfind (const unsigned short *buf, unsigned pos, unsigned *is_set) |
Hybrid binary search, starts as binary, then switches to linear scan. More... | |
unsigned | bm::sse2_gap_test (const unsigned short *buf, unsigned pos) |
Hybrid binary search, starts as binary, then switches to scan. More... | |
void | bm::sse2_xor_arr_2_mask (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end, bm::word_t mask) BMNOEXCEPT |
XOR array elements to specified mask dst = *src ^ mask. More... | |
void | bm::sse2_andnot_arr_2_mask (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end, bm::word_t mask) BMNOEXCEPT |
Inverts array elements and NOT them to specified mask dst = ~*src & mask. More... | |
unsigned | bm::sse2_and_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT |
AND blocks2 dst &= *src. More... | |
unsigned | bm::sse2_and_arr_unal (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) BMNOEXCEPT |
AND array elements against another array (unaligned) dst &= *src. More... | |
bool | bm::sse2_or_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT |
OR array elements against another array dst |= *src. More... | |
bool | bm::sse2_or_arr_unal (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) BMNOEXCEPT |
OR array elements against another array (unaligned) dst |= *src. More... | |
bool | bm::sse2_or_block_2way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT |
OR 2 blocks anc copy result to the destination dst = *src1 | src2. More... | |
bool | bm::sse2_or_block_3way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT |
OR array elements against another 2 arrays dst |= *src1 | src2. More... | |
bool | bm::sse2_or_block_5way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2, const __m128i *BMRESTRICT src3, const __m128i *BMRESTRICT src4) BMNOEXCEPT |
OR array elements against another 2 arrays dst |= *src1 | src2 | src3 | src4. More... | |
unsigned | bm::sse2_xor_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT |
XOR block against another dst ^= *src. More... | |
unsigned | bm::sse2_xor_block_2way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT |
3 operand XOR dst = *src1 ^ src2 More... | |
unsigned | bm::sse2_sub_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT |
AND-NOT (SUB) array elements against another array dst &= ~*src. More... | |
void | bm::sse2_set_block (__m128i *BMRESTRICT dst, bm::word_t value) BMNOEXCEPT |
SSE2 block memset dst = value. More... | |
void | bm::sse2_copy_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT |
SSE2 block copy dst = *src. More... | |
void | bm::sse2_copy_block_unalign (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT |
SSE2 block copy (unaligned SRC) dst = *src. More... | |
void | bm::sse2_stream_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT |
SSE2 block copy dst = *src. More... | |
void | bm::sse2_stream_block_unalign (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT |
SSE2 block copy (unaligned src) dst = *src. More... | |
void | bm::sse2_invert_block (__m128i *BMRESTRICT dst) BMNOEXCEPT |
Invert bit block dst = ~*dst or dst ^= *dst. More... | |
unsigned | bm::sse2_lower_bound_scan_u32 (const unsigned *BMRESTRICT arr, unsigned target, unsigned from, unsigned to) BMNOEXCEPT |
lower bound (great or equal) linear scan in ascending order sorted array More... | |
Processor specific optimizations for SSE2 instructions (internals)
|
inline |
AND array elements against another array (unaligned) dst &= *src.
Definition at line 259 of file bmsse_util.h.
References _mm_and_si128(), _mm_load_si128(), _mm_loadu_si128(), _mm_or_si128(), _mm_setzero_si128(), _mm_store_si128(), BM_ALIGN16, and BM_ALIGN16ATTR.
Referenced by bm::decoder::get_32_AND().
|
inline |
AND blocks2 dst &= *src.
Definition at line 126 of file bmsse_util.h.
References _mm_and_si128(), _mm_load_si128(), _mm_or_si128(), _mm_setzero_si128(), _mm_store_si128(), BM_ALIGN16, BM_ALIGN16ATTR, BMRESTRICT, and bm::set_block_size.
AND block digest stride dst &= *src.
Definition at line 415 of file bmsse2.h.
References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_setzero_si128(), and _mm_store_si128().
|
inlinenoexcept |
AND block digest stride dst = *src1 & src2.
Definition at line 681 of file bmsse2.h.
References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_setzero_si128(), and _mm_store_si128().
|
inlinenoexcept |
AND block digest stride.
Definition at line 599 of file bmsse2.h.
References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_setzero_si128(), and _mm_store_si128().
|
inlinenoexcept |
AND block digest stride.
Definition at line 519 of file bmsse2.h.
References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_setzero_si128(), and _mm_store_si128().
|
inlinenoexcept |
AND-OR block digest stride dst |= *src1 & src2.
Definition at line 464 of file bmsse2.h.
References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_setzero_si128(), and _mm_store_si128().
|
inline |
Inverts array elements and NOT them to specified mask dst = ~*src & mask.
Definition at line 102 of file bmsse_util.h.
References _mm_andnot_si128(), _mm_load_si128(), _mm_set1_epi32(), _mm_store_si128(), and mask.
|
inlinenoexcept |
Build partial XOR product of 2 bit-blocks using digest mask.
target_block | - target := block ^ xor_block |
block | - arg1 |
xor_block | - arg2 |
digest | - mask for each block wave to XOR (1) or just copy (0) |
Definition at line 289 of file bmsse2.h.
References _mm_load_si128(), _mm_store_si128(), _mm_xor_si128(), bm::block_waves, i, mask, and bm::set_block_digest_wave_size.
|
inlinenoexcept |
Build partial XOR product of 2 bit-blocks using digest mask.
target_block | - target ^= xor_block |
xor_block | - arg1 |
digest | - mask for each block wave to XOR (if 1) |
Definition at line 360 of file bmsse2.h.
References _mm_load_si128(), _mm_store_si128(), _mm_xor_si128(), bm::bmi_blsi_u64(), bm::bmi_bslr_u64(), bm::set_block_digest_wave_size, t, and bm::word_bitcount64().
SSE2 optimized bitcounting function implements parallel bitcounting algorithm for SSE2 instruction set.
unsigned CalcBitCount32(unsigned b) { b = (b & 0x55555555) + (b >> 1 & 0x55555555); b = (b & 0x33333333) + (b >> 2 & 0x33333333); b = (b + (b >> 4)) & 0x0F0F0F0F; b = b + (b >> 8); b = (b + (b >> 16)) & 0x0000003F; return b; }
Definition at line 66 of file bmsse2.h.
References _mm_add_epi32(), _mm_and_si128(), _mm_load_si128(), _mm_set_epi32(), _mm_srli_epi32, _mm_store_si128(), _mm_xor_si128(), b, BM_ALIGN16, BM_ALIGN16ATTR, and tmp1.
Find first non-zero bit.
Definition at line 992 of file bmsse2.h.
References _mm_cmpeq_epi32(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_setzero_si128(), _mm_store_si128(), bm::bit_scan_forward32(), BM_ALIGN32, BM_ALIGN32ATTR, BM_ASSERT, mask, and bm::set_block_size.
|
inlinenoexcept |
Find first bit which is different between two bit-blocks.
Definition at line 1048 of file bmsse2.h.
References _mm_cmpeq_epi32(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_setzero_si128(), _mm_store_si128(), _mm_xor_si128(), bm::bit_scan_forward32(), BM_ALIGN32, BM_ALIGN32ATTR, BM_ASSERT, mask, and bm::set_block_size.
|
inlinenoexcept |
set digest stride to 0xFF.. or 0x0 value
Definition at line 268 of file bmsse2.h.
References _mm_set1_epi32(), _mm_store_si128(), and rapidjson::value.
|
inline |
SSE2 block copy dst = *src.
Definition at line 838 of file bmsse_util.h.
References _mm_load_si128(), _mm_store_si128(), BMRESTRICT, and bm::set_block_size.
|
inline |
SSE2 block copy (unaligned SRC) dst = *src.
Definition at line 879 of file bmsse_util.h.
References _mm_loadu_si128(), _mm_store_si128(), BMRESTRICT, and bm::set_block_size.
|
inline |
Hybrid binary search, starts as binary, then switches to linear scan.
buf | - GAP buffer pointer. |
pos | - index of the element. |
is_set | - output. GAP value (0 or 1). |
Definition at line 1445 of file bmsse2.h.
References BM_ASSERT, buf, ncbi::grid::netcache::search::fields::size, and bm::sse2_gap_find().
Referenced by bm::sse2_gap_test().
|
inline |
Hybrid binary search, starts as binary, then switches to scan.
Definition at line 1502 of file bmsse2.h.
References buf, and bm::sse2_gap_bfind().
|
inline |
Invert bit block dst = ~*dst or dst ^= *dst.
Definition at line 1006 of file bmsse_util.h.
References _mm_load_si128(), _mm_set1_epi32(), _mm_store_si128(), _mm_xor_si128(), BMRESTRICT, and bm::set_block_size.
check if block is all ONE bits
Definition at line 219 of file bmsse2.h.
References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_set_epi32(), BMRESTRICT, and bm::set_block_size.
check if block is all zero bits
Definition at line 194 of file bmsse2.h.
References _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_setzero_si128(), BMRESTRICT, and bm::set_block_size.
check if digest stride is all zero bits
Definition at line 244 of file bmsse2.h.
References _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), and _mm_setzero_si128().
|
inline |
lower bound (great or equal) linear scan in ascending order sorted array
Definition at line 1099 of file bmsse_util.h.
References _mm_cmpeq_epi32(), _mm_cmpgt_epi32(), _mm_loadu_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_set1_epi32(), _mm_sub_epi32(), arr, bm::bit_scan_forward32(), BMRESTRICT, len, and mask.
Referenced by TestSIMDUtils().
|
inline |
OR array elements against another array (unaligned) dst |= *src.
Definition at line 426 of file bmsse_util.h.
References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_loadu_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_set1_epi32(), and _mm_store_si128().
Referenced by bm::decoder::get_32_OR().
|
inline |
OR array elements against another array dst |= *src.
Definition at line 372 of file bmsse_util.h.
References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_set1_epi32(), _mm_store_si128(), BMRESTRICT, and bm::set_block_size.
|
inline |
OR 2 blocks anc copy result to the destination dst = *src1 | src2.
Definition at line 478 of file bmsse_util.h.
References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_set1_epi32(), _mm_store_si128(), BMRESTRICT, and bm::set_block_size.
|
inline |
OR array elements against another 2 arrays dst |= *src1 | src2.
Definition at line 524 of file bmsse_util.h.
References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_set1_epi32(), _mm_store_si128(), BMRESTRICT, and bm::set_block_size.
|
inline |
OR array elements against another 2 arrays dst |= *src1 | src2 | src3 | src4.
Definition at line 575 of file bmsse_util.h.
References _mm_and_si128(), _mm_cmpeq_epi8(), _MM_HINT_T0, _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_prefetch(), _mm_set1_epi32(), _mm_stream_si128(), BMRESTRICT, and bm::set_block_size.
|
inline |
SSE2 block memset dst = value.
Definition at line 809 of file bmsse_util.h.
References _mm_set1_epi32(), _mm_store_si128(), BMRESTRICT, bm::set_block_size, and rapidjson::value.
block shift left by 1
Definition at line 1165 of file bmsse2.h.
References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_cvtsi128_si32(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_set1_epi32(), _mm_set_epi32(), _mm_slli_epi32(), _mm_srli_epi32, _mm_srli_si128(), _mm_store_si128(), and bm::set_block_size.
block shift right by 1
Definition at line 1112 of file bmsse2.h.
References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_cvtsi128_si32(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_set1_epi32(), _mm_set_epi32(), _mm_shuffle_epi32, _mm_slli_epi32(), _mm_slli_si128(), _mm_srli_epi32, _mm_store_si128(), and bm::set_block_size.
|
inline |
SSE2 block copy dst = *src.
Definition at line 921 of file bmsse_util.h.
References _mm_load_si128(), _mm_stream_si128(), BMRESTRICT, and bm::set_block_size.
|
inline |
SSE2 block copy (unaligned src) dst = *src.
Definition at line 962 of file bmsse_util.h.
References _mm_loadu_si128(), _mm_stream_si128(), BMRESTRICT, and bm::set_block_size.
|
inline |
AND-NOT (SUB) array elements against another array dst &= ~*src.
Definition at line 744 of file bmsse_util.h.
References _mm_andnot_si128(), _mm_load_si128(), _mm_or_si128(), _mm_setzero_si128(), _mm_store_si128(), BM_ALIGN16, BM_ALIGN16ATTR, BMRESTRICT, and bm::set_block_size.
SUB (AND NOT) block digest stride dst &= ~*src.
Definition at line 731 of file bmsse2.h.
References _mm_andnot_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_setzero_si128(), and _mm_store_si128().
|
inlinenoexcept |
2-operand SUB (AND NOT) block digest stride dst = src1 & ~*src2
Definition at line 780 of file bmsse2.h.
References _mm_andnot_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_setzero_si128(), and _mm_store_si128().
|
inline |
XOR array elements to specified mask dst = *src ^ mask.
Definition at line 78 of file bmsse_util.h.
References _mm_load_si128(), _mm_set1_epi32(), _mm_store_si128(), _mm_xor_si128(), and mask.
|
inline |
XOR block against another dst ^= *src.
Definition at line 646 of file bmsse_util.h.
References _mm_load_si128(), _mm_or_si128(), _mm_setzero_si128(), _mm_store_si128(), _mm_xor_si128(), BM_ALIGN16, BM_ALIGN16ATTR, BMRESTRICT, and bm::set_block_size.
|
inline |
3 operand XOR dst = *src1 ^ src2
Definition at line 693 of file bmsse_util.h.
References _mm_load_si128(), _mm_or_si128(), _mm_setzero_si128(), _mm_store_si128(), _mm_xor_si128(), BM_ALIGN16, BM_ALIGN16ATTR, BMRESTRICT, and bm::set_block_size.