NCBI C++ ToolKit
|
Processor specific optimizations for SSE4.2 instructions (internals) More...
Functions | |
bm::id_t | bm::sse42_bit_block_calc_count_change (const __m128i *block, const __m128i *block_end, unsigned *bit_count) |
bool | bm::sse2_sub_digest_5way (__m128i *dst, const __m128i *src1, const __m128i *src2, const __m128i *src3, const __m128i *src4) noexcept |
SUB block digest stride. More... | |
bool | bm::sse2_sub_digest_3way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept |
SUB block digest stride. More... | |
bm::id_t | bm::sse4_bit_count (const __m128i *block, const __m128i *block_end) noexcept |
bool | bm::sse4_is_all_zero (const __m128i *block) noexcept |
check if block is all zero bits More... | |
bool | bm::sse4_is_digest_zero (const __m128i *block) noexcept |
check if digest stride is all zero bits More... | |
void | bm::sse4_block_set_digest (__m128i *dst, unsigned value) noexcept |
set digest stride to 0xFF.. or 0x0 value More... | |
unsigned | bm::sse4_and_block (__m128i *dst, const __m128i *src) noexcept |
AND blocks2 dst &= *src. More... | |
bool | bm::sse4_and_digest (__m128i *dst, const __m128i *src) noexcept |
AND block digest stride dst &= *src. More... | |
bool | bm::sse4_and_digest_2way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept |
AND block digest stride dst = *src1 & src2. More... | |
bool | bm::sse4_and_or_digest_2way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept |
AND-OR block digest stride dst |= *src1 & src2. More... | |
bool | bm::sse4_and_digest_3way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept |
AND block digest stride. More... | |
bool | bm::sse4_and_digest_5way (__m128i *dst, const __m128i *src1, const __m128i *src2, const __m128i *src3, const __m128i *src4) noexcept |
AND block digest stride. More... | |
bool | bm::sse4_sub_digest (__m128i *dst, const __m128i *src) noexcept |
SUB (AND NOT) block digest stride dst &= ~*src. More... | |
bool | bm::sse4_sub_digest_2way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept |
2-operand SUB (AND NOT) block digest stride dst = src1 & ~*src2 More... | |
bool | bm::sse4_sub_digest_5way (__m128i *dst, const __m128i *src1, const __m128i *src2, const __m128i *src3, const __m128i *src4) noexcept |
SUB block digest stride. More... | |
bool | bm::sse4_sub_digest_3way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept |
SUB block digest stride. More... | |
bool | bm::sse4_is_all_one (const __m128i *block) noexcept |
check if block is all ONE bits More... | |
bool | bm::sse42_test_all_one_wave (const void *ptr) noexcept |
check if SSE wave is all oxFFFF...FFF More... | |
bool | bm::sse42_test_all_zero_wave (const void *ptr) noexcept |
check if wave of pointers is all NULL More... | |
bool | bm::sse42_test_all_zero_wave2 (const void *ptr0, const void *ptr1) noexcept |
check if 2 waves of pointers are all NULL More... | |
bool | bm::sse42_test_all_eq_wave2 (const void *ptr0, const void *ptr1) noexcept |
check if wave of 2 pointers are the same (null or FULL) More... | |
unsigned | bm::sse42_bit_block_calc_change (const __m128i *block, unsigned size) noexcept |
void | bm::sse42_bit_block_calc_xor_change (const __m128i *block, const __m128i *xor_block, unsigned size, unsigned *gc, unsigned *bc) noexcept |
bool | bm::sse42_bit_find_first_diff (const __m128i *block1, const __m128i *block2, unsigned *pos) noexcept |
Find first bit which is different between two bit-blocks. More... | |
bool | bm::sse42_bit_find_first (const __m128i *block, unsigned off, unsigned *pos) noexcept |
Find first non-zero bit. More... | |
unsigned | bm::sse4_gap_find (const bm::gap_word_t *pbuf, const bm::gap_word_t pos, const unsigned size) noexcept |
unsigned | bm::sse42_gap_bfind (const unsigned short *buf, unsigned pos, unsigned *is_set) noexcept |
Hybrid binary search, starts as binary, then switches to linear scan. More... | |
unsigned | bm::sse42_gap_test (const unsigned short *buf, unsigned pos) noexcept |
Hybrid binary search to test GAP value, starts as binary, then switches to scan. More... | |
int | bm::sse42_cmpge_u32 (__m128i vect4, unsigned value) noexcept |
Experimental (test) function to do SIMD vector search (lower bound) in sorted, growing array. More... | |
bool | bm::sse42_shift_l1 (__m128i *block, unsigned *empty_acc, unsigned co1) noexcept |
block shift left by 1 More... | |
bool | bm::sse42_shift_r1 (__m128i *block, unsigned *empty_acc, unsigned co1) noexcept |
block shift right by 1 More... | |
bool | bm::sse42_shift_r1_and (__m128i *block, bm::word_t co1, const __m128i *mask_block, bm::id64_t *digest) noexcept |
block shift right by 1 plus AND More... | |
void | bm::sse42_bit_block_xor (bm::word_t *target_block, const bm::word_t *block, const bm::word_t *xor_block, bm::id64_t digest) noexcept |
Build partial XOR product of 2 bit-blocks using digest mask. More... | |
void | bm::sse42_bit_block_xor_2way (bm::word_t *target_block, const bm::word_t *xor_block, bm::id64_t digest) noexcept |
Build partial XOR product of 2 bit-blocks using digest mask. More... | |
Processor specific optimizations for SSE4.2 instructions (internals)
|
inlinenoexcept |
SUB block digest stride.
Definition at line 911 of file bmsse2.h.
References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_set1_epi32(), _mm_setzero_si128(), _mm_store_si128(), and _mm_xor_si128().
|
inlinenoexcept |
SUB block digest stride.
Definition at line 828 of file bmsse2.h.
References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_set1_epi32(), _mm_setzero_si128(), _mm_store_si128(), and _mm_xor_si128().
SSE4.2 calculate number of bit changes from 0 to 1
Definition at line 948 of file bmsse4.h.
References _mm_extract_epi32, _mm_insert_epi32, _mm_load_si128(), _mm_or_si128(), _mm_popcnt_u32(), _mm_popcnt_u64(), _mm_slli_epi32(), _mm_slli_si128(), _mm_srli_epi32, _mm_store_si128(), _mm_xor_si128(), BM_ALIGN32, BM_ALIGN32ATTR, and ncbi::grid::netcache::search::fields::size.
|
inline |
SSE4.2 optimized bitcounting and number of GAPs
Definition at line 1237 of file bmavx512.h.
References _mm_extract_epi32, _mm_load_si128(), _mm_popcnt_u32(), _mm_srli_epi32, _mm_xor_si128(), and b.
|
inlinenoexcept |
SSE4.2 calculate number of bit changes from 0 to 1 of a XOR product
Definition at line 1025 of file bmsse4.h.
References _mm_extract_epi32, _mm_insert_epi32, _mm_load_si128(), _mm_or_si128(), _mm_popcnt_u32(), _mm_popcnt_u64(), _mm_slli_epi32(), _mm_slli_si128(), _mm_srli_epi32, _mm_store_si128(), _mm_xor_si128(), BM_ALIGN32, BM_ALIGN32ATTR, and ncbi::grid::netcache::search::fields::size.
|
inlinenoexcept |
Build partial XOR product of 2 bit-blocks using digest mask.
target_block | - target := block ^ xor_block |
block | - arg1 |
xor_block | - arg2 |
digest | - mask for each block wave to XOR (1) or just copy (0) |
Definition at line 1988 of file bmsse4.h.
References _mm_load_si128(), _mm_store_si128(), _mm_xor_si128(), bm::block_waves, i, mask, and bm::set_block_digest_wave_size.
|
inlinenoexcept |
Build partial XOR product of 2 bit-blocks using digest mask.
target_block | - target ^= xor_block |
xor_block | - arg1 |
digest | - mask for each block wave to XOR (if 1) |
Definition at line 2058 of file bmsse4.h.
References _mm_load_si128(), _mm_popcnt_u64(), _mm_store_si128(), _mm_xor_si128(), bm::bmi_blsi_u64(), bm::bmi_bslr_u64(), bm::set_block_digest_wave_size, and t.
|
inlinenoexcept |
Find first non-zero bit.
Definition at line 1274 of file bmsse4.h.
References _mm_cmpeq_epi32(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_setzero_si128(), _mm_store_si128(), _mm_test_all_zeros(), BM_ALIGN32, BM_ALIGN32ATTR, BM_ASSERT, BM_BSF32, mask, and bm::set_block_size.
|
inlinenoexcept |
Find first bit which is different between two bit-blocks.
Definition at line 1218 of file bmsse4.h.
References _mm_cmpeq_epi32(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_setzero_si128(), _mm_store_si128(), _mm_test_all_zeros(), _mm_xor_si128(), BM_ALIGN32, BM_ALIGN32ATTR, BM_ASSERT, BM_BSF32, mask, and bm::set_block_size.
Experimental (test) function to do SIMD vector search (lower bound) in sorted, growing array.
Definition at line 1527 of file bmsse4.h.
References _mm_cmpeq_epi32(), _mm_cmpgt_epi32(), _mm_movemask_epi8(), _mm_or_si128(), _mm_set1_epi32(), _mm_sub_epi32(), BM_BSF32, mask, and rapidjson::value.
Referenced by TestSIMDUtils().
|
inlinenoexcept |
Hybrid binary search, starts as binary, then switches to linear scan.
buf | - GAP buffer pointer. |
pos | - index of the element. |
is_set | - output. GAP value (0 or 1). |
Definition at line 1390 of file bmsse4.h.
References BM_ASSERT, BMRESTRICT, buf, ncbi::grid::netcache::search::fields::size, and bm::sse4_gap_find().
|
inlinenoexcept |
Hybrid binary search to test GAP value, starts as binary, then switches to scan.
Definition at line 1461 of file bmsse4.h.
References BM_ASSERT, BMRESTRICT, buf, ncbi::grid::netcache::search::fields::size, and bm::sse4_gap_find().
block shift left by 1
Definition at line 1773 of file bmsse4.h.
References _mm_and_si128(), _mm_extract_epi32, _mm_insert_epi32, _mm_load_si128(), _mm_or_si128(), _mm_set1_epi32(), _mm_slli_epi32(), _mm_srli_epi32, _mm_srli_si128(), _mm_store_si128(), _mm_testz_si128(), and bm::set_block_size.
block shift right by 1
Definition at line 1823 of file bmsse4.h.
References _mm_extract_epi32, _mm_insert_epi32, _mm_load_si128(), _mm_or_si128(), _mm_set1_epi32(), _mm_slli_epi32(), _mm_slli_si128(), _mm_srli_epi32, _mm_store_si128(), _mm_testz_si128(), and bm::set_block_size.
|
inlinenoexcept |
block shift right by 1 plus AND
Definition at line 1873 of file bmsse4.h.
References _mm_and_si128(), _mm_extract_epi32, _mm_insert_epi32, _mm_load_si128(), _mm_or_si128(), _mm_popcnt_u32(), _mm_popcnt_u64(), _mm_set1_epi32(), _mm_slli_epi32(), _mm_slli_si128(), _mm_srli_epi32, _mm_store_si128(), _mm_testz_si128(), _mm_xor_si128(), BM_ASSERT, i, bm::id_max, bm::set_block_digest_wave_size, and t.
check if wave of 2 pointers are the same (null or FULL)
Definition at line 934 of file bmsse4.h.
References _mm_loadu_si128(), _mm_testz_si128(), and _mm_xor_si128().
Referenced by bm::bvector< Alloc >::combine_operation_or().
check if SSE wave is all oxFFFF...FFF
Definition at line 899 of file bmsse4.h.
References _mm_loadu_si128(), and _mm_test_all_ones().
check if wave of pointers is all NULL
Definition at line 910 of file bmsse4.h.
References _mm_loadu_si128(), and _mm_testz_si128().
Referenced by bm::bvector< Alloc >::combine_operation_and(), bm::bvector< Alloc >::combine_operation_sub(), bm::blocks_manager< Alloc >::deallocate_top_subblock(), bm::for_each_bit(), and bm::for_each_nzblock().
check if 2 waves of pointers are all NULL
Definition at line 921 of file bmsse4.h.
References _mm_loadu_si128(), _mm_or_si128(), and _mm_testz_si128().
Referenced by bm::bvector< Alloc >::combine_operation_xor().
AND blocks2 dst &= *src.
Definition at line 294 of file bmsse4.h.
References _mm_and_si128(), _mm_load_si128(), _mm_or_si128(), _mm_setzero_si128(), _mm_store_si128(), _mm_testz_si128(), BMRESTRICT, and bm::set_block_size.
AND block digest stride dst &= *src.
Definition at line 341 of file bmsse4.h.
References _mm_and_si128(), _mm_load_si128(), _mm_or_si128(), _mm_store_si128(), and _mm_testz_si128().
|
inlinenoexcept |
AND block digest stride dst = *src1 & src2.
Definition at line 389 of file bmsse4.h.
References _mm_and_si128(), _mm_load_si128(), _mm_or_si128(), _mm_store_si128(), and _mm_testz_si128().
|
inlinenoexcept |
AND block digest stride.
Definition at line 491 of file bmsse4.h.
References _mm_and_si128(), _mm_load_si128(), _mm_or_si128(), _mm_store_si128(), and _mm_testz_si128().
|
inlinenoexcept |
AND block digest stride.
Definition at line 552 of file bmsse4.h.
References _mm_and_si128(), _mm_load_si128(), _mm_or_si128(), _mm_store_si128(), and _mm_testz_si128().
|
inlinenoexcept |
AND-OR block digest stride dst |= *src1 & src2.
Definition at line 438 of file bmsse4.h.
References _mm_and_si128(), _mm_load_si128(), _mm_or_si128(), _mm_store_si128(), and _mm_testz_si128().
SSE4.2 optimized bitcounting .
Definition at line 93 of file bmsse4.h.
References _mm_popcnt_u32(), _mm_popcnt_u64(), and b.
|
inlinenoexcept |
set digest stride to 0xFF.. or 0x0 value
Definition at line 276 of file bmsse4.h.
References _mm_set1_epi32(), _mm_store_si128(), and rapidjson::value.
|
inlinenoexcept |
SSE4.2 check for one to two (variable len) 128 bit SSE lines for gap search results (8 elements)
Definition at line 1340 of file bmsse4.h.
References _mm_andnot_si128(), _mm_cmpeq_epi16(), _mm_cmpeq_epi64(), _mm_loadu_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_popcnt_u32(), _mm_set1_epi16(), _mm_setzero_si128(), _mm_slli_epi16(), _mm_slli_epi64(), _mm_slli_si128(), _mm_subs_epu16(), BM_ASSERT, BMRESTRICT, and ncbi::grid::netcache::search::fields::size.
Referenced by bm::sse42_gap_bfind(), bm::sse42_gap_test(), and TestSIMDUtils().
check if block is all ONE bits
Definition at line 874 of file bmsse4.h.
References _mm_and_si128(), _mm_load_si128(), _mm_test_all_ones(), BMRESTRICT, and bm::set_block_size.
Referenced by TestSIMDUtils().
check if block is all zero bits
Definition at line 232 of file bmsse4.h.
References _mm_cmpeq_epi8(), _mm_load_si128(), _mm_or_si128(), _mm_setzero_si128(), _mm_test_all_ones(), BMRESTRICT, and bm::set_block_size.
Referenced by TestSIMDUtils().
check if digest stride is all zero bits
Definition at line 257 of file bmsse4.h.
References _mm_load_si128(), _mm_or_si128(), and _mm_test_all_zeros().
SUB (AND NOT) block digest stride dst &= ~*src.
Definition at line 636 of file bmsse4.h.
References _mm_andnot_si128(), _mm_load_si128(), _mm_or_si128(), _mm_store_si128(), and _mm_testz_si128().
|
inlinenoexcept |
2-operand SUB (AND NOT) block digest stride dst = src1 & ~*src2
Definition at line 685 of file bmsse4.h.
References _mm_andnot_si128(), _mm_load_si128(), _mm_or_si128(), _mm_store_si128(), and _mm_testz_si128().
|
inlinenoexcept |
SUB block digest stride.
Definition at line 814 of file bmsse4.h.
References _mm_and_si128(), _mm_load_si128(), _mm_or_si128(), _mm_set1_epi32(), _mm_store_si128(), _mm_testz_si128(), and _mm_xor_si128().
|
inlinenoexcept |
SUB block digest stride.
Definition at line 732 of file bmsse4.h.
References _mm_and_si128(), _mm_load_si128(), _mm_or_si128(), _mm_set1_epi32(), _mm_store_si128(), _mm_testz_si128(), and _mm_xor_si128().