NCBI C++ ToolKit
Functions
SSE4.2 funcions (internal)

Processor specific optimizations for SSE4.2 instructions (internals) More...

Functions

bm::id_t bm::sse42_bit_block_calc_count_change (const __m128i *block, const __m128i *block_end, unsigned *bit_count)
 
bool bm::sse2_sub_digest_5way (__m128i *dst, const __m128i *src1, const __m128i *src2, const __m128i *src3, const __m128i *src4) noexcept
 SUB block digest stride. More...
 
bool bm::sse2_sub_digest_3way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept
 SUB block digest stride. More...
 
bm::id_t bm::sse4_bit_count (const __m128i *block, const __m128i *block_end) noexcept
 
bool bm::sse4_is_all_zero (const __m128i *block) noexcept
 check if block is all zero bits More...
 
bool bm::sse4_is_digest_zero (const __m128i *block) noexcept
 check if digest stride is all zero bits More...
 
void bm::sse4_block_set_digest (__m128i *dst, unsigned value) noexcept
 set digest stride to 0xFF.. or 0x0 value More...
 
unsigned bm::sse4_and_block (__m128i *dst, const __m128i *src) noexcept
 AND blocks2 dst &= *src. More...
 
bool bm::sse4_and_digest (__m128i *dst, const __m128i *src) noexcept
 AND block digest stride dst &= *src. More...
 
bool bm::sse4_and_digest_2way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept
 AND block digest stride dst = *src1 & src2. More...
 
bool bm::sse4_and_or_digest_2way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept
 AND-OR block digest stride dst |= *src1 & src2. More...
 
bool bm::sse4_and_digest_3way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept
 AND block digest stride. More...
 
bool bm::sse4_and_digest_5way (__m128i *dst, const __m128i *src1, const __m128i *src2, const __m128i *src3, const __m128i *src4) noexcept
 AND block digest stride. More...
 
bool bm::sse4_sub_digest (__m128i *dst, const __m128i *src) noexcept
 SUB (AND NOT) block digest stride dst &= ~*src. More...
 
bool bm::sse4_sub_digest_2way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept
 2-operand SUB (AND NOT) block digest stride dst = src1 & ~*src2 More...
 
bool bm::sse4_sub_digest_5way (__m128i *dst, const __m128i *src1, const __m128i *src2, const __m128i *src3, const __m128i *src4) noexcept
 SUB block digest stride. More...
 
bool bm::sse4_sub_digest_3way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept
 SUB block digest stride. More...
 
bool bm::sse4_is_all_one (const __m128i *block) noexcept
 check if block is all ONE bits More...
 
bool bm::sse42_test_all_one_wave (const void *ptr) noexcept
 check if SSE wave is all oxFFFF...FFF More...
 
bool bm::sse42_test_all_zero_wave (const void *ptr) noexcept
 check if wave of pointers is all NULL More...
 
bool bm::sse42_test_all_zero_wave2 (const void *ptr0, const void *ptr1) noexcept
 check if 2 waves of pointers are all NULL More...
 
bool bm::sse42_test_all_eq_wave2 (const void *ptr0, const void *ptr1) noexcept
 check if wave of 2 pointers are the same (null or FULL) More...
 
unsigned bm::sse42_bit_block_calc_change (const __m128i *block, unsigned size) noexcept
 
void bm::sse42_bit_block_calc_xor_change (const __m128i *block, const __m128i *xor_block, unsigned size, unsigned *gc, unsigned *bc) noexcept
 
bool bm::sse42_bit_find_first_diff (const __m128i *block1, const __m128i *block2, unsigned *pos) noexcept
 Find first bit which is different between two bit-blocks. More...
 
bool bm::sse42_bit_find_first (const __m128i *block, unsigned off, unsigned *pos) noexcept
 Find first non-zero bit. More...
 
unsigned bm::sse4_gap_find (const bm::gap_word_t *pbuf, const bm::gap_word_t pos, const unsigned size) noexcept
 
unsigned bm::sse42_gap_bfind (const unsigned short *buf, unsigned pos, unsigned *is_set) noexcept
 Hybrid binary search, starts as binary, then switches to linear scan. More...
 
unsigned bm::sse42_gap_test (const unsigned short *buf, unsigned pos) noexcept
 Hybrid binary search to test GAP value, starts as binary, then switches to scan. More...
 
int bm::sse42_cmpge_u32 (__m128i vect4, unsigned value) noexcept
 Experimental (test) function to do SIMD vector search (lower bound) in sorted, growing array. More...
 
bool bm::sse42_shift_l1 (__m128i *block, unsigned *empty_acc, unsigned co1) noexcept
 block shift left by 1 More...
 
bool bm::sse42_shift_r1 (__m128i *block, unsigned *empty_acc, unsigned co1) noexcept
 block shift right by 1 More...
 
bool bm::sse42_shift_r1_and (__m128i *block, bm::word_t co1, const __m128i *mask_block, bm::id64_t *digest) noexcept
 block shift right by 1 plus AND More...
 
void bm::sse42_bit_block_xor (bm::word_t *target_block, const bm::word_t *block, const bm::word_t *xor_block, bm::id64_t digest) noexcept
 Build partial XOR product of 2 bit-blocks using digest mask. More...
 
void bm::sse42_bit_block_xor_2way (bm::word_t *target_block, const bm::word_t *xor_block, bm::id64_t digest) noexcept
 Build partial XOR product of 2 bit-blocks using digest mask. More...
 

Detailed Description

Processor specific optimizations for SSE4.2 instructions (internals)

Function Documentation

◆ sse2_sub_digest_3way()

bool bm::sse2_sub_digest_3way ( __m128i dst,
const __m128i src1,
const __m128i src2 
)
inlinenoexcept

SUB block digest stride.

Returns
true if stide is all zero

Definition at line 911 of file bmsse2.h.

References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_set1_epi32(), _mm_setzero_si128(), _mm_store_si128(), and _mm_xor_si128().

◆ sse2_sub_digest_5way()

bool bm::sse2_sub_digest_5way ( __m128i dst,
const __m128i src1,
const __m128i src2,
const __m128i src3,
const __m128i src4 
)
inlinenoexcept

SUB block digest stride.

Returns
true if stide is all zero

Definition at line 828 of file bmsse2.h.

References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_set1_epi32(), _mm_setzero_si128(), _mm_store_si128(), and _mm_xor_si128().

◆ sse42_bit_block_calc_change()

unsigned bm::sse42_bit_block_calc_change ( const __m128i block,
unsigned  size 
)
inlinenoexcept

◆ sse42_bit_block_calc_count_change()

bm::id_t bm::sse42_bit_block_calc_count_change ( const __m128i block,
const __m128i block_end,
unsigned *  bit_count 
)
inline

SSE4.2 optimized bitcounting and number of GAPs

Definition at line 1237 of file bmavx512.h.

References _mm_extract_epi32, _mm_load_si128(), _mm_popcnt_u32(), _mm_srli_epi32, _mm_xor_si128(), and b.

◆ sse42_bit_block_calc_xor_change()

void bm::sse42_bit_block_calc_xor_change ( const __m128i block,
const __m128i xor_block,
unsigned  size,
unsigned *  gc,
unsigned *  bc 
)
inlinenoexcept

◆ sse42_bit_block_xor()

void bm::sse42_bit_block_xor ( bm::word_t target_block,
const bm::word_t block,
const bm::word_t xor_block,
bm::id64_t  digest 
)
inlinenoexcept

Build partial XOR product of 2 bit-blocks using digest mask.

Parameters
target_block- target := block ^ xor_block
block- arg1
xor_block- arg2
digest- mask for each block wave to XOR (1) or just copy (0)

Definition at line 1988 of file bmsse4.h.

References _mm_load_si128(), _mm_store_si128(), _mm_xor_si128(), bm::block_waves, i, mask, and bm::set_block_digest_wave_size.

◆ sse42_bit_block_xor_2way()

void bm::sse42_bit_block_xor_2way ( bm::word_t target_block,
const bm::word_t xor_block,
bm::id64_t  digest 
)
inlinenoexcept

Build partial XOR product of 2 bit-blocks using digest mask.

Parameters
target_block- target ^= xor_block
xor_block- arg1
digest- mask for each block wave to XOR (if 1)

Definition at line 2058 of file bmsse4.h.

References _mm_load_si128(), _mm_popcnt_u64(), _mm_store_si128(), _mm_xor_si128(), bm::bmi_blsi_u64(), bm::bmi_bslr_u64(), bm::set_block_digest_wave_size, and t.

◆ sse42_bit_find_first()

bool bm::sse42_bit_find_first ( const __m128i block,
unsigned  off,
unsigned *  pos 
)
inlinenoexcept

◆ sse42_bit_find_first_diff()

bool bm::sse42_bit_find_first_diff ( const __m128i block1,
const __m128i block2,
unsigned *  pos 
)
inlinenoexcept

◆ sse42_cmpge_u32()

int bm::sse42_cmpge_u32 ( __m128i  vect4,
unsigned  value 
)
inlinenoexcept

Experimental (test) function to do SIMD vector search (lower bound) in sorted, growing array.

Definition at line 1527 of file bmsse4.h.

References _mm_cmpeq_epi32(), _mm_cmpgt_epi32(), _mm_movemask_epi8(), _mm_or_si128(), _mm_set1_epi32(), _mm_sub_epi32(), BM_BSF32, mask, and rapidjson::value.

Referenced by TestSIMDUtils().

◆ sse42_gap_bfind()

unsigned bm::sse42_gap_bfind ( const unsigned short *  buf,
unsigned  pos,
unsigned *  is_set 
)
inlinenoexcept

Hybrid binary search, starts as binary, then switches to linear scan.

Parameters
buf- GAP buffer pointer.
pos- index of the element.
is_set- output. GAP value (0 or 1).
Returns
GAP index.

Definition at line 1390 of file bmsse4.h.

References BM_ASSERT, BMRESTRICT, buf, ncbi::grid::netcache::search::fields::size, and bm::sse4_gap_find().

◆ sse42_gap_test()

unsigned bm::sse42_gap_test ( const unsigned short *  buf,
unsigned  pos 
)
inlinenoexcept

Hybrid binary search to test GAP value, starts as binary, then switches to scan.

Returns
test result

Definition at line 1461 of file bmsse4.h.

References BM_ASSERT, BMRESTRICT, buf, ncbi::grid::netcache::search::fields::size, and bm::sse4_gap_find().

◆ sse42_shift_l1()

bool bm::sse42_shift_l1 ( __m128i block,
unsigned *  empty_acc,
unsigned  co1 
)
inlinenoexcept

◆ sse42_shift_r1()

bool bm::sse42_shift_r1 ( __m128i block,
unsigned *  empty_acc,
unsigned  co1 
)
inlinenoexcept

◆ sse42_shift_r1_and()

bool bm::sse42_shift_r1_and ( __m128i block,
bm::word_t  co1,
const __m128i mask_block,
bm::id64_t digest 
)
inlinenoexcept

◆ sse42_test_all_eq_wave2()

bool bm::sse42_test_all_eq_wave2 ( const void *  ptr0,
const void *  ptr1 
)
inlinenoexcept

check if wave of 2 pointers are the same (null or FULL)

Definition at line 934 of file bmsse4.h.

References _mm_loadu_si128(), _mm_testz_si128(), and _mm_xor_si128().

Referenced by bm::bvector< Alloc >::combine_operation_or().

◆ sse42_test_all_one_wave()

bool bm::sse42_test_all_one_wave ( const void *  ptr)
inlinenoexcept

check if SSE wave is all oxFFFF...FFF

Definition at line 899 of file bmsse4.h.

References _mm_loadu_si128(), and _mm_test_all_ones().

◆ sse42_test_all_zero_wave()

bool bm::sse42_test_all_zero_wave ( const void *  ptr)
inlinenoexcept

◆ sse42_test_all_zero_wave2()

bool bm::sse42_test_all_zero_wave2 ( const void *  ptr0,
const void *  ptr1 
)
inlinenoexcept

check if 2 waves of pointers are all NULL

Definition at line 921 of file bmsse4.h.

References _mm_loadu_si128(), _mm_or_si128(), and _mm_testz_si128().

Referenced by bm::bvector< Alloc >::combine_operation_xor().

◆ sse4_and_block()

unsigned bm::sse4_and_block ( __m128i dst,
const __m128i src 
)
inlinenoexcept

AND blocks2 dst &= *src.

Returns
0 if no bits were set

Definition at line 294 of file bmsse4.h.

References _mm_and_si128(), _mm_load_si128(), _mm_or_si128(), _mm_setzero_si128(), _mm_store_si128(), _mm_testz_si128(), BMRESTRICT, and bm::set_block_size.

◆ sse4_and_digest()

bool bm::sse4_and_digest ( __m128i dst,
const __m128i src 
)
inlinenoexcept

AND block digest stride dst &= *src.

Returns
true if stide is all zero

Definition at line 341 of file bmsse4.h.

References _mm_and_si128(), _mm_load_si128(), _mm_or_si128(), _mm_store_si128(), and _mm_testz_si128().

◆ sse4_and_digest_2way()

bool bm::sse4_and_digest_2way ( __m128i dst,
const __m128i src1,
const __m128i src2 
)
inlinenoexcept

AND block digest stride dst = *src1 & src2.

Returns
true if stide is all zero

Definition at line 389 of file bmsse4.h.

References _mm_and_si128(), _mm_load_si128(), _mm_or_si128(), _mm_store_si128(), and _mm_testz_si128().

◆ sse4_and_digest_3way()

bool bm::sse4_and_digest_3way ( __m128i dst,
const __m128i src1,
const __m128i src2 
)
inlinenoexcept

AND block digest stride.

Returns
true if stide is all zero

Definition at line 491 of file bmsse4.h.

References _mm_and_si128(), _mm_load_si128(), _mm_or_si128(), _mm_store_si128(), and _mm_testz_si128().

◆ sse4_and_digest_5way()

bool bm::sse4_and_digest_5way ( __m128i dst,
const __m128i src1,
const __m128i src2,
const __m128i src3,
const __m128i src4 
)
inlinenoexcept

AND block digest stride.

Returns
true if stide is all zero

Definition at line 552 of file bmsse4.h.

References _mm_and_si128(), _mm_load_si128(), _mm_or_si128(), _mm_store_si128(), and _mm_testz_si128().

◆ sse4_and_or_digest_2way()

bool bm::sse4_and_or_digest_2way ( __m128i dst,
const __m128i src1,
const __m128i src2 
)
inlinenoexcept

AND-OR block digest stride dst |= *src1 & src2.

Returns
true if stide is all zero

Definition at line 438 of file bmsse4.h.

References _mm_and_si128(), _mm_load_si128(), _mm_or_si128(), _mm_store_si128(), and _mm_testz_si128().

◆ sse4_bit_count()

bm::id_t bm::sse4_bit_count ( const __m128i block,
const __m128i block_end 
)
inlinenoexcept

SSE4.2 optimized bitcounting .

Definition at line 93 of file bmsse4.h.

References _mm_popcnt_u32(), _mm_popcnt_u64(), and b.

◆ sse4_block_set_digest()

void bm::sse4_block_set_digest ( __m128i dst,
unsigned  value 
)
inlinenoexcept

set digest stride to 0xFF.. or 0x0 value

Definition at line 276 of file bmsse4.h.

References _mm_set1_epi32(), _mm_store_si128(), and rapidjson::value.

◆ sse4_gap_find()

unsigned bm::sse4_gap_find ( const bm::gap_word_t pbuf,
const bm::gap_word_t  pos,
const unsigned  size 
)
inlinenoexcept

◆ sse4_is_all_one()

bool bm::sse4_is_all_one ( const __m128i block)
inlinenoexcept

check if block is all ONE bits

Definition at line 874 of file bmsse4.h.

References _mm_and_si128(), _mm_load_si128(), _mm_test_all_ones(), BMRESTRICT, and bm::set_block_size.

Referenced by TestSIMDUtils().

◆ sse4_is_all_zero()

bool bm::sse4_is_all_zero ( const __m128i block)
inlinenoexcept

check if block is all zero bits

Definition at line 232 of file bmsse4.h.

References _mm_cmpeq_epi8(), _mm_load_si128(), _mm_or_si128(), _mm_setzero_si128(), _mm_test_all_ones(), BMRESTRICT, and bm::set_block_size.

Referenced by TestSIMDUtils().

◆ sse4_is_digest_zero()

bool bm::sse4_is_digest_zero ( const __m128i block)
inlinenoexcept

check if digest stride is all zero bits

Definition at line 257 of file bmsse4.h.

References _mm_load_si128(), _mm_or_si128(), and _mm_test_all_zeros().

◆ sse4_sub_digest()

bool bm::sse4_sub_digest ( __m128i dst,
const __m128i src 
)
inlinenoexcept

SUB (AND NOT) block digest stride dst &= ~*src.

Returns
true if stide is all zero

Definition at line 636 of file bmsse4.h.

References _mm_andnot_si128(), _mm_load_si128(), _mm_or_si128(), _mm_store_si128(), and _mm_testz_si128().

◆ sse4_sub_digest_2way()

bool bm::sse4_sub_digest_2way ( __m128i dst,
const __m128i src1,
const __m128i src2 
)
inlinenoexcept

2-operand SUB (AND NOT) block digest stride dst = src1 & ~*src2

Returns
true if stide is all zero

Definition at line 685 of file bmsse4.h.

References _mm_andnot_si128(), _mm_load_si128(), _mm_or_si128(), _mm_store_si128(), and _mm_testz_si128().

◆ sse4_sub_digest_3way()

bool bm::sse4_sub_digest_3way ( __m128i dst,
const __m128i src1,
const __m128i src2 
)
inlinenoexcept

SUB block digest stride.

Returns
true if stide is all zero

Definition at line 814 of file bmsse4.h.

References _mm_and_si128(), _mm_load_si128(), _mm_or_si128(), _mm_set1_epi32(), _mm_store_si128(), _mm_testz_si128(), and _mm_xor_si128().

◆ sse4_sub_digest_5way()

bool bm::sse4_sub_digest_5way ( __m128i dst,
const __m128i src1,
const __m128i src2,
const __m128i src3,
const __m128i src4 
)
inlinenoexcept

SUB block digest stride.

Returns
true if stide is all zero

Definition at line 732 of file bmsse4.h.

References _mm_and_si128(), _mm_load_si128(), _mm_or_si128(), _mm_set1_epi32(), _mm_store_si128(), _mm_testz_si128(), and _mm_xor_si128().

Modified on Mon May 06 04:49:40 2024 by modify_doxy.py rev. 669887