NCBI C++ ToolKit
Classes | Functions
SSE2 functions

Processor specific optimizations for SSE2 instructions (internals) More...

Classes

class  bm::sse_empty_guard
 SSE2 reinitialization guard class. More...
 

Functions

bm::id_t bm::sse2_bit_count (const __m128i *block, const __m128i *block_end)
 
bool bm::sse2_is_all_zero (const __m128i *block) noexcept
 check if block is all zero bits More...
 
bool bm::sse2_is_all_one (const __m128i *block) noexcept
 check if block is all ONE bits More...
 
bool bm::sse2_is_digest_zero (const __m128i *block) noexcept
 check if digest stride is all zero bits More...
 
void bm::sse2_block_set_digest (__m128i *dst, unsigned value) noexcept
 set digest stride to 0xFF.. or 0x0 value More...
 
void bm::sse2_bit_block_xor (bm::word_t *target_block, const bm::word_t *block, const bm::word_t *xor_block, bm::id64_t digest) noexcept
 Build partial XOR product of 2 bit-blocks using digest mask. More...
 
void bm::sse2_bit_block_xor_2way (bm::word_t *target_block, const bm::word_t *xor_block, bm::id64_t digest) noexcept
 Build partial XOR product of 2 bit-blocks using digest mask. More...
 
bool bm::sse2_and_digest (__m128i *dst, const __m128i *src) noexcept
 AND block digest stride dst &= *src. More...
 
bool bm::sse2_and_or_digest_2way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept
 AND-OR block digest stride dst |= *src1 & src2. More...
 
bool bm::sse2_and_digest_5way (__m128i *dst, const __m128i *src1, const __m128i *src2, const __m128i *src3, const __m128i *src4) noexcept
 AND block digest stride. More...
 
bool bm::sse2_and_digest_3way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept
 AND block digest stride. More...
 
bool bm::sse2_and_digest_2way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept
 AND block digest stride dst = *src1 & src2. More...
 
bool bm::sse2_sub_digest (__m128i *dst, const __m128i *src) noexcept
 SUB (AND NOT) block digest stride dst &= ~*src. More...
 
bool bm::sse2_sub_digest_2way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept
 2-operand SUB (AND NOT) block digest stride dst = src1 & ~*src2 More...
 
bool bm::sse2_bit_find_first (const __m128i *block, unsigned off, unsigned *pos) noexcept
 Find first non-zero bit. More...
 
bool bm::sse2_bit_find_first_diff (const __m128i *block1, const __m128i *block2, unsigned *pos) noexcept
 Find first bit which is different between two bit-blocks. More...
 
bool bm::sse2_shift_r1 (__m128i *block, unsigned *empty_acc, unsigned co1) noexcept
 block shift right by 1 More...
 
bool bm::sse2_shift_l1 (__m128i *block, unsigned *empty_acc, unsigned co1) noexcept
 block shift left by 1 More...
 
unsigned bm::sse2_gap_bfind (const unsigned short *buf, unsigned pos, unsigned *is_set)
 Hybrid binary search, starts as binary, then switches to linear scan. More...
 
unsigned bm::sse2_gap_test (const unsigned short *buf, unsigned pos)
 Hybrid binary search, starts as binary, then switches to scan. More...
 
void bm::sse2_xor_arr_2_mask (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end, bm::word_t mask) BMNOEXCEPT
 XOR array elements to specified mask dst = *src ^ mask. More...
 
void bm::sse2_andnot_arr_2_mask (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end, bm::word_t mask) BMNOEXCEPT
 Inverts array elements and NOT them to specified mask dst = ~*src & mask. More...
 
unsigned bm::sse2_and_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 AND blocks2 dst &= *src. More...
 
unsigned bm::sse2_and_arr_unal (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) BMNOEXCEPT
 AND array elements against another array (unaligned) dst &= *src. More...
 
bool bm::sse2_or_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 OR array elements against another array dst |= *src. More...
 
bool bm::sse2_or_arr_unal (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src, const __m128i *BMRESTRICT src_end) BMNOEXCEPT
 OR array elements against another array (unaligned) dst |= *src. More...
 
bool bm::sse2_or_block_2way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
 OR 2 blocks anc copy result to the destination dst = *src1 | src2. More...
 
bool bm::sse2_or_block_3way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
 OR array elements against another 2 arrays dst |= *src1 | src2. More...
 
bool bm::sse2_or_block_5way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2, const __m128i *BMRESTRICT src3, const __m128i *BMRESTRICT src4) BMNOEXCEPT
 OR array elements against another 2 arrays dst |= *src1 | src2 | src3 | src4. More...
 
unsigned bm::sse2_xor_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 XOR block against another dst ^= *src. More...
 
unsigned bm::sse2_xor_block_2way (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src1, const __m128i *BMRESTRICT src2) BMNOEXCEPT
 3 operand XOR dst = *src1 ^ src2 More...
 
unsigned bm::sse2_sub_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 AND-NOT (SUB) array elements against another array dst &= ~*src. More...
 
void bm::sse2_set_block (__m128i *BMRESTRICT dst, bm::word_t value) BMNOEXCEPT
 SSE2 block memset dst = value. More...
 
void bm::sse2_copy_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 SSE2 block copy dst = *src. More...
 
void bm::sse2_copy_block_unalign (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 SSE2 block copy (unaligned SRC) dst = *src. More...
 
void bm::sse2_stream_block (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 SSE2 block copy dst = *src. More...
 
void bm::sse2_stream_block_unalign (__m128i *BMRESTRICT dst, const __m128i *BMRESTRICT src) BMNOEXCEPT
 SSE2 block copy (unaligned src) dst = *src. More...
 
void bm::sse2_invert_block (__m128i *BMRESTRICT dst) BMNOEXCEPT
 Invert bit block dst = ~*dst or dst ^= *dst. More...
 
unsigned bm::sse2_lower_bound_scan_u32 (const unsigned *BMRESTRICT arr, unsigned target, unsigned from, unsigned to) BMNOEXCEPT
 lower bound (great or equal) linear scan in ascending order sorted array More...
 

Detailed Description

Processor specific optimizations for SSE2 instructions (internals)

Function Documentation

◆ sse2_and_arr_unal()

unsigned bm::sse2_and_arr_unal ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src,
const __m128i *BMRESTRICT  src_end 
)
inline

AND array elements against another array (unaligned) dst &= *src.

Returns
0 if no bits were set

Definition at line 259 of file bmsse_util.h.

References _mm_and_si128(), _mm_load_si128(), _mm_loadu_si128(), _mm_or_si128(), _mm_setzero_si128(), _mm_store_si128(), BM_ALIGN16, and BM_ALIGN16ATTR.

Referenced by bm::decoder::get_32_AND().

◆ sse2_and_block()

unsigned bm::sse2_and_block ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)
inline

AND blocks2 dst &= *src.

Returns
0 if no bits were set

Definition at line 126 of file bmsse_util.h.

References _mm_and_si128(), _mm_load_si128(), _mm_or_si128(), _mm_setzero_si128(), _mm_store_si128(), BM_ALIGN16, BM_ALIGN16ATTR, BMRESTRICT, and bm::set_block_size.

◆ sse2_and_digest()

bool bm::sse2_and_digest ( __m128i dst,
const __m128i src 
)
inlinenoexcept

AND block digest stride dst &= *src.

Returns
true if stide is all zero

Definition at line 415 of file bmsse2.h.

References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_setzero_si128(), and _mm_store_si128().

◆ sse2_and_digest_2way()

bool bm::sse2_and_digest_2way ( __m128i dst,
const __m128i src1,
const __m128i src2 
)
inlinenoexcept

AND block digest stride dst = *src1 & src2.

Returns
true if stide is all zero

Definition at line 681 of file bmsse2.h.

References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_setzero_si128(), and _mm_store_si128().

◆ sse2_and_digest_3way()

bool bm::sse2_and_digest_3way ( __m128i dst,
const __m128i src1,
const __m128i src2 
)
inlinenoexcept

AND block digest stride.

Returns
true if stide is all zero

Definition at line 599 of file bmsse2.h.

References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_setzero_si128(), and _mm_store_si128().

◆ sse2_and_digest_5way()

bool bm::sse2_and_digest_5way ( __m128i dst,
const __m128i src1,
const __m128i src2,
const __m128i src3,
const __m128i src4 
)
inlinenoexcept

AND block digest stride.

Returns
true if stide is all zero

Definition at line 519 of file bmsse2.h.

References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_setzero_si128(), and _mm_store_si128().

◆ sse2_and_or_digest_2way()

bool bm::sse2_and_or_digest_2way ( __m128i dst,
const __m128i src1,
const __m128i src2 
)
inlinenoexcept

AND-OR block digest stride dst |= *src1 & src2.

Returns
true if stide is all zero

Definition at line 464 of file bmsse2.h.

References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_setzero_si128(), and _mm_store_si128().

◆ sse2_andnot_arr_2_mask()

void bm::sse2_andnot_arr_2_mask ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src,
const __m128i *BMRESTRICT  src_end,
bm::word_t  mask 
)
inline

Inverts array elements and NOT them to specified mask dst = ~*src & mask.

Definition at line 102 of file bmsse_util.h.

References _mm_andnot_si128(), _mm_load_si128(), _mm_set1_epi32(), _mm_store_si128(), and mask.

◆ sse2_bit_block_xor()

void bm::sse2_bit_block_xor ( bm::word_t target_block,
const bm::word_t block,
const bm::word_t xor_block,
bm::id64_t  digest 
)
inlinenoexcept

Build partial XOR product of 2 bit-blocks using digest mask.

Parameters
target_block- target := block ^ xor_block
block- arg1
xor_block- arg2
digest- mask for each block wave to XOR (1) or just copy (0)

Definition at line 289 of file bmsse2.h.

References _mm_load_si128(), _mm_store_si128(), _mm_xor_si128(), bm::block_waves, i, mask, and bm::set_block_digest_wave_size.

◆ sse2_bit_block_xor_2way()

void bm::sse2_bit_block_xor_2way ( bm::word_t target_block,
const bm::word_t xor_block,
bm::id64_t  digest 
)
inlinenoexcept

Build partial XOR product of 2 bit-blocks using digest mask.

Parameters
target_block- target ^= xor_block
xor_block- arg1
digest- mask for each block wave to XOR (if 1)

Definition at line 360 of file bmsse2.h.

References _mm_load_si128(), _mm_store_si128(), _mm_xor_si128(), bm::bmi_blsi_u64(), bm::bmi_bslr_u64(), bm::set_block_digest_wave_size, t, and bm::word_bitcount64().

◆ sse2_bit_count()

bm::id_t bm::sse2_bit_count ( const __m128i block,
const __m128i block_end 
)
inline

SSE2 optimized bitcounting function implements parallel bitcounting algorithm for SSE2 instruction set.

unsigned CalcBitCount32(unsigned b)
{
    b = (b & 0x55555555) + (b >> 1 & 0x55555555);
    b = (b & 0x33333333) + (b >> 2 & 0x33333333);
    b = (b + (b >> 4)) & 0x0F0F0F0F;
    b = b + (b >> 8);
    b = (b + (b >> 16)) & 0x0000003F;
    return b;
}

Definition at line 66 of file bmsse2.h.

References _mm_add_epi32(), _mm_and_si128(), _mm_load_si128(), _mm_set_epi32(), _mm_srli_epi32, _mm_store_si128(), _mm_xor_si128(), b, BM_ALIGN16, BM_ALIGN16ATTR, and tmp1.

◆ sse2_bit_find_first()

bool bm::sse2_bit_find_first ( const __m128i block,
unsigned  off,
unsigned *  pos 
)
inlinenoexcept

◆ sse2_bit_find_first_diff()

bool bm::sse2_bit_find_first_diff ( const __m128i block1,
const __m128i block2,
unsigned *  pos 
)
inlinenoexcept

◆ sse2_block_set_digest()

void bm::sse2_block_set_digest ( __m128i dst,
unsigned  value 
)
inlinenoexcept

set digest stride to 0xFF.. or 0x0 value

Definition at line 268 of file bmsse2.h.

References _mm_set1_epi32(), _mm_store_si128(), and rapidjson::value.

◆ sse2_copy_block()

void bm::sse2_copy_block ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)
inline

SSE2 block copy dst = *src.

Definition at line 838 of file bmsse_util.h.

References _mm_load_si128(), _mm_store_si128(), BMRESTRICT, and bm::set_block_size.

◆ sse2_copy_block_unalign()

void bm::sse2_copy_block_unalign ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)
inline

SSE2 block copy (unaligned SRC) dst = *src.

Definition at line 879 of file bmsse_util.h.

References _mm_loadu_si128(), _mm_store_si128(), BMRESTRICT, and bm::set_block_size.

◆ sse2_gap_bfind()

unsigned bm::sse2_gap_bfind ( const unsigned short *  buf,
unsigned  pos,
unsigned *  is_set 
)
inline

Hybrid binary search, starts as binary, then switches to linear scan.

Parameters
buf- GAP buffer pointer.
pos- index of the element.
is_set- output. GAP value (0 or 1).
Returns
GAP index.

Definition at line 1445 of file bmsse2.h.

References BM_ASSERT, buf, ncbi::grid::netcache::search::fields::size, and bm::sse2_gap_find().

Referenced by bm::sse2_gap_test().

◆ sse2_gap_test()

unsigned bm::sse2_gap_test ( const unsigned short *  buf,
unsigned  pos 
)
inline

Hybrid binary search, starts as binary, then switches to scan.

Definition at line 1502 of file bmsse2.h.

References buf, and bm::sse2_gap_bfind().

◆ sse2_invert_block()

void bm::sse2_invert_block ( __m128i *BMRESTRICT  dst)
inline

Invert bit block dst = ~*dst or dst ^= *dst.

Definition at line 1006 of file bmsse_util.h.

References _mm_load_si128(), _mm_set1_epi32(), _mm_store_si128(), _mm_xor_si128(), BMRESTRICT, and bm::set_block_size.

◆ sse2_is_all_one()

bool bm::sse2_is_all_one ( const __m128i block)
inlinenoexcept

check if block is all ONE bits

Definition at line 219 of file bmsse2.h.

References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_set_epi32(), BMRESTRICT, and bm::set_block_size.

◆ sse2_is_all_zero()

bool bm::sse2_is_all_zero ( const __m128i block)
inlinenoexcept

check if block is all zero bits

Definition at line 194 of file bmsse2.h.

References _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_setzero_si128(), BMRESTRICT, and bm::set_block_size.

◆ sse2_is_digest_zero()

bool bm::sse2_is_digest_zero ( const __m128i block)
inlinenoexcept

check if digest stride is all zero bits

Definition at line 244 of file bmsse2.h.

References _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), and _mm_setzero_si128().

◆ sse2_lower_bound_scan_u32()

unsigned bm::sse2_lower_bound_scan_u32 ( const unsigned *BMRESTRICT  arr,
unsigned  target,
unsigned  from,
unsigned  to 
)
inline

lower bound (great or equal) linear scan in ascending order sorted array

Definition at line 1099 of file bmsse_util.h.

References _mm_cmpeq_epi32(), _mm_cmpgt_epi32(), _mm_loadu_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_set1_epi32(), _mm_sub_epi32(), arr, bm::bit_scan_forward32(), BMRESTRICT, len, and mask.

Referenced by TestSIMDUtils().

◆ sse2_or_arr_unal()

bool bm::sse2_or_arr_unal ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src,
const __m128i *BMRESTRICT  src_end 
)
inline

OR array elements against another array (unaligned) dst |= *src.

Returns
true if all bits are 1

Definition at line 426 of file bmsse_util.h.

References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_loadu_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_set1_epi32(), and _mm_store_si128().

Referenced by bm::decoder::get_32_OR().

◆ sse2_or_block()

bool bm::sse2_or_block ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)
inline

OR array elements against another array dst |= *src.

Returns
true if all bits are 1

Definition at line 372 of file bmsse_util.h.

References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_set1_epi32(), _mm_store_si128(), BMRESTRICT, and bm::set_block_size.

◆ sse2_or_block_2way()

bool bm::sse2_or_block_2way ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src1,
const __m128i *BMRESTRICT  src2 
)
inline

OR 2 blocks anc copy result to the destination dst = *src1 | src2.

Returns
true if all bits are 1

Definition at line 478 of file bmsse_util.h.

References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_set1_epi32(), _mm_store_si128(), BMRESTRICT, and bm::set_block_size.

◆ sse2_or_block_3way()

bool bm::sse2_or_block_3way ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src1,
const __m128i *BMRESTRICT  src2 
)
inline

OR array elements against another 2 arrays dst |= *src1 | src2.

Returns
true if all bits are 1

Definition at line 524 of file bmsse_util.h.

References _mm_and_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_set1_epi32(), _mm_store_si128(), BMRESTRICT, and bm::set_block_size.

◆ sse2_or_block_5way()

bool bm::sse2_or_block_5way ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src1,
const __m128i *BMRESTRICT  src2,
const __m128i *BMRESTRICT  src3,
const __m128i *BMRESTRICT  src4 
)
inline

OR array elements against another 2 arrays dst |= *src1 | src2 | src3 | src4.

Returns
true if all bits are 1

Definition at line 575 of file bmsse_util.h.

References _mm_and_si128(), _mm_cmpeq_epi8(), _MM_HINT_T0, _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_prefetch(), _mm_set1_epi32(), _mm_stream_si128(), BMRESTRICT, and bm::set_block_size.

◆ sse2_set_block()

void bm::sse2_set_block ( __m128i *BMRESTRICT  dst,
bm::word_t  value 
)
inline

SSE2 block memset dst = value.

Definition at line 809 of file bmsse_util.h.

References _mm_set1_epi32(), _mm_store_si128(), BMRESTRICT, bm::set_block_size, and rapidjson::value.

◆ sse2_shift_l1()

bool bm::sse2_shift_l1 ( __m128i block,
unsigned *  empty_acc,
unsigned  co1 
)
inlinenoexcept

◆ sse2_shift_r1()

bool bm::sse2_shift_r1 ( __m128i block,
unsigned *  empty_acc,
unsigned  co1 
)
inlinenoexcept

◆ sse2_stream_block()

void bm::sse2_stream_block ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)
inline

SSE2 block copy dst = *src.

Definition at line 921 of file bmsse_util.h.

References _mm_load_si128(), _mm_stream_si128(), BMRESTRICT, and bm::set_block_size.

◆ sse2_stream_block_unalign()

void bm::sse2_stream_block_unalign ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)
inline

SSE2 block copy (unaligned src) dst = *src.

Definition at line 962 of file bmsse_util.h.

References _mm_loadu_si128(), _mm_stream_si128(), BMRESTRICT, and bm::set_block_size.

◆ sse2_sub_block()

unsigned bm::sse2_sub_block ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)
inline

AND-NOT (SUB) array elements against another array dst &= ~*src.

Returns
0 if no bits were set

Definition at line 744 of file bmsse_util.h.

References _mm_andnot_si128(), _mm_load_si128(), _mm_or_si128(), _mm_setzero_si128(), _mm_store_si128(), BM_ALIGN16, BM_ALIGN16ATTR, BMRESTRICT, and bm::set_block_size.

◆ sse2_sub_digest()

bool bm::sse2_sub_digest ( __m128i dst,
const __m128i src 
)
inlinenoexcept

SUB (AND NOT) block digest stride dst &= ~*src.

Returns
true if stide is all zero

Definition at line 731 of file bmsse2.h.

References _mm_andnot_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_setzero_si128(), and _mm_store_si128().

◆ sse2_sub_digest_2way()

bool bm::sse2_sub_digest_2way ( __m128i dst,
const __m128i src1,
const __m128i src2 
)
inlinenoexcept

2-operand SUB (AND NOT) block digest stride dst = src1 & ~*src2

Returns
true if stide is all zero

Definition at line 780 of file bmsse2.h.

References _mm_andnot_si128(), _mm_cmpeq_epi8(), _mm_load_si128(), _mm_movemask_epi8(), _mm_or_si128(), _mm_setzero_si128(), and _mm_store_si128().

◆ sse2_xor_arr_2_mask()

void bm::sse2_xor_arr_2_mask ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src,
const __m128i *BMRESTRICT  src_end,
bm::word_t  mask 
)
inline

XOR array elements to specified mask dst = *src ^ mask.

Definition at line 78 of file bmsse_util.h.

References _mm_load_si128(), _mm_set1_epi32(), _mm_store_si128(), _mm_xor_si128(), and mask.

◆ sse2_xor_block()

unsigned bm::sse2_xor_block ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src 
)
inline

XOR block against another dst ^= *src.

Returns
0 if no bits were set

Definition at line 646 of file bmsse_util.h.

References _mm_load_si128(), _mm_or_si128(), _mm_setzero_si128(), _mm_store_si128(), _mm_xor_si128(), BM_ALIGN16, BM_ALIGN16ATTR, BMRESTRICT, and bm::set_block_size.

◆ sse2_xor_block_2way()

unsigned bm::sse2_xor_block_2way ( __m128i *BMRESTRICT  dst,
const __m128i *BMRESTRICT  src1,
const __m128i *BMRESTRICT  src2 
)
inline

3 operand XOR dst = *src1 ^ src2

Returns
0 if no bits were set

Definition at line 693 of file bmsse_util.h.

References _mm_load_si128(), _mm_or_si128(), _mm_setzero_si128(), _mm_store_si128(), _mm_xor_si128(), BM_ALIGN16, BM_ALIGN16ATTR, BMRESTRICT, and bm::set_block_size.

Modified on Fri Sep 20 14:57:31 2024 by modify_doxy.py rev. 669887