NCBI C++ ToolKit
Namespaces | Macros | Functions
bmsse2.h File Reference

Compute functions for SSE2 SIMD instruction set (internal) More...

#include <mmintrin.h>
#include <emmintrin.h>
#include "bmdef.h"
#include "bmutil.h"
#include "bmsse_util.h"
+ Include dependency graph for bmsse2.h:

Go to the source code of this file.

Go to the SVN repository for this file.

Namespaces

 bm
 #include<zmmintrin.h>
 

Macros

#define VECT_XOR_ARR_2_MASK(dst, src, src_end, mask)    sse2_xor_arr_2_mask((__m128i*)(dst), (__m128i*)(src), (__m128i*)(src_end), (bm::word_t)mask)
 
#define VECT_ANDNOT_ARR_2_MASK(dst, src, src_end, mask)    sse2_andnot_arr_2_mask((__m128i*)(dst), (__m128i*)(src), (__m128i*)(src_end), (bm::word_t)mask)
 
#define VECT_BITCOUNT(first, last)    sse2_bit_count((__m128i*) (first), (__m128i*) (last))
 
#define VECT_BITCOUNT_AND(first, last, mask)    sse2_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_and)
 
#define VECT_BITCOUNT_OR(first, last, mask)    sse2_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_or)
 
#define VECT_BITCOUNT_XOR(first, last, mask)    sse2_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_xor)
 
#define VECT_BITCOUNT_SUB(first, last, mask)    sse2_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_sub)
 
#define VECT_INVERT_BLOCK(first)    sse2_invert_block((__m128i*)first);
 
#define VECT_AND_BLOCK(dst, src)    sse2_and_block((__m128i*) dst, (__m128i*) (src))
 
#define VECT_AND_DIGEST(dst, src)    sse2_and_digest((__m128i*) dst, (const __m128i*) (src))
 
#define VECT_AND_OR_DIGEST_2WAY(dst, src1, src2)    sse2_and_or_digest_2way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2))
 
#define VECT_AND_DIGEST_5WAY(dst, src1, src2, src3, src4)    sse2_and_digest_5way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2), (const __m128i*) (src3), (const __m128i*) (src4))
 
#define VECT_AND_DIGEST_3WAY(dst, src1, src2)    sse2_and_digest_3way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2))
 
#define VECT_AND_DIGEST_2WAY(dst, src1, src2)    sse2_and_digest_2way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2))
 
#define VECT_OR_BLOCK(dst, src)    sse2_or_block((__m128i*) dst, (__m128i*) (src))
 
#define VECT_OR_BLOCK_2WAY(dst, src1, src2)    sse2_or_block_2way((__m128i*) (dst), (__m128i*) (src1), (__m128i*) (src2))
 
#define VECT_OR_BLOCK_3WAY(dst, src1, src2)    sse2_or_block_3way((__m128i*) (dst), (__m128i*) (src1), (__m128i*) (src2))
 
#define VECT_OR_BLOCK_5WAY(dst, src1, src2, src3, src4)    sse2_or_block_5way((__m128i*) (dst), (__m128i*) (src1), (__m128i*) (src2), (__m128i*) (src3), (__m128i*) (src4))
 
#define VECT_SUB_BLOCK(dst, src)    sse2_sub_block((__m128i*) dst, (__m128i*) (src))
 
#define VECT_SUB_DIGEST(dst, src)    sse2_sub_digest((__m128i*) dst, (const __m128i*) (src))
 
#define VECT_SUB_DIGEST_2WAY(dst, src1, src2)    sse2_sub_digest_2way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2))
 
#define VECT_SUB_DIGEST_5WAY(dst, src1, src2, src3, src4)    sse2_sub_digest_5way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2), (const __m128i*) (src3), (const __m128i*) (src4))
 
#define VECT_SUB_DIGEST_3WAY(dst, src1, src2)    sse2_sub_digest_3way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2))
 
#define VECT_XOR_BLOCK(dst, src)    sse2_xor_block((__m128i*) dst, (__m128i*) (src))
 
#define VECT_XOR_BLOCK_2WAY(dst, src1, src2)    sse2_xor_block_2way((__m128i*) (dst), (const __m128i*) (src1), (const __m128i*) (src2))
 
#define VECT_COPY_BLOCK(dst, src)    sse2_copy_block((__m128i*) dst, (__m128i*) (src))
 
#define VECT_COPY_BLOCK_UNALIGN(dst, src)    sse2_copy_block_unalign((__m128i*) dst, (__m128i*) (src))
 
#define VECT_STREAM_BLOCK(dst, src)    sse2_stream_block((__m128i*) dst, (__m128i*) (src))
 
#define VECT_STREAM_BLOCK_UNALIGN(dst, src)    sse2_stream_block_unalign((__m128i*) dst, (__m128i*) (src))
 
#define VECT_SET_BLOCK(dst, value)    sse2_set_block((__m128i*) dst, value)
 
#define VECT_IS_ZERO_BLOCK(dst)    sse2_is_all_zero((__m128i*) dst)
 
#define VECT_IS_ONE_BLOCK(dst)    sse2_is_all_one((__m128i*) dst)
 
#define VECT_IS_DIGEST_ZERO(start)    sse2_is_digest_zero((__m128i*)start)
 
#define VECT_BLOCK_SET_DIGEST(dst, val)    sse2_block_set_digest((__m128i*)dst, val)
 
#define VECT_LOWER_BOUND_SCAN_U32(arr, target, from, to)    sse2_lower_bound_scan_u32(arr, target, from, to)
 
#define VECT_SHIFT_R1(b, acc, co)    sse2_shift_r1((__m128i*)b, acc, co)
 
#define VECT_BIT_FIND_FIRST(src, off, pos)    sse2_bit_find_first((__m128i*) src, off, pos)
 
#define VECT_BIT_FIND_DIFF(src1, src2, pos)    sse2_bit_find_first_diff((__m128i*) src1, (__m128i*) (src2), pos)
 
#define VECT_BIT_BLOCK_XOR(t, src, src_xor, d)    sse2_bit_block_xor(t, src, src_xor, d)
 
#define VECT_BIT_BLOCK_XOR_2WAY(t, src_xor, d)    sse2_bit_block_xor_2way(t, src_xor, d)
 
#define VECT_GAP_BFIND(buf, pos, is_set)    sse2_gap_bfind(buf, pos, is_set)
 
#define VECT_GAP_TEST(buf, pos)    sse2_gap_test(buf, pos)
 

Functions

bm::id_t bm::sse2_bit_count (const __m128i *block, const __m128i *block_end)
 
template<class Func >
bm::id_t bm::sse2_bit_count_op (const __m128i *block, const __m128i *block_end, const __m128i *mask_block, Func sse2_func)
 
bool bm::sse2_is_all_zero (const __m128i *block) noexcept
 check if block is all zero bits More...
 
bool bm::sse2_is_all_one (const __m128i *block) noexcept
 check if block is all ONE bits More...
 
bool bm::sse2_is_digest_zero (const __m128i *block) noexcept
 check if digest stride is all zero bits More...
 
void bm::sse2_block_set_digest (__m128i *dst, unsigned value) noexcept
 set digest stride to 0xFF.. or 0x0 value More...
 
void bm::sse2_bit_block_xor (bm::word_t *target_block, const bm::word_t *block, const bm::word_t *xor_block, bm::id64_t digest) noexcept
 Build partial XOR product of 2 bit-blocks using digest mask. More...
 
void bm::sse2_bit_block_xor_2way (bm::word_t *target_block, const bm::word_t *xor_block, bm::id64_t digest) noexcept
 Build partial XOR product of 2 bit-blocks using digest mask. More...
 
bool bm::sse2_and_digest (__m128i *dst, const __m128i *src) noexcept
 AND block digest stride dst &= *src. More...
 
bool bm::sse2_and_or_digest_2way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept
 AND-OR block digest stride dst |= *src1 & src2. More...
 
bool bm::sse2_and_digest_5way (__m128i *dst, const __m128i *src1, const __m128i *src2, const __m128i *src3, const __m128i *src4) noexcept
 AND block digest stride. More...
 
bool bm::sse2_and_digest_3way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept
 AND block digest stride. More...
 
bool bm::sse2_and_digest_2way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept
 AND block digest stride dst = *src1 & src2. More...
 
bool bm::sse2_sub_digest (__m128i *dst, const __m128i *src) noexcept
 SUB (AND NOT) block digest stride dst &= ~*src. More...
 
bool bm::sse2_sub_digest_2way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept
 2-operand SUB (AND NOT) block digest stride dst = src1 & ~*src2 More...
 
bool bm::sse2_sub_digest_5way (__m128i *dst, const __m128i *src1, const __m128i *src2, const __m128i *src3, const __m128i *src4) noexcept
 SUB block digest stride. More...
 
bool bm::sse2_sub_digest_3way (__m128i *dst, const __m128i *src1, const __m128i *src2) noexcept
 SUB block digest stride. More...
 
bool bm::sse2_bit_find_first (const __m128i *block, unsigned off, unsigned *pos) noexcept
 Find first non-zero bit. More...
 
bool bm::sse2_bit_find_first_diff (const __m128i *block1, const __m128i *block2, unsigned *pos) noexcept
 Find first bit which is different between two bit-blocks. More...
 
bool bm::sse2_shift_r1 (__m128i *block, unsigned *empty_acc, unsigned co1) noexcept
 block shift right by 1 More...
 
bool bm::sse2_shift_l1 (__m128i *block, unsigned *empty_acc, unsigned co1) noexcept
 block shift left by 1 More...
 
bm::id_t bm::sse2_bit_block_calc_count_change (const __m128i *block, const __m128i *block_end, unsigned *bit_count)
 
unsigned bm::sse2_gap_find (const bm::gap_word_t *pbuf, const bm::gap_word_t pos, unsigned size)
 
unsigned bm::sse2_gap_bfind (const unsigned short *buf, unsigned pos, unsigned *is_set)
 Hybrid binary search, starts as binary, then switches to linear scan. More...
 
unsigned bm::sse2_gap_test (const unsigned short *buf, unsigned pos)
 Hybrid binary search, starts as binary, then switches to scan. More...
 

Detailed Description

Compute functions for SSE2 SIMD instruction set (internal)

Definition in file bmsse2.h.

Macro Definition Documentation

◆ VECT_AND_BLOCK

#define VECT_AND_BLOCK (   dst,
  src 
)     sse2_and_block((__m128i*) dst, (__m128i*) (src))

Definition at line 1541 of file bmsse2.h.

◆ VECT_AND_DIGEST

#define VECT_AND_DIGEST (   dst,
  src 
)     sse2_and_digest((__m128i*) dst, (const __m128i*) (src))

Definition at line 1544 of file bmsse2.h.

◆ VECT_AND_DIGEST_2WAY

#define VECT_AND_DIGEST_2WAY (   dst,
  src1,
  src2 
)     sse2_and_digest_2way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2))

Definition at line 1556 of file bmsse2.h.

◆ VECT_AND_DIGEST_3WAY

#define VECT_AND_DIGEST_3WAY (   dst,
  src1,
  src2 
)     sse2_and_digest_3way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2))

Definition at line 1553 of file bmsse2.h.

◆ VECT_AND_DIGEST_5WAY

#define VECT_AND_DIGEST_5WAY (   dst,
  src1,
  src2,
  src3,
  src4 
)     sse2_and_digest_5way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2), (const __m128i*) (src3), (const __m128i*) (src4))

Definition at line 1550 of file bmsse2.h.

◆ VECT_AND_OR_DIGEST_2WAY

#define VECT_AND_OR_DIGEST_2WAY (   dst,
  src1,
  src2 
)     sse2_and_or_digest_2way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2))

Definition at line 1547 of file bmsse2.h.

◆ VECT_ANDNOT_ARR_2_MASK

#define VECT_ANDNOT_ARR_2_MASK (   dst,
  src,
  src_end,
  mask 
)     sse2_andnot_arr_2_mask((__m128i*)(dst), (__m128i*)(src), (__m128i*)(src_end), (bm::word_t)mask)

Definition at line 1520 of file bmsse2.h.

◆ VECT_BIT_BLOCK_XOR

#define VECT_BIT_BLOCK_XOR (   t,
  src,
  src_xor,
 
)     sse2_bit_block_xor(t, src, src_xor, d)

Definition at line 1632 of file bmsse2.h.

◆ VECT_BIT_BLOCK_XOR_2WAY

#define VECT_BIT_BLOCK_XOR_2WAY (   t,
  src_xor,
 
)     sse2_bit_block_xor_2way(t, src_xor, d)

Definition at line 1635 of file bmsse2.h.

◆ VECT_BIT_FIND_DIFF

#define VECT_BIT_FIND_DIFF (   src1,
  src2,
  pos 
)     sse2_bit_find_first_diff((__m128i*) src1, (__m128i*) (src2), pos)

Definition at line 1629 of file bmsse2.h.

◆ VECT_BIT_FIND_FIRST

#define VECT_BIT_FIND_FIRST (   src,
  off,
  pos 
)     sse2_bit_find_first((__m128i*) src, off, pos)

Definition at line 1626 of file bmsse2.h.

◆ VECT_BITCOUNT

#define VECT_BITCOUNT (   first,
  last 
)     sse2_bit_count((__m128i*) (first), (__m128i*) (last))

Definition at line 1523 of file bmsse2.h.

◆ VECT_BITCOUNT_AND

#define VECT_BITCOUNT_AND (   first,
  last,
  mask 
)     sse2_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_and)

Definition at line 1526 of file bmsse2.h.

◆ VECT_BITCOUNT_OR

#define VECT_BITCOUNT_OR (   first,
  last,
  mask 
)     sse2_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_or)

Definition at line 1529 of file bmsse2.h.

◆ VECT_BITCOUNT_SUB

#define VECT_BITCOUNT_SUB (   first,
  last,
  mask 
)     sse2_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_sub)

Definition at line 1535 of file bmsse2.h.

◆ VECT_BITCOUNT_XOR

#define VECT_BITCOUNT_XOR (   first,
  last,
  mask 
)     sse2_bit_count_op((__m128i*) (first), (__m128i*) (last), (__m128i*) (mask), sse2_xor)

Definition at line 1532 of file bmsse2.h.

◆ VECT_BLOCK_SET_DIGEST

#define VECT_BLOCK_SET_DIGEST (   dst,
  val 
)     sse2_block_set_digest((__m128i*)dst, val)

Definition at line 1616 of file bmsse2.h.

◆ VECT_COPY_BLOCK

#define VECT_COPY_BLOCK (   dst,
  src 
)     sse2_copy_block((__m128i*) dst, (__m128i*) (src))

Definition at line 1592 of file bmsse2.h.

◆ VECT_COPY_BLOCK_UNALIGN

#define VECT_COPY_BLOCK_UNALIGN (   dst,
  src 
)     sse2_copy_block_unalign((__m128i*) dst, (__m128i*) (src))

Definition at line 1595 of file bmsse2.h.

◆ VECT_GAP_BFIND

#define VECT_GAP_BFIND (   buf,
  pos,
  is_set 
)     sse2_gap_bfind(buf, pos, is_set)

Definition at line 1638 of file bmsse2.h.

◆ VECT_GAP_TEST

#define VECT_GAP_TEST (   buf,
  pos 
)     sse2_gap_test(buf, pos)

Definition at line 1641 of file bmsse2.h.

◆ VECT_INVERT_BLOCK

#define VECT_INVERT_BLOCK (   first)     sse2_invert_block((__m128i*)first);

Definition at line 1538 of file bmsse2.h.

◆ VECT_IS_DIGEST_ZERO

#define VECT_IS_DIGEST_ZERO (   start)     sse2_is_digest_zero((__m128i*)start)

Definition at line 1613 of file bmsse2.h.

◆ VECT_IS_ONE_BLOCK

#define VECT_IS_ONE_BLOCK (   dst)     sse2_is_all_one((__m128i*) dst)

Definition at line 1610 of file bmsse2.h.

◆ VECT_IS_ZERO_BLOCK

#define VECT_IS_ZERO_BLOCK (   dst)     sse2_is_all_zero((__m128i*) dst)

Definition at line 1607 of file bmsse2.h.

◆ VECT_LOWER_BOUND_SCAN_U32

#define VECT_LOWER_BOUND_SCAN_U32 (   arr,
  target,
  from,
  to 
)     sse2_lower_bound_scan_u32(arr, target, from, to)

Definition at line 1619 of file bmsse2.h.

◆ VECT_OR_BLOCK

#define VECT_OR_BLOCK (   dst,
  src 
)     sse2_or_block((__m128i*) dst, (__m128i*) (src))

Definition at line 1559 of file bmsse2.h.

◆ VECT_OR_BLOCK_2WAY

#define VECT_OR_BLOCK_2WAY (   dst,
  src1,
  src2 
)     sse2_or_block_2way((__m128i*) (dst), (__m128i*) (src1), (__m128i*) (src2))

Definition at line 1562 of file bmsse2.h.

◆ VECT_OR_BLOCK_3WAY

#define VECT_OR_BLOCK_3WAY (   dst,
  src1,
  src2 
)     sse2_or_block_3way((__m128i*) (dst), (__m128i*) (src1), (__m128i*) (src2))

Definition at line 1565 of file bmsse2.h.

◆ VECT_OR_BLOCK_5WAY

#define VECT_OR_BLOCK_5WAY (   dst,
  src1,
  src2,
  src3,
  src4 
)     sse2_or_block_5way((__m128i*) (dst), (__m128i*) (src1), (__m128i*) (src2), (__m128i*) (src3), (__m128i*) (src4))

Definition at line 1568 of file bmsse2.h.

◆ VECT_SET_BLOCK

#define VECT_SET_BLOCK (   dst,
  value 
)     sse2_set_block((__m128i*) dst, value)

Definition at line 1604 of file bmsse2.h.

◆ VECT_SHIFT_R1

#define VECT_SHIFT_R1 (   b,
  acc,
  co 
)     sse2_shift_r1((__m128i*)b, acc, co)

Definition at line 1622 of file bmsse2.h.

◆ VECT_STREAM_BLOCK

#define VECT_STREAM_BLOCK (   dst,
  src 
)     sse2_stream_block((__m128i*) dst, (__m128i*) (src))

Definition at line 1598 of file bmsse2.h.

◆ VECT_STREAM_BLOCK_UNALIGN

#define VECT_STREAM_BLOCK_UNALIGN (   dst,
  src 
)     sse2_stream_block_unalign((__m128i*) dst, (__m128i*) (src))

Definition at line 1601 of file bmsse2.h.

◆ VECT_SUB_BLOCK

#define VECT_SUB_BLOCK (   dst,
  src 
)     sse2_sub_block((__m128i*) dst, (__m128i*) (src))

Definition at line 1571 of file bmsse2.h.

◆ VECT_SUB_DIGEST

#define VECT_SUB_DIGEST (   dst,
  src 
)     sse2_sub_digest((__m128i*) dst, (const __m128i*) (src))

Definition at line 1574 of file bmsse2.h.

◆ VECT_SUB_DIGEST_2WAY

#define VECT_SUB_DIGEST_2WAY (   dst,
  src1,
  src2 
)     sse2_sub_digest_2way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2))

Definition at line 1577 of file bmsse2.h.

◆ VECT_SUB_DIGEST_3WAY

#define VECT_SUB_DIGEST_3WAY (   dst,
  src1,
  src2 
)     sse2_sub_digest_3way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2))

Definition at line 1583 of file bmsse2.h.

◆ VECT_SUB_DIGEST_5WAY

#define VECT_SUB_DIGEST_5WAY (   dst,
  src1,
  src2,
  src3,
  src4 
)     sse2_sub_digest_5way((__m128i*) dst, (const __m128i*) (src1), (const __m128i*) (src2), (const __m128i*) (src3), (const __m128i*) (src4))

Definition at line 1580 of file bmsse2.h.

◆ VECT_XOR_ARR_2_MASK

#define VECT_XOR_ARR_2_MASK (   dst,
  src,
  src_end,
  mask 
)     sse2_xor_arr_2_mask((__m128i*)(dst), (__m128i*)(src), (__m128i*)(src_end), (bm::word_t)mask)

Definition at line 1517 of file bmsse2.h.

◆ VECT_XOR_BLOCK

#define VECT_XOR_BLOCK (   dst,
  src 
)     sse2_xor_block((__m128i*) dst, (__m128i*) (src))

Definition at line 1586 of file bmsse2.h.

◆ VECT_XOR_BLOCK_2WAY

#define VECT_XOR_BLOCK_2WAY (   dst,
  src1,
  src2 
)     sse2_xor_block_2way((__m128i*) (dst), (const __m128i*) (src1), (const __m128i*) (src2))

Definition at line 1589 of file bmsse2.h.

Modified on Fri Jan 05 07:24:27 2024 by modify_doxy.py rev. 669887