1 #ifndef BMDBG__H__INCLUDED__
2 #define BMDBG__H__INCLUDED__
45 #pragma warning( push )
46 #pragma warning( disable : 4311 4312 4127)
55 unsigned len = (*gap_buf >> 3);
56 tout <<
"[" << *gap_buf <<
" len=" <<
len <<
"] ";
57 for (
unsigned i = 0;
i <
len; ++
i)
60 tout << *gap_buf <<
"; ";
69 unsigned len = gap_len ? gap_len : (*gap_buf >> 3);
70 tout <<
"[" " len=" <<
len <<
"] ";
71 unsigned i = gap_len ? 0 : 1;
74 tout << gap_buf[
i] <<
"; ";
94 for (
unsigned i = 0;
i <
l; ++
i)
100 for (
unsigned i = 0;
i <
l; ++
i)
112 template<
typename TOut>
116 unsigned len = gap_len ? gap_len : (*gap_buf >> 3);
117 tout <<
"[" " len=" <<
len <<
"] ";
118 unsigned i = gap_len ? 0 : 1;
121 unsigned v = gap_buf[
i];
127 tout <<
" gamma_bits=" << total <<
" src_bits =" <<
len * 16;
142 std::ifstream fin(fname.c_str(),
std::ios::in | std::ios::binary);
147 fin.seekg(0, std::ios::end);
148 fsize = (size_t)fin.tellg();
156 fin.seekg(0, std::ios::beg);
157 fin.read((
char*) &
data[0], std::streamsize(fsize));
169 std::ifstream bv_file (fname,
std::ios::in | std::ios::binary);
172 std::cerr <<
"Cannot open file: " << fname << std::endl;
175 bv_file.seekg(0, std::ios_base::end);
176 unsigned length = (unsigned)bv_file.tellg();
179 std::cerr <<
"Empty file:" << fname << std::endl;
185 bv_file.seekg(0, std::ios::beg);
187 char*
buffer =
new char[length];
189 bv_file.read(
buffer, length);
199 std::ofstream bfile (fname,
std::ios::out | std::ios::binary);
202 std::cerr <<
"Cannot open file: " << fname << std::endl;
205 typename TBV::statistics st1;
208 unsigned char* blob =
new unsigned char[st1.max_serialize_mem];
212 bfile.write((
char*)blob, std::streamsize(blob_size));
219 void SaveBlob(
const char* name_prefix,
unsigned num,
const char* ext,
220 const unsigned char* blob,
size_t blob_size)
222 std::stringstream fname_str;
223 fname_str << name_prefix <<
"-" << num << ext;
226 const char* fname = s.c_str();
227 std::ofstream bfile (fname,
std::ios::out | std::ios::binary);
230 std::cerr <<
"Cannot open file: " << fname << std::endl;
233 bfile.write((
char*)blob, std::streamsize(blob_size));
238 template<
typename V,
typename TOut>
241 for (
unsigned i = 0;
i <
sizeof(V)*8;
i++)
243 tout << (unsigned)((
val >>
i) & 1);
244 if (
i == 15 && (
sizeof(V)*8 > 16)) tout <<
"-";
248 template<
typename TOut>
254 template<
typename TOut>
260 const unsigned*
row = distance[
i];
264 tout << std::setw(4) << std::setfill(
'0') <<
row[j] <<
" ";
270 template<
typename TM,
typename TOut>
278 if (
i < 10) tout <<
" ";
279 for (
unsigned j = 0; j <
columns; ++j)
283 tout << std::setw(4) << std::setfill(
'0') <<
row[j] <<
" ";
301 unsigned bit_idx = 0;
310 value |= (1 << bit_idx);
316 if (bit_idx ==
sizeof(
unsigned) * 8)
322 template<
class BV,
typename TOut>
325 const unsigned sz = 128000;
326 unsigned* bc_arr =
new unsigned[sz];
327 for(
unsigned x = 0; x < sz; ++x) bc_arr[x] = 0;
330 unsigned last_block = bv.count_blocks(bc_arr);
333 for (
unsigned i = 0;
i <= last_block; ++
i)
338 for (;
i <= last_block; ++
i)
340 tout << std::setw(5) << std::setfill(
'0') << bc_arr[
i] <<
" ";
342 if (++j == 10)
break;
344 tout <<
" | " << sum << std::endl;
346 tout <<
"Total=" << sum << std::endl;
350 template<
typename TOut>
353 static unsigned sum = 0;
354 static unsigned row_idx = 0;
355 static unsigned prev = 0;
373 tout << std::setw(5) << std::setfill(
'0') <<
count <<
" ";
380 tout <<
" | " << sum << std::endl;
384 template<
class BV,
typename TOut>
387 typename BV::statistics
st;
393 auto ssize =
buf.size();
395 tout <<
" - Blocks: [ "
396 <<
"B:" <<
st.bit_blocks
397 <<
", G:" <<
st.gap_blocks <<
"] "
399 <<
", mem = " <<
st.memory_used <<
" " << (
st.memory_used / (1024 * 1024)) <<
"MB "
400 <<
", max smem:" <<
st.max_serialize_mem <<
" " << (
st.max_serialize_mem / (1024 * 1024)) <<
"MB "
401 <<
" compressed = " << ssize <<
" " << (ssize / (1024 * 1024)) <<
"MB "
407 template<
class BV,
typename TOut>
410 const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
415 int total_gap_eff = 0;
422 typename BV::block_idx_type nb;
423 typename BV::block_idx_type nb_prev = 0;
424 for (nb = 0; nb <
blocks; ++nb)
428 const bm::word_t* blk = bman.get_block(i0, j0);
437 tout <<
"[Alert!" << nb <<
"]";
441 typename BV::block_idx_type start = nb;
445 blk = bman.get_block(i0, j0);
450 tout <<
"[Alert!" << nb <<
"]";
464 tout <<
"{F." << start <<
":" << nb <<
"}";
469 if ((nb-1) != nb_prev)
471 tout <<
".." << (size_t)nb-nb_prev <<
"..";
481 unsigned raw_size=bc*2;
482 unsigned cmr_len=
len*2;
483 size_t mem_eff = raw_size - cmr_len;
484 total_gap_eff += unsigned(mem_eff);
488 tout <<
" [GAP " << nb <<
"(" <<
i <<
"," << j <<
")"
489 <<
"=" << bc <<
":" << level <<
"-L" <<
len <<
"(" << mem_eff <<
")]";
503 tout <<
" (BIT " << nb <<
"=" << bc <<
"[" << zw <<
"])";
514 tout << std::endl <<
"gap_efficiency=" << total_gap_eff << std::endl;
522 unsigned char*
buf = 0;
523 typename BV::size_type blob_size = 0;
529 typename BV::statistics
st;
532 buf =
new unsigned char[
st.max_serialize_mem];
533 blob_size = (unsigned)bvs.
serialize(bv, (
unsigned char*)
buf,
st.max_serialize_mem);
546 template<
class SV,
typename TOut>
547 void print_svector_xor_stat(TOut& toutconst SV& sv)
550 typename SV::size_type sz = sv.size();
555 for (
typename SV::size_type nb = 0; nb < nb_max; ++nb)
557 tout <<
"nb = " << nb << std::endl;
562 auto planes = sv.planes();
563 for (
unsigned i = 0;
i < planes; ++
i)
565 const typename SV::bvector_type* bv = sv.get_plane(
i);
568 const typename SV::bvector_type::blocks_manager_type& bman = bv->get_blocks_manager();
569 const bm::word_t* block = bman.get_block_ptr(i0, j0);
575 bm::compute_complexity_descr(block, x_descr);
577 bm::bit_block_change_bc32(block, &gc, &bc);
578 unsigned best_metric, block_metric;
579 block_metric = best_metric = gc < bc ? gc : bc;
581 bool kb_found =
false;
583 for (
unsigned k =
i + 1; k < planes; ++k)
585 const typename SV::bvector_type* bv_x = sv.get_plane(
i);
588 const typename SV::bvector_type::blocks_manager_type& bman_x = bv_x->get_blocks_manager();
589 const bm::word_t* block_x = bman_x.get_block_ptr(i0, j0);
595 bm::compute_xor_complexity_descr(block, block_x, x_descr);
598 bm::bit_block_xor_product(tb, block, block_x, kb_d64);
599 unsigned kb_bc, kb_gc;
600 bm::bit_block_change_bc32(tb, &kb_gc, &kb_bc);
620 tout <<
"XOR match " <<
"metric gain = " << std::endl;
629 template<
class SV,
typename TOut>
632 typedef typename SV::bvector_type bvector_type;
642 typename bvector_type::size_type cnt_and = dmit->
result;
645 typename bvector_type::size_type cnt_or = dmit->
result;
646 if (cnt_and == 0 || cnt_or == 0)
652 d = double(cnt_and) / double(cnt_or);
654 unsigned res = unsigned(d * 100);
655 if (res > 100) res = 100;
663 similarity_batch_type sbatch;
673 typename similarity_batch_type::vector_type& sim_vec = sbatch.descr_vect_;
676 for (
size_t k = 0; k < sim_vec.size(); ++k)
678 unsigned sim = sim_vec[k].similarity();
681 const typename SV::bvector_type* bv1 = sim_vec[k].get_first();
682 const typename SV::bvector_type* bv2 = sim_vec[k].get_second();
686 typename SV::bvector_type bvx(*bv2);
690 if (bv_size_x < bv_size2)
692 size_t diff = bv_size2 - bv_size_x;
695 size_t sz10p = bv_size2 / 10;
698 tout <<
"[" << sim_vec[k].get_first_idx()
699 <<
", " << sim_vec[k].get_second_idx()
701 <<
" size(" << sim_vec[k].get_second_idx() <<
")="
703 <<
" size(x)=" << bv_size_x
713 typename SV::statistics
st;
714 svect.calc_stat(&
st);
716 tout <<
"size = " << svect.size() << std::endl;
718 tout <<
"Bit blocks: " <<
st.bit_blocks << std::endl;
719 tout <<
"GAP blocks: " <<
st.gap_blocks << std::endl;
720 tout <<
"GAP levels counts:";
725 case 0: tout <<
"[ I: " <<
st.gap_levels[
g] <<
"] ";
break;
726 case 1: tout <<
"[ II: " <<
st.gap_levels[
g] <<
"] ";
break;
727 case 2: tout <<
"[ III:" <<
st.gap_levels[
g] <<
"] ";
break;
728 case 3: tout <<
"[ IV: " <<
st.gap_levels[
g] <<
"] ";
break;
730 tout <<
"[ " <<
g <<
": " <<
st.gap_levels[
g] <<
"] ";
break;
735 tout <<
"Max serialize mem:" <<
st.max_serialize_mem <<
" "
736 << (
st.max_serialize_mem / (1024 * 1024)) <<
"MB" << std::endl;
737 tout <<
"Memory used: " <<
st.memory_used <<
" "
738 << (
st.memory_used / (1024 * 1024)) <<
"MB" << std::endl;
740 auto eff_max_element = svect.effective_vector_max();
741 size_t std_vect_size =
sizeof(
typename SV::value_type) * svect.size() * eff_max_element;
742 tout <<
"Projected mem usage for vector<value_type>:"
743 << std_vect_size <<
" "
744 << std_vect_size / (1024 * 1024) <<
"MB"
746 if (
sizeof(
typename SV::value_type) > 4 && (eff_max_element == 1))
748 tout <<
"Projected mem usage for vector<long long>:"
749 <<
sizeof(
long long) * svect.size() << std::endl;
752 tout <<
"\nplanes:" << std::endl;
754 size_t ssize(0), octet_ssize(0);
756 typename SV::bvector_type bv_join;
757 auto planes = svect.get_bmatrix().rows();
759 unsigned octet_cnt(0), octet(0);
760 for (
unsigned i = 0;
i < planes; ++
i)
762 const typename SV::bvector_type* bv_plane = svect.get_slice(
i);
763 tout <<
i <<
"-" << octet_cnt <<
":";
767 bool any_else =
false;
768 for (
unsigned j =
i+1; j < planes; ++j)
770 if (svect.get_slice(j))
781 bv_join |= *bv_plane;
784 octet_ssize += pssize;
788 tout <<
"--------------------" << std::endl;
789 tout <<
"octet N = " << octet <<
790 " compressed = " << octet_ssize <<
791 " " << octet_ssize/(1024*1024) <<
"MB" << std::endl;
792 octet_cnt = 0; octet_ssize = 0;
801 tout <<
"-------------------- END of OCTETS\n";
803 const typename SV::bvector_type* bv_null = svect.get_null_bvector();
806 tout <<
"NULL plane:\n";
808 typename SV::size_type not_null_cnt = bv_null->count();
809 tout <<
" - Bitcount: " << not_null_cnt << std::endl;
811 tout <<
"Projected mem usage for std::vector<pair<unsigned, value_type> >:"
812 << ((
sizeof(
typename SV::value_type) +
sizeof(
unsigned)) * not_null_cnt) <<
" "
813 << ((
sizeof(
typename SV::value_type) +
sizeof(
unsigned)) * not_null_cnt) / (1024 * 1024) <<
"MB"
818 tout <<
"NO NULL plane:\n";
821 tout <<
" Total serialized size (planes): " << ssize
823 <<
" " << ssize / (1024 * 1024) <<
" MB" << std::endl;
828 double fr = double(bv_join_cnt) / double (svect.size());
829 tout <<
"Non-zero elements: " << bv_join_cnt <<
" "
832 size_t non_zero_mem = size_t(bv_join_cnt) *
sizeof(
typename SV::value_type);
833 tout <<
"Projected mem usage for non-zero elements: " << non_zero_mem <<
" "
834 << non_zero_mem / (1024*1024) <<
" MB"
840 template<
class SV,
typename TOut>
843 typename SV::octet_freq_matrix_type octet_stat_matr;
845 str_svect.calc_octet_stat(octet_stat_matr);
847 for (
unsigned i = 0;
i < octet_stat_matr.rows(); ++
i)
850 = octet_stat_matr.row(
i);
852 for (
unsigned j = 0; j < octet_stat_matr.cols(); ++j)
865 for (
unsigned j = 0; j < octet_stat_matr.cols(); ++j)
875 tout <<
"\t total= " <<
cnt;
890 std::ofstream fout(fname.c_str(), std::ios::binary);
893 size_t sz = vect.size();
894 fout.write((
char*)&sz,
sizeof(sz));
899 fout.write((
char*)vect.data(),
913 std::ifstream fin(fname.c_str(),
std::ios::in | std::ios::binary);
917 fin.read((
char*) &sz,
sizeof(sz));
939 typename CBC::buffer_type sbuf;
943 std::ofstream fout(fname.c_str(), std::ios::binary);
948 const char*
buf = (
char*)sbuf.buf();
949 fout.write(
buf, sbuf.size());
959 *blob_size = sbuf.size();
969 std::vector<unsigned char>
buffer;
996 size_t* sv_blob_size=0,
bool use_xor =
true)
1006 std::ofstream fout(fname.c_str(), std::ios::binary);
1011 const char*
buf = (
char*)sv_lay.
buf();
1012 fout.write(
buf, std::streamsize(sv_lay.
size()));
1022 *sv_blob_size = sv_lay.
size();
1030 std::vector<unsigned char>
buffer;
1059 template<
class SV,
class V>
1062 if (sv.size() != vect.size())
1066 for (
size_t i = 0;
i < vect.size(); ++
i)
1068 unsigned v1 = sv[(unsigned)
i];
1069 unsigned v2 = vect[
i];
1077 template<
class SV,
class BV>
1080 typename SV::back_insert_iterator bit = sv.get_back_inserter();
1081 typename BV::enumerator en = bv.first();
1082 for (; en.valid(); ++en)
1084 auto v = en.value();
1095 size_t getCurrentRSS( )
1099 if ( (
fp = fopen(
"/proc/self/statm",
"r" )) ==
NULL )
1101 if ( fscanf(
fp,
"%*s%ld", &rss ) != 1 )
1107 return (
size_t)rss * (size_t)sysconf( _SC_PAGESIZE);
1117 #pragma warning( pop )
#define BM_DECLARE_TEMP_BLOCK(x)
#define IS_FULL_BLOCK(addr)
#define IS_VALID_ADDR(addr)
Serialization for sparse_vector<>
Bitvector Bit-vector container with runtime compression of bits.
void calc_stat(struct bm::bvector< Alloc >::statistics *st) const noexcept
Calculates bitvector statistics.
size_type count() const noexcept
population count (count of ON bits)
Deseriaizer for compressed collections.
int deserialize(CBC &buffer_coll, const unsigned char *buf, bm::word_t *temp_block=0)
Seriaizer for compressed collections.
void serialize(const CBC &buffer_coll, buffer_type &buf, bm::word_t *temp_block=0)
Serialize compressed collection into memory buffer.
Bit-vector serialization class.
size_type serialize(const BV &bv, unsigned char *buf, size_t buf_size)
Bitvector serialization into memory block.
Serialize sparse vector into a memory buffer(s) structure.
void set_xor_ref(bool is_enabled) noexcept
Turn ON and OFF XOR compression of sparse vectors Enables XOR reference compression for the sparse ve...
void serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout)
Serialize sparse vector into a memory buffer(s) structure.
std::ofstream out("events_result.xml")
main entry point for tests
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static const char * str(char *buf, int n)
static const column_t columns[]
bm::id_t bit_block_count(const bm::word_t *block) noexcept
Bitcount for bit block.
size_t serialize(const BV &bv, unsigned char *buf, bm::word_t *temp_block=0, unsigned serialization_flags=0)
Saves bitvector into memory.
size_t deserialize(BV &bv, const unsigned char *buf, bm::word_t *temp_block=0, const bm::bv_ref_vector< BV > *ref_vect=0)
Bitvector deserialization from a memory BLOB.
@ COUNT_AND
(A & B).count()
@ COUNT_OR
(A | B).count()
unsigned gap_bit_count(const T *buf, unsigned dsize=0) noexcept
Calculates number of bits ON in GAP buffer.
unsigned gap_control_sum(const T *buf) noexcept
Calculates sum of all words in GAP block. (For debugging purposes)
T gap_level(const T *buf) noexcept
Returs GAP blocks capacity level.
bm::gap_word_t gap_length(const bm::gap_word_t *buf) noexcept
Returs GAP block length.
int sparse_vector_deserialize(SV &sv, const unsigned char *buf, bm::word_t *temp_block=0)
Deserialize sparse vector.
const unsigned set_array_mask
const unsigned set_block_plane_cnt
void print_svector_stat(TOut &tout, const SV &svect, bool print_sim=false)
int svector_check(const SV &sv, const V &vect)
void PrintDGap(TOut &tout, const bm::gap_word_t *gap_buf, unsigned gap_len=0)
int file_save_compressed_collection(const CBC &cbc, const std::string &fname, size_t *blob_size=0)
unsigned BinStrLR(const char *str)
Binary code string converted to number Bits are expected left to right.
unsigned PrintGammaCode(TOut &tout, unsigned value)
void print_bc(TOut &tout, unsigned i, unsigned count)
void SaveBVector(const char *fname, const TBV &bvector)
void print_blocks_count(TOut &tout, const BV &bv)
void PrintTMatrix(TOut &tout, const TM &tmatrix, unsigned cols=0, bool binary=false)
void PrintGap(TOut &tout, const bm::gap_word_t *gap_buf)
void get_block_coord(BI_TYPE nb, unsigned &i, unsigned &j) noexcept
Recalc linear bvector block index into 2D matrix coordinates.
int file_load_compressed_collection(CBC &cbc, const std::string &fname)
void print_str_svector_stat(TOut &tout, const SV &str_svect)
size_t print_bvector_stat(TOut &tout, const BV &bvect)
const unsigned set_total_blocks
void PrintDGapGamma(TOut &tout, const bm::gap_word_t *gap_buf, unsigned gap_len=0)
void PrintDistanceMatrix(TOut &tout, const unsigned distance[bm::set_block_plane_cnt][bm::set_block_plane_cnt])
int load_vector(VECT &vect, const std::string &fname)
const unsigned bie_cut_off
const unsigned gap_levels
size_t compute_serialization_size(const BV &bv)
void convert_bv2sv(SV &sv, const BV &bv)
const unsigned set_block_size
unsigned long long int id64_t
int read_dump_file(const std::string &fname, VT &data)
Read dump file into an STL container (vector of some basic type)
void PrintBits32(TOut &tout, unsigned val)
void build_jaccard_similarity_batch(SIMBATCH &sbatch, const SV &sv)
Utility function to build jaccard similarity batch for sparse_vector<>
unsigned int iLog2(unsigned int value)
const unsigned set_array_shift
void print_stat(TOut &tout, const BV &bv, typename BV::block_idx_type blocks=0)
unsigned short gap_word_t
void LoadBVector(const char *fname, TBV &bvector, unsigned *file_size=0)
const unsigned set_block_shift
int file_save_svector(const SV &sv, const std::string &fname, size_t *sv_blob_size=0, bool use_xor=true)
void PrintBinary(TOut &tout, V val)
int save_vector(const VECT &vect, const std::string &fname)
void SaveBlob(const char *name_prefix, unsigned num, const char *ext, const unsigned char *blob, size_t blob_size)
int file_load_svector(SV &sv, const std::string &fname)
double value_type
The numeric datatype used by the parser.
const GenericPointer< typename T::ValueType > T2 value
std::istream & in(std::istream &in_, double &x_)
static SLJIT_INLINE sljit_ins st(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
#define row(bind, expected)
static DP_BlockInfo * blocks
Structure to compute XOR gap-count profile by sub-block waves.
Distance metric descriptor, holds metric code and result.
layout class for serialization buffer structure
const unsigned char * buf() const noexcept
Return serialization buffer pointer.
size_t size() const noexcept
return current serialized size
Mini-matrix for bit transposition purposes.
static unsigned cols() noexcept
static unsigned rows() noexcept
const T * row(unsigned row_idx) const noexcept
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)