NCBI C++ ToolKit
bmdbg.h
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef BMDBG__H__INCLUDED__
2 #define BMDBG__H__INCLUDED__
3 /*
4 Copyright(c) 2002-2017 Anatoliy Kuznetsov(anatoliy_kuznetsov at yahoo.com)
5 
6 Licensed under the Apache License, Version 2.0 (the "License");
7 you may not use this file except in compliance with the License.
8 You may obtain a copy of the License at
9 
10  http://www.apache.org/licenses/LICENSE-2.0
11 
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 
18 For more information please visit: http://bitmagic.io
19 */
20 
21 /*! \file bmdbg.h
22  \brief Debugging functions (internal). Poorly documented, not well written.
23 */
24 
25 
26 #include <cstdio>
27 #include <stdlib.h>
28 #include <cassert>
29 #include <memory>
30 #include <time.h>
31 
32 #include <iostream>
33 #include <sstream>
34 #include <fstream>
35 #include <iomanip>
36 #include <vector>
37 
38 #include "bmalgo_similarity.h"
39 #include "bmsparsevec_serial.h"
40 #include "bmdef.h"
41 
42 
43 
44 #ifdef _MSC_VER
45 #pragma warning( push )
46 #pragma warning( disable : 4311 4312 4127)
47 #endif
48 
49 namespace bm
50 {
51 
52 template<class TOut>
53 void PrintGap(TOut& tout, const bm::gap_word_t* gap_buf)
54 {
55  unsigned len = (*gap_buf >> 3);
56  tout << "[" << *gap_buf << " len=" << len << "] ";
57  for (unsigned i = 0; i < len; ++i)
58  {
59  ++gap_buf;
60  tout << *gap_buf << "; ";
61  }
62  tout << std::endl;
63 }
64 
65 template<class TOut>
66 void PrintDGap(TOut& tout, const bm::gap_word_t* gap_buf, unsigned gap_len=0)
67 {
68 
69  unsigned len = gap_len ? gap_len : (*gap_buf >> 3);
70  tout << "[" " len=" << len << "] ";
71  unsigned i = gap_len ? 0 : 1;
72  for (; i < len; ++i)
73  {
74  tout << gap_buf[i] << "; ";
75  }
76  tout << std::endl;
77 }
78 
79 inline unsigned int iLog2(unsigned int value)
80 {
81  unsigned int l = 0;
82  while( (value >> l) > 1 ) ++l;
83  return l;
84 }
85 
86 template<class TOut>
87 unsigned PrintGammaCode(TOut& tout, unsigned value)
88 {
89  unsigned bits = 0;
90  // Elias gamma encode
91  {
92  unsigned l = iLog2(value);
93  //tout << "log2=" << l << endl;
94  for (unsigned i = 0; i < l; ++i)
95  {
96  tout << 0;
97  ++bits;
98  }
99  tout << 1; ++bits;
100  for (unsigned i = 0; i < l; ++i)
101  {
102  if (value & 1 << i)
103  tout << 1;
104  else
105  tout << 0;
106  ++bits;
107  }
108  }
109  return bits;
110 }
111 
112 template<typename TOut>
113 void PrintDGapGamma(TOut& tout, const bm::gap_word_t* gap_buf, unsigned gap_len=0)
114 {
115  unsigned total = 0;
116  unsigned len = gap_len ? gap_len : (*gap_buf >> 3);
117  tout << "[" " len=" << len << "] ";
118  unsigned i = gap_len ? 0 : 1;
119  for (; i < len; ++i)
120  {
121  unsigned v = gap_buf[i];
122 
123  unsigned bits = PrintGammaCode(tout, v+1);
124  tout << "; ";
125  total += bits;
126  }
127  tout << " gamma_bits=" << total << " src_bits =" << len * 16;
128  tout << std::endl;
129 
130 }
131 
132 /// Read dump file into an STL container (vector of some basic type)
133 ///
134 /// @return 0 - if reading went well
135 ///
136 template<class VT>
137 int read_dump_file(const std::string& fname, VT& data)
138 {
139  typedef typename VT::value_type value_type;
140 
141  size_t fsize;
142  std::ifstream fin(fname.c_str(), std::ios::in | std::ios::binary);
143  if (!fin.good())
144  {
145  return -1;
146  }
147  fin.seekg(0, std::ios::end);
148  fsize = (size_t)fin.tellg();
149 
150  data.resize(fsize/sizeof(value_type));
151 
152  if (!fsize)
153  {
154  return 0; // empty input
155  }
156  fin.seekg(0, std::ios::beg);
157  fin.read((char*) &data[0], std::streamsize(fsize));
158  if (!fin.good())
159  {
160  data.resize(0);
161  return -2;
162  }
163  return 0;
164 }
165 
166 template<class TBV>
167 void LoadBVector(const char* fname, TBV& bvector, unsigned* file_size=0)
168 {
169  std::ifstream bv_file (fname, std::ios::in | std::ios::binary);
170  if (!bv_file.good())
171  {
172  std::cerr << "Cannot open file: " << fname << std::endl;
173  exit(1);
174  }
175  bv_file.seekg(0, std::ios_base::end);
176  unsigned length = (unsigned)bv_file.tellg();
177  if (length == 0)
178  {
179  std::cerr << "Empty file:" << fname << std::endl;
180  exit(1);
181  }
182  if (file_size)
183  *file_size = length;
184 
185  bv_file.seekg(0, std::ios::beg);
186 
187  char* buffer = new char[length];
188 
189  bv_file.read(buffer, length);
190 
191  bm::deserialize(bvector, (unsigned char*)buffer);
192 
193  delete [] buffer;
194 }
195 
196 template<class TBV>
197 void SaveBVector(const char* fname, const TBV& bvector)
198 {
199  std::ofstream bfile (fname, std::ios::out | std::ios::binary);
200  if (!bfile.good())
201  {
202  std::cerr << "Cannot open file: " << fname << std::endl;
203  exit(1);
204  }
205  typename TBV::statistics st1;
206  bvector.calc_stat(&st1);
207 
208  unsigned char* blob = new unsigned char[st1.max_serialize_mem];
209  size_t blob_size = bm::serialize(bvector, blob);
210 
211 
212  bfile.write((char*)blob, std::streamsize(blob_size));
213  bfile.close();
214 
215  delete [] blob;
216 }
217 
218 inline
219 void SaveBlob(const char* name_prefix, unsigned num, const char* ext,
220  const unsigned char* blob, size_t blob_size)
221 {
222  std::stringstream fname_str;
223  fname_str << name_prefix << "-" << num << ext;
224 
225  std::string s = fname_str.str();
226  const char* fname = s.c_str();
227  std::ofstream bfile (fname, std::ios::out | std::ios::binary);
228  if (!bfile.good())
229  {
230  std::cerr << "Cannot open file: " << fname << std::endl;
231  exit(1);
232  }
233  bfile.write((char*)blob, std::streamsize(blob_size));
234  bfile.close();
235 }
236 
237 
238 template<typename V, typename TOut>
239 void PrintBinary(TOut& tout, V val)
240 {
241  for (unsigned i = 0; i < sizeof(V)*8; i++)
242  {
243  tout << (unsigned)((val >> i) & 1);
244  if (i == 15 && (sizeof(V)*8 > 16)) tout << "-";
245  }
246 }
247 
248 template<typename TOut>
249 void PrintBits32(TOut& tout, unsigned val)
250 {
251  PrintBinary(tout, val);
252 }
253 
254 template<typename TOut>
255 void PrintDistanceMatrix(TOut& tout,
256  const unsigned distance[bm::set_block_plane_cnt][bm::set_block_plane_cnt])
257 {
258  for (unsigned i = 0; i < bm::set_block_plane_cnt; ++i)
259  {
260  const unsigned* row = distance[i];
261  tout << i << ": ";
262  for (unsigned j = i; j < bm::set_block_plane_cnt; ++j)
263  {
264  tout << std::setw(4) << std::setfill('0') << row[j] << " ";
265  }
266  tout << std::endl;
267  }
268 }
269 
270 template<typename TM, typename TOut>
271 void PrintTMatrix(TOut& tout, const TM& tmatrix, unsigned cols=0, bool binary = false)
272 {
273  unsigned columns = cols ? cols : tmatrix.cols();
274  for (unsigned i = 0; i < tmatrix.rows(); ++i)
275  {
276  const typename TM::value_type* row = tmatrix.row(i);
277  tout << i << ": ";
278  if (i < 10) tout << " ";
279  for (unsigned j = 0; j < columns; ++j)
280  {
281  if (!binary)
282  {
283  tout << std::setw(4) << std::setfill('0') << row[j] << " ";
284  }
285  else
286  {
287  PrintBinary(tout, row[j]);
288  }
289  }
290  tout << std::endl;
291  }
292 }
293 
294 /// Binary code string converted to number
295 /// Bits are expected left to right
296 ///
297 inline
298 unsigned BinStrLR(const char* str)
299 {
300  unsigned value = 0;
301  unsigned bit_idx = 0;
302  for (; *str; ++str)
303  {
304  switch(*str)
305  {
306  case '0':
307  ++bit_idx;
308  break;
309  case '1':
310  value |= (1 << bit_idx);
311  ++bit_idx;
312  break;
313  default:
314  assert(0);
315  }
316  if (bit_idx == sizeof(unsigned) * 8)
317  break;
318  }
319  return value;
320 }
321 
322 template<class BV, typename TOut>
323 void print_blocks_count(TOut& tout, const BV& bv)
324 {
325  const unsigned sz = 128000;
326  unsigned* bc_arr = new unsigned[sz];
327  for(unsigned x = 0; x < sz; ++x) bc_arr[x] = 0;
328 
329 
330  unsigned last_block = bv.count_blocks(bc_arr);
331  unsigned sum = 0;
332 
333  for (unsigned i = 0; i <= last_block; ++i)
334  {
335  tout << i << ":";
336 
337  unsigned j = 0;
338  for (; i <= last_block; ++i)
339  {
340  tout << std::setw(5) << std::setfill('0') << bc_arr[i] << " ";
341  sum += bc_arr[i];
342  if (++j == 10) break;
343  }
344  tout << " | " << sum << std::endl;
345  }
346  tout << "Total=" << sum << std::endl;
347  delete [] bc_arr;
348 }
349 
350 template<typename TOut>
351 void print_bc(TOut& tout, unsigned i, unsigned count)
352 {
353  static unsigned sum = 0;
354  static unsigned row_idx = 0;
355  static unsigned prev = 0;
356 
357  if (i == 0)
358  {
359  sum = row_idx = 0;
360  }
361  else
362  {
363  if (prev +1 < i)
364  print_bc(tout, prev+1, 0);
365  prev = i;
366  }
367 
368  if (row_idx == 0)
369  {
370  tout << i << ":";
371  }
372 
373  tout << std::setw(5) << std::setfill('0') << count << " ";
374  sum += count;
375 
376  ++row_idx;
377  if (row_idx == 10)
378  {
379  row_idx = 0;
380  tout << " | " << sum << std::endl;
381  }
382 }
383 
384 template<class BV, typename TOut>
385 size_t print_bvector_stat(TOut& tout, const BV& bvect)
386 {
387  typename BV::statistics st;
388  bvect.calc_stat(&st);
389 
390  typename serializer<BV>::buffer buf;
391  bm::serializer<BV> ser;
392  ser.serialize(bvect, buf, &st);
393  auto ssize = buf.size();
394 
395  tout << " - Blocks: [ "
396  << "B:" << st.bit_blocks
397  << ", G:" << st.gap_blocks << "] "
398  << " count() = " << bvect.count()
399  << ", mem = " << st.memory_used << " " << (st.memory_used / (1024 * 1024)) << "MB "
400  << ", max smem:" << st.max_serialize_mem << " " << (st.max_serialize_mem / (1024 * 1024)) << "MB "
401  << " compressed = " << ssize << " " << (ssize / (1024 * 1024)) << "MB "
402  << std::endl;
403  return ssize;
404 }
405 
406 
407 template<class BV, typename TOut>
408 void print_stat(TOut& tout, const BV& bv, typename BV::block_idx_type blocks = 0)
409 {
410  const typename BV::blocks_manager_type& bman = bv.get_blocks_manager();
411 
412  bm::id_t count = 0; (void)count;
413  int printed = 0;
414 
415  int total_gap_eff = 0;
416 
417  if (!blocks)
418  {
420  }
421 
422  typename BV::block_idx_type nb;
423  typename BV::block_idx_type nb_prev = 0;
424  for (nb = 0; nb < blocks; ++nb)
425  {
426  unsigned i0, j0;
427  bm::get_block_coord(nb, i0, j0);
428  const bm::word_t* blk = bman.get_block(i0, j0);
429 
430  if (!blk)
431  continue;
432 
433  if (IS_FULL_BLOCK(blk))
434  {
435  if (BM_IS_GAP(blk)) // gap block
436  {
437  tout << "[Alert!" << nb << "]";
438  assert(0);
439  }
440 
441  typename BV::block_idx_type start = nb;
442  for(auto i = nb+1; i < bm::set_total_blocks; ++i, ++nb)
443  {
444  bm::get_block_coord(nb, i0, j0);
445  blk = bman.get_block(i0, j0);
446  if (IS_FULL_BLOCK(blk))
447  {
448  if (BM_IS_GAP(blk)) // gap block
449  {
450  tout << "[Alert!" << nb << "]";
451  assert(0);
452  --nb;
453  break;
454  }
455 
456  }
457  else
458  {
459  --nb;
460  break;
461  }
462  }
463 
464  tout << "{F." << start << ":" << nb << "}";
465  ++printed;
466  }
467  else
468  {
469  if ((nb-1) != nb_prev)
470  {
471  tout << ".." << (size_t)nb-nb_prev << "..";
472  }
473 
474  if (BM_IS_GAP(blk))
475  {
476  unsigned bc = bm::gap_bit_count(BMGAP_PTR(blk));
477  /*unsigned sum = */bm::gap_control_sum(BMGAP_PTR(blk));
478  unsigned level = bm::gap_level(BMGAP_PTR(blk));
479  count += bc;
480  unsigned len = bm::gap_length(BMGAP_PTR(blk))-1;
481  unsigned raw_size=bc*2;
482  unsigned cmr_len=len*2;
483  size_t mem_eff = raw_size - cmr_len;
484  total_gap_eff += unsigned(mem_eff);
485 
486  unsigned i,j;
487  bm::get_block_coord(nb, i, j);
488  tout << " [GAP " << nb << "(" << i << "," << j << ")"
489  << "=" << bc << ":" << level << "-L" << len << "(" << mem_eff << ")]";
490  ++printed;
491  }
492  else // bitset
493  {
494  unsigned bc = bm::bit_block_count(blk);
495 
496  unsigned zw = 0;
497  for (unsigned i = 0; i < bm::set_block_size; ++i)
498  {
499  zw += (blk[i] == 0);
500  }
501 
502  count += bc;
503  tout << " (BIT " << nb << "=" << bc << "[" << zw << "])";
504  ++printed;
505  }
506  }
507  if (printed == 10)
508  {
509  printed = 0;
510  printf("\n");
511  }
512  nb_prev = nb;
513  } // for nb
514  tout << std::endl << "gap_efficiency=" << total_gap_eff << std::endl;
515 
516 }
517 
518 template<class BV>
519 size_t compute_serialization_size(const BV& bv)
520 {
522  unsigned char* buf = 0;
523  typename BV::size_type blob_size = 0;
524  try
525  {
526  bm::serializer<BV> bvs(typename BV::allocator_type(), tb);
527  //bvs.set_compression_level(4);
528 
529  typename BV::statistics st;
530  bv.calc_stat(&st);
531 
532  buf = new unsigned char[st.max_serialize_mem];
533  blob_size = (unsigned)bvs.serialize(bv, (unsigned char*)buf, st.max_serialize_mem);
534  }
535  catch (...)
536  {
537  delete [] buf;
538  throw;
539  }
540 
541  delete [] buf;
542  return blob_size;
543 }
544 
545 #if 0
546 template<class SV, typename TOut>
547 void print_svector_xor_stat(TOut& toutconst SV& sv)
548 {
550  typename SV::size_type sz = sv.size();
551  if (!sz)
552  return;
553  typename SV::size_type nb_max = (sz >> bm::set_block_shift);
554 
555  for (typename SV::size_type nb = 0; nb < nb_max; ++nb)
556  {
557  tout << "nb = " << nb << std::endl;
558 
559  unsigned i0 = unsigned(nb >> bm::set_array_shift);
560  unsigned j0 = unsigned(nb & bm::set_array_mask);
561 
562  auto planes = sv.planes();
563  for (unsigned i = 0; i < planes; ++i)
564  {
565  const typename SV::bvector_type* bv = sv.get_plane(i);
566  if (!bv)
567  continue;
568  const typename SV::bvector_type::blocks_manager_type& bman = bv->get_blocks_manager();
569  const bm::word_t* block = bman.get_block_ptr(i0, j0);
570  if (!IS_VALID_ADDR(block) || BM_IS_GAP(block))
571  continue;
572 
573  // compute block complexity
575  bm::compute_complexity_descr(block, x_descr);
576  unsigned gc, bc;
577  bm::bit_block_change_bc32(block, &gc, &bc);
578  unsigned best_metric, block_metric;
579  block_metric = best_metric = gc < bc ? gc : bc;
580 
581  bool kb_found = false;
582  bm::id64_t d64 = 0;
583  for (unsigned k = i + 1; k < planes; ++k)
584  {
585  const typename SV::bvector_type* bv_x = sv.get_plane(i);
586  if (!bv_x)
587  continue;
588  const typename SV::bvector_type::blocks_manager_type& bman_x = bv_x->get_blocks_manager();
589  const bm::word_t* block_x = bman_x.get_block_ptr(i0, j0);
590  if (!IS_VALID_ADDR(block_x) || BM_IS_GAP(block_x))
591  continue;
592 
593  // evaluate potential key block as XOR filter
594  bm::id64_t kb_d64 =
595  bm::compute_xor_complexity_descr(block, block_x, x_descr);
596  if (kb_d64) // candidate XOR filter found
597  {
598  bm::bit_block_xor_product(tb, block, block_x, kb_d64);
599  unsigned kb_bc, kb_gc;
600  bm::bit_block_change_bc32(tb, &kb_gc, &kb_bc);
601  if (kb_gc < best_metric && kb_gc < bm::bie_cut_off)
602  {
603  d64 = kb_d64;
604  best_metric = kb_gc;
605  kb_found = true;
606  //*kb_j = j0;
607  }
608  if (kb_bc < best_metric && kb_bc < bm::bie_cut_off)
609  {
610  d64 = kb_d64;
611  best_metric = kb_bc;
612  kb_found = true;
613  //*kb_j = j0;
614  }
615  }
616  } // for k
617 
618  if (kb_found)
619  {
620  tout << "XOR match " << "metric gain = " << std::endl;
621  }
622  tout << std::endl;
623  } // for i
624 
625  } // for nb
626 }
627 #endif
628 
629 template<class SV, typename TOut>
630 void print_svector_stat(TOut& tout, const SV& svect, bool print_sim = false)
631 {
632  typedef typename SV::bvector_type bvector_type;
633  /// Functor to compute jaccard similarity
634  /// \internal
635  struct Jaccard_Func
636  {
637  unsigned operator () (distance_metric_descriptor* dmit,
638  distance_metric_descriptor* /*dmit_end*/)
639  {
640  double d;
641  BM_ASSERT(dmit->metric == COUNT_AND);
642  typename bvector_type::size_type cnt_and = dmit->result;
643  ++dmit;
644  BM_ASSERT(dmit->metric == COUNT_OR);
645  typename bvector_type::size_type cnt_or = dmit->result;
646  if (cnt_and == 0 || cnt_or == 0)
647  {
648  d = 0.0;
649  }
650  else
651  {
652  d = double(cnt_and) / double(cnt_or);
653  }
654  unsigned res = unsigned(d * 100);
655  if (res > 100) res = 100;
656  return res;
657  }
658  };
659 
661  typedef bm::similarity_batch<similarity_descriptor_type> similarity_batch_type;
662 
663  similarity_batch_type sbatch;
664 
665  bm::build_jaccard_similarity_batch(sbatch, svect);
666 
667  if (print_sim)
668  {
669  sbatch.calculate();
670  sbatch.sort();
671  }
672 
673  typename similarity_batch_type::vector_type& sim_vec = sbatch.descr_vect_;
674  if (print_sim)
675  {
676  for (size_t k = 0; k < sim_vec.size(); ++k)
677  {
678  unsigned sim = sim_vec[k].similarity();
679  if (sim > 10)
680  {
681  const typename SV::bvector_type* bv1 = sim_vec[k].get_first();
682  const typename SV::bvector_type* bv2 = sim_vec[k].get_second();
683 
684  auto bv_size2 = compute_serialization_size(*bv2);
685 
686  typename SV::bvector_type bvx(*bv2);
687  bvx ^= *bv1;
688 
689  auto bv_size_x = compute_serialization_size(bvx);
690  if (bv_size_x < bv_size2) // true savings
691  {
692  size_t diff = bv_size2 - bv_size_x;
693 
694  // compute 10% cut-off
695  size_t sz10p = bv_size2 / 10;
696  if (diff > sz10p)
697  {
698  tout << "[" << sim_vec[k].get_first_idx()
699  << ", " << sim_vec[k].get_second_idx()
700  << "] = " << sim
701  << " size(" << sim_vec[k].get_second_idx() << ")="
702  << bv_size2
703  << " size(x)=" << bv_size_x
704  << " diff=" << diff
705  << std:: endl;
706  }
707  }
708  }
709  } // for k
710  }
711 
712 
713  typename SV::statistics st;
714  svect.calc_stat(&st);
715 
716  tout << "size = " << svect.size() << std::endl;
717 
718  tout << "Bit blocks: " << st.bit_blocks << std::endl;
719  tout << "GAP blocks: " << st.gap_blocks << std::endl;
720  tout << "GAP levels counts:";
721  for (unsigned g = 0; g < bm::gap_levels; ++g)
722  {
723  switch (g)
724  {
725  case 0: tout << "[ I: " << st.gap_levels[g] << "] "; break;
726  case 1: tout << "[ II: " << st.gap_levels[g] << "] "; break;
727  case 2: tout << "[ III:" << st.gap_levels[g] << "] "; break;
728  case 3: tout << "[ IV: " << st.gap_levels[g] << "] "; break;
729  default:
730  tout << "[ " << g << ": " << st.gap_levels[g] << "] "; break;
731  }
732  } // for
733  tout << std::endl;
734 
735  tout << "Max serialize mem:" << st.max_serialize_mem << " "
736  << (st.max_serialize_mem / (1024 * 1024)) << "MB" << std::endl;
737  tout << "Memory used: " << st.memory_used << " "
738  << (st.memory_used / (1024 * 1024)) << "MB" << std::endl;
739 
740  auto eff_max_element = svect.effective_vector_max();
741  size_t std_vect_size = sizeof(typename SV::value_type) * svect.size() * eff_max_element;
742  tout << "Projected mem usage for vector<value_type>:"
743  << std_vect_size << " "
744  << std_vect_size / (1024 * 1024) << "MB"
745  << std::endl;
746  if (sizeof(typename SV::value_type) > 4 && (eff_max_element == 1))
747  {
748  tout << "Projected mem usage for vector<long long>:"
749  << sizeof(long long) * svect.size() << std::endl;
750  }
751 
752  tout << "\nplanes:" << std::endl;
753 
754  size_t ssize(0), octet_ssize(0);
755 
756  typename SV::bvector_type bv_join; // global OR of all planes
757  auto planes = svect.get_bmatrix().rows();
758 
759  unsigned octet_cnt(0), octet(0);
760  for (unsigned i = 0; i < planes; ++i)
761  {
762  const typename SV::bvector_type* bv_plane = svect.get_slice(i);
763  tout << i << "-" << octet_cnt << ":";
764  if (bv_plane == 0)
765  {
766  tout << "NULL\n";
767  bool any_else = false;
768  for (unsigned j = i+1; j < planes; ++j) // look ahead
769  {
770  if (svect.get_slice(j))
771  {
772  any_else = true;
773  break;
774  }
775  }
776  if (!any_else)
777  break;
778  }
779  else
780  {
781  bv_join |= *bv_plane;
782  auto pssize = bm::print_bvector_stat(tout,*bv_plane);
783  ssize += pssize;
784  octet_ssize += pssize;
785  }
786  if (octet_cnt == 7)
787  {
788  tout << "--------------------" << std::endl;
789  tout << "octet N = " << octet <<
790  " compressed = " << octet_ssize <<
791  " " << octet_ssize/(1024*1024) << "MB" << std::endl;
792  octet_cnt = 0; octet_ssize = 0;
793  octet++;
794  tout << std::endl;
795  }
796  else
797  {
798  octet_cnt++;
799  }
800  } // for i
801  tout << "-------------------- END of OCTETS\n";
802 
803  const typename SV::bvector_type* bv_null = svect.get_null_bvector();
804  if (bv_null)
805  {
806  tout << "NULL plane:\n";
807  ssize += print_bvector_stat(tout,*bv_null);
808  typename SV::size_type not_null_cnt = bv_null->count();
809  tout << " - Bitcount: " << not_null_cnt << std::endl;
810 
811  tout << "Projected mem usage for std::vector<pair<unsigned, value_type> >:"
812  << ((sizeof(typename SV::value_type) + sizeof(unsigned)) * not_null_cnt) << " "
813  << ((sizeof(typename SV::value_type) + sizeof(unsigned)) * not_null_cnt) / (1024 * 1024) << "MB"
814  << std::endl;
815  }
816  else
817  {
818  tout << "NO NULL plane:\n";
819  }
820 
821  tout << " Total serialized size (planes): " << ssize
822  << std::endl
823  << " " << ssize / (1024 * 1024) << " MB" << std::endl;
824 
825  if (svect.size())
826  {
827  bm::id64_t bv_join_cnt = bv_join.count();
828  double fr = double(bv_join_cnt) / double (svect.size());
829  tout << "Non-zero elements: " << bv_join_cnt << " "
830  << "ratio=" << fr
831  << std::endl;
832  size_t non_zero_mem = size_t(bv_join_cnt) * sizeof(typename SV::value_type);
833  tout << "Projected mem usage for non-zero elements: " << non_zero_mem << " "
834  << non_zero_mem / (1024*1024) << " MB"
835  << std::endl;
836  }
837 }
838 
839 
840 template<class SV, typename TOut>
841 void print_str_svector_stat(TOut& tout, const SV& str_svect)
842 {
843  typename SV::octet_freq_matrix_type octet_stat_matr;
844 
845  str_svect.calc_octet_stat(octet_stat_matr);
846 
847  for (unsigned i = 0; i < octet_stat_matr.rows(); ++i)
848  {
850  = octet_stat_matr.row(i);
851  bool any = false;
852  for (unsigned j = 0; j < octet_stat_matr.cols(); ++j)
853  {
854  if (row[j]) // letter is present
855  {
856  any = true;
857  break;
858  }
859  }
860  if (!any)
861  continue;
862 
863  tout << i << " : ";
864  unsigned cnt = 0;
865  for (unsigned j = 0; j < octet_stat_matr.cols(); ++j)
866  {
867  if (row[j]) // letter is present
868  {
869  tout << char(j);
870  ++cnt;
871  }
872  } // for j
873  if (cnt)
874  {
875  tout << "\t total= " << cnt;
876  }
877  else
878  {
879  tout << " (empty) ";
880  }
881  tout << std::endl;
882  } // for i
883 }
884 
885 // Save std::vector
886 //
887 template<class VECT>
888 int save_vector(const VECT& vect, const std::string& fname)
889 {
890  std::ofstream fout(fname.c_str(), std::ios::binary);
891  if (!fout.good())
892  return -1;
893  size_t sz = vect.size();
894  fout.write((char*)&sz, sizeof(sz));
895  if (!fout.good())
896  return -1;
897  if (sz)
898  {
899  fout.write((char*)vect.data(),
900  (std::streamsize) (sz*sizeof(typename VECT::value_type)));
901  if (!fout.good())
902  return -1;
903  }
904  fout.close();
905  return 0;
906 }
907 
908 // Save std::vector
909 //
910 template<class VECT>
911 int load_vector(VECT& vect, const std::string& fname)
912 {
913  std::ifstream fin(fname.c_str(), std::ios::in | std::ios::binary);
914  if (!fin.good())
915  return -1;
916  size_t sz;
917  fin.read((char*) &sz, sizeof(sz));
918  if (!fin.good())
919  return -2;
920  vect.resize(sz);
921  if (sz)
922  {
923  fin.read((char*)vect.data(), sz*sizeof(typename VECT::value_type));
924  if (!fin.good())
925  return -1;
926  }
927  fin.close();
928  return 0;
929 }
930 
931 
932 
933 // save compressed collection to disk
934 //
935 template<class CBC>
936 int file_save_compressed_collection(const CBC& cbc, const std::string& fname, size_t* blob_size = 0)
937 {
939  typename CBC::buffer_type sbuf;
940 
941  cbcs.serialize(cbc, sbuf);
942 
943  std::ofstream fout(fname.c_str(), std::ios::binary);
944  if (!fout.good())
945  {
946  return -1;
947  }
948  const char* buf = (char*)sbuf.buf();
949  fout.write(buf, sbuf.size());
950  if (!fout.good())
951  {
952  return -1;
953  }
954 
955  fout.close();
956 
957  if (blob_size)
958  {
959  *blob_size = sbuf.size();
960  }
961  return 0;
962 }
963 
964 // load compressed collection from disk
965 //
966 template<class CBC>
967 int file_load_compressed_collection(CBC& cbc, const std::string& fname)
968 {
969  std::vector<unsigned char> buffer;
970 
971  // read the input buffer, validate errors
972  auto ret = bm::read_dump_file(fname, buffer);
973  if (ret != 0)
974  {
975  return -2;
976  }
977  if (buffer.size() == 0)
978  {
979  return -3;
980  }
981 
982  const unsigned char* buf = &buffer[0];
983 
985  cbcd.deserialize(cbc, buf);
986 
987  return 0;
988 }
989 
990 
991 
992 // save sparse_vector dump to disk
993 //
994 template<class SV>
995 int file_save_svector(const SV& sv, const std::string& fname,
996  size_t* sv_blob_size=0, bool use_xor = true)
997 {
998  BM_ASSERT(!fname.empty());
999 
1001 
1002  bm::sparse_vector_serializer<SV> sv_serializer;
1003  sv_serializer.set_xor_ref(use_xor);
1004 
1005  sv_serializer.serialize(sv, sv_lay);
1006  std::ofstream fout(fname.c_str(), std::ios::binary);
1007  if (!fout.good())
1008  {
1009  return -1;
1010  }
1011  const char* buf = (char*)sv_lay.buf();
1012  fout.write(buf, std::streamsize(sv_lay.size()));
1013  if (!fout.good())
1014  {
1015  return -1;
1016  }
1017 
1018  fout.close();
1019 
1020  if (sv_blob_size)
1021  {
1022  *sv_blob_size = sv_lay.size();
1023  }
1024  return 0;
1025 }
1026 
1027 template<class SV>
1028 int file_load_svector(SV& sv, const std::string& fname)
1029 {
1030  std::vector<unsigned char> buffer;
1031 
1032  // read the input buffer, validate errors
1033  auto ret = bm::read_dump_file(fname, buffer);
1034  if (ret != 0)
1035  {
1036  return -2;
1037  }
1038  if (buffer.size() == 0)
1039  {
1040  return -3;
1041  }
1042 
1043  const unsigned char* buf = &buffer[0];
1045  auto res = bm::sparse_vector_deserialize(sv, buf, tb);
1046  if (res != 0)
1047  {
1048  return -4;
1049  }
1050  return 0;
1051 }
1052 
1053 
1054 // compare-check if sparse vector is excatly coresponds to vector
1055 //
1056 // returns 0 - if equal
1057 // 1 - no size match
1058 // 2 - element match fails
1059 template<class SV, class V>
1060 int svector_check(const SV& sv, const V& vect)
1061 {
1062  if (sv.size() != vect.size())
1063  {
1064  return 1;
1065  }
1066  for (size_t i = 0; i < vect.size(); ++i)
1067  {
1068  unsigned v1 = sv[(unsigned)i];
1069  unsigned v2 = vect[i];
1070  if (v1 != v2)
1071  return 2;
1072  } // for i
1073  return 0;
1074 }
1075 
1076 
1077 template<class SV, class BV>
1078 void convert_bv2sv(SV& sv, const BV& bv)
1079 {
1080  typename SV::back_insert_iterator bit = sv.get_back_inserter();
1081  typename BV::enumerator en = bv.first();
1082  for (; en.valid(); ++en)
1083  {
1084  auto v = en.value();
1085  bit = v;
1086  }
1087  bit.flush();
1088 }
1089 
1090 #if 0
1091 /**
1092  Get RSS on
1093  @internal
1094  */
1095 size_t getCurrentRSS( )
1096 {
1097  long rss = 0L;
1098  FILE* fp = NULL;
1099  if ( (fp = fopen( "/proc/self/statm", "r" )) == NULL )
1100  return (size_t)0L; /* Can't open? */
1101  if ( fscanf( fp, "%*s%ld", &rss ) != 1 )
1102  {
1103  fclose( fp );
1104  return (size_t)0L; /* Can't read? */
1105  }
1106  fclose( fp );
1107  return (size_t)rss * (size_t)sysconf( _SC_PAGESIZE);
1108 }
1109 #endif
1110 
1111 
1112 } // namespace
1113 
1114 
1115 
1116 #ifdef _MSC_VER
1117 #pragma warning( pop )
1118 #endif
1119 
1120 #endif
#define BM_DECLARE_TEMP_BLOCK(x)
Definition: bm.h:47
Definitions(internal)
#define IS_FULL_BLOCK(addr)
Definition: bmdef.h:162
#define IS_VALID_ADDR(addr)
Definition: bmdef.h:161
#define BMGAP_PTR(ptr)
Definition: bmdef.h:189
#define BM_IS_GAP(ptr)
Definition: bmdef.h:191
#define BM_ASSERT
Definition: bmdef.h:139
Serialization for sparse_vector<>
Bitvector Bit-vector container with runtime compression of bits.
Definition: bm.h:115
void calc_stat(struct bm::bvector< Alloc >::statistics *st) const noexcept
Calculates bitvector statistics.
Definition: bm.h:3978
size_type count() const noexcept
population count (count of ON bits)
Definition: bm.h:2401
Deseriaizer for compressed collections.
int deserialize(CBC &buffer_coll, const unsigned char *buf, bm::word_t *temp_block=0)
Seriaizer for compressed collections.
void serialize(const CBC &buffer_coll, buffer_type &buf, bm::word_t *temp_block=0)
Serialize compressed collection into memory buffer.
Bit-vector serialization class.
Definition: bmserial.h:76
size_type serialize(const BV &bv, unsigned char *buf, size_t buf_size)
Bitvector serialization into memory block.
Definition: bmserial.h:2703
Serialize sparse vector into a memory buffer(s) structure.
void set_xor_ref(bool is_enabled) noexcept
Turn ON and OFF XOR compression of sparse vectors Enables XOR reference compression for the sparse ve...
void serialize(const SV &sv, sparse_vector_serial_layout< SV > &sv_layout)
Serialize sparse vector into a memory buffer(s) structure.
static const char fp[]
Definition: des.c:87
std::ofstream out("events_result.xml")
main entry point for tests
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:61
static const char * str(char *buf, int n)
Definition: stats.c:84
static const column_t columns[]
Definition: utf8_2.c:22
char data[12]
Definition: iconv.c:80
string
Definition: cgiapp.hpp:690
#define NULL
Definition: ncbistd.hpp:225
const CVect2< U > & v2
Definition: globals.hpp:440
bm::id_t bit_block_count(const bm::word_t *block) noexcept
Bitcount for bit block.
Definition: bmfunc.h:5051
size_t serialize(const BV &bv, unsigned char *buf, bm::word_t *temp_block=0, unsigned serialization_flags=0)
Saves bitvector into memory.
Definition: bmserial.h:3071
size_t deserialize(BV &bv, const unsigned char *buf, bm::word_t *temp_block=0, const bm::bv_ref_vector< BV > *ref_vect=0)
Bitvector deserialization from a memory BLOB.
Definition: bmserial.h:3137
@ COUNT_AND
(A & B).count()
Definition: bmalgo_impl.h:59
@ COUNT_OR
(A | B).count()
Definition: bmalgo_impl.h:61
unsigned gap_bit_count(const T *buf, unsigned dsize=0) noexcept
Calculates number of bits ON in GAP buffer.
Definition: bmfunc.h:2299
unsigned gap_control_sum(const T *buf) noexcept
Calculates sum of all words in GAP block. (For debugging purposes)
Definition: bmfunc.h:4521
T gap_level(const T *buf) noexcept
Returs GAP blocks capacity level.
Definition: bmfunc.h:1649
bm::gap_word_t gap_length(const bm::gap_word_t *buf) noexcept
Returs GAP block length.
Definition: bmfunc.h:1603
int sparse_vector_deserialize(SV &sv, const unsigned char *buf, bm::word_t *temp_block=0)
Deserialize sparse vector.
exit(2)
char * buf
int i
int len
#include<zmmintrin.h>
Definition: bm.h:78
const unsigned set_array_mask
Definition: bmconst.h:97
const unsigned set_block_plane_cnt
Definition: bmconst.h:64
void print_svector_stat(TOut &tout, const SV &svect, bool print_sim=false)
Definition: bmdbg.h:630
int svector_check(const SV &sv, const V &vect)
Definition: bmdbg.h:1060
void PrintDGap(TOut &tout, const bm::gap_word_t *gap_buf, unsigned gap_len=0)
Definition: bmdbg.h:66
int file_save_compressed_collection(const CBC &cbc, const std::string &fname, size_t *blob_size=0)
Definition: bmdbg.h:936
unsigned BinStrLR(const char *str)
Binary code string converted to number Bits are expected left to right.
Definition: bmdbg.h:298
unsigned int word_t
Definition: bmconst.h:39
unsigned PrintGammaCode(TOut &tout, unsigned value)
Definition: bmdbg.h:87
void print_bc(TOut &tout, unsigned i, unsigned count)
Definition: bmdbg.h:351
void SaveBVector(const char *fname, const TBV &bvector)
Definition: bmdbg.h:197
void print_blocks_count(TOut &tout, const BV &bv)
Definition: bmdbg.h:323
void PrintTMatrix(TOut &tout, const TM &tmatrix, unsigned cols=0, bool binary=false)
Definition: bmdbg.h:271
void PrintGap(TOut &tout, const bm::gap_word_t *gap_buf)
Definition: bmdbg.h:53
void get_block_coord(BI_TYPE nb, unsigned &i, unsigned &j) noexcept
Recalc linear bvector block index into 2D matrix coordinates.
Definition: bmfunc.h:180
int file_load_compressed_collection(CBC &cbc, const std::string &fname)
Definition: bmdbg.h:967
void print_str_svector_stat(TOut &tout, const SV &str_svect)
Definition: bmdbg.h:841
size_t print_bvector_stat(TOut &tout, const BV &bvect)
Definition: bmdbg.h:385
const unsigned set_total_blocks
Definition: bmconst.h:111
void PrintDGapGamma(TOut &tout, const bm::gap_word_t *gap_buf, unsigned gap_len=0)
Definition: bmdbg.h:113
void PrintDistanceMatrix(TOut &tout, const unsigned distance[bm::set_block_plane_cnt][bm::set_block_plane_cnt])
Definition: bmdbg.h:255
int load_vector(VECT &vect, const std::string &fname)
Definition: bmdbg.h:911
const unsigned bie_cut_off
Definition: bmconst.h:88
const unsigned gap_levels
Definition: bmconst.h:85
size_t compute_serialization_size(const BV &bv)
Definition: bmdbg.h:519
void convert_bv2sv(SV &sv, const BV &bv)
Definition: bmdbg.h:1078
const unsigned set_block_size
Definition: bmconst.h:55
unsigned long long int id64_t
Definition: bmconst.h:35
int read_dump_file(const std::string &fname, VT &data)
Read dump file into an STL container (vector of some basic type)
Definition: bmdbg.h:137
void PrintBits32(TOut &tout, unsigned val)
Definition: bmdbg.h:249
void build_jaccard_similarity_batch(SIMBATCH &sbatch, const SV &sv)
Utility function to build jaccard similarity batch for sparse_vector<>
unsigned int id_t
Definition: bmconst.h:38
unsigned int iLog2(unsigned int value)
Definition: bmdbg.h:79
const unsigned set_array_shift
Definition: bmconst.h:96
void print_stat(TOut &tout, const BV &bv, typename BV::block_idx_type blocks=0)
Definition: bmdbg.h:408
unsigned short gap_word_t
Definition: bmconst.h:78
void LoadBVector(const char *fname, TBV &bvector, unsigned *file_size=0)
Definition: bmdbg.h:167
const unsigned set_block_shift
Definition: bmconst.h:56
int file_save_svector(const SV &sv, const std::string &fname, size_t *sv_blob_size=0, bool use_xor=true)
Definition: bmdbg.h:995
void PrintBinary(TOut &tout, V val)
Definition: bmdbg.h:239
int save_vector(const VECT &vect, const std::string &fname)
Definition: bmdbg.h:888
void SaveBlob(const char *name_prefix, unsigned num, const char *ext, const unsigned char *blob, size_t blob_size)
Definition: bmdbg.h:219
int file_load_svector(SV &sv, const std::string &fname)
Definition: bmdbg.h:1028
double value_type
The numeric datatype used by the parser.
Definition: muParserDef.h:228
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
std::istream & in(std::istream &in_, double &x_)
static unsigned cnt[256]
#define count
static uint8_t * buffer
Definition: pcre2test.c:1016
#define VT(vt)
static SLJIT_INLINE sljit_ins st(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
#define assert(x)
Definition: srv_diag.hpp:58
#define row(bind, expected)
Definition: string_bind.c:73
static DP_BlockInfo * blocks
Structure to compute XOR gap-count profile by sub-block waves.
Definition: bmxor.h:230
Distance metric descriptor, holds metric code and result.
Definition: bmalgo_impl.h:87
layout class for serialization buffer structure
const unsigned char * buf() const noexcept
Return serialization buffer pointer.
size_t size() const noexcept
return current serialized size
Mini-matrix for bit transposition purposes.
Definition: bmtrans.h:41
static unsigned cols() noexcept
Definition: bmtrans.h:62
static unsigned rows() noexcept
Definition: bmtrans.h:61
const T * row(unsigned row_idx) const noexcept
Definition: bmtrans.h:64
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
Definition: thrddgri.c:44
Modified on Fri Sep 20 14:57:43 2024 by modify_doxy.py rev. 669887