NCBI C++ ToolKit
bamgraph.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: bamgraph.cpp 101886 2024-02-28 18:12:53Z vasilche $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Eugene Vasilchenko
27  *
28  * File Description:
29  * Make alignment density graphs from BAM files.
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
37 #include <sra/error_codes.hpp>
39 #include <objects/seq/seq__.hpp>
42 #include <serial/serial.hpp>
43 #include <serial/typeinfo.hpp>
44 #include <cmath>
45 #include <numeric>
46 
48 
49 #define NCBI_USE_ERRCODE_X BAM2Graph
51 
53 
54 class CSeq_entry;
55 
56 static const int kDefaultOutlierMax_Byte = 50;
57 static const int kDefaultOutlierMax_Int = 1000000;
58 static const Uint8 kDefaultMinMapQuality = 4;
59 #define DEFAULT_BAI_SUFFIX ".bai"
60 
62  : m_MinMapQuality(-1),
63  m_GraphType(eGraphType_linear),
64  m_GraphValueType(eGraphValueType_byte),
65  m_GraphBinSize(kDefaultGraphBinSize),
66  m_OutlierMax(0),
67  m_OutlierDetails(false),
68  m_RawAccess(false),
69  m_Estimated(false)
70 {
71 }
72 
73 
75 {
76 }
77 
78 
80 {
81  m_RefLabel = label;
82 }
83 
84 
86 {
87  m_RefId = SerialClone(id);
88 }
89 
90 
91 void CBam2Seq_graph::SetGraphTitle(const string& title)
92 {
93  m_GraphTitle = title;
94 }
95 
96 
97 void CBam2Seq_graph::SetAnnotName(const string& name)
98 {
99  m_AnnotName = name;
100 }
101 
102 
104 {
105  m_Seq_inst = inst;
106 }
107 
108 
110 {
111  m_GraphType = type;
112 }
113 
114 
116 {
118 }
119 
120 
122 {
124 }
125 
126 
128 {
129  m_MinMapQuality = qual;
130 }
131 
132 
134 {
135  m_GraphBinSize = bin_size;
136 }
137 
138 
140 {
141  m_OutlierMax = x;
142 }
143 
144 
146 {
147  if ( m_OutlierMax > 1 ) {
148  return m_OutlierMax;
149  }
152  }
153  else {
154  return kDefaultOutlierMax_Int;
155  }
156 }
157 
158 
160 {
161  m_OutlierDetails = details;
162 }
163 
164 
165 void CBam2Seq_graph::SetRawAccess(bool raw_access)
166 {
167  m_RawAccess = raw_access;
168 }
169 
170 
171 void CBam2Seq_graph::SetEstimated(bool estimated)
172 {
173  m_Estimated = estimated;
174 }
175 
176 
178  const string& bam_file,
179  const string& bam_index)
180 {
181  if ( GetEstimated() ) {
182  return CollectEstimatedCoverage(bam_file, bam_index);
183  }
184  if ( GetRawAccess() ) {
185  return CollectRawAccessCoverage(bam_file, bam_index);
186  }
187  CBamDb db(mgr, bam_file, bam_index);
188  return CollectCoverage(db);
189 }
190 
191 
193 {
194  if ( GetEstimated() ) {
195  return CollectEstimatedCoverage(db);
196  }
197  if ( GetRawAccess() ) {
198  return CollectRawAccessCoverage(db);
199  }
200  vector<Uint8> ret;
201  TSeqPos bin_cnt = 0;
202  int align_cnt = 0;
203  double align_cov = 0;
204  ret.reserve(1024);
205 
206  TSeqPos bin_size = GetGraphBinSize();
207  TSeqPos min_pos = kInvalidSeqPos, max_pos = 0;
208  TSeqPos max_align_span = 0;
209  int min_qual = GetMinMapQuality();
210 
211  TSeqPos ref_length = db.GetRefSeqLength(GetRefLabel());
212 
213  typedef map<TSeqPos, Int8> TCovLevelChangeMap;
214  TCovLevelChangeMap cov_level_change_map;
215  const TSeqPos kMapBinCountThreshold = 20;
216 
217  const TSeqPos kWarnLongAlignThreshold = 10000;
218  const size_t kWarnLongAlignCount = 10;
219  size_t invalid_align_count = 0;
220  size_t long_align_count = 0;
221  for ( CBamAlignIterator ait(db, GetRefLabel(), 0); ait; ++ait ) {
222  if ( min_qual > 0 && ait.GetMapQuality() < min_qual ) {
223  continue;
224  }
225  ++align_cnt;
226  TSeqPos size = ait.GetCIGARRefSize();
227  if ( size == 0 ) {
228  continue;
229  }
230  TSeqPos pos = ait.GetRefSeqPos();
231  //x_AddCoverage(pos, size);
232 
233  TSeqPos end = pos + size;
234  if ( end > ref_length ) {
235  if ( ++invalid_align_count <= kWarnLongAlignCount ) {
236  ERR_POST_X(5, Warning << "CBam2Seq_graph: "
237  "alignment is out of refseq bounds " <<
238  GetRefLabel() << " @ " << pos << ": " << size
239  << ", CIGAR: "<< ait.GetCIGAR());
240  }
241  else if ( invalid_align_count == kWarnLongAlignCount+1 ) {
242  ERR_POST_X(6, Warning << "CBam2Seq_graph: "
243  "there are more alignments out of refseq bounds...");
244  }
245  --align_cnt;
246  continue;
247  }
248  if ( pos < min_pos ) {
249  min_pos = pos;
250  }
251  if ( end > max_pos ) {
252  max_pos = end;
253  }
254  if ( size > max_align_span ) {
255  max_align_span = size;
256  }
257  align_cov += size;
258  if ( size > kWarnLongAlignThreshold ) {
259  if ( ++long_align_count <= kWarnLongAlignCount ) {
260  ERR_POST_X(3, Warning << "CBam2Seq_graph: "
261  "alignment is too long at " <<
262  GetRefLabel() << " @ " << pos << ": " << size
263  << ", CIGAR: "<< ait.GetCIGAR());
264  }
265  else if ( long_align_count == kWarnLongAlignCount+1 ) {
266  ERR_POST_X(4, Warning << "CBam2Seq_graph: "
267  "there are more very long alignments...");
268  }
269  }
270  _ASSERT(end > pos);
271  TSeqPos end_bin = (end - 1) / bin_size;
272  if ( end_bin >= bin_cnt ) {
273  bin_cnt = end_bin + 1;
274  size_t cap = ret.capacity();
275  while ( bin_cnt > cap ) {
276  LOG_POST_X(1, Info<<"CBam2Seq_graph: "
277  "Cap "<<cap<<" at "<<align_cnt<<" aligns ");
278  cap *= 2;
279  }
280  ret.reserve(cap);
281  ret.resize(bin_cnt);
282  }
283  TSeqPos begin_bin = pos / bin_size;
284  if ( begin_bin == end_bin ) {
285  ret[begin_bin] += size;
286  }
287  else {
288  TSeqPos begin_bin_coverage = (begin_bin + 1) * bin_size - pos;
289  ret[begin_bin] += begin_bin_coverage;
290  ++begin_bin;
291  TSeqPos end_bin_coverage = end - end_bin * bin_size;
292  ret[end_bin] += end_bin_coverage;
293  // all intermediate bins are fully covered
294  if ( end_bin > begin_bin + kMapBinCountThreshold ) {
295  // keep long alignment spans in a separate map
296  cov_level_change_map[begin_bin] += bin_size;
297  cov_level_change_map[end_bin] -= bin_size;
298  }
299  else {
300  for ( TSeqPos bin = begin_bin; bin < end_bin; ++bin ) {
301  ret[bin] += bin_size;
302  }
303  }
304  }
305  }
306  if ( !cov_level_change_map.empty() ) {
307  TSeqPos bin = cov_level_change_map.begin()->first;
308  Uint8 level = 0;
309  ITERATE ( TCovLevelChangeMap, it, cov_level_change_map ) {
310  TSeqPos next_bin = it->first;
311  for ( ; bin < next_bin; ++bin ) {
312  ret[bin] += level;
313  }
314  level += it->second;
315  }
316  }
317  m_TotalRange.SetFrom(min_pos).SetToOpen(max_pos);
318  m_AlignCount = align_cnt;
319  m_MaxAlignSpan = max_align_span;
320  LOG_POST_X(2, Info<<"CBam2Seq_graph: "
321  "Total aligns: "<<align_cnt<<
322  " total size: "<<align_cov<<" "<<
323  " max align span: "<<max_align_span);
324  return ret;
325 }
326 
327 
329 {
330  vector<Uint8> ret;
331  TSeqPos bin_cnt = 0;
332  int align_cnt = 0;
333  double align_cov = 0;
334  ret.reserve(1024);
335 
336  TSeqPos bin_size = GetGraphBinSize();
337  TSeqPos min_pos = kInvalidSeqPos, max_pos = 0;
338  TSeqPos max_align_span = 0;
339  int min_qual = GetMinMapQuality();
340 
341  size_t ref_index = bam_raw_db.GetRefIndex(GetRefLabel());
342  TSeqPos ref_length = bam_raw_db.GetRefSeqLength(ref_index);
343 
344  typedef map<TSeqPos, Int8> TCovLevelChangeMap;
345  TCovLevelChangeMap cov_level_change_map;
346  const TSeqPos kMapBinCountThreshold = 20;
347 
348  const TSeqPos kWarnLongAlignThreshold = 10000;
349  const size_t kWarnLongAlignCount = 10;
350  size_t invalid_align_count = 0;
351  size_t long_align_count = 0;
352  for ( CBamRawAlignIterator ait(bam_raw_db, GetRefLabel(), 0); ait; ++ait ) {
353  if ( min_qual > 0 && ait.GetMapQuality() < min_qual ) {
354  continue;
355  }
356  ++align_cnt;
357  TSeqPos size = ait.GetCIGARRefSize();
358  if ( size == 0 ) {
359  continue;
360  }
361  TSeqPos pos = ait.GetRefSeqPos();
362  //x_AddCoverage(pos, size);
363 
364  TSeqPos end = pos + size;
365  if ( end > ref_length ) {
366  if ( ++invalid_align_count <= kWarnLongAlignCount ) {
367  ERR_POST_X(5, Warning << "CBam2Seq_graph: "
368  "alignment is out of refseq bounds " <<
369  GetRefLabel() << " @ " << pos << ": " << size
370  << ", CIGAR: "<< ait.GetCIGAR());
371  }
372  else if ( invalid_align_count == kWarnLongAlignCount+1 ) {
373  ERR_POST_X(6, Warning << "CBam2Seq_graph: "
374  "there are more alignments out of refseq bounds...");
375  }
376  --align_cnt;
377  continue;
378  }
379  if ( pos < min_pos ) {
380  min_pos = pos;
381  }
382  if ( end > max_pos ) {
383  max_pos = end;
384  }
385  if ( size > max_align_span ) {
386  max_align_span = size;
387  }
388  align_cov += size;
389  if ( size > kWarnLongAlignThreshold ) {
390  if ( ++long_align_count <= kWarnLongAlignCount ) {
391  ERR_POST_X(3, Warning << "CBam2Seq_graph: "
392  "alignment is too long at " <<
393  GetRefLabel() << " @ " << pos << ": " << size
394  << ", CIGAR: "<< ait.GetCIGAR());
395  }
396  else if ( long_align_count == kWarnLongAlignCount+1 ) {
397  ERR_POST_X(4, Warning << "CBam2Seq_graph: "
398  "there are more very long alignments...");
399  }
400  }
401  _ASSERT(end > pos);
402  TSeqPos end_bin = (end - 1) / bin_size;
403  if ( end_bin >= bin_cnt ) {
404  bin_cnt = end_bin + 1;
405  size_t cap = ret.capacity();
406  while ( bin_cnt > cap ) {
407  LOG_POST_X(1, Info<<"CBam2Seq_graph: "
408  "Cap "<<cap<<" at "<<align_cnt<<" aligns ");
409  cap *= 2;
410  }
411  ret.reserve(cap);
412  ret.resize(bin_cnt);
413  }
414  TSeqPos begin_bin = pos / bin_size;
415  if ( begin_bin == end_bin ) {
416  ret[begin_bin] += size;
417  }
418  else {
419  TSeqPos begin_bin_coverage = (begin_bin + 1) * bin_size - pos;
420  ret[begin_bin] += begin_bin_coverage;
421  ++begin_bin;
422  TSeqPos end_bin_coverage = end - end_bin * bin_size;
423  ret[end_bin] += end_bin_coverage;
424  // all intermediate bins are fully covered
425  if ( end_bin > begin_bin + kMapBinCountThreshold ) {
426  // keep long alignment spans in a separate map
427  cov_level_change_map[begin_bin] += bin_size;
428  cov_level_change_map[end_bin] -= bin_size;
429  }
430  else {
431  for ( TSeqPos bin = begin_bin; bin < end_bin; ++bin ) {
432  ret[bin] += bin_size;
433  }
434  }
435  }
436  }
437  if ( !cov_level_change_map.empty() ) {
438  TSeqPos bin = cov_level_change_map.begin()->first;
439  Uint8 level = 0;
440  ITERATE ( TCovLevelChangeMap, it, cov_level_change_map ) {
441  TSeqPos next_bin = it->first;
442  for ( ; bin < next_bin; ++bin ) {
443  ret[bin] += level;
444  }
445  level += it->second;
446  }
447  }
448  m_TotalRange.SetFrom(min_pos).SetToOpen(max_pos);
449  m_AlignCount = align_cnt;
450  m_MaxAlignSpan = max_align_span;
451  LOG_POST_X(2, Info<<"CBam2Seq_graph: "
452  "Total aligns: "<<align_cnt<<
453  " total size: "<<align_cov<<" "<<
454  " max align span: "<<max_align_span);
455  return ret;
456 }
457 
458 
460  const CBamIndex& bam_index)
461 {
462  auto bin_size = bam_index.GetMinBinSize();
463  m_GraphBinSize = bin_size;
464  size_t ref_index = header.GetRefIndex(GetRefLabel());
465  TSeqPos length = header.GetRefLength(ref_index);
466  vector<uint64_t> ret = bam_index.CollectEstimatedCoverage(ref_index);
467  if ( length == 0 || length == kInvalidSeqPos ) {
468  length = TSeqPos(ret.size())*bin_size;
469  }
470  m_TotalRange.SetFrom(0).SetToOpen(length);
471  m_AlignCount = 0;
472  m_MaxAlignSpan = 0;
473  return ret;
474 }
475 
476 
478 {
479  return CollectEstimatedCoverage(db.GetHeader(), db.GetIndex());
480 }
481 
482 
483 vector<Uint8> CBam2Seq_graph::CollectEstimatedCoverage(const string& bam_file,
484  const string& bam_ind)
485 {
486  CBamHeader header(bam_file);
487  CBamIndex bam_index(bam_ind.empty()? bam_file+".bai": bam_ind);
488  return CollectEstimatedCoverage(header, bam_index);
489 }
490 
491 
493 {
494  if ( db.UsesRawIndex() ) {
495  return CollectEstimatedCoverage(db.GetRawDb());
496  }
498 }
499 
500 
501 vector<Uint8> CBam2Seq_graph::CollectRawAccessCoverage(const string& bam_file,
502  const string& bam_ind)
503 {
504  CBamRawDb bam_raw_db(bam_file, bam_ind.empty()? bam_file+".bai": bam_ind);
505  return CollectRawAccessCoverage(bam_raw_db);
506 }
507 
508 
510 {
511  if ( db.UsesRawIndex() ) {
512  return CollectRawAccessCoverage(db.GetRawDb());
513  }
515 }
516 
517 
519 {
520  return CollectEstimatedCoverage(db);
521 }
522 
523 
524 static void sx_SetTitle(CSeq_graph& graph, CSeq_annot& annot,
525  string title, string name)
526 {
527  if ( name.empty() ) {
528  name = "BAM coverage";
529  }
530  if ( title.empty() ) {
531  title = name;
532  }
533  graph.SetTitle(title);
534  CRef<CAnnotdesc> desc(new CAnnotdesc);
535  desc->SetName(name);
536  annot.SetDesc().Set().push_back(desc);
537 }
538 
539 
541  const string& bam_file)
542 {
543  CRef<CSeq_annot> annot(new CSeq_annot);
544  CRef<CSeq_graph> graph(new CSeq_graph);
545  annot->SetData().SetGraph().push_back(graph);
546  sx_SetTitle(*graph, *annot, GetGraphTitle(), GetAnnotName());
547 
548  CRef<CAnnotdesc> desc(new CAnnotdesc);
549  CUser_object& user_desc = desc->SetUser();
550  user_desc.SetType().SetStr("BAM coverage");
551  annot->SetDesc().Set().push_back(desc);
552  if ( GetEstimated() ) {
553  user_desc.AddField("Estimated", true);
554  }
555  user_desc.AddField("MinMapQuality", GetMinMapQuality());
556 
557  Uint8 min_cov = kMax_UI8, max_cov = 0, sum_cov = 0;
558  size_t val_count = 0;
559  ITERATE ( vector<Uint8>, it, cov ) {
560  Uint8 c = *it;
561  if ( c != 0 ) {
562  ++val_count;
563  sum_cov += c;
564  if ( c < min_cov ) {
565  min_cov = c;
566  }
567  if ( c > max_cov ) {
568  max_cov = c;
569  }
570  }
571  }
572  double avg_cov;
573  if ( val_count == 0 ) {
574  // avoid division by zero
575  min_cov = 1;
576  max_cov = 2;
577  avg_cov = 0;
578  }
579  else {
580  avg_cov = double(sum_cov)/val_count;
581  }
582  double cov_mul = 1./GetGraphBinSize();
583  user_desc.AddField("SourceFile", bam_file);
584  if ( m_AlignCount ) {
585  if ( m_AlignCount < kMax_Int ) {
586  user_desc.AddField("AlignCount", int(m_AlignCount));
587  }
588  else {
589  user_desc.AddField("AlignCount", double(m_AlignCount));
590  }
591  }
592  if ( m_MaxAlignSpan ) {
593  user_desc.AddField("MaxAlignSpan", int(m_MaxAlignSpan));
594  }
595  user_desc.AddField("MinCoverage", min_cov*cov_mul);
596  user_desc.AddField("MaxCoverage", max_cov*cov_mul);
597  user_desc.AddField("AvgCoverage", avg_cov*cov_mul);
598 
599  CUser_field::TData::TFields* outliers = 0;
600  if ( max_cov > avg_cov*GetOutlierMax() ) {
601  max_cov = Uint8(avg_cov*GetOutlierMax());
602  user_desc.AddField("LimitCoverage", max_cov*cov_mul);
603  if ( GetOutlierDetails() ) {
604  outliers =
605  &user_desc.SetFieldRef("Outliers")->SetData().SetFields();
606  }
607  }
608 
609  graph->SetLoc().SetWhole(*SerialClone(*m_RefId));
610  graph->SetComp(GetGraphBinSize());
611  graph->SetNumval(TSeqPos(cov.size()));
612  CByte_graph::TValues* vvb = 0;
613  CInt_graph::TValues* vvi = 0;
614  int MAX = 0;
616  MAX = 254;
617  CByte_graph& bytes = graph->SetGraph().SetByte();
618  bytes.SetAxis(0);
619  bytes.SetMin(1);
620  bytes.SetMax(MAX);
621  vvb = &bytes.SetValues();
622  vvb->reserve(cov.size());
623  }
624  else {
625  MAX = kMax_Int-1;
626  CInt_graph& ints = graph->SetGraph().SetInt();
627  ints.SetAxis(0);
628  ints.SetMin(1);
629  ints.SetMax(MAX);
630  vvi = &ints.SetValues();
631  vvi->reserve(cov.size());
632  }
633 
635  user_desc.AddField("Logarithmic", true);
636 
637  // logarithmic:
638  // value = log(bin_cov/bin_size) = log(bin_cov) - log(bin_size)
639  // 1 -> log(min_cov) - log(bin_size)
640  // MAX -> log(max_cov) - log(bin_size)
641  // v = 1 + (log(cov)-log(min_cov))*(253/(log(max_cov)-log(min_cov))) =
642  // 1 + (log(cov)-log(min_cov))*byte_mul;
643  // x = log(min_cov) + (v-1)*(log(max_cov)-log(min_cov))/253 - log(bin)=
644  // v / byte_mul + log(min_cov) - 1/byte_mul - log(bin_size);
645  double base = log(double(min_cov));
646  double byte_mul = double(MAX-1)/(log(double(max_cov))-base);
647  graph->SetA(1./byte_mul);
648  graph->SetB(log(double(min_cov))-log(double(GetGraphBinSize()))-1./byte_mul);
649  ITERATE ( vector<Uint8>, it, cov ) {
650  Uint8 c = *it;
651  int b;
652  if ( c < min_cov ) {
653  b = 0;
654  }
655  else if ( c > max_cov ) {
656  b = MAX+1;
657  }
658  else {
659  b = 1 + int((log(double(c))-base)*byte_mul+.5);
660  }
661  if ( vvb )
662  vvb->push_back(b);
663  else
664  vvi->push_back(b);
665  }
666  }
667  else {
668  // linear:
669  // value = average bin coverage = bin_cov / bin_size
670  // 1 -> min_cov / bin_size
671  // MAX -> max_cov / bin_size
672  // v = 1 + (cov-min_cov) * ((MAX-1)/(max_cov-min_cov)) =
673  // 1 + (cov-min_cov) * byte_mul;
674  // x*bin_size = min_cov + (v-1)*((max_cov-min_cov)/(max-1)) =
675  // v / byte_mul + min_cov - 1 / byte_mul;
676  // x = v / (byte_mul*bin_size) + (min_cov - 1 / byte_mul)/bin_size;
677  double byte_mul = double(MAX-1)/(max_cov-min_cov);
678  graph->SetA(cov_mul/byte_mul);
679  graph->SetB(cov_mul*(min_cov - 1./byte_mul));
680  ITERATE ( vector<Uint8>, it, cov ) {
681  Uint8 c = *it;
682  int b;
683  if ( c < min_cov ) {
684  b = 0;
685  }
686  else if ( c > max_cov ) {
687  b = MAX+1;
688  if ( outliers ) {
689  CRef<CUser_field> field(new CUser_field);
690  field->SetLabel().SetId(int(it-cov.begin()));
691  field->SetData().SetReal(double(c));
692  outliers->push_back(field);
693  }
694  }
695  else {
696  b = 1 + int((c-min_cov)*byte_mul+.5);
697  }
698  if ( vvb )
699  vvb->push_back(b);
700  else
701  vvi->push_back(b);
702  }
703  }
704  return annot;
705 }
706 
707 
709  const string& bam_file,
710  const string& bam_index)
711 {
712  return MakeSeq_annot(CollectCoverage(mgr, bam_file, bam_index), bam_file);
713 }
714 
715 
717  const string& bam_file)
718 {
719  return MakeSeq_annot(CollectCoverage(db), bam_file);
720 }
721 
722 
724  const string& bam_file)
725 {
726  return MakeSeq_annot(CollectCoverage(db), bam_file);
727 }
728 
729 
731  const string& bam_file)
732 {
733  return MakeSeq_annot(mgr, bam_file, bam_file+DEFAULT_BAI_SUFFIX);
734 }
735 
736 
738 {
739  CRef<CSeq_entry> entry(new CSeq_entry);
740  CBioseq& seq = entry->SetSeq();
741  seq.SetAnnot().push_back(annot);
742  if ( m_RefId ) {
744  seq.SetId().push_back(id);
745  }
746  if ( 1 ) {
748  id->SetLocal().SetStr(GetRefLabel());
749  if ( !m_RefId || !GetRefId().Equals(*id) ) {
750  seq.SetId().push_back(id);
751  }
752  }
753  if ( m_Seq_inst ) {
754  seq.SetInst(*m_Seq_inst);
755  }
756  else {
757  CSeq_inst& inst = seq.SetInst();
761  }
762  return entry;
763 }
764 
765 
767  const string& bam_file,
768  const string& bam_index)
769 {
770  return MakeSeq_entry(MakeSeq_annot(mgr, bam_file, bam_index));
771 }
772 
773 
775  const string& bam_file)
776 {
777  return MakeSeq_entry(MakeSeq_annot(db, bam_file));
778 }
779 
780 
782 {
783  return MakeSeq_entry(MakeSeq_annot(db, db.GetDbName()));
784 }
785 
786 
788  const string& bam_file)
789 {
790  return MakeSeq_entry(MakeSeq_annot(db, bam_file));
791 }
792 
793 
795  const string& bam_file)
796 {
797  return MakeSeq_entry(MakeSeq_annot(mgr, bam_file));
798 }
799 
800 
static void sx_SetTitle(CSeq_graph &graph, CSeq_annot &annot, string title, string name)
Definition: bamgraph.cpp:524
static const int kDefaultOutlierMax_Int
Definition: bamgraph.cpp:57
static const Uint8 kDefaultMinMapQuality
Definition: bamgraph.cpp:58
static const int kDefaultOutlierMax_Byte
Definition: bamgraph.cpp:56
NCBI_DEFINE_ERR_SUBCODE_X(6)
#define DEFAULT_BAI_SUFFIX
Definition: bamgraph.cpp:59
CAnnotdesc –.
Definition: Annotdesc.hpp:66
const string & GetRefLabel(void) const
Label of the reference sequence in the BAM file.
Definition: bamgraph.hpp:217
CBam2Seq_graph(void)
Definition: bamgraph.cpp:61
void SetGraphBinSize(TSeqPos bin_size)
Definition: bamgraph.cpp:133
void SetRawAccess(bool raw_access=true)
Definition: bamgraph.cpp:165
EGraphValueType m_GraphValueType
Definition: bamgraph.hpp:200
vector< Uint8 > CollectRawAccessCoverage(const CBamHeader &header, const CBamIndex &bam_index)
EGraphType m_GraphType
Definition: bamgraph.hpp:199
int GetMinMapQuality(void) const
Minimal map quality of alignments to include in graph.
Definition: bamgraph.cpp:121
void SetGraphTitle(const string &title)
Definition: bamgraph.cpp:91
~CBam2Seq_graph(void)
Definition: bamgraph.cpp:74
bool GetEstimated(void) const
make estimated graph using BAM index only the bin size will be derived from index
Definition: bamgraph.hpp:271
bool GetOutlierDetails(void) const
Definition: bamgraph.hpp:259
void SetOutlierMax(double x)
Definition: bamgraph.cpp:139
void SetEstimated(bool estimated=true)
Definition: bamgraph.cpp:171
TSeqPos m_GraphBinSize
Definition: bamgraph.hpp:201
const string & GetAnnotName(void) const
Annot name of generated Seq-graph.
Definition: bamgraph.hpp:235
string m_GraphTitle
Definition: bamgraph.hpp:195
void SetGraphValueType(EGraphValueType type)
Definition: bamgraph.cpp:115
Uint8 m_AlignCount
Definition: bamgraph.hpp:209
vector< Uint8 > CollectCoverage(CBamMgr &mgr, const string &bam_file, const string &bam_index)
Generate raw align coverage for BAM file using BAM file index.
Definition: bamgraph.cpp:177
bool m_OutlierDetails
Definition: bamgraph.hpp:203
bool GetRawAccess(void) const
try to use raw BAM file access for efficiency
Definition: bamgraph.hpp:265
double GetOutlierMax(void) const
Limit too big graph values by a multiple of their average.
Definition: bamgraph.cpp:145
void SetAnnotName(const string &name)
Definition: bamgraph.cpp:97
void SetOutlierDetails(bool details=true)
Definition: bamgraph.cpp:159
CRef< CSeq_annot > MakeSeq_annot(CBamMgr &mgr, const string &bam_file, const string &bam_index)
Generate Seq-annot for BAM file using BAM file index.
Definition: bamgraph.cpp:708
CRef< CSeq_id > m_RefId
Definition: bamgraph.hpp:194
string m_RefLabel
Definition: bamgraph.hpp:193
vector< Uint8 > CollectEstimatedCoverage(const CBamHeader &header, const CBamIndex &bam_index)
Definition: bamgraph.cpp:459
CRef< CSeq_entry > MakeSeq_entry(CBamMgr &mgr, const string &bam_file, const string &bam_index)
Generate Seq-entry for BAM file.
Definition: bamgraph.cpp:766
TSeqPos m_MaxAlignSpan
Definition: bamgraph.hpp:210
CRef< CSeq_inst > m_Seq_inst
Definition: bamgraph.hpp:197
string m_AnnotName
Definition: bamgraph.hpp:196
double m_OutlierMax
Definition: bamgraph.hpp:202
void SetRefLabel(const string &ref_label)
Definition: bamgraph.cpp:79
void SetGraphType(EGraphType type)
Definition: bamgraph.cpp:109
void SetRefId(const CSeq_id &ref_id)
Definition: bamgraph.cpp:85
const string & GetGraphTitle(void) const
Title of generated Seq-graph.
Definition: bamgraph.hpp:229
EGraphType
Type of graph coverage axis - linear or logarithmic.
Definition: bamgraph.hpp:102
@ eGraphType_logarithmic
Definition: bamgraph.hpp:104
const CSeq_id & GetRefId(void) const
Seq-id for the reference sequence in generated entry.
Definition: bamgraph.hpp:223
EGraphType GetGraphType(void) const
Definition: bamgraph.hpp:241
void SetSeq_inst(CRef< CSeq_inst > inst)
Use specified Seq-inst object for the virtual sequence.
Definition: bamgraph.cpp:103
EGraphValueType GetGraphValueType(void) const
Definition: bamgraph.hpp:247
EGraphValueType
Type of graph values - byte (0-255) or int.
Definition: bamgraph.hpp:110
void SetMinMapQuality(int qual)
Definition: bamgraph.cpp:127
CRange< TSeqPos > m_TotalRange
Definition: bamgraph.hpp:208
TSeqPos GetGraphBinSize(void) const
Definition: bamgraph.hpp:253
bool UsesRawIndex() const
Definition: bamread.hpp:216
const string & GetDbName(void) const
Definition: bamread.hpp:225
TSeqPos GetRefSeqLength(const string &str) const
Definition: bamread.cpp:1023
const string & GetIndexName(void) const
Definition: bamread.hpp:229
CBamRawDb & GetRawDb()
Definition: bamread.hpp:220
size_t GetRefIndex(const string &name) const
Definition: bamindex.cpp:1608
TSeqPos GetRefLength(size_t index) const
Definition: bamindex.hpp:94
vector< uint64_t > CollectEstimatedCoverage(size_t ref_index, TIndexLevel min_index_level, TIndexLevel max_index_level) const
Definition: bamindex.cpp:1482
size_t GetRefIndex(const string &ref_label) const
Definition: bamindex.hpp:1026
const CBamHeader & GetHeader() const
Definition: bamindex.hpp:1010
const CBamIndex & GetIndex() const
Definition: bamindex.hpp:1014
TSeqPos GetRefSeqLength(size_t ref_index) const
Definition: bamindex.hpp:1034
CByte_graph –.
Definition: Byte_graph.hpp:66
CInt_graph –.
Definition: Int_graph.hpp:66
Definition: Seq_entry.hpp:56
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
CRef< CUser_field > SetFieldRef(const string &str, const string &delim=".", const string &obj_subtype=kEmptyStr, NStr::ECase use_case=NStr::eCase)
Definition: map.hpp:338
#define false
Definition: bool.h:36
static int type
Definition: getdata.c:31
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
#define LOG_POST_X(err_subcode, message)
Definition: ncbidiag.hpp:553
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
Definition: ncbidiag.hpp:550
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
C * SerialClone(const C &src)
Create on heap a clone of the source object.
Definition: serialbase.hpp:512
#define kMax_UI8
Definition: ncbi_limits.h:222
#define kMax_Int
Definition: ncbi_limits.h:184
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
position_type GetToOpen(void) const
Definition: range.hpp:138
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static const char label[]
void SetFrom(TFrom value)
Assign a value to From data member.
Definition: Range_.hpp:231
vector< CRef< CUser_field > > TFields
void SetLabel(TLabel &value)
Assign a value to Label data member.
void SetType(TType &value)
Assign a value to Type data member.
void SetData(TData &value)
Assign a value to Data data member.
@ e_Local
local use
Definition: Seq_id_.hpp:95
void SetA(TA value)
Assign a value to A data member.
void SetMin(TMin value)
Assign a value to Min data member.
void SetTitle(const TTitle &value)
Assign a value to Title data member.
Definition: Seq_graph_.hpp:784
vector< char > TValues
Definition: Byte_graph_.hpp:89
void SetAxis(TAxis value)
Assign a value to Axis data member.
Definition: Int_graph_.hpp:394
void SetNumval(TNumval value)
Assign a value to Numval data member.
void SetComp(TComp value)
Assign a value to Comp data member.
TValues & SetValues(void)
Assign a value to Values data member.
void SetGraph(TGraph &value)
Assign a value to Graph data member.
Definition: Seq_graph_.cpp:250
void SetB(TB value)
Assign a value to B data member.
void SetMax(TMax value)
Assign a value to Max data member.
void SetMax(TMax value)
Assign a value to Max data member.
Definition: Int_graph_.hpp:300
vector< int > TValues
Definition: Int_graph_.hpp:88
void SetLoc(TLoc &value)
Assign a value to Loc data member.
Definition: Seq_graph_.cpp:224
void SetAxis(TAxis value)
Assign a value to Axis data member.
void SetMin(TMin value)
Assign a value to Min data member.
Definition: Int_graph_.hpp:347
TValues & SetValues(void)
Assign a value to Values data member.
Definition: Int_graph_.hpp:431
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_annot_.cpp:244
TId & SetId(void)
Assign a value to Id data member.
Definition: Bioseq_.hpp:296
void SetDesc(TDesc &value)
Assign a value to Desc data member.
Definition: Seq_annot_.cpp:223
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
Definition: Bioseq_.hpp:372
TName & SetName(void)
Select the variant.
Definition: Annotdesc_.hpp:508
TUser & SetUser(void)
Select the variant.
Definition: Annotdesc_.cpp:190
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
void SetRepr(TRepr value)
Assign a value to Repr data member.
Definition: Seq_inst_.hpp:574
void SetLength(TLength value)
Assign a value to Length data member.
Definition: Seq_inst_.hpp:668
void SetMol(TMol value)
Assign a value to Mol data member.
Definition: Seq_inst_.hpp:621
@ eRepr_virtual
no seq data
Definition: Seq_inst_.hpp:93
@ eMol_na
just a nucleic acid
Definition: Seq_inst_.hpp:113
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
Definition of all error codes used in SRA C++ support libraries.
const struct ncbi::grid::netcache::search::fields::SIZE size
static bool Equals(const CVariation::TPlacements &p1, const CVariation::TPlacements &p2)
#define MAX(a, b)
returns larger of a and b.
Definition: ncbi_std.h:117
constexpr TSeqPos GetMinBinSize() const
Definition: bamindex.hpp:208
Definition: type.c:6
#define _ASSERT
Modified on Tue Apr 23 07:38:42 2024 by modify_doxy.py rev. 669887