NCBI C++ ToolKit
bamgraph.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef SRA__READER__BAM__BAMGRAPH__HPP
2 #define SRA__READER__BAM__BAMGRAPH__HPP
3 /* $Id: bamgraph.hpp 101492 2023-12-19 18:33:20Z vasilche $
4  * ===========================================================================
5  *
6  * PUBLIC DOMAIN NOTICE
7  * National Center for Biotechnology Information
8  *
9  * This software/database is a "United States Government Work" under the
10  * terms of the United States Copyright Act. It was written as part of
11  * the author's official duties as a United States Government employee and
12  * thus cannot be copyrighted. This software/database is freely available
13  * to the public for use. The National Library of Medicine and the U.S.
14  * Government have not placed any restriction on its use or reproduction.
15  *
16  * Although all reasonable efforts have been taken to ensure the accuracy
17  * and reliability of the software and data, the NLM and the U.S.
18  * Government do not and cannot warrant the performance or results that
19  * may be obtained by using this software or data. The NLM and the U.S.
20  * Government disclaim all warranties, express or implied, including
21  * warranties of performance, merchantability or fitness for any particular
22  * purpose.
23  *
24  * Please cite the author in any work or product based on this material.
25  *
26  * ===========================================================================
27  *
28  * Authors: Eugene Vasilchenko
29  *
30  * File Description:
31  * Make alignment density graphs from BAM files.
32  *
33  */
34 
35 #include <corelib/ncbistd.hpp>
37 #include <util/range.hpp>
39 #include <objects/seq/Seq_inst.hpp>
42 
45 
46 class CSeq_entry;
47 class CSeq_graph;
48 class CSeq_inst;
49 class CBamMgr;
50 class CBamDb;
51 class CBamRawDb;
52 class CBamHeader;
53 class CBamIndex;
54 
55 /////////////////////////////////////////////////////////////////////////////
56 /// CBam2Seq_graph
57 ///
58 /// Generate Seq-graph object with sequence coverage in BAM file.
59 /// The graph is generated in Seq-annot with descriptors,
60 /// or Seq-entry with virtual sequence and the Seq-annot.
61 ///
62 /// The graph has 2 special values:
63 /// 0 - no alignments in the bin at all.
64 /// max (255 for byte graph, and kMax_Int = 0x7fffffff = 2147483647 for int) -
65 /// values exceding limit (see also SetOutlierMax() and GetOutlierMax()).
66 /// All remainging values will be scaled to the range [1..max-1].
67 /// The outliers values (if any) will be encoded in Seq-annot descriptors,
68 /// field "Outliers", data fields for each outlier as an User-field with
69 /// the outlier bin number in its 'label id', and value in its 'data real'.
70 /////////////////////////////////////////////////////////////////////////////
71 
73 {
74 public:
75  CBam2Seq_graph(void);
76  ~CBam2Seq_graph(void);
77 
78  /// Label of the reference sequence in the BAM file
79  const string& GetRefLabel(void) const;
80  void SetRefLabel(const string& ref_label);
81 
82  /// Seq-id for the reference sequence in generated entry
83  const CSeq_id& GetRefId(void) const;
84  void SetRefId(const CSeq_id& ref_id);
85 
86  /// Title of generated Seq-graph
87  const string& GetGraphTitle(void) const;
88  void SetGraphTitle(const string& title);
89 
90  /// Annot name of generated Seq-graph
91  const string& GetAnnotName(void) const;
92  void SetAnnotName(const string& name);
93 
94  /// Use specified Seq-inst object for the virtual sequence
95  void SetSeq_inst(CRef<CSeq_inst> inst);
96 
97  /// Minimal map quality of alignments to include in graph
98  int GetMinMapQuality(void) const;
99  void SetMinMapQuality(int qual);
100 
101  /// Type of graph coverage axis - linear or logarithmic
102  enum EGraphType {
103  eGraphType_linear, // default
104  eGraphType_logarithmic
105  };
106  EGraphType GetGraphType(void) const;
107  void SetGraphType(EGraphType type);
108 
109  /// Type of graph values - byte (0-255) or int
112  eGraphValueTyps_int
113  };
114  EGraphValueType GetGraphValueType(void) const;
115  void SetGraphValueType(EGraphValueType type);
116 
117  /// Size of sequence bins in bases corresponding to one graph value
118  enum {
119  kGraphBinSizeFromBAI = 1<<14, // bin size of BAI index
120  kGraphBinSizeFromIndex = 0, // bin size determined by used BAM index
121  kDefaultGraphBinSize = 1000,
122  // kEstimatedGraphBinSize is deprecated after adding support
123  // for CSI indexes, because the index bin size becomes variable,
124  // use either kGraphBinSizeFromBAI or kGraphBinSizeFromIndex
125  kEstimatedGraphBinSize NCBI_STD_DEPRECATED("Use either kGraphBinSizeFromIndex or kGraphBinSizeFromBAI") = 1<<14
126  };
127  TSeqPos GetGraphBinSize(void) const;
128  void SetGraphBinSize(TSeqPos bin_size);
129 
130  /// Limit too big graph values by a multiple of their average
131  double GetOutlierMax(void) const;
132  void SetOutlierMax(double x);
133  bool GetOutlierDetails(void) const;
134  void SetOutlierDetails(bool details = true);
135 
136  /// try to use raw BAM file access for efficiency
137  bool GetRawAccess(void) const;
138  void SetRawAccess(bool raw_access = true);
139 
140  /// make estimated graph using BAM index only
141  /// the bin size will be derived from index
142  bool GetEstimated(void) const;
143  void SetEstimated(bool estimated = true);
144 
145  /// Generate raw align coverage for BAM file using BAM file index
146  vector<Uint8> CollectCoverage(CBamMgr& mgr,
147  const string& bam_file,
148  const string& bam_index);
149  vector<Uint8> CollectCoverage(CBamDb& db);
150  vector<Uint8> CollectCoverage(CBamRawDb& db);
151  vector<Uint8> CollectEstimatedCoverage(const CBamHeader& header,
152  const CBamIndex& bam_index);
153  vector<Uint8> CollectEstimatedCoverage(CBamRawDb& db);
154  vector<Uint8> CollectEstimatedCoverage(CBamDb& db);
155  vector<Uint8> CollectEstimatedCoverage(const string& bam_file,
156  const string& bam_index);
157  vector<Uint8> CollectRawAccessCoverage(const CBamHeader& header,
158  const CBamIndex& bam_index);
159  vector<Uint8> CollectRawAccessCoverage(CBamRawDb& db);
160  vector<Uint8> CollectRawAccessCoverage(CBamDb& db);
161  vector<Uint8> CollectRawAccessCoverage(const string& bam_file,
162  const string& bam_index);
163 
164  /// Generate Seq-annot for BAM file using BAM file index
165  CRef<CSeq_annot> MakeSeq_annot(CBamMgr& mgr,
166  const string& bam_file,
167  const string& bam_index);
168  /// Generate Seq-annot for BAM file using default BAM file index (.bai)
169  CRef<CSeq_annot> MakeSeq_annot(CBamMgr& mgr,
170  const string& bam_file);
171  CRef<CSeq_annot> MakeSeq_annot(CBamDb& db,
172  const string& bam_file);
173  CRef<CSeq_annot> MakeSeq_annot(CBamRawDb& db,
174  const string& bam_file);
175  CRef<CSeq_annot> MakeSeq_annot(const vector<Uint8>& cov,
176  const string& bam_file);
177  /// Generate Seq-entry for BAM file
178  CRef<CSeq_entry> MakeSeq_entry(CBamMgr& mgr,
179  const string& bam_file,
180  const string& bam_index);
181  /// Generate Seq-entry for BAM file using default BAM file index (.bai)
182  CRef<CSeq_entry> MakeSeq_entry(CBamMgr& mgr,
183  const string& bam_file);
184  CRef<CSeq_entry> MakeSeq_entry(CBamDb& db,
185  const string& bam_file);
186  CRef<CSeq_entry> MakeSeq_entry(CBamDb& db);
187  CRef<CSeq_entry> MakeSeq_entry(CBamRawDb& db,
188  const string& bam_file);
189  CRef<CSeq_entry> MakeSeq_entry(CRef<CSeq_annot> annot);
190 
191 private:
192  // parameters
193  string m_RefLabel;
195  string m_GraphTitle;
196  string m_AnnotName;
202  double m_OutlierMax;
206 
207  // statistics
211 };
212 
213 
214 /////////////////////////////////////////////////////////////////////////////
215 // Inline methods for class CBam2Seq_graph
216 
217 inline const string& CBam2Seq_graph::GetRefLabel(void) const
218 {
219  return m_RefLabel;
220 }
221 
222 
223 inline const CSeq_id& CBam2Seq_graph::GetRefId(void) const
224 {
225  return *m_RefId;
226 }
227 
228 
229 inline const string& CBam2Seq_graph::GetGraphTitle(void) const
230 {
231  return m_GraphTitle;
232 }
233 
234 
235 inline const string& CBam2Seq_graph::GetAnnotName(void) const
236 {
237  return m_AnnotName;
238 }
239 
240 
242 {
243  return m_GraphType;
244 }
245 
246 
248 {
249  return m_GraphValueType;
250 }
251 
252 
254 {
255  return m_GraphBinSize;
256 }
257 
258 
259 inline bool CBam2Seq_graph::GetOutlierDetails(void) const
260 {
261  return m_OutlierDetails;
262 }
263 
264 
265 inline bool CBam2Seq_graph::GetRawAccess(void) const
266 {
267  return m_RawAccess;
268 }
269 
270 
271 inline bool CBam2Seq_graph::GetEstimated(void) const
272 {
273  return m_Estimated;
274 }
275 
276 
279 
280 #endif // SRA__READER__BAM__BAMGRAPH__HPP
CBam2Seq_graph.
Definition: bamgraph.hpp:73
const string & GetRefLabel(void) const
Label of the reference sequence in the BAM file.
Definition: bamgraph.hpp:217
EGraphValueType m_GraphValueType
Definition: bamgraph.hpp:200
vector< Uint8 > CollectRawAccessCoverage(const CBamHeader &header, const CBamIndex &bam_index)
EGraphType m_GraphType
Definition: bamgraph.hpp:199
bool GetEstimated(void) const
make estimated graph using BAM index only the bin size will be derived from index
Definition: bamgraph.hpp:271
bool GetOutlierDetails(void) const
Definition: bamgraph.hpp:259
TSeqPos m_GraphBinSize
Definition: bamgraph.hpp:201
const string & GetAnnotName(void) const
Annot name of generated Seq-graph.
Definition: bamgraph.hpp:235
string m_GraphTitle
Definition: bamgraph.hpp:195
Uint8 m_AlignCount
Definition: bamgraph.hpp:209
bool m_OutlierDetails
Definition: bamgraph.hpp:203
bool GetRawAccess(void) const
try to use raw BAM file access for efficiency
Definition: bamgraph.hpp:265
CRef< CSeq_id > m_RefId
Definition: bamgraph.hpp:194
string m_RefLabel
Definition: bamgraph.hpp:193
TSeqPos m_MaxAlignSpan
Definition: bamgraph.hpp:210
CRef< CSeq_inst > m_Seq_inst
Definition: bamgraph.hpp:197
string m_AnnotName
Definition: bamgraph.hpp:196
double m_OutlierMax
Definition: bamgraph.hpp:202
const string & GetGraphTitle(void) const
Title of generated Seq-graph.
Definition: bamgraph.hpp:229
EGraphType
Type of graph coverage axis - linear or logarithmic.
Definition: bamgraph.hpp:102
const CSeq_id & GetRefId(void) const
Seq-id for the reference sequence in generated entry.
Definition: bamgraph.hpp:223
EGraphType GetGraphType(void) const
Definition: bamgraph.hpp:241
EGraphValueType GetGraphValueType(void) const
Definition: bamgraph.hpp:247
EGraphValueType
Type of graph values - byte (0-255) or int.
Definition: bamgraph.hpp:110
CRange< TSeqPos > m_TotalRange
Definition: bamgraph.hpp:208
TSeqPos GetGraphBinSize(void) const
Definition: bamgraph.hpp:253
Definition: Seq_entry.hpp:56
Include a standard set of the NCBI C++ Toolkit most basic headers.
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define NCBI_STD_DEPRECATED(message)
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_BAMREAD_EXPORT
Definition: ncbi_export.h:1235
Definition: type.c:6
Modified on Fri May 24 14:52:26 2024 by modify_doxy.py rev. 669887