NCBI C++ ToolKit
vcf_reader.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: vcf_reader.hpp 93699 2021-05-13 21:23:28Z stakhovv $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Frank Ludwig
27  *
28  * File Description:
29  * VCF file reader
30  *
31  */
32 
33 #ifndef OBJTOOLS_READERS___VCFREADER__HPP
34 #define OBJTOOLS_READERS___VCFREADER__HPP
35 
36 #include <corelib/ncbistd.hpp>
41 
42 
44 
45 BEGIN_SCOPE(objects) // namespace ncbi::objects::
46 
47 class CVcfData;
48 class CDbtag;
49 class CReaderListener;
50 
51 // ----------------------------------------------------------------------------
53 // ----------------------------------------------------------------------------
54 {
60 };
61 ESpecType SpecType( const string& );
62 
63 // ----------------------------------------------------------------------------
65 // ----------------------------------------------------------------------------
66 {
71 };
72 ESpecNumber SpecNumber( const string& );
73 
74 // ----------------------------------------------------------------------------
76 // ----------------------------------------------------------------------------
77 {
78 public:
80 
82  string id,
83  string numvals,
84  string type,
85  string description ) :
86  m_id( id ),
87  m_numvals( SpecNumber( numvals ) ),
88  m_type( SpecType( type ) ),
89  m_description( description )
90  {};
91 
92  string m_id;
93  int m_numvals;
95  string m_description;
96 };
97 
98 // ----------------------------------------------------------------------------
100 // ----------------------------------------------------------------------------
101 {
102 public:
104 
106  string id,
107  string description ) :
108  m_id( id ),
109  m_description( description )
110  {};
111 
112  string m_id;
114 };
115 
116 // ----------------------------------------------------------------------------
118 // ----------------------------------------------------------------------------
119 {
120 public:
122 
124  string id,
125  string numvals,
126  string type,
127  string description ) :
128  m_id( id ),
129  m_numvals( SpecNumber( numvals ) ),
130  m_type( SpecType( type ) ),
131  m_description( description )
132  {};
133 
134  string m_id;
138 };
139 
140 
141 // ----------------------------------------------------------------------------
143 // ----------------------------------------------------------------------------
144  : public CReaderBase
145 {
146  //
147  // object management:
148  //
149 public:
150  enum {
151  fNormal = 0,
152  fUseSetFormat = 1<<8,
153  };
154 
155  CVcfReader(
156  int = 0,
157  CReaderListener* = nullptr);
158  virtual ~CVcfReader();
159 
160  //
161  // object interface:
162  //
163 public:
165  ReadSeqAnnot(
166  ILineReader&,
167  ILineErrorListener* =nullptr ) override;
168 
169  //
170  // helpers:
171  //
172 protected:
174 
175  void xGetData(
176  ILineReader&,
177  TReaderData&) override;
178 
179  void xProcessData(
180  const TReaderData&,
181  CSeq_annot&) override;
182 
183  bool
185  const CTempString& ) override;
186 
187  virtual bool
188  xProcessTrackLine(
189  const string&,
190  CSeq_annot&);
191 
192  virtual bool
193  xProcessMetaLine(
194  const string&,
195  CSeq_annot&);
196 
197  virtual void
198  xSetFileFormat(
199  const string&,
200  CSeq_annot&,
201  bool&);
202 
203  virtual bool
204  xProcessMetaLineInfo(
205  const string&,
206  CSeq_annot&);
207 
208  virtual bool
209  xProcessMetaLineFilter(
210  const string&,
211  CSeq_annot&);
212 
213  virtual bool
214  xProcessMetaLineFormat(
215  const string&,
216  CSeq_annot&);
217 
218  virtual bool
219  xProcessHeaderLine(
220  const string&,
221  CSeq_annot& );
222 
223  virtual bool
224  xProcessDataLine(
225  const string&,
226  CSeq_annot&);
227 
228  virtual bool
229  xAssignVcfMeta(
230  CSeq_annot&);
231 
232  virtual bool
233  xAssignVariationAlleleSet(
234  const CVcfData&,
235  CRef<CSeq_feat> );
236 
237  virtual bool
238  xAssignFeatureLocationSet(
239  const CVcfData&,
240  CRef<CSeq_feat> );
241 
242  virtual bool
243  xAssignVariationIds(
244  CVcfData&,
245  CRef<CSeq_feat> );
246 
247  virtual bool
248  xAssignVariantSnv(
249  const CVcfData&,
250  unsigned int,
251  CRef<CSeq_feat> );
252 
253  virtual bool
254  xAssignVariantMnv(
255  const CVcfData&,
256  unsigned int,
257  CRef<CSeq_feat> );
258 
259  virtual bool
260  xAssignVariantDel(
261  const CVcfData&,
262  unsigned int,
263  CRef<CSeq_feat> );
264 
265  virtual bool
266  xAssignVariantIns(
267  const CVcfData&,
268  unsigned int,
269  CRef<CSeq_feat> );
270 
271  virtual bool
272  xAssignVariantDelins(
273  const CVcfData&,
274  unsigned int,
275  CRef<CSeq_feat> );
276 
277  virtual bool
278  xAssignVariantProps(
279  CVcfData&,
281 
282  void xAssignVariantSource(
283  CVcfData&,
285 
286  virtual bool
287  xProcessScore(
288  CVcfData&,
289  CRef<CSeq_feat> );
290 
291  virtual bool
292  xProcessFilter(
293  CVcfData&,
294  CRef<CSeq_feat> );
295 
296  virtual bool
297  xProcessInfo(
298  CVcfData&,
300 
301  virtual bool
302  xProcessFormat(
303  CVcfData&,
304  CRef<CSeq_feat> );
305 
306  virtual bool
307  xParseData(
308  const string&,
309  CVcfData&,
310  ILineErrorListener* =nullptr);
311 
312  virtual bool
313  xNormalizeData(
314  CVcfData&,
315  ILineErrorListener* =nullptr);
316 
317  //
318  // data:
319  //
320 private:
321  bool
322  xAssigndbSNPTag(
323  const vector<string>& ids,
324  CRef<CDbtag> pDbtag) const;
325 
326 protected:
327  static const double mMaxSupportedVersion;
333  vector<string> m_MetaDirectives;
334  vector<string> m_GenotypeHeaders;
337 };
338 
341 
342 #endif // OBJTOOLS_READERS___VCFREADER__HPP
User-defined methods of the data storage class.
Definition: Dbtag.hpp:53
Defines and provides stubs for a general interface to a variety of file readers.
Definition: reader_base.hpp:63
virtual void xProcessData(const TReaderData &, CSeq_annot &)
virtual bool xIsCommentLine(const CTempString &)
virtual CRef< CSeq_annot > xCreateSeqAnnot()
virtual void xGetData(ILineReader &, TReaderData &)
virtual CRef< CSeq_annot > ReadSeqAnnot(CNcbiIstream &istr, ILineErrorListener *pErrors=nullptr)
Read an object from a given input stream, render it as a single Seq-annot.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
string m_description
Definition: vcf_reader.hpp:113
CVcfFilterSpec(string id, string description)
Definition: vcf_reader.hpp:105
ESpecType m_type
Definition: vcf_reader.hpp:136
string m_description
Definition: vcf_reader.hpp:137
CVcfFormatSpec(string id, string numvals, string type, string description)
Definition: vcf_reader.hpp:123
ESpecType m_type
Definition: vcf_reader.hpp:94
string m_description
Definition: vcf_reader.hpp:95
CVcfInfoSpec(string id, string numvals, string type, string description)
Definition: vcf_reader.hpp:81
vector< string > m_MetaDirectives
Definition: vcf_reader.hpp:333
bool m_MetaHandled
Definition: vcf_reader.hpp:336
CRef< CAnnotdesc > m_Meta
Definition: vcf_reader.hpp:329
map< string, CVcfFormatSpec > m_FormatSpecs
Definition: vcf_reader.hpp:331
static const double mMaxSupportedVersion
Definition: vcf_reader.hpp:327
vector< string > m_GenotypeHeaders
Definition: vcf_reader.hpp:334
map< string, CVcfInfoSpec > m_InfoSpecs
Definition: vcf_reader.hpp:330
map< string, CVcfFilterSpec > m_FilterSpecs
Definition: vcf_reader.hpp:332
double mActualVersion
Definition: vcf_reader.hpp:328
CMessageListenerLenient m_ErrorsPrivate
Definition: vcf_reader.hpp:335
Abstract base class for lightweight line-by-line reading.
Definition: line_reader.hpp:54
Include a standard set of the NCBI C++ Toolkit most basic headers.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_XOBJREAD_EXPORT
Definition: ncbi_export.h:1315
Definition: type.c:6
ESpecNumber
Definition: vcf_reader.hpp:66
@ eNumber_CountUnknown
Definition: vcf_reader.hpp:69
@ eNumber_CountGenotypes
Definition: vcf_reader.hpp:68
@ eNumber_CountAlleles
Definition: vcf_reader.hpp:67
@ eNumber_CountAllAlleles
Definition: vcf_reader.hpp:70
ESpecType SpecType(const string &)
Definition: vcf_reader.cpp:101
ESpecNumber SpecNumber(const string &)
Definition: vcf_reader.cpp:129
ESpecType
Definition: vcf_reader.hpp:54
@ eType_Integer
Definition: vcf_reader.hpp:55
@ eType_Float
Definition: vcf_reader.hpp:56
@ eType_String
Definition: vcf_reader.hpp:59
@ eType_Character
Definition: vcf_reader.hpp:58
@ eType_Flag
Definition: vcf_reader.hpp:57
Modified on Tue Feb 27 05:54:58 2024 by modify_doxy.py rev. 669887