NCBI C++ ToolKit
gff3_reader.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1  /* $Id: gff3_reader.hpp 99211 2023-02-27 16:15:10Z ludwigf $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Frank Ludwig
27  *
28  * File Description:
29  * GFF3 file reader
30  *
31  */
32 
33 #ifndef OBJTOOLS_READERS___GFF3_READER__HPP
34 #define OBJTOOLS_READERS___GFF3_READER__HPP
35 
36 #include <corelib/ncbistd.hpp>
38 
40 BEGIN_SCOPE(objects) // namespace ncbi::objects::
41 
43 
44 // ============================================================================
46 // ============================================================================
47  : public CGff2Record
48 {
49 public:
52 
53  bool AssignFromGff(
54  const string& ) override;
55 
56 protected:
58  const string& );
59 };
60 
61 // ============================================================================
63 // ============================================================================
64 {
67  using LIST_IDS = list<string>;
69 
70  void Reset() { mAlignments.clear(); mIds.clear(); };
71  operator bool() const { return !mAlignments.empty(); };
72 };
73 
74 // ============================================================================
76 // ============================================================================
77  : public CGff2Reader
78 {
79  friend class CGff3ReadRecord;
80 
81 public:
82  //
83  // object management:
84  //
85 public:
86  enum {
87  //range 12..23
88  fGeneXrefs = (0x1 << 12),
89  };
90  //typedef unsigned int TReaderFlags;
91 
93  TReaderFlags uFlags,
94  const string& name = "",
95  const string& title = "",
97  CReaderListener* = nullptr);
98 
100  unsigned int uFlags,
101  CReaderListener*);
102 
103  virtual ~CGff3Reader();
104 
106  ReadSeqAnnot(
107  ILineReader& lr,
108  ILineErrorListener* pErrors=nullptr) override;
109 
110  TSeqPos SequenceSize() const;
111 
112  TSeqPos GetSequenceSize(
113  const string&) const;
114 
115  shared_ptr<CGff3LocationMerger> GetLocationMerger() {
116  return mpLocations;
117  };
118 
119 protected:
120  void xProcessData(
121  const TReaderData&,
122  CSeq_annot&) override;
123 
124  CGff3ReadRecord* x_CreateRecord() override { return new CGff3ReadRecord(); };
125 
126  virtual bool xInitializeFeature(
127  const CGff2Record&,
128  CRef<CSeq_feat> );
129 
130  bool xUpdateAnnotFeature(
131  const CGff2Record&,
132  CSeq_annot&,
133  ILineErrorListener*) override;
134 
135  bool xAddFeatureToAnnot(
137  CSeq_annot& ) override;
138 
139  virtual bool xUpdateAnnotExon(
140  const CGff2Record&,
142  CSeq_annot&,
144 
145  virtual bool xJoinLocationIntoRna(
146  const CGff2Record&,
148 
149  virtual bool xUpdateAnnotCds(
150  const CGff2Record&,
152  CSeq_annot&,
154 
155  virtual bool xUpdateAnnotGene(
156  const CGff2Record&,
158  CSeq_annot&,
160 
161  virtual bool xUpdateAnnotRegion(
162  const CGff2Record&,
164  CSeq_annot&,
166 
167  virtual bool xUpdateAnnotGeneric(
168  const CGff2Record&,
170  CSeq_annot&,
172 
173  virtual bool xUpdateAnnotRna(
174  const CGff2Record&,
176  CSeq_annot&,
178 
179  virtual bool xFindFeatureUnderConstruction(
180  const CGff2Record&,
181  CRef<CSeq_feat>&);
182 
183  void xVerifyCdsParents(
184  const CGff2Record&);
185 
186  virtual void xValidateAnnot(
187  const CSeq_annot&) override;
188 
189  virtual bool xFeatureSetXrefGrandParent(
190  const string&,
192 
193  virtual bool xFeatureSetXrefParent(
194  const string&,
196 
197  bool xReadInit() override;
198 
199  static string xNextGenericId();
200 
201  string xMakeRecordId(
202  const CGff2Record& record);
203 
204  void xVerifyExonLocation(
205  const string&,
206  const CGff2Record&);
207 
209  const string&) override;
210 
211  virtual void xAddPendingExon(
212  const string&,
213  const CGff2Record&);
214 
215  virtual void xGetPendingExons(
216  const string&,
217  list<CGff2Record>&);
218 
219  void xPostProcessAnnot(
220  CSeq_annot&) override;
221 
222  void xProcessAlignmentData(
223  CSeq_annot& pAnnot);
224 
225  bool xParseFeature(
226  const string&,
227  CSeq_annot&,
228  ILineErrorListener*) override;
229 
230  virtual bool xParseAlignment(
231  const string& strLine);
232 
234  const string& pragma) override;
235 
236  // Data:
241 
244 
245  shared_ptr<CGff3LocationMerger> mpLocations;
246  static unsigned int msGenericIdCounter;
247 };
248 
251 
252 #endif // OBJTOOLS_READERS___GFF3_READER__HPP
void xProcessData(const TReaderData &, CSeq_annot &) override
virtual bool xAddFeatureToAnnot(CRef< CSeq_feat >, CSeq_annot &)
virtual bool xParseFeature(const string &, CSeq_annot &, ILineErrorListener *)
void xPostProcessAnnot(CSeq_annot &) override
virtual bool xIsIgnoredFeatureType(const string &)
virtual bool xUpdateAnnotFeature(const CGff2Record &, CSeq_annot &, ILineErrorListener *=0)
virtual void xProcessSequenceRegionPragma(const string &)
string x_NormalizedAttributeKey(const string &)
bool AssignFromGff(const string &) override
map< string, string > mIdToSeqIdMap
CGff3ReadRecord * x_CreateRecord() override
map< string, string > mCdsParentMap
SAlignmentData mAlignmentData
shared_ptr< CGff3LocationMerger > mpLocations
PENDING_EXONS mPendingExons
shared_ptr< CGff3LocationMerger > GetLocationMerger()
map< string, CRef< CSeq_interval > > mMrnaLocs
static unsigned int msGenericIdCounter
CRef< CSeq_id >(*)(const string &, TReaderFlags, bool) SeqIdResolver
static CRef< CSeq_id > AsSeqId(const string &rawId, long flags=0, bool localInts=true)
Convert a raw ID string to a Seq-id, based in given customization flags.
Definition: read_util.cpp:89
virtual void xValidateAnnot(const CSeq_annot &)
virtual bool xReadInit()
virtual CRef< CSeq_annot > ReadSeqAnnot(CNcbiIstream &istr, ILineErrorListener *pErrors=nullptr)
Read an object from a given input stream, render it as a single Seq-annot.
Abstract base class for lightweight line-by-line reading.
Definition: line_reader.hpp:54
bool empty() const
Definition: map.hpp:149
void clear()
Definition: map.hpp:169
Definition: map.hpp:338
Include a standard set of the NCBI C++ Toolkit most basic headers.
#define bool
Definition: bool.h:34
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_XOBJREAD_EXPORT
Definition: ncbi_export.h:1315
MAP_ID_TO_ALIGN mAlignments
Definition: gff3_reader.hpp:66
list< string > LIST_IDS
Definition: gff3_reader.hpp:67
Modified on Mon Jun 24 05:23:00 2024 by modify_doxy.py rev. 669887