NCBI C++ ToolKit
gff3_writer.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: gff3_writer.hpp 99488 2023-04-05 11:57:12Z stakhovv $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Frank Ludwig
27  *
28  * File Description: Write gff3 file
29  *
30  */
31 
32 #ifndef OBJTOOLS_READERS___GFF3_WRITER__HPP
33 #define OBJTOOLS_READERS___GFF3_WRITER__HPP
34 
35 #include <corelib/ncbistd.hpp>
37 #include <objmgr/scope.hpp>
47 
49 
51 BEGIN_objects_SCOPE
52 
53 
54 // ============================================================================
56  // ============================================================================
57  : public CGffSourceRecord
58 {
59 public:
61  : CGffSourceRecord("") {};
63  : CGffSourceRecord(rhs), mRecordId(rhs.mRecordId) {};
65  const string& recordId) { mRecordId = recordId; };
66  string Id() const { return mRecordId; };
67 
68  string StrAttributes() const {
69  string attributes;
70  attributes.reserve(256);
71 
72  if (!mRecordId.empty()) {
73  attributes += "ID=";
75  }
76  auto baseAttributes = CGffBaseRecord::StrAttributes();
77  if (!baseAttributes.empty()) {
79  attributes += baseAttributes;
80  }
81  return attributes;
82  }
83 protected:
84  string mRecordId;
85 };
86 
87 // ============================================================================
89 // ============================================================================
90  : public CGffFeatureRecord
91 {
92 public:
94  : CGffFeatureRecord("")
95  {};
96 
99  {};
100 
102  const string& recordId) { mRecordId = recordId; };
103  void SetParent(
104  const string& parent) { mParent = parent; };
105  string Id() const { return mRecordId; };
106  string Parent() const { return mParent; };
107 
108  string StrAttributes() const {
109  string attributes;
110  attributes.reserve(256);
111 
112  if (!mRecordId.empty()) {
113  attributes += "ID=";
115  }
116  if (!mParent.empty()) {
117  if (!attributes.empty()) {
119  }
120  attributes += "Parent=";
122  }
123  auto baseAttributes = CGffBaseRecord::StrAttributes();
124  if (!baseAttributes.empty()) {
126  attributes += baseAttributes;
127  }
128  return attributes;
129  }
130 
131 protected:
132  string mRecordId;
133  string mParent;
134 
135 };
136 
137 
138 // ============================================================================
140 // ============================================================================
141  : public CGff2Writer, public CAlignWriter
142 {
143 public:
144  typedef enum {
145  fExtraQuals = (fGff2WriterLast << 1),
146  fMicroIntrons = (fGff2WriterLast << 2),
147  fExcludeNucs = (fGff2WriterLast << 3), // for backward compatibility :-(
148  fIncludeProts = (fGff2WriterLast << 4),
149  fGff3WriterLast = fIncludeProts,
150  } TFlags;
151 
152 public:
153  CGff3Writer(
154  CScope&,
155  CNcbiOstream&,
156  unsigned int = fNormal,
157  bool sortAlignments = false
158  );
159  CGff3Writer(
160  CNcbiOstream&,
161  unsigned int = fNormal,
162  bool sortAlignments = false
163  );
164  virtual ~CGff3Writer() = default;
165 
167  const string& defaultMethod) { m_sDefaultMethod = defaultMethod; };
168  void SetBioseqHandle(CBioseq_Handle bsh);
169 
170  bool WriteHeader() override;
172  const CSeq_annot& annot) override { return CGff2Writer::WriteHeader(annot); };
173  bool WriteAlign(
174  const CSeq_align&,
175  const string& asmblyName="",
176  const string& asmblyAccession="" ) override;
177 
178 protected:
179  typedef list<pair<CConstRef<CSeq_align>, string>> TAlignCache;
180 
181 protected:
182  bool x_WriteBioseqHandle(
183  CBioseq_Handle) override;
185  CSeq_annot_Handle) override;
186  virtual bool x_WriteFeatureContext(
188 
189  virtual bool xPassesFilterByViewMode(
191 
192  virtual SAnnotSelector& xSetJunkFilteringAnnotSelector();
193 
194  bool xWriteAlign(
195  const CSeq_align&,
196  const string& = "") override;
197  virtual bool xWriteAlignDenseg(
198  const CSeq_align&,
199  const string& = "");
200  virtual bool xWriteAlignSpliced(
201  const CSeq_align&,
202  const string& = "");
203  virtual bool xWriteAlignDisc(
204  const CSeq_align&,
205  const string& = "");
206 
207  virtual bool xWriteSequenceHeader(
208  CBioseq_Handle );
209  virtual bool xWriteSource(
211  bool xWriteFeature(
212  CFeat_CI feat_it) override;
213 
214  virtual bool xWriteSequence(
215  CBioseq_Handle );
216  virtual bool xWriteNucleotideSequence(
217  CBioseq_Handle );
218  virtual bool xWriteProteinSequence(
219  CBioseq_Handle );
220 
221  virtual bool xWriteNucleotideFeature(
223  const CMappedFeat&);
224  virtual bool xWriteNucleotideFeatureTransSpliced(
226  const CMappedFeat&);
227  virtual bool xWriteProteinFeature(
229  const CMappedFeat&);
230 
231  virtual bool xWriteFeatureGene(
233  const CMappedFeat& );
234  virtual bool xWriteFeatureRna(
236  const CMappedFeat& );
237  virtual bool xWriteFeatureCds(
239  const CMappedFeat& );
240  virtual bool xWriteFeatureGeneric(
242  const CMappedFeat& );
243  virtual bool xWriteFeatureProtein(
245  const CMappedFeat&,
246  const CMappedFeat& );
247  virtual bool xWriteFeatureTrna(
249  const CMappedFeat& );
250  virtual bool xWriteFeatureCDJVSegment(
252  const CMappedFeat& );
253  bool xWriteAllChildren(
255  const CMappedFeat&) override;
256 
257 
258  virtual bool xWriteRecord(
259  const CGffBaseRecord& );
260 
261  void xWriteAlignment(
262  const CGffAlignRecord& record );
263 
264  virtual bool xWriteFeatureRecords(
265  const CGffFeatureRecord&,
266  const CSeq_loc&,
267  unsigned int );
268 
269  //bool xCreateMicroIntrons(
270  // CBioseq_Handle);
271  //
272  bool xSplicedSegHasProteinProd(
273  const CSpliced_seg& spliced);
274 
277  virtual bool xAssignAlignmentScores(
279  const CSeq_align&);
280 
281  bool xAssignAlignmentDenseg(
283  const CAlnMap&,
284  unsigned int);
285  virtual bool xAssignAlignmentDensegSeqId(
287  const CAlnMap&,
288  unsigned int);
289  bool xAssignAlignmentDensegType(
291  const CAlnMap&,
292  unsigned int);
293  bool xAssignAlignmentDensegMethod(
295  const CAlnMap&,
296  unsigned int);
297  virtual bool xAssignAlignmentDensegScores(
299  const CAlnMap&,
300  unsigned int);
301  virtual bool xAssignAlignmentDensegTarget(
303  const CAlnMap&,
304  unsigned int);
305  bool xAssignAlignmentDensegGap(
307  const CAlnMap&,
308  unsigned int);
309  virtual bool xAssignAlignmentDensegLocation(
311  const CAlnMap&,
312  unsigned int);
313 
314  //Spliced-seg processing
315  bool xAssignAlignmentSpliced(
317  const CSpliced_seg&,
318  const CSpliced_exon&);
319  virtual bool xAssignAlignmentSplicedTarget(
321  const CSpliced_seg&,
322  const CSpliced_exon&);
323  bool xAssignAlignmentSplicedPhase(
325  const CSpliced_seg&,
326  const CSpliced_exon&);
327  bool xAssignAlignmentSplicedAttributes(
329  const CSpliced_seg&,
330  const CSpliced_exon&);
331  virtual bool xAssignAlignmentSplicedGap(
333  const CSpliced_seg&,
334  const CSpliced_exon&);
335  virtual bool xAssignAlignmentSplicedScores(
337  const CSpliced_seg&,
338  const CSpliced_exon&);
339  virtual bool xAssignAlignmentSplicedLocation(
341  const CSpliced_seg&,
342  const CSpliced_exon&);
343  bool xAssignAlignmentSplicedType(
345  const CSpliced_seg&,
346  const CSpliced_exon&);
347  bool xAssignAlignmentSplicedMethod(
349  const CSpliced_seg&,
350  const CSpliced_exon&);
351  virtual bool xAssignAlignmentSplicedSeqId(
353  const CSpliced_seg&,
354  const CSpliced_exon&);
355 
356  virtual void x_SortAlignments(TAlignCache& alignCache,
357  CScope& scope);
358  bool xAssignSource(
361  bool xAssignSourceType(
363  bool xAssignSourceSeqId(
366  bool xAssignSourceMethod(
369  bool xAssignSourceEndpoints(
372  bool xAssignSourceAttributes(
375  bool xAssignSourceAttributeGbKey(
377  bool xAssignSourceAttributeMolType(
380  bool xAssignSourceAttributeIsCircular(
383  bool xAssignSourceAttributesBioSource(
386 
387  bool xAssignSourceAttributeGenome(
389  const CBioSource&);
390  bool xAssignSourceAttributeName(
392  const CBioSource&);
393  bool xAssignSourceAttributeDbxref(
395  const CBioSource&);
396  bool xAssignSourceAttributesOrgMod(
398  const CBioSource&);
399  bool xAssignSourceAttributesSubSource(
401  const CBioSource&);
402 
403  //begin mss-234//
404  bool xAssignFeature(
407  const CMappedFeat&) override;
408  bool xAssignFeatureType(
411  const CMappedFeat&) override;
415  const CMappedFeat&) override;
417  CGffFeatureRecord& record,
419  const CMappedFeat& mapped_feat) override;
423  const CMappedFeat&) override;
424  bool xAssignFeaturePhase(
427  const CMappedFeat&) override;
428 
432  const CMappedFeat&) override;
433 
437  const CMappedFeat&) override;
438  bool xAssignFeatureAttributeParent(
441  const CMappedFeat&);
442  bool xAssignFeatureAttributeID(
445  const CMappedFeat&);
446  virtual bool xAssignFeatureAttributeParentMrna(
449  const CMappedFeat& );
450  virtual bool xAssignFeatureAttributeParentCds(
453  const CMappedFeat& );
454  virtual bool xAssignFeatureAttributeParentpreRNA(
457  const CMappedFeat&);
458  virtual bool xAssignFeatureAttributeParentVDJsegmentCregion(
461  const CMappedFeat&);
462  virtual bool xAssignFeatureAttributeParentGene(
465  const CMappedFeat& );
466  virtual bool xAssignFeatureAttributeParentRegion(
469  const CMappedFeat& );
470 
474  const CMappedFeat&) override;
475  bool xAssignFeatureAttributeName(
477  const CMappedFeat&);
481  const CMappedFeat&) override;
482  bool xAssignFeatureAttributeNcrnaClass(
484  const CMappedFeat&);
485  bool xAssignFeatureAttributeTranscriptId(
487  const CMappedFeat&);
491  const CMappedFeat&) override;
492 
493  string xNextAlignId();
494 
495 protected:
496  unsigned int m_uRecordId;
498 
500 
503 
506 
509 
511 
513 
516 
517 
519  unsigned int m_uPendingGeneId;
520  unsigned int m_uPendingMrnaId;
521  unsigned int m_uPendingTrnaId;
522  unsigned int m_uPendingCdsId;
523  unsigned int m_uPendingGenericId;
524  unsigned int m_uPendingAlignId;
525 
528 };
529 
530 END_objects_SCOPE
532 
533 #endif // OBJTOOLS_WRITERS___GFF3_WRITER__HPP
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
virtual bool xWriteAlign(const CSeq_align &, const string &="")=0
CBioseq_Handle –.
CFeat_CI –.
Definition: feat_ci.hpp:64
CWriterBase implementation that formats Genbank objects as plain GFF files.
Definition: gff_writer.hpp:60
bool WriteAlign(const CSeq_align &, const string &asmblyName="", const string &asmblyAccession="") override
Write a Seq-align object.
Definition: gff_writer.cpp:311
virtual bool xAssignFeatureAttributeNote(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.hpp:267
virtual bool xAssignFeatureType(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.cpp:463
virtual bool xAssignFeatureAttributeDbxref(CGffFeatureRecord &, CGffFeatureContext &, const string &label, const CMappedFeat &)
Definition: gff_writer.cpp:613
virtual bool xAssignFeature(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.cpp:369
virtual bool xAssignFeatureStrand(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.cpp:474
virtual bool xWriteAllChildren(CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.cpp:226
virtual bool xAssignFeatureAttributesFormatIndependent(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.cpp:537
virtual bool xWriteFeature(CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.cpp:290
virtual bool x_WriteSeqAnnotHandle(CSeq_annot_Handle)
Definition: gff_writer.cpp:262
virtual bool xAssignFeatureEndpoints(CGffFeatureRecord &record, CGffFeatureContext &, const CMappedFeat &mapped_feat)
Definition: gff_writer.cpp:498
virtual bool xAssignFeatureMethod(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.cpp:487
virtual bool xAssignFeatureAttributesQualifiers(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.hpp:247
bool WriteHeader() override
Write a file header identifying the file content as GFF version 2.
Definition: gff_writer.cpp:335
virtual bool xAssignFeatureAttributesFormatSpecific(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.cpp:572
virtual bool x_WriteBioseqHandle(CBioseq_Handle)
Definition: gff_writer.cpp:208
virtual bool xAssignFeaturePhase(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.cpp:453
string StrAttributes() const
string Id() const
string Parent() const
void SetRecordId(const string &recordId)
CGff3FeatureRecord(const CGff3FeatureRecord &rhs)
Definition: gff3_writer.hpp:97
void SetParent(const string &parent)
void SetRecordId(const string &recordId)
Definition: gff3_writer.hpp:64
CGff3SourceRecord(const CGff3SourceRecord &rhs)
Definition: gff3_writer.hpp:62
string Id() const
Definition: gff3_writer.hpp:66
string StrAttributes() const
Definition: gff3_writer.hpp:68
CBioseq_Handle m_BioseqHandle
TMrnaMapNew m_CdsMapNew
virtual ~CGff3Writer()=default
bool m_SortAlignments
unsigned int m_uPendingCdsId
unsigned int m_uPendingMrnaId
list< pair< CConstRef< CSeq_align >, string > > TAlignCache
TRegionMapNew m_RegionMapNew
TGeneMapNew m_GeneMapNew
TFeatureMap m_PrernaMapNew
bool WriteHeader(const CSeq_annot &annot) override
Write a file header, using annotation information.
TMrnaMapNew m_MrnaMapNew
void SetDefaultMethod(const string &defaultMethod)
string m_sDefaultMethod
TFeatureMap m_VDJsegmentCregionMapNew
unsigned int m_uPendingAlignId
unsigned int m_uPendingTrnaId
unsigned int m_uPendingGenericId
unsigned int m_uRecordId
unsigned int m_uPendingGeneId
bool xAssignAlignment(CGffFeatureRecord &)
CGffIdGenerator m_idGenerator
virtual string StrAttributes() const
const string & xEscapedString(const string &value) const
static const char * ATTR_SEPARATOR
CMappedFeat –.
Definition: mapped_feat.hpp:59
CScope –.
Definition: scope.hpp:92
CSeq_annot_Handle –.
Include a standard set of the NCBI C++ Toolkit most basic headers.
static const struct attribute attributes[]
Definition: attributes.c:165
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define NCBI_XOBJWRITE_EXPORT
Definition: ncbi_export.h:1347
The Object manager core.
SAnnotSelector –.
Modified on Sun Apr 14 05:29:10 2024 by modify_doxy.py rev. 669887