NCBI C++ ToolKit
gff_writer.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: gff_writer.hpp 101758 2024-02-07 15:03:49Z foleyjp $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Frank Ludwig
27  *
28  * File Description: Write gff file
29  *
30  */
31 
32 #ifndef OBJTOOLS_READERS___GFF_WRITER__HPP
33 #define OBJTOOLS_READERS___GFF_WRITER__HPP
34 
35 #include <corelib/ncbistd.hpp>
39 
41 BEGIN_objects_SCOPE
42 
43 // ============================================================================
44 /// CWriterBase implementation that formats Genbank objects as plain GFF files.
45 /// GFF (or GFF2) is a predecessor of GTF, GFF3, GVF and probably half a dozen
46 /// other dialects in use today. GFF files consist of feature records, with each
47 /// feature record consisting of nine columns.
48 /// There is some agreed upon meaning of the first eight columns and on the
49 /// general grammer of the column contents. Beyond that, there has never been
50 /// a universally agreed upon defintion of the format (probably the prime reason
51 /// the format is deprecated now).
52 /// For the purpose of this implementation, GFF is taken to be the greatest
53 /// common denominator between GTF and GFF3 (and this GVF). Those other writers
54 /// then derive from this one, adding their own bits and pieces to complete the
55 /// format renderer.
56 ///
58  public CWriterBase, CFeatWriter
59 // ============================================================================
60 {
61 public:
62  typedef enum {
63  fSoQuirks = (fWriterBaseLast << 1),
64  fGenerateMissingTranscripts = (fWriterBaseLast << 2),
65  fGff2WriterLast = fSoQuirks,
66  } TFlags;
67 
68 public:
69  /// Constructor.
70  /// @param scope
71  /// scope to be used for ID reference resolution (it's OK to create one
72  /// on the fly).
73  /// @param ostr
74  /// stream objects should be written to.
75  /// @param flags
76  /// any output customization flags.
77  ///
79  CScope& scope,
80  CNcbiOstream& ostr,
81  unsigned int flags=fNormal );
82 
83  /// Constructor.
84  /// Scopeless version. A scope will be allocated internally.
85  /// @param ostr
86  /// stream objects should be written to.
87  /// @param flags
88  /// any output customization flags.
89  ///
91  CNcbiOstream&,
92  unsigned int = fNormal );
93 
94  virtual ~CGff2Writer();
95 
96  /// Write a file header identifying the file content as GFF version 2.
97  ///
98  bool WriteHeader() override;
99 
101  const CSeq_annot& ) override { return WriteHeader(); };
102 
103  /// Write a trailer marking the end of a parsing context.
104  ///
105  bool WriteFooter() override;
106 
107  virtual bool WriteFooter(
108  const CSeq_annot& ) { return WriteFooter(); };
109 
110  /// Convenience function to render a "naked" Seq-annot. Makes use of the
111  /// internal scope.
112  /// @param annot
113  /// Seq-annot object to be rendered
114  /// @param asmblyName
115  /// optional assembly name to use for the file header
116  /// @param asmblyAccession
117  /// optional assembly accession to use for the file header
118  ///
119  bool WriteAnnot(
120  const CSeq_annot& annot,
121  const string& asmblyName="",
122  const string& asmblyAccession="" ) override;
123 
124  /// Write a Seq-align object.
125  /// Calling this function on a general GFF2 writer (as opposed to GFF3 or
126  /// another more specialized format will fail because GFF2 at this general
127  /// level does not address alignments; you will need at least a GFF3 writer
128  /// for that.
129  /// @param align
130  /// Seq-align object to be rendered
131  /// @param asmblyName
132  /// optional assembly name to use for the file header
133  /// @param asmblyAccession
134  /// optional assembly accession to use for the file header
135  ///
136  bool WriteAlign(
137  const CSeq_align&,
138  const string& asmblyName="",
139  const string& asmblyAccession="" ) override;
140 
141  /// Write Seq-entry contained in a given handle.
142  /// Essentially, will iterate through all contained Bioseq objects and process
143  /// those, with some special processing for nuc-prot sets.
144  /// @param seh
145  /// Seq-entry handle to be processed
146  /// @param asmblyName
147  /// optional assembly name to use for the file header
148  /// @param asmblyAccession
149  /// optional assembly accession to use for the file header
150  ///
151  bool WriteSeqEntryHandle(
152  CSeq_entry_Handle seh,
153  const string& asmblyName="",
154  const string& asmblyAccession="" ) override;
155 
156  /// Write Bioseq contained in given handle
157  /// Essentially, will write all features that live on the given Bioseq.
158  /// @param bsh
159  /// Bioseq handle to be processed
160  /// @param asmblyName
161  /// optional assembly name to use for the file header
162  /// @param asmblyAccession
163  /// optional assembly accession to use for the file header
164  ///
165  bool WriteBioseqHandle(
166  CBioseq_Handle bsh,
167  const string& asmblyName="",
168  const string& asmblyAccession="" ) override;
169 
170  /// Write Seq-annot contained in given handle
171  /// Essentially, write out embedded feature table. Other annotation
172  /// types are not supported in the generic GFF2 writer(i.e. there will be
173  /// a header and nothing else.
174  /// @param sah
175  /// Seq-annot handle to be processed
176  /// @param asmblyName
177  /// optional assembly name to use for the file header
178  /// @param asmblyAccession
179  /// optional assembly accession to use for the file header
180  ///
181  bool WriteSeqAnnotHandle(
182  CSeq_annot_Handle sah,
183  const string& asmblyName="",
184  const string& asmblyAccession="" ) override;
185 
186 protected:
187  virtual bool xAssignFeature(
190  const CMappedFeat&);
191 
192  virtual bool xAssignFeatureBasic(
195  const CMappedFeat&);
196 
197  virtual bool xAssignFeatureType(
200  const CMappedFeat&);
201 
202  virtual bool xAssignFeatureSeqId(
205  const CMappedFeat&);
206 
207  virtual bool xAssignFeatureMethod(
210  const CMappedFeat&);
211 
212  virtual bool xAssignFeatureEndpoints(
213  CGffFeatureRecord& record,
215  const CMappedFeat& mapped_feat);
216 
217  virtual bool xAssignFeatureScore(
220  const CMappedFeat&);
221 
222  virtual bool xAssignFeatureStrand(
225  const CMappedFeat&);
226 
227  virtual bool xAssignFeaturePhase(
230  const CMappedFeat&);
231 
232  virtual bool xAssignFeatureAttributes(
235  const CMappedFeat&);
236 
237  virtual bool xAssignFeatureAttributesFormatIndependent(
240  const CMappedFeat&);
241 
242  virtual bool xAssignFeatureAttributesFormatSpecific(
245  const CMappedFeat&);
246 
250  const CMappedFeat&) { return false; };
251  virtual bool xAssignFeatureAttributesGene(
254  const CMappedFeat&);
255 
256  virtual bool xAssignFeatureAttributeDbxref(
259  const string& label,
260  const CMappedFeat&);
261 
265  const CMappedFeat&) { return false; };
266 
270  const CMappedFeat&) { return false; };
271  virtual bool xAssignFeatureAttributeProduct(
274  const CMappedFeat&);
275  virtual bool xAssignFeatureAttributeProteinId(
278  const CMappedFeat&);
279  virtual bool xAssignFeatureAttributeRibosomalSlippage(
282  const CMappedFeat&);
283  virtual bool xAssignFeatureAttributeTranslationTable(
286  const CMappedFeat&);
287  virtual bool xAssignFeatureAttributePartial(
290  const CMappedFeat&);
291  virtual bool xAssignFeatureAttributePseudo(
294  const CMappedFeat&);
295  bool xAssignFeatureAttributeCodeBreak(
298  const CMappedFeat&);
299  bool xAssignFeatureAttributeOldLocusTag(
302  const CMappedFeat&);
303  bool xAssignFeatureAttributeGeneBiotype(
306  const CMappedFeat&);
307  bool xAssignFeatureAttributeMapLoc(
310  const CMappedFeat&);
311  bool xAssignFeatureAttributeException(
314  const CMappedFeat&);
315  bool xAssignFeatureAttributeExperiment(
318  const CMappedFeat&);
319  bool xAssignFeatureAttributeModelEvidence(
322  const CMappedFeat&);
323  bool xAssignFeatureAttributeRptFamily(
326  const CMappedFeat&);
327  bool xAssignFeatureAttributePseudoGene(
330  const CMappedFeat&);
331  bool xAssignFeatureAttributeIsOrdered(
334  const CMappedFeat&);
335  bool xAssignFeatureAttributeFunction(
338  const CMappedFeat&);
339  bool xAssignFeatureAttributesGoMarkup(
342  const CMappedFeat&);
343  bool xAssignFeatureAttributeEcNumbers(
346  const CMappedFeat&);
347  bool xAssignFeatureAttributeExonNumber(
350  const CMappedFeat&);
351 
352  virtual bool x_WriteSequenceHeader(
353  CBioseq_Handle ) { return true; };
354 
355  virtual bool x_WriteSequenceHeader(
356  CSeq_id_Handle ) { return true; };
357 
358  virtual bool x_WriteAnnot(
359  const CSeq_annot& );
360 
361  virtual bool x_WriteAlign(
362  const CSeq_align&);
363 
364  virtual bool x_WriteSeqEntryHandle(
366 
367  virtual bool x_WriteSeqAnnotHandle(
369  virtual bool x_WriteBioseqHandle(
370  CBioseq_Handle );
371 
372  virtual bool xWriteFeature(
374  const CMappedFeat& );
375  bool xWriteFeature(
376  CFeat_CI feat_it) override;
377 
378  virtual bool xWriteAllChildren(
380  const CMappedFeat&);
381 
382  virtual bool x_WriteAssemblyInfo(
383  const string&,
384  const string& );
385 
386  virtual bool xGeneratingMissingTranscripts() const
387  {
388  return (m_uFlags & fGenerateMissingTranscripts);
389  }
390 
391  static bool IsTranscriptType(
392  const CMappedFeat&);
393  static bool HasAccaptableTranscriptParent(
395  const CMappedFeat&);
396  static bool xIntervalsNeedPartNumbers(
397  const list<CRef<CSeq_interval>>&);
398 
399  CMappedFeat xGenerateMissingTranscript(
401  const CMappedFeat&);
402 
403 
404  // data:
407 };
408 
409 END_objects_SCOPE
411 
412 #endif // OBJTOOLS_WRITERS___GFF_WRITER__HPP
CBioseq_Handle –.
virtual bool xWriteFeature(CFeat_CI)
Definition: writer.hpp:278
CFeat_CI –.
Definition: feat_ci.hpp:64
CWriterBase implementation that formats Genbank objects as plain GFF files.
Definition: gff_writer.hpp:60
virtual bool xAssignFeatureAttributeNote(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.hpp:267
bool WriteHeader(const CSeq_annot &) override
Write a file header, using annotation information.
Definition: gff_writer.hpp:100
bool m_bHeaderWritten
Definition: gff_writer.hpp:406
virtual bool xAssignFeatureAttributeDbxref(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.hpp:262
CRef< CScope > m_pScope
Definition: gff_writer.hpp:405
virtual bool xGeneratingMissingTranscripts() const
Definition: gff_writer.hpp:386
virtual bool WriteFooter(const CSeq_annot &)
Definition: gff_writer.hpp:107
virtual bool xAssignFeatureAttributesQualifiers(CGffFeatureRecord &, CGffFeatureContext &, const CMappedFeat &)
Definition: gff_writer.hpp:247
virtual bool x_WriteSequenceHeader(CSeq_id_Handle)
Definition: gff_writer.hpp:355
virtual bool x_WriteSequenceHeader(CBioseq_Handle)
Definition: gff_writer.hpp:352
CMappedFeat –.
Definition: mapped_feat.hpp:59
CScope –.
Definition: scope.hpp:92
CSeq_annot_Handle –.
CSeq_entry_Handle –.
Defines and provides stubs for a general interface to a variety of file formatters.
Definition: writer.hpp:81
virtual bool WriteAnnot(const CSeq_annot &, const string &="", const string &="")
Write a raw Seq-annot to the internal output stream.
Definition: writer.hpp:116
TFlags
Customization flags that are relevant to all CWriterBase derived writers.
Definition: writer.hpp:85
virtual bool WriteFooter()
Write a file trailer.
Definition: writer.hpp:221
virtual bool WriteAlign(const CSeq_align &, const string &="", const string &="")
Write a raw Seq-align to the internal output stream.
Definition: writer.hpp:135
virtual bool WriteBioseqHandle(CBioseq_Handle, const string &="", const string &="")
Write a Bioseq handle to the internal output stream.
Definition: writer.hpp:173
virtual bool WriteHeader()
Write a file header.
Definition: writer.hpp:206
virtual bool WriteSeqAnnotHandle(CSeq_annot_Handle, const string &="", const string &="")
Write a Seq-annot handle to the internal output stream.
Definition: writer.hpp:192
virtual bool WriteSeqEntryHandle(CSeq_entry_Handle, const string &="", const string &="")
Write a Seq-entry handle to the internal output stream.
Definition: writer.hpp:154
Include a standard set of the NCBI C++ Toolkit most basic headers.
static uch flags
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define NCBI_XOBJWRITE_EXPORT
Definition: ncbi_export.h:1347
static const char label[]
Modified on Fri Sep 20 14:57:52 2024 by modify_doxy.py rev. 669887