NCBI C++ ToolKit
gtf_reader.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: gtf_reader.cpp 93733 2021-05-17 14:36:08Z ludwigf $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Frank Ludwig
27  *
28  * File Description:
29  * GFF file reader
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
35 
36 #include <util/line_reader.hpp>
37 
41 
46 
48 #include <objects/seq/Annot_id.hpp>
52 
59 
61 #include "gtf_location_merger.hpp"
63 
64 #include <algorithm>
65 
67 BEGIN_objects_SCOPE // namespace ncbi::objects::
68 
69 // ----------------------------------------------------------------------------
71  const string& strGtfType,
72  const string& strRawAttributes )
73 // ----------------------------------------------------------------------------
74 {
75  vector< string > attributes;
76  xSplitGffAttributes(strRawAttributes, attributes);
77 
78  for ( size_t u=0; u < attributes.size(); ++u ) {
79  string key, value;
80  string attribute(attributes[u]);
81  if (!NStr::SplitInTwo(attribute, "=", key, value)) {
82  if (!NStr::SplitInTwo(attribute, " ", key, value)) {
83  if (strGtfType == "gene") {
86  continue;
87  }
88  if (strGtfType == "transcript") {
89  string gid, tid;
90  if (!NStr::SplitInTwo(attribute, ".", gid, tid)) {
91  return false;
92  }
94  "gene_id", xNormalizedAttributeValue(gid));
96  "transcript_id", xNormalizedAttributeValue(attribute));
97  continue;
98  }
99  }
100  }
103  if ( key.empty() && value.empty() ) {
104  // Probably due to trailing "; ". Sequence Ontology generates such
105  // things.
106  continue;
107  }
108  if (NStr::StartsWith(value, "\"")) {
109  value = value.substr(1, string::npos);
110  }
111  if (NStr::EndsWith(value, "\"")) {
112  value = value.substr(0, value.length() - 1);
113  }
115  }
116  return true;
117 }
118 
119 // ----------------------------------------------------------------------------
121  unsigned int uFlags,
122  const string& strAnnotName,
123  const string& strAnnotTitle,
124  SeqIdResolver resolver,
125  CReaderListener* pRL):
126 // ----------------------------------------------------------------------------
127  CGff2Reader( uFlags, strAnnotName, strAnnotTitle, resolver, pRL)
128 {
129  mpLocations.reset(new CGtfLocationMerger(uFlags, resolver));
130 }
131 
132 // ----------------------------------------------------------------------------
134  unsigned int uFlags,
135  CReaderListener* pRL):
136 // ----------------------------------------------------------------------------
137  CGtfReader( uFlags, "", "", CReadUtil::AsSeqId, pRL)
138 {
139 }
140 
141 
142 // ----------------------------------------------------------------------------
144 // ----------------------------------------------------------------------------
145 {
146 }
147 
148 // ----------------------------------------------------------------------------
151  ILineReader& lineReader,
152  ILineErrorListener* pEC )
153 // ----------------------------------------------------------------------------
154 {
156  return CReaderBase::ReadSeqAnnot(lineReader, pEC);
157 }
158 
159 // ----------------------------------------------------------------------------
160 void
162  const TReaderData& readerData,
163  CSeq_annot& annot)
164 // ----------------------------------------------------------------------------
165 {
166  for (const auto& lineData: readerData) {
167  const auto& line = lineData.mData;
168  if (xIsTrackTerminator(line)) {
169  continue;
170  }
171  if (xParseStructuredComment(line)) {
172  continue;
173  }
174  if (xParseBrowserLine(line, annot)) {
175  continue;
176  }
177  if (xParseFeature(line, annot, nullptr)) {
178  continue;
179  }
180  }
181 }
182 
183 // ----------------------------------------------------------------------------
185  const CGff2Record& record,
186  CSeq_annot& annot,
187  ILineErrorListener* pEC)
188 // ----------------------------------------------------------------------------
189 {
190  const CGtfReadRecord& gff = dynamic_cast<const CGtfReadRecord&>(record);
191  auto recType = gff.NormalizedType();
192 
193  using TYPEHANDLER = bool (CGtfReader::*)(const CGtfReadRecord&, CSeq_annot&);
194  using HANDLERMAP = map<string, TYPEHANDLER>;
195 
196  HANDLERMAP typeHandlers = {
197  {"cds", &CGtfReader::xUpdateAnnotCds},
198  {"start_codon", &CGtfReader::xUpdateAnnotCds},
199  {"stop_codon", &CGtfReader::xUpdateAnnotCds},
204  {"internal", &CGtfReader::xUpdateAnnotTranscript},
205  {"terminal", &CGtfReader::xUpdateAnnotTranscript},
207  };
208 
209  //
210  // Handle officially recognized GTF types:
211  //
212  HANDLERMAP::iterator it = typeHandlers.find(recType);
213  if (it != typeHandlers.end()) {
214  TYPEHANDLER handler = it->second;
215  return (this->*handler)(gff, annot);
216  }
217 
218  //
219  // Every other type is not officially sanctioned GTF, and per spec we are
220  // supposed to ignore it. In the spirit of being lenient on input we may
221  // try to salvage some of it anyway.
222  //
223  if (recType == "gene") {
224  return xCreateParentGene(gff, annot);
225  }
226  if (recType == "mrna" || recType == "transcript") {
227  return xCreateParentMrna(gff, annot);
228  }
229  return true;
230 }
231 
232 // ----------------------------------------------------------------------------
234  const CGtfReadRecord& gff,
235  CSeq_annot& annot )
236 // ----------------------------------------------------------------------------
237 {
238  auto featId = mpLocations->GetFeatureIdFor(gff, "cds");
239  mpLocations->AddRecordForId(featId, gff) ;
240  return (xFindFeatById(featId) || xCreateParentCds(gff, annot));
241  }
242 
243 // ----------------------------------------------------------------------------
245  const CGtfReadRecord& gff,
246  CSeq_annot& annot )
247 // ----------------------------------------------------------------------------
248 {
249  //
250  // If there is no gene feature to go with this CDS then make one. Otherwise,
251  // make sure the existing gene feature includes the location of the CDS.
252  //
253  auto geneFeatId = mpLocations->GetFeatureIdFor(gff, "gene");
254  CRef< CSeq_feat > pGene = xFindFeatById(geneFeatId);
255  if (!pGene) {
256  if (!xCreateParentGene(gff, annot)) {
257  return false;
258  }
259  mpLocations->AddRecordForId(geneFeatId, gff);
260  }
261  else {
262  mpLocations->AddRecordForId(geneFeatId, gff);
263  if (!xFeatureTrimQualifiers(gff, *pGene)) {
264  return false;
265  }
266  }
267 
268  //
269  // If there is no mRNA feature with this gene_id|transcript_id then make one.
270  // Otherwise, fix up the location of the existing one.
271  //
272  auto transcriptFeatId = mpLocations->GetFeatureIdFor(gff, "transcript");
273  CRef<CSeq_feat> pMrna = xFindFeatById(transcriptFeatId);
274  if (!pMrna) {
275  //
276  // Create a brand new CDS feature:
277  //
278  if (!xCreateParentMrna(gff, annot)) {
279  return false;
280  }
281  mpLocations->AddRecordForId(transcriptFeatId, gff);
282  }
283  else {
284  //
285  // Update an already existing CDS features:
286  //
287  mpLocations->AddRecordForId(transcriptFeatId, gff);
288  if (!xFeatureTrimQualifiers(gff, *pMrna)) {
289  return false;
290  }
291  }
292  return true;
293 }
294 
295 // ----------------------------------------------------------------------------
297  const CGtfReadRecord& record,
298  const string& prefix,
299  CSeq_feat& feature )
300 // ----------------------------------------------------------------------------
301 {
302  static int seqNum(1);
303 
304  string strFeatureId = prefix;
305  if (strFeatureId.empty()) {
306  strFeatureId = "id";
307  }
308  strFeatureId += "_";
309  strFeatureId += NStr::IntToString(seqNum++);
310  feature.SetId().SetLocal().SetStr(strFeatureId);
311  return true;
312 }
313 
314 // -----------------------------------------------------------------------------
316  const CGtfReadRecord& gff,
317  CSeq_annot& annot )
318 // -----------------------------------------------------------------------------
319 {
320  auto featId = mpLocations->GetFeatureIdFor(gff, "gene");
321  if (m_MapIdToFeature.find(featId) != m_MapIdToFeature.end()) {
322  return true;
323  }
324 
325  CRef<CSeq_feat> pFeature( new CSeq_feat );
326 
327  if (!xFeatureSetDataGene(gff, *pFeature)) {
328  return false;
329  }
330  if (!xCreateFeatureId(gff, "gene", *pFeature)) {
331  return false;
332  }
333  if ( !xFeatureSetQualifiersGene(gff, *pFeature)) {
334  return false;
335  }
336 
337  (gff.Type() == "gene") ?
338  mpLocations->AddRecordForId(featId, gff) :
339  mpLocations->AddStubForId(featId);
340  m_MapIdToFeature[featId] = pFeature;
341  xAddFeatureToAnnot(pFeature, annot);
342  return true;
343 }
344 
345 // ----------------------------------------------------------------------------
347  const CGtfReadRecord& record,
348  CSeq_feat& feature )
349 // ----------------------------------------------------------------------------
350 {
351  list<string> ignoredAttrs = {
352  "locus_tag", "transcript_id"
353  };
354  //
355  // Create GB qualifiers for the record attributes:
356  //
357 
358  const auto& attrs = record.GtfAttributes().Get();
359  auto it = attrs.begin();
360  for (/*NOOP*/; it != attrs.end(); ++it) {
361  auto cit = std::find(ignoredAttrs.begin(), ignoredAttrs.end(), it->first);
362  if (cit != ignoredAttrs.end()) {
363  continue;
364  }
365  // special case some well-known attributes
366  if (xProcessQualifierSpecialCase(it->first, it->second, feature)) {
367  continue;
368  }
369 
370  // turn everything else into a qualifier
371  xFeatureAddQualifiers(it->first, it->second, feature);
372  }
373  return true;
374 }
375 
376 // ----------------------------------------------------------------------------
378  const CGtfReadRecord& record,
379  CSeq_feat& feature )
380 // ----------------------------------------------------------------------------
381 {
382  list<string> ignoredAttrs = {
383  "locus_tag"
384  };
385 
386  const auto& attrs = record.GtfAttributes().Get();
387  auto it = attrs.begin();
388  for (/*NOOP*/; it != attrs.end(); ++it) {
389  auto cit = std::find(ignoredAttrs.begin(), ignoredAttrs.end(), it->first);
390  if (cit != ignoredAttrs.end()) {
391  continue;
392  }
393  // special case some well-known attributes
394  if (xProcessQualifierSpecialCase(it->first, it->second, feature)) {
395  continue;
396  }
397 
398  // turn everything else into a qualifier
399  xFeatureAddQualifiers(it->first, it->second, feature);
400  }
401  return true;
402 }
403 
404 // ----------------------------------------------------------------------------
406  const CGtfReadRecord& record,
407  CSeq_feat& feature )
408 // ----------------------------------------------------------------------------
409 {
410  list<string> ignoredAttrs = {
411  "locus_tag"
412  };
413 
414  const auto& attrs = record.GtfAttributes().Get();
415  auto it = attrs.begin();
416  for (/*NOOP*/; it != attrs.end(); ++it) {
417  auto cit = std::find(ignoredAttrs.begin(), ignoredAttrs.end(), it->first);
418  if (cit != ignoredAttrs.end()) {
419  continue;
420  }
421  // special case some well-known attributes
422  if (xProcessQualifierSpecialCase(it->first, it->second, feature)) {
423  continue;
424  }
425 
426  // turn everything else into a qualifier
427  xFeatureAddQualifiers(it->first, it->second, feature);
428  }
429  return true;
430 }
431 
432 // -----------------------------------------------------------------------------
434  const CGtfReadRecord& gff,
435  CSeq_annot& annot )
436 // -----------------------------------------------------------------------------
437 {
438  auto featId = mpLocations->GetFeatureIdFor(gff, "cds");
439  if (m_MapIdToFeature.find(featId) != m_MapIdToFeature.end()) {
440  return true;
441  }
442 
443  CRef<CSeq_feat> pFeature(new CSeq_feat);
444 
445  if (!xFeatureSetDataCds(gff, *pFeature)) {
446  return false;
447  }
448  if (!xCreateFeatureId(gff, "cds", *pFeature)) {
449  return false;
450  }
451  if (!xFeatureSetQualifiersCds(gff, *pFeature)) {
452  return false;
453  }
454  m_MapIdToFeature[featId] = pFeature;
455  return xAddFeatureToAnnot(pFeature, annot);
456 }
457 
458 // -----------------------------------------------------------------------------
460  const CGtfReadRecord& gff,
461  CSeq_annot& annot )
462 // -----------------------------------------------------------------------------
463 {
464  auto featId = mpLocations->GetFeatureIdFor(gff, "transcript");
465  if (m_MapIdToFeature.find(featId) != m_MapIdToFeature.end()) {
466  return true;
467  }
468 
469  CRef< CSeq_feat > pFeature( new CSeq_feat );
470 
471  if (!xFeatureSetDataMrna(gff, *pFeature)) {
472  return false;
473  }
474  if (!xCreateFeatureId(gff, "mrna", *pFeature)) {
475  return false;
476  }
477  if ( ! xFeatureSetQualifiersRna( gff, *pFeature ) ) {
478  return false;
479  }
480 
481  mpLocations->AddStubForId(featId);
482  m_MapIdToFeature[featId] = pFeature;
483 
484  return xAddFeatureToAnnot( pFeature, annot );
485 }
486 
487 // ----------------------------------------------------------------------------
489  const string& featId)
490 // ----------------------------------------------------------------------------
491 {
492  auto featIt = m_MapIdToFeature.find(featId);
493  if (featIt == m_MapIdToFeature.end()) {
494  return CRef<CSeq_feat>();
495  }
496  return featIt->second;
497 }
498 
499 // ----------------------------------------------------------------------------
501  const CGtfReadRecord& record,
502  CSeq_feat& feature )
503 // ----------------------------------------------------------------------------
504 {
505  CGene_ref& gene = feature.SetData().SetGene();
506 
507  const auto& attributes = record.GtfAttributes();
508  string geneSynonym = attributes.ValueOf("gene_synonym");
509  if (!geneSynonym.empty()) {
510  gene.SetSyn().push_back(geneSynonym);
511  }
512  string locusTag = attributes.ValueOf("locus_tag");
513  if (!locusTag.empty()) {
514  gene.SetLocus_tag(locusTag);
515  }
516  return true;
517 }
518 
519 // ----------------------------------------------------------------------------
521  const CGtfReadRecord& record,
522  CSeq_feat& feature)
523 // ----------------------------------------------------------------------------
524 {
525  if (!xFeatureSetDataRna(record, feature, CSeqFeatData::eSubtype_mRNA)) {
526  return false;
527  }
528  CRNA_ref& rna = feature.SetData().SetRna();
529 
530  string product = record.GtfAttributes().ValueOf("product");
531  if (!product.empty()) {
532  rna.SetExt().SetName(product);
533  }
534  return true;
535 }
536 
537 // ----------------------------------------------------------------------------
539  const CGtfReadRecord& record,
540  CSeq_feat& feature,
541  CSeqFeatData::ESubtype subType)
542 // ----------------------------------------------------------------------------
543 {
544  CRNA_ref& rnaRef = feature.SetData().SetRna();
545  switch (subType){
546  default:
548  break;
551  break;
554  break;
555  }
556  return true;
557 }
558 
559 // ----------------------------------------------------------------------------
561  const CGtfReadRecord& record,
562  CSeq_feat& feature )
563 // ----------------------------------------------------------------------------
564 {
565  CCdregion& cdr = feature.SetData().SetCdregion();
566  const auto& attributes = record.GtfAttributes();
567 
568  string proteinId = attributes.ValueOf("protein_id");
569  if (!proteinId.empty()) {
570  CRef<CSeq_id> pId = mSeqIdResolve(proteinId, m_iFlags, true);
571  if (pId->IsGenbank()) {
572  feature.SetProduct().SetWhole(*pId);
573  }
574  }
575  string ribosomalSlippage = attributes.ValueOf("ribosomal_slippage");
576  if (!ribosomalSlippage.empty()) {
577  feature.SetExcept( true );
578  feature.SetExcept_text("ribosomal slippage");
579  }
580  string transTable = attributes.ValueOf("transl_table");
581  if (!transTable.empty()) {
583  pGc->SetId(NStr::StringToUInt(transTable));
584  cdr.SetCode().Set().push_back(pGc);
585  }
586  return true;
587 }
588 
589 // ----------------------------------------------------------------------------
591  const CGtfReadRecord& record,
592  CSeq_feat& feature )
593  // ----------------------------------------------------------------------------
594 {
595  typedef CSeq_feat::TQual TQual;
596  //task:
597  // for each attribute of the new piece check if we already got a feature
598  // qualifier
599  // if so, and with the same value, then the qualifier is allowed to live
600  // otherwise it is subfeature specific and hence removed from the feature
601  TQual& quals = feature.SetQual();
602  for (TQual::iterator it = quals.begin(); it != quals.end(); /**/) {
603  const string& qualKey = (*it)->GetQual();
604  if (NStr::StartsWith(qualKey, "gff_")) {
605  it++;
606  continue;
607  }
608  if (qualKey == "locus_tag") {
609  it++;
610  continue;
611  }
612  if (qualKey == "old_locus_tag") {
613  it++;
614  continue;
615  }
616  if (qualKey == "product") {
617  it++;
618  continue;
619  }
620  if (qualKey == "protein_id") {
621  it++;
622  continue;
623  }
624  const string& qualVal = (*it)->GetVal();
625  if (!record.GtfAttributes().HasValue(qualKey, qualVal)) {
626  //superfluous qualifier- squish
627  it = quals.erase(it);
628  continue;
629  }
630  it++;
631  }
632  return true;
633 }
634 
635 // ----------------------------------------------------------------------------
637  const string& key,
638  const CGtfAttributes::MultiValue& values,
639  CSeq_feat& feature )
640 // ----------------------------------------------------------------------------
641 {
642  CRef<CGb_qual> pQual(0);
643 
644  if (0 == NStr::CompareNocase(key, "exon_id")) {
645  return true;
646  }
647  if (0 == NStr::CompareNocase(key, "exon_number")) {
648  return true;
649  }
650  if ( 0 == NStr::CompareNocase(key, "note") ) {
651  feature.SetComment(NStr::Join(values, ";"));
652  return true;
653  }
654  if ( 0 == NStr::CompareNocase(key, "dbxref") ||
655  0 == NStr::CompareNocase(key, "db_xref"))
656  {
657  for (auto value: values) {
658  vector< string > tags;
659  NStr::Split(value, ";", tags );
660  for (auto it = tags.begin(); it != tags.end(); ++it ) {
661  feature.SetDbxref().push_back(x_ParseDbtag(*it));
662  }
663  }
664  return true;
665  }
666 
667  if ( 0 == NStr::CompareNocase(key, "pseudo")) {
668  feature.SetPseudo( true );
669  return true;
670  }
671  if ( 0 == NStr::CompareNocase(key, "partial")) {
672  // RW-1108 - ignore partial attribute in Genbank mode
674  return true;
675  }
676  }
677  return false;
678 }
679 
680 // ----------------------------------------------------------------------------
682  const string& key,
683  const CGtfAttributes::MultiValue& values,
684  CSeq_feat& feature)
685  // ----------------------------------------------------------------------------
686 {
687  for (auto value: values) {
688  feature.AddQualifier(key, value);
689  }
690 };
691 
692 // ============================================================================
694  CSeq_feat& descendent,
695  CSeq_feat& ancestor)
696 // ============================================================================
697 {
698  xSetXrefFromTo(descendent, ancestor);
700  xSetXrefFromTo(ancestor, descendent);
701  }
702 }
703 
704 // ----------------------------------------------------------------------------
706  CSeq_annot& annot)
707 // ----------------------------------------------------------------------------
708 {
709  //location fixup:
710  for (auto itLocation: mpLocations->LocationMap()) {
711  auto id = itLocation.first;
712  auto itFeature = m_MapIdToFeature.find(id);
713  if (itFeature == m_MapIdToFeature.end()) {
714  continue;
715  }
716  CRef<CSeq_feat> pFeature = itFeature->second;
717  auto featSubType = pFeature->GetData().GetSubtype();
718  CRef<CSeq_loc> pNewLoc = mpLocations->MergeLocation(
719  featSubType, itLocation.second);
720  pFeature->SetLocation(*pNewLoc);
721  }
722 
723  //generate xrefs:
724  for (auto itLocation: mpLocations->LocationMap()) {
725  auto id = itLocation.first;
726  auto itFeature = m_MapIdToFeature.find(id);
727  if (itFeature == m_MapIdToFeature.end()) {
728  continue;
729  }
730  CRef<CSeq_feat> pFeature = itFeature->second;
731  auto featSubType = pFeature->GetData().GetSubtype();
732  switch(featSubType) {
733  default: {
734  break;
735  }
737  auto parentGeneFeatId = string("gene:") + pFeature->GetNamedQual("gene_id");
738  CRef<CSeq_feat> pParentGene;
739  if (x_GetFeatureById(parentGeneFeatId, pParentGene)) {
740  xSetAncestorXrefs(*pFeature, *pParentGene);
741  }
742  break;
743  }
745  auto parentRnaFeatId = string("transcript:") + pFeature->GetNamedQual("gene_id") +
746  "_" + pFeature->GetNamedQual("transcript_id");
747  CRef<CSeq_feat> pParentRna;
748  if (x_GetFeatureById(parentRnaFeatId, pParentRna)) {
749  xSetAncestorXrefs(*pFeature, *pParentRna);
750  }
751  auto parentGeneFeatId = string("gene:") + pFeature->GetNamedQual("gene_id");
752  CRef<CSeq_feat> pParentGene;
753  if (x_GetFeatureById(parentGeneFeatId, pParentGene)) {
754  xSetAncestorXrefs(*pFeature, *pParentGene);
755  }
756  break;
757  }
758  }
759  }
760  return CGff2Reader::xPostProcessAnnot(annot);
761 }
762 
763 END_objects_SCOPE
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
static const struct attribute attributes[]
Definition: attributes.c:165
#define bool
Definition: bool.h:34
CCdregion –.
Definition: Cdregion.hpp:66
void xSetXrefFromTo(CSeq_feat &, CSeq_feat &)
virtual bool xAddFeatureToAnnot(CRef< CSeq_feat >, CSeq_annot &)
virtual bool xParseStructuredComment(const string &)
virtual bool xParseFeature(const string &, CSeq_annot &, ILineErrorListener *)
void xPostProcessAnnot(CSeq_annot &) override
bool x_GetFeatureById(const string &, CRef< CSeq_feat > &)
IdToFeatureMap m_MapIdToFeature
static CRef< CDbtag > x_ParseDbtag(const string &)
unsigned int mCurrentFeatureCount
static string xNormalizedAttributeValue(const CTempString &)
Definition: gff2_data.cpp:343
bool xSplitGffAttributes(const string &, vector< string > &) const
Definition: gff2_data.cpp:471
static string xNormalizedAttributeKey(const CTempString &)
Definition: gff2_data.cpp:335
const string & Type() const
const string & NormalizedType() const
void AddValue(const string &key, const string &value)
Definition: gtf_reader.hpp:104
string ValueOf(const string &key) const
Definition: gtf_reader.hpp:60
const MultiAttributes & Get() const
Definition: gtf_reader.hpp:54
vector< string > MultiValue
Definition: gtf_reader.hpp:50
bool HasValue(const string &key, const string &value="") const
Definition: gtf_reader.hpp:72
CGtfAttributes mAttributes
Definition: gtf_reader.hpp:168
bool xAssignAttributesFromGff(const string &, const string &)
Definition: gtf_reader.cpp:70
const CGtfAttributes & GtfAttributes() const
Definition: gtf_reader.hpp:133
bool xFeatureSetDataGene(const CGtfReadRecord &, CSeq_feat &)
Definition: gtf_reader.cpp:500
void xFeatureAddQualifiers(const string &key, const CGtfAttributes::MultiValue &, CSeq_feat &)
Definition: gtf_reader.cpp:681
@ fGenerateChildXrefs
Definition: gtf_reader.hpp:178
bool xFeatureSetQualifiersCds(const CGtfReadRecord &record, CSeq_feat &)
Definition: gtf_reader.cpp:405
bool xCreateParentCds(const CGtfReadRecord &, CSeq_annot &)
Definition: gtf_reader.cpp:433
bool xUpdateAnnotFeature(const CGff2Record &, CSeq_annot &, ILineErrorListener *=nullptr) override
Definition: gtf_reader.cpp:184
bool xCreateParentGene(const CGtfReadRecord &, CSeq_annot &)
Definition: gtf_reader.cpp:315
unique_ptr< CGtfLocationMerger > mpLocations
Definition: gtf_reader.hpp:289
bool xFeatureSetQualifiersGene(const CGtfReadRecord &record, CSeq_feat &)
Definition: gtf_reader.cpp:346
bool xCreateParentMrna(const CGtfReadRecord &, CSeq_annot &)
Definition: gtf_reader.cpp:459
virtual bool xUpdateAnnotCds(const CGtfReadRecord &, CSeq_annot &)
Definition: gtf_reader.cpp:233
void xPostProcessAnnot(CSeq_annot &) override
Definition: gtf_reader.cpp:705
bool xFeatureSetDataMrna(const CGtfReadRecord &, CSeq_feat &)
Definition: gtf_reader.cpp:520
CRef< CSeq_feat > xFindFeatById(const string &)
Definition: gtf_reader.cpp:488
bool xFeatureSetQualifiersRna(const CGtfReadRecord &record, CSeq_feat &)
Definition: gtf_reader.cpp:377
CRef< CSeq_annot > ReadSeqAnnot(ILineReader &lr, ILineErrorListener *pErrors=nullptr) override
Read an object from a given line reader, render it as a single Seq-annot, if possible.
Definition: gtf_reader.cpp:150
virtual bool xUpdateAnnotTranscript(const CGtfReadRecord &, CSeq_annot &)
Definition: gtf_reader.cpp:244
CGtfReader(unsigned int=0, const string &="", const string &="", SeqIdResolver=CReadUtil::AsSeqId, CReaderListener *=nullptr)
Definition: gtf_reader.cpp:120
bool xFeatureTrimQualifiers(const CGtfReadRecord &, CSeq_feat &)
Definition: gtf_reader.cpp:590
virtual ~CGtfReader()
Definition: gtf_reader.cpp:143
virtual bool xFeatureSetDataRna(const CGtfReadRecord &, CSeq_feat &, CSeqFeatData::ESubtype)
Definition: gtf_reader.cpp:538
bool xProcessQualifierSpecialCase(const string &, const CGtfAttributes::MultiValue &, CSeq_feat &)
Definition: gtf_reader.cpp:636
bool xCreateFeatureId(const CGtfReadRecord &, const string &, CSeq_feat &)
Definition: gtf_reader.cpp:296
void xSetAncestorXrefs(CSeq_feat &, CSeq_feat &) override
Definition: gtf_reader.cpp:693
void xProcessData(const TReaderData &, CSeq_annot &) override
Definition: gtf_reader.cpp:161
bool xFeatureSetDataCds(const CGtfReadRecord &, CSeq_feat &)
Definition: gtf_reader.cpp:560
@RNA_ref.hpp User-defined methods of the data storage class.
Definition: RNA_ref.hpp:54
Common file reader utility functions.
Definition: read_util.hpp:47
SeqIdResolver mSeqIdResolve
virtual bool xParseBrowserLine(const string &, CSeq_annot &)
vector< TReaderLine > TReaderData
Definition: reader_base.hpp:70
TReaderFlags m_iFlags
virtual CRef< CSeq_annot > ReadSeqAnnot(CNcbiIstream &istr, ILineErrorListener *pErrors=nullptr)
Read an object from a given input stream, render it as a single Seq-annot.
virtual bool xIsTrackTerminator(const CTempString &)
CRef –.
Definition: ncbiobj.hpp:618
ESubtype GetSubtype(void) const
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
const string & GetNamedQual(const CTempString &qual_name) const
Return a named qualifier.
Definition: Seq_feat.cpp:429
void AddQualifier(const string &qual_name, const string &qual_val)
Add a qualifier to this feature.
Definition: Seq_feat.cpp:291
Abstract base class for lightweight line-by-line reading.
Definition: line_reader.hpp:54
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Definition: map.hpp:338
void(*)(CSeq_entry_Handle seh, IWorkbench *wb, const CSerialObject &obj) handler
char value[7]
Definition: config.c:431
Include a standard set of the NCBI C++ Toolkit most basic headers.
string
Definition: cgiapp.hpp:687
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3457
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5429
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5083
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
Definition: ncbistr.hpp:2697
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5411
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
Definition: ncbistr.cpp:3550
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Definition: ncbistr.cpp:642
TSyn & SetSyn(void)
Assign a value to Syn data member.
Definition: Gene_ref_.hpp:774
void SetLocus_tag(const TLocus_tag &value)
Assign a value to Locus_tag data member.
Definition: Gene_ref_.hpp:802
void SetType(TType value)
Assign a value to Type data member.
Definition: RNA_ref_.hpp:538
TDbxref & SetDbxref(void)
Assign a value to Dbxref data member.
Definition: Seq_feat_.hpp:1339
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
void SetComment(const TComment &value)
Assign a value to Comment data member.
Definition: Seq_feat_.hpp:1058
void SetProduct(TProduct &value)
Assign a value to Product data member.
Definition: Seq_feat_.cpp:110
void SetCode(TCode &value)
Assign a value to Code data member.
Definition: Cdregion_.cpp:68
void SetExcept(TExcept value)
Assign a value to Except data member.
Definition: Seq_feat_.hpp:1018
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
void SetId(TId &value)
Assign a value to Id data member.
Definition: Seq_feat_.cpp:73
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
void SetPseudo(TPseudo value)
Assign a value to Pseudo data member.
Definition: Seq_feat_.hpp:1374
void SetExcept_text(const TExcept_text &value)
Assign a value to Except_text data member.
Definition: Seq_feat_.hpp:1414
vector< CRef< CGb_qual > > TQual
Definition: Seq_feat_.hpp:117
TQual & SetQual(void)
Assign a value to Qual data member.
Definition: Seq_feat_.hpp:1153
bool IsGenbank(void) const
Check if variant Genbank is selected.
Definition: Seq_id_.hpp:841
Lightweight interface for getting lines of data with minimal memory copying.
const struct ncbi::grid::netcache::search::fields::KEY key
static const char * prefix[]
Definition: pcregrep.c:405
Modified on Mon Sep 25 00:52:28 2023 by modify_doxy.py rev. 669887