NCBI C++ ToolKit
gff_record.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: gff_record.cpp 93574 2021-04-30 16:19:19Z stakhovv $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Frank Ludwig
27  *
28  * File Description: Write gff file
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
37 
44 
46 
49 
50 // ----------------------------------------------------------------------------
52 // ----------------------------------------------------------------------------
53  m_strSeqId( "<FIX ME>" ),
54  m_strSource( "<FIX ME>" ),
55  m_strType( "<FIX ME>" ),
56  m_strStart( "<FIX ME>" ),
57  m_strEnd( "<FIX ME>" ),
58  m_strScore( "<FIX ME>" ),
59  m_strStrand( "<FIX ME>" ),
60  m_strPhase( "<FIX ME>" ),
61  m_strAttributes( "<FIX ME>" )
62 {
63 }
64 
65 // ----------------------------------------------------------------------------
67  const CSeq_feat& feature )
68 // ----------------------------------------------------------------------------
69 {
70  m_strSeqId = "<unknown>";
71 
72  if ( feature.CanGetLocation() ) {
73  const CSeq_loc& location = feature.GetLocation();
74  const CSeq_id* pId = location.GetId();
75  switch ( pId->Which() ) {
76 
77  case CSeq_id::e_Local:
78  if ( pId->GetLocal().IsId() ) {
80  }
81  else {
82  m_strSeqId = pId->GetLocal().GetStr();
83  }
84  break;
85 
86  case CSeq_id::e_Gi:
88  break;
89 
90  case CSeq_id::e_Other:
91  if ( pId->GetOther().CanGetAccession() ) {
92  m_strSeqId = pId->GetOther().GetAccession();
93  if ( pId->GetOther().CanGetVersion() ) {
94  m_strSeqId += ".";
96  pId->GetOther().GetVersion() );
97  }
98  }
99  break;
100 
101  default:
102  break;
103  }
104  }
105 
106  return true;
107 }
108 
109 // ----------------------------------------------------------------------------
111  const CSeq_feat& feature )
112 // ----------------------------------------------------------------------------
113 {
114  m_strType = "region";
115 
116  if ( feature.CanGetQual() ) {
117  const vector< CRef< CGb_qual > >& quals = feature.GetQual();
118  vector< CRef< CGb_qual > >::const_iterator it = quals.begin();
119  while ( it != quals.end() ) {
120  if ( (*it)->CanGetQual() && (*it)->CanGetVal() ) {
121  if ( (*it)->GetQual() == "standard_name" ) {
122  m_strType = (*it)->GetVal();
123  return true;
124  }
125  }
126  ++it;
127  }
128  }
129 
130  if ( ! feature.CanGetData() ) {
131  return true;
132  }
133 
134  switch ( feature.GetData().GetSubtype() ) {
135  default:
136  m_strType = feature.GetData().GetKey();
137  break;
138 
140  m_strType = "gene";
141  break;
142 
144  m_strType = "CDS";
145  break;
146 
148  m_strType = "mRNA";
149  break;
150 
152  m_strType = "scRNA";
153  break;
154 
156  m_strType = "exon";
157  break;
158  }
159  return true;
160 }
161 
162 // ----------------------------------------------------------------------------
164  const CSeq_feat& feature )
165 // ----------------------------------------------------------------------------
166 {
167  if ( feature.CanGetLocation() ) {
168  const CSeq_loc& location = feature.GetLocation();
169  unsigned int uStart = location.GetStart( eExtreme_Positional ) + 1;
170  m_strStart = NStr::UIntToString( uStart );
171  }
172  return true;
173 }
174 
175 // ----------------------------------------------------------------------------
177  const CSeq_feat& feature )
178 // ----------------------------------------------------------------------------
179 {
180  if ( feature.CanGetLocation() ) {
181  const CSeq_loc& location = feature.GetLocation();
182  unsigned int uEnd = location.GetStop( eExtreme_Positional ) + 1;
183  m_strEnd = NStr::UIntToString( uEnd );
184  }
185  return true;
186 }
187 
188 // ----------------------------------------------------------------------------
190  const CSeq_feat& feature )
191 // ----------------------------------------------------------------------------
192 {
193  m_strSource = ".";
194 
195  if ( feature.CanGetQual() ) {
196  const vector< CRef< CGb_qual > >& quals = feature.GetQual();
197  vector< CRef< CGb_qual > >::const_iterator it = quals.begin();
198  while ( it != quals.end() ) {
199  if ( (*it)->CanGetQual() && (*it)->CanGetVal() ) {
200  if ( (*it)->GetQual() == "source" ) {
201  m_strSource = (*it)->GetVal();
202  return true;
203  }
204  }
205  ++it;
206  }
207  }
208  return true;
209 }
210 
211 // ----------------------------------------------------------------------------
213  const CSeq_feat& feature )
214 // ----------------------------------------------------------------------------
215 {
216  m_strScore = ".";
217 
218  if ( feature.CanGetQual() ) {
219  const vector< CRef< CGb_qual > >& quals = feature.GetQual();
220  vector< CRef< CGb_qual > >::const_iterator it = quals.begin();
221  while ( it != quals.end() ) {
222  if ( (*it)->CanGetQual() && (*it)->CanGetVal() ) {
223  if ( (*it)->GetQual() == "score" ) {
224  m_strScore = (*it)->GetVal();
225  return true;
226  }
227  }
228  ++it;
229  }
230  }
231  return true;
232 }
233 
234 // ----------------------------------------------------------------------------
236  const CSeq_feat& feature )
237 // ----------------------------------------------------------------------------
238 {
239  m_strStrand = ".";
240  if ( feature.CanGetLocation() ) {
241  const CSeq_loc& location = feature.GetLocation();
242  ENa_strand strand = location.GetStrand();
243  switch( strand ) {
244  default:
245  break;
246  case eNa_strand_plus:
247  m_strStrand = "+";
248  break;
249  case eNa_strand_minus:
250  m_strStrand = "-";
251  break;
252  }
253  }
254  return true;
255 }
256 
257 // ----------------------------------------------------------------------------
259  const CSeq_feat& feature )
260 // ----------------------------------------------------------------------------
261 {
262  m_strPhase = ".";
263 
264  if ( ! feature.CanGetData() ) {
265  return true;
266  }
267  const CSeq_feat::TData& data = feature.GetData();
268  if ( data.GetSubtype() != CSeq_feat::TData::eSubtype_cdregion ) {
269  return true;
270  }
271 
272  const CCdregion& cdr = data.GetCdregion();
273  CCdregion::TFrame frame = cdr.GetFrame();
274  switch ( frame ) {
275  default:
276  break;
278  m_strPhase = "0";
279  break;
281  m_strPhase = "1";
282  break;
284  m_strPhase = "2";
285  break;
286  }
287 
288  return true;
289 }
290 
291 // ----------------------------------------------------------------------------
293  const CSeq_annot& annot,
294  const CSeq_feat& feature )
295 // ----------------------------------------------------------------------------
296 {
297  m_strAttributes = "";
298 
299  // If feature ids are present then they are likely used to show parent/child
300  // relationships, via corresponding xrefs. Thus, any feature ids override
301  // gb ID tags (feature ids and ID tags should agree in the first place, but
302  // if not, feature ids must trump ID tags).
303  //
304  bool bIdAssigned = false;
305 
306  if ( feature.CanGetId() ) {
307  const CSeq_feat::TId& id = feature.GetId();
308  string value = CGffRecord::FeatIdString( id );
309  AddAttribute( "ID", value );
310  bIdAssigned = true;
311  }
312 
313  if ( feature.CanGetXref() ) {
314  const CSeq_feat::TXref& xref = feature.GetXref();
315  string value;
316  for ( size_t i=0; i < xref.size(); ++i ) {
317 // const CSeqFeatXref& ref = *xref[i];
318  if ( xref[i]->CanGetId() && xref[i]->CanGetData() ) {
319  const CSeqFeatXref::TId& id = xref[i]->GetId();
320  CSeq_feat::TData::ESubtype other_type = GetSubtypeOf( annot, id );
321  if ( ! IsParentOf( other_type, feature.GetData().GetSubtype() ) ) {
322  continue;
323  }
324  if ( ! value.empty() ) {
325  value += ",";
326  }
328  }
329  }
330  if ( ! value.empty() ) {
331  AddAttribute( "Parent", value );
332  }
333  }
334 
335  if ( feature.CanGetQual() ) {
336  const vector< CRef< CGb_qual > >& quals = feature.GetQual();
337  vector< CRef< CGb_qual > >::const_iterator it = quals.begin();
338  while ( it != quals.end() ) {
339  if ( (*it)->CanGetQual() && (*it)->CanGetVal() ) {
340  if ( (*it)->GetQual() == "ID" ) {
341  if ( ! bIdAssigned ) {
342  AddAttribute( "ID", (*it)->GetVal() );
343  }
344  }
345  if ( (*it)->GetQual() == "Name" ) {
346  AddAttribute( "Name", (*it)->GetVal() );
347  }
348  if ( (*it)->GetQual() == "Var_type" ) {
349  AddAttribute( "Var_type", (*it)->GetVal() );
350  }
351  }
352  ++it;
353  }
354  }
355 
356  return true;
357 }
358 
359 // ----------------------------------------------------------------------------
361  const CSeq_feat& feature )
362 // ----------------------------------------------------------------------------
363 {
364  if ( feature.CanGetDbxref() ) {
365  const CSeq_feat::TDbxref& dbxrefs = feature.GetDbxref();
366  if ( dbxrefs.size() > 0 ) {
367  string value;
368  dbxrefs[0]->GetLabel( &value );
369  for ( size_t i=1; i < dbxrefs.size(); ++i ) {
370  string label;
371  dbxrefs[i]->GetLabel( &label );
372  value += ",";
373  value += label;
374  }
375  AddAttribute( "Dbxref", value );
376  }
377  }
378  if ( feature.CanGetComment() ) {
379  AddAttribute( "comment", feature.GetComment() );
380  }
381  return true;
382 }
383 
384 // ----------------------------------------------------------------------------
386  const CSeq_annot& annot,
387  const CSeq_feat& feature )
388 // ----------------------------------------------------------------------------
389 {
390  if ( ! AssignType( feature ) ) {
391  return false;
392  }
393  if ( ! AssignSeqId( feature ) ) {
394  return false;
395  }
396  if ( ! AssignSource( feature ) ) {
397  return false;
398  }
399  if ( ! AssignStart( feature ) ) {
400  return false;
401  }
402  if ( ! AssignStop( feature ) ) {
403  return false;
404  }
405  if ( ! AssignScore( feature ) ) {
406  return false;
407  }
408  if ( ! AssignStrand( feature ) ) {
409  return false;
410  }
411  if ( ! AssignPhase( feature ) ) {
412  return false;
413  }
414  if ( ! AssignAttributesCore( annot, feature ) ) {
415  return false;
416  }
417  if ( ! AssignAttributesExtended( feature ) ) {
418  return false;
419  }
420 
421  return true;
422 }
423 
424 // ----------------------------------------------------------------------------
426  CNcbiOstream& out )
427 // ----------------------------------------------------------------------------
428 {
429  out << m_strSeqId + '\t';
430  out << m_strSource << '\t';
431  out << m_strType << '\t';
432  out << m_strStart << '\t';
433  out << m_strEnd << '\t';
434  out << m_strScore << '\t';
435  out << m_strStrand << '\t';
436  out << m_strPhase << '\t';
437  out << m_strAttributes << '\n';
438 }
439 
440 // ----------------------------------------------------------------------------
442  const string& key,
443  const string& value )
444 // ----------------------------------------------------------------------------
445 {
446  if ( ! m_strAttributes.empty() ) {
447  m_strAttributes += ";";
448  }
449  m_strAttributes += key;
450  m_strAttributes += "=\"";
452  m_strAttributes += "\"";
453 }
454 
455 // ----------------------------------------------------------------------------
457  const CFeat_id& id )
458 // ----------------------------------------------------------------------------
459 {
460  switch ( id.Which() ) {
461  default:
462  break;
463 
464  case CFeat_id::e_Local: {
465  const CFeat_id::TLocal& local = id.GetLocal();
466  if ( local.IsId() ) {
467  return NStr::IntToString( local.GetId() );
468  }
469  if ( local.IsStr() ) {
470  return local.GetStr();
471  }
472  break;
473  }
474  }
475  return "FEATID";
476 }
477 
478 // ----------------------------------------------------------------------------
480  const CSeq_annot& annot,
481  const CFeat_id& id )
482 // ----------------------------------------------------------------------------
483 {
484  const list< CRef< CSeq_feat > >& table = annot.GetData().GetFtable();
485  list< CRef< CSeq_feat > >::const_iterator it = table.begin();
486  while ( it != table.end() ) {
487  if ( (*it)->CanGetId() && (*it)->CanGetData() ) {
488  if ( id.Equals( (*it)->GetId() ) ) {
489  return (*it)->GetData().GetSubtype();
490  }
491  }
492  ++it;
493  }
495 }
496 
497 // ----------------------------------------------------------------------------
499  CSeq_feat::TData::ESubtype maybe_parent,
500  CSeq_feat::TData::ESubtype maybe_child )
501 // ----------------------------------------------------------------------------
502 {
503  switch ( maybe_parent ) {
504  default:
505  return false;
506 
511  return false;
512 
514  switch ( maybe_child ) {
515 
518  return true;
519 
520  default:
521  return IsParentOf( CSeq_feat::TData::eSubtype_gene, maybe_child ) ||
523  }
524 
526  switch ( maybe_child ) {
527 
530  return true;
531 
532  default:
533  return IsParentOf( CSeq_feat::TData::eSubtype_intron, maybe_child ) ||
535  }
536 
538  switch ( maybe_child ) {
539 
541  return true;
542 
543  default:
544  return IsParentOf( CSeq_feat::TData::eSubtype_exon, maybe_child );
545  }
546  }
547 
548  return false;
549 }
550 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
User-defined methods of the data storage class.
CCdregion –.
Definition: Cdregion.hpp:66
CFeat_id –.
Definition: Feat_id.hpp:66
static CSeq_feat::TData::ESubtype GetSubtypeOf(const CSeq_annot &, const CFeat_id &)
Definition: gff_record.cpp:479
string m_strType
Definition: gff_record.hpp:96
bool AssignStart(const CSeq_feat &)
Definition: gff_record.cpp:163
bool AssignScore(const CSeq_feat &)
Definition: gff_record.cpp:212
void DumpRecord(CNcbiOstream &)
Definition: gff_record.cpp:425
bool AssignPhase(const CSeq_feat &)
Definition: gff_record.cpp:258
string m_strAttributes
Definition: gff_record.hpp:102
string m_strStart
Definition: gff_record.hpp:97
bool AssignStrand(const CSeq_feat &)
Definition: gff_record.cpp:235
string m_strStrand
Definition: gff_record.hpp:100
string m_strSeqId
Definition: gff_record.hpp:94
string m_strScore
Definition: gff_record.hpp:99
bool AssignStop(const CSeq_feat &)
Definition: gff_record.cpp:176
bool AssignType(const CSeq_feat &)
Definition: gff_record.cpp:110
bool SetRecord(const CSeq_annot &, const CSeq_feat &)
Definition: gff_record.cpp:385
string m_strSource
Definition: gff_record.hpp:95
bool AssignSeqId(const CSeq_feat &)
Definition: gff_record.cpp:66
bool AssignAttributesCore(const CSeq_annot &, const CSeq_feat &)
Definition: gff_record.cpp:292
bool AssignSource(const CSeq_feat &)
Definition: gff_record.cpp:189
void AddAttribute(const string &key, const string &value)
Definition: gff_record.cpp:441
string m_strEnd
Definition: gff_record.hpp:98
static string FeatIdString(const CFeat_id &id)
Definition: gff_record.cpp:456
string m_strPhase
Definition: gff_record.hpp:101
bool AssignAttributesExtended(const CSeq_feat &)
Definition: gff_record.cpp:360
static bool IsParentOf(CSeq_feat::TData::ESubtype, CSeq_feat::TData::ESubtype)
Definition: gff_record.cpp:498
ESubtype GetSubtype(void) const
string GetKey(EVocabulary vocab=eVocabulary_full) const
@ eSubtype_bad
These no longer need to match the FEATDEF values in the C toolkit's objfdef.h.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
std::ofstream out("events_result.xml")
main entry point for tests
static const char location[]
Definition: config.c:97
char data[12]
Definition: iconv.c:80
USING_SCOPE(objects)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
Definition: ncbistr.hpp:5109
static const char label[]
bool IsId(void) const
Check if variant Id is selected.
Definition: Object_id_.hpp:264
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
bool CanGetDbxref(void) const
Check if it is safe to call GetDbxref method.
Definition: Seq_feat_.hpp:1327
vector< CRef< CDbtag > > TDbxref
Definition: Seq_feat_.hpp:123
bool CanGetXref(void) const
Check if it is safe to call GetXref method.
Definition: Seq_feat_.hpp:1302
bool CanGetQual(void) const
Check if it is safe to call GetQual method.
Definition: Seq_feat_.hpp:1141
const TQual & GetQual(void) const
Get the Qual member data.
Definition: Seq_feat_.hpp:1147
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_feat_.hpp:904
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
TFrame GetFrame(void) const
Get the Frame member data.
Definition: Cdregion_.hpp:534
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
bool CanGetData(void) const
Check if it is safe to call GetData method.
Definition: Seq_feat_.hpp:919
const TDbxref & GetDbxref(void) const
Get the Dbxref member data.
Definition: Seq_feat_.hpp:1333
bool CanGetId(void) const
Check if it is safe to call GetId method.
Definition: Seq_feat_.hpp:898
bool CanGetLocation(void) const
Check if it is safe to call GetLocation method.
Definition: Seq_feat_.hpp:1111
const TComment & GetComment(void) const
Get the Comment member data.
Definition: Seq_feat_.hpp:1049
const TXref & GetXref(void) const
Get the Xref member data.
Definition: Seq_feat_.hpp:1308
vector< CRef< CSeqFeatXref > > TXref
Definition: Seq_feat_.hpp:122
bool CanGetComment(void) const
Check if it is safe to call GetComment method.
Definition: Seq_feat_.hpp:1043
@ e_Local
for local software use
Definition: Feat_id_.hpp:94
@ eFrame_three
reading frame
Definition: Cdregion_.hpp:98
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
E_Choice Which(void) const
Which variant is currently selected.
Definition: Seq_id_.hpp:746
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
TVersion GetVersion(void) const
Get the Version member data.
bool CanGetVersion(void) const
Check if it is safe to call GetVersion method.
const TOther & GetOther(void) const
Get the variant data.
Definition: Seq_id_.cpp:347
const TLocal & GetLocal(void) const
Get the variant data.
Definition: Seq_id_.cpp:193
bool CanGetAccession(void) const
Check if it is safe to call GetAccession method.
const TAccession & GetAccession(void) const
Get the Accession member data.
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ e_Other
for historical reasons, 'other' = 'refseq'
Definition: Seq_id_.hpp:104
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
@ e_Local
local use
Definition: Seq_id_.hpp:95
const TFtable & GetFtable(void) const
Get the variant data.
Definition: Seq_annot_.hpp:621
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_annot_.hpp:873
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
int i
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
static bool Equals(const CVariation::TPlacements &p1, const CVariation::TPlacements &p2)
#define local
Definition: zutil.h:33
Modified on Mon May 20 05:00:14 2024 by modify_doxy.py rev. 669887