NCBI C++ ToolKit
gvf_write_data.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: gvf_write_data.cpp 94017 2021-06-15 15:20:40Z ludwigf $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Frank Ludwig
27  *
28  * File Description:
29  * GVF file reader
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbistd.hpp>
48 
51 #include <objmgr/mapped_feat.hpp>
52 #include <objmgr/util/feature.hpp>
53 #include <objmgr/util/sequence.hpp>
54 
56 BEGIN_objects_SCOPE // namespace ncbi::objects::
57 
59 
60 
61 // ----------------------------------------------------------------------------
63 // ----------------------------------------------------------------------------
64 {
65  return string( "id_" ) + NStr::IntToString( s_unique++ );
66 }
67 
68 // ----------------------------------------------------------------------------
70  CGffFeatureContext& fc )
71 // ----------------------------------------------------------------------------
72  : CGffWriteRecord( fc )
73 {
74 };
75 
76 // ----------------------------------------------------------------------------
78 // ----------------------------------------------------------------------------
79 {
80 };
81 
82 // ----------------------------------------------------------------------------
84  const CMappedFeat& mapped_feat )
85 // ----------------------------------------------------------------------------
86 {
87  mMethod = ".";
88  if ( mapped_feat.IsSetExt() ) {
89  const CSeq_feat::TExt& ext = mapped_feat.GetExt();
90  if ( ext.IsSetType() && ext.GetType().IsStr() &&
91  ext.GetType().GetStr() == "GvfAttributes" )
92  {
93  if ( ext.HasField( "source" ) ) {
94  mMethod = ext.GetField( "source" ).GetData().GetStr();
95  return true;
96  }
97  }
98  }
99 
100  if ( CSeqFeatData::eSubtype_variation_ref != mapped_feat.GetData().GetSubtype() ) {
101  return true;
102  }
103  const CVariation_ref& variation = mapped_feat.GetData().GetVariation();
104  if ( variation.IsSetId() ) {
105  mMethod = variation.GetId().GetDb();
106  return true;
107  }
108  return true;
109 }
110 
111 // ----------------------------------------------------------------------------
113  const CMappedFeat& mapped_feat,
114  unsigned int )
115 // ----------------------------------------------------------------------------
116 {
117  mType = ".";
118  if ( mapped_feat.IsSetExt() ) {
119  const CSeq_feat::TExt& ext = mapped_feat.GetExt();
120  if ( ext.IsSetType() && ext.GetType().IsStr() &&
121  ext.GetType().GetStr() == "GvfAttributes" )
122  {
123  if ( ext.HasField( "orig-var-type" ) ) {
124  mType = ext.GetField( "orig-var-type" ).GetData().GetStr();
125  return true;
126  }
127  }
128  }
129 
130  if ( CSeqFeatData::eSubtype_variation_ref != mapped_feat.GetData().GetSubtype() ) {
131  return true;
132  }
133 
134  const CVariation_ref& var_ref = mapped_feat.GetData().GetVariation();
135  if ( var_ref.IsComplex() ) {
136  mType = "complex_structural_alteration";
137  return true;
138  }
139  if ( var_ref.IsGain() ) {
140  mType = "copy_number_gain";
141  return true;
142  }
143  if ( var_ref.IsLoss() ) {
144  if ( var_ref.IsSetConsequence() ) {
145  const CVariation_ref::TConsequence& consequences = var_ref.GetConsequence();
146  for ( CVariation_ref::TConsequence::const_iterator cit = consequences.begin();
147  cit != consequences.end(); ++cit )
148  {
149  if ( (*cit)->IsLoss_of_heterozygosity() ) {
150  mType = "loss_of_heterozygosity";
151  return true;
152  }
153  }
154  }
155  mType = "copy_number_loss";
156  return true;
157  }
158  if ( var_ref.IsCNV() ) {
159  mType = "copy_number_variation";
160  return true;
161  }
162 
163  if ( ! var_ref.GetData().IsInstance() ) {
164  return true;
165  }
166  switch( var_ref.GetData().GetInstance().GetType() ) {
167  default:
168  return true;
170  mType = "single_nucleotide_variation";
171  return true;
172  }
173 
174  return true;
175 }
176 
177 // ----------------------------------------------------------------------------
179  const CMappedFeat& mapped_feat,
180  unsigned int )
181 // ----------------------------------------------------------------------------
182 {
183  if ( ! x_AssignAttributeID( mapped_feat ) ) {
184  return false;
185  }
186  if ( ! x_AssignAttributeParent( mapped_feat ) ) {
187  return false;
188  }
189  if ( ! x_AssignAttributeName( mapped_feat ) ) {
190  return false;
191  }
192  if ( ! x_AssignAttributeVarType( mapped_feat ) ) {
193  return false;
194  }
195  if ( ! x_AssignAttributeStartRange( mapped_feat ) ) {
196  return false;
197  }
198  if ( ! x_AssignAttributeEndRange( mapped_feat ) ) {
199  return false;
200  }
201  if ( ! x_AssignAttributesCustom( mapped_feat ) ) {
202  return false;
203  }
204  return true;
205 }
206 
207 // ----------------------------------------------------------------------------
209  const CMappedFeat& mf )
210 // ----------------------------------------------------------------------------
211 {
212  if ( ! mf.IsSetExt() ) {
213  return true;
214  }
215  const CSeq_feat::TExt& ext = mf.GetExt();
216  if ( ! ext.IsSetType() || ! ext.GetType().IsStr() ||
217  ext.GetType().GetStr() != "GvfAttributes" )
218  {
219  return true;
220  }
221  const CUser_object::TData& data = ext.GetData();
222  for ( CUser_object::TData::const_iterator cit = data.begin();
223  cit != data.end(); ++cit )
224  {
225  string key, value;
226  try {
227  key = (*cit)->GetLabel().GetStr();
228  value = (*cit)->GetData().GetStr();
229  }
230  catch(...) {
231  continue;
232  }
233  if ( ! NStr::StartsWith( key, "custom-" ) ) {
234  continue;
235  }
236  key = key.substr( string("custom-").length() );
238  }
239  return true;
240 }
241 
242 // ----------------------------------------------------------------------------
244  const CMappedFeat& mf )
245 // ----------------------------------------------------------------------------
246 {
247  if ( mf.IsSetExt() ) {
248  const CSeq_feat::TExt& ext = mf.GetExt();
249  if ( ext.IsSetType() && ext.GetType().IsStr() &&
250  ext.GetType().GetStr() == "GvfAttributes" )
251  {
252  if ( ext.HasField( "id" ) ) {
253  SetAttribute(
254  "ID", ext.GetField("id").GetData().GetStr());
255  return true;
256  }
257  }
258  }
259 
261  SetAttribute("ID", s_UniqueId());
262  return true;
263  }
264  const CVariation_ref& var_ref = mf.GetData().GetVariation();
265  if ( ! var_ref.IsSetId() ) {
266  SetAttribute("ID", s_UniqueId());
267  return true;
268  }
269  const CVariation_ref::TId& id = var_ref.GetId();
270  string strId;
271  id.GetLabel( &strId );
272  SetAttribute("ID", strId);
273  return true;
274 }
275 
276 // ----------------------------------------------------------------------------
278  const CMappedFeat& mf )
279 // ----------------------------------------------------------------------------
280 {
281  if ( mf.IsSetExt() ) {
282  const CSeq_feat::TExt& ext = mf.GetExt();
283  if ( ext.IsSetType() && ext.GetType().IsStr() &&
284  ext.GetType().GetStr() == "GvfAttributes" )
285  {
286  if ( ext.HasField( "parent" ) ) {
287  SetAttribute(
288  "Parent", ext.GetField("parent").GetData().GetStr());
289  return true;
290  }
291  }
292  }
293 
295  return true;
296  }
297  const CVariation_ref& var_ref = mf.GetData().GetVariation();
298  if ( ! var_ref.IsSetParent_id() ) {
299  return true;
300  }
301  const CVariation_ref::TId& id = var_ref.GetParent_id();
302  string strId;
303  id.GetLabel( &strId );
304  SetAttribute("Parent", strId);
305  return true;
306 }
307 
308 // ----------------------------------------------------------------------------
310  const CMappedFeat& mf )
311 // ----------------------------------------------------------------------------
312 {
314  return true;
315  }
316  const CVariation_ref& var_ref = mf.GetData().GetVariation();
317  if ( ! var_ref.IsSetName() ) {
318  return true;
319  }
320  SetAttribute("Name", var_ref.GetName());
321  return true;
322 }
323 
324 // ----------------------------------------------------------------------------
326  const CMappedFeat& mf )
327 // ----------------------------------------------------------------------------
328 {
329  if ( mf.IsSetExt() ) {
330  const CSeq_feat::TExt& ext = mf.GetExt();
331  if ( ext.IsSetType() && ext.GetType().IsStr() &&
332  ext.GetType().GetStr() == "GvfAttributes" )
333  {
334  if ( ext.HasField( "custom-var_type" ) ) {
335  SetAttribute("var_type",
336  ext.GetField( "custom-var_type" ).GetData().GetStr());
337  return true;
338  }
339  }
340  }
341  return true;
342 }
343 
344 // ----------------------------------------------------------------------------
346  const CMappedFeat& mf )
347 // ----------------------------------------------------------------------------
348 {
349  const CSeq_loc& loc = mf.GetLocation();
350  if ( ! loc.IsInt() ) {
351  return true;
352  }
353  const CSeq_interval& intv = loc.GetInt();
354  if ( ! intv.IsSetFuzz_from() ) {
355  return true;
356  }
357  const CSeq_interval::TFuzz_from& fuzz = intv.GetFuzz_from();
358 
359  switch( fuzz.Which() ) {
360 
361  default:
362  return true;
363 
364  case CInt_fuzz::e_Range: {
365  int min = fuzz.GetRange().GetMin() + 1;
366  int max = fuzz.GetRange().GetMax() + 1;
367  SetAttribute(
368  "Start_range", NStr::IntToString(min) + "," +
370  return true;
371  }
372  case CInt_fuzz::e_Lim: {
373  string min = NStr::IntToString( intv.GetFrom() + 1 );
374  if ( fuzz.GetLim() == CInt_fuzz::eLim_gt ) {
375  SetAttribute(
376  "Start_range", min + string(",."));
377  }
378  else if ( fuzz.GetLim() == CInt_fuzz::eLim_lt ) {
379  SetAttribute(
380  "Start_range", string(".,") + min);
381  }
382  return true;
383  }
384  }
385  return true;
386 }
387 
388 // ----------------------------------------------------------------------------
390  const CMappedFeat& mf )
391 // ----------------------------------------------------------------------------
392 {
393  const CSeq_loc& loc = mf.GetLocation();
394  if ( ! loc.IsInt() ) {
395  return true;
396  }
397  const CSeq_interval& intv = loc.GetInt();
398  if ( ! intv.IsSetFuzz_to() ) {
399  return true;
400  }
401  const CSeq_interval::TFuzz_to& fuzz = intv.GetFuzz_to();
402 
403  switch( fuzz.Which() ) {
404 
405  default:
406  return true;
407 
408  case CInt_fuzz::e_Range: {
409  int min = fuzz.GetRange().GetMin() + 1;
410  int max = fuzz.GetRange().GetMax() + 1;
411  SetAttribute(
412  "End_range", NStr::IntToString( min ) + "," +
414  return true;
415  }
416  case CInt_fuzz::e_Lim: {
417  string max = NStr::IntToString( intv.GetTo() + 1 );
418  if ( fuzz.GetLim() == CInt_fuzz::eLim_gt ) {
419  SetAttribute("End_range", max + string(",."));
420  }
421  else if ( fuzz.GetLim() == CInt_fuzz::eLim_lt ) {
422  SetAttribute("End_range", string(".,") + max);
423  }
424  return true;
425  }
426  }
427  return true;
428 }
429 
430 // ----------------------------------------------------------------------------
432 // ----------------------------------------------------------------------------
433 {
434  TAttributes temp_attrs( mAttributes.begin(), mAttributes.end() );
435  string strAttributes;
436 
437  TAttrIt priority = temp_attrs.find("ID");
438  if ( priority != temp_attrs.end() ) {
439  x_AppendAttribute( priority, strAttributes );
440  temp_attrs.erase( priority );
441  }
442  priority = temp_attrs.find("Parent");
443  if ( priority != temp_attrs.end() ) {
444  x_AppendAttribute( priority, strAttributes );
445  temp_attrs.erase( priority );
446  }
447  priority = temp_attrs.find("Name");
448  if ( priority != temp_attrs.end() ) {
449  x_AppendAttribute( priority, strAttributes );
450  temp_attrs.erase( priority );
451  }
452  priority = temp_attrs.find("Start_range");
453  if ( priority != temp_attrs.end() ) {
454  x_AppendAttribute( priority, strAttributes );
455  temp_attrs.erase( priority );
456  }
457  priority = temp_attrs.find("End_range");
458  if ( priority != temp_attrs.end() ) {
459  x_AppendAttribute( priority, strAttributes );
460  temp_attrs.erase( priority );
461  }
462  TAttrIt other = temp_attrs.begin();
463  while ( other != temp_attrs.end() ) {
464  x_AppendAttribute( other, strAttributes );
465  other++;
466  }
467  return strAttributes;
468 }
469 
470 // ----------------------------------------------------------------------------
472  TAttrCit it,
473  string& strAttributes ) const
474 // ----------------------------------------------------------------------------
475 {
476  string key = it->first;
477  string value = it->second.front();
478  bool needsQuotes = ( NStr::Find( value, " " ) != NPOS );
479 
480  if ( !strAttributes.empty() ) {
481  strAttributes += ";";
482  }
483  strAttributes += key;
484  strAttributes += "=";
485  if (needsQuotes) {
486  strAttributes += "\"";
487  }
488  strAttributes += value;
489  if (needsQuotes) {
490  strAttributes += "\"";
491  }
492 }
493 
494 END_objects_SCOPE
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Definition: Dbtag.hpp:53
void GetLabel(string *label) const
Definition: Dbtag.cpp:187
bool SetAttribute(const string &, const string &)
TAttributes mAttributes
TAttributes::iterator TAttrIt
TAttributes::const_iterator TAttrCit
virtual bool x_AssignAttributeName(const CMappedFeat &)
virtual bool x_AssignAttributesCustom(const CMappedFeat &)
virtual bool x_AssignAttributeEndRange(const CMappedFeat &)
virtual ~CGvfWriteRecord()
static int s_unique
virtual bool x_AssignAttributeVarType(const CMappedFeat &)
string StrAttributes() const override
virtual bool x_AssignAttributeID(const CMappedFeat &)
virtual bool AssignSource(const CMappedFeat &)
virtual bool x_AssignAttributeStartRange(const CMappedFeat &)
CGvfWriteRecord(CGffFeatureContext &)
void x_AppendAttribute(TAttrCit, string &) const
virtual bool AssignAttributes(const CMappedFeat &, unsigned int=0)
virtual bool AssignType(const CMappedFeat &, unsigned int=0)
virtual bool x_AssignAttributeParent(const CMappedFeat &)
CMappedFeat –.
Definition: mapped_feat.hpp:59
ESubtype GetSubtype(void) const
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Access a named field in this user object.
Definition: User_object.cpp:71
bool IsLoss() const
bool IsComplex() const
bool IsGain() const
bool IsCNV() const
void erase(iterator pos)
Definition: map.hpp:167
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
Include a standard set of the NCBI C++ Toolkit most basic headers.
char data[12]
Definition: iconv.c:80
string
Definition: cgiapp.hpp:690
const CSeqFeatData & GetData(void) const
const CUser_object & GetExt(void) const
bool IsSetExt(void) const
const CSeq_loc & GetLocation(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define NPOS
Definition: ncbistr.hpp:133
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5078
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2882
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5406
const TStr & GetStr(void) const
Get the variant data.
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
const TData & GetData(void) const
Get the Data member data.
TLim GetLim(void) const
Get the variant data.
Definition: Int_fuzz_.hpp:642
TMin GetMin(void) const
Get the Min member data.
Definition: Int_fuzz_.hpp:519
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
const TData & GetData(void) const
Get the Data member data.
E_Choice Which(void) const
Which variant is currently selected.
Definition: Int_fuzz_.hpp:547
const TType & GetType(void) const
Get the Type member data.
TMax GetMax(void) const
Get the Max member data.
Definition: Int_fuzz_.hpp:472
vector< CRef< CUser_field > > TData
const TRange & GetRange(void) const
Get the variant data.
Definition: Int_fuzz_.cpp:159
@ eLim_gt
greater than
Definition: Int_fuzz_.hpp:211
@ eLim_lt
less than
Definition: Int_fuzz_.hpp:212
const TVariation & GetVariation(void) const
Get the variant data.
const TFuzz_from & GetFuzz_from(void) const
Get the Fuzz_from member data.
const TFuzz_to & GetFuzz_to(void) const
Get the Fuzz_to member data.
TFrom GetFrom(void) const
Get the From member data.
bool IsSetFuzz_to(void) const
Check if a value has been assigned to Fuzz_to data member.
TTo GetTo(void) const
Get the To member data.
bool IsInt(void) const
Check if variant Int is selected.
Definition: Seq_loc_.hpp:528
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
bool IsSetFuzz_from(void) const
Check if a value has been assigned to Fuzz_from data member.
TType GetType(void) const
Get the Type member data.
const TInstance & GetInstance(void) const
Get the variant data.
const TId & GetId(void) const
Get the Id member data.
bool IsSetConsequence(void) const
Check if a value has been assigned to Consequence data member.
const TData & GetData(void) const
Get the Data member data.
bool IsInstance(void) const
Check if variant Instance is selected.
list< CRef< C_E_Consequence > > TConsequence
const TConsequence & GetConsequence(void) const
Get the Consequence member data.
bool IsSetParent_id(void) const
Check if a value has been assigned to Parent_id data member.
const TParent_id & GetParent_id(void) const
Get the Parent_id member data.
const TName & GetName(void) const
Get the Name member data.
bool IsSetId(void) const
ids (i.e., SNP rsid / ssid, dbVar nsv/nssv) expected values include 'dbSNP|rs12334',...
bool IsSetName(void) const
names and synonyms some variants have well-known canonical names and possible accepted synonyms Check...
@ eType_snv
delta=[morph of length 1] NOTE: this is snV not snP; the latter requires frequency-based validation t...
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
T max(T x_, T y_)
T min(T x_, T y_)
#define const
Definition: zconf.h:232
Modified on Fri Sep 20 14:58:18 2024 by modify_doxy.py rev. 669887