NCBI C++ ToolKit
vcf_writer.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: vcf_writer.cpp 93688 2021-05-13 15:00:32Z stakhovv $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Frank Ludwig
27  *
28  * File Description: Write vcf file
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistd.hpp>
34 
43 #include <objects/general/Date.hpp>
49 #include <objects/seq/Seq_data.hpp>
50 #include <objects/seq/IUPACaa.hpp>
51 
52 #include <objmgr/scope.hpp>
53 #include <objmgr/feat_ci.hpp>
54 #include <objmgr/mapped_feat.hpp>
55 #include <objmgr/util/feature.hpp>
56 #include <objmgr/seq_vector.hpp>
58 
62 
65 
66 // ----------------------------------------------------------------------------
68 // ----------------------------------------------------------------------------
69 {
70  if ( ! annot.IsSetDesc() || ! annot.GetDesc().IsSet() ) {
71  return CConstRef<CUser_object>();
72  }
73  const list< CRef< CAnnotdesc > > descrs = annot.GetDesc().Get();
74  list< CRef< CAnnotdesc > >::const_iterator cit = descrs.begin();
75  CConstRef<CAnnotdesc> pDescMeta;
76  while ( cit != descrs.end() ) {
77  CConstRef<CAnnotdesc> pDesc = *cit;
78  cit++;
79  if ( ! pDesc->IsUser() ) {
80  continue;
81  }
82  if ( ! pDesc->GetUser().IsSetType() ) {
83  continue;
84  }
85  if ( ! pDesc->GetUser().GetType().IsStr() ) {
86  continue;
87  }
88  if ( pDesc->GetUser().GetType().GetStr() == "vcf-meta-info" ) {
89  pDescMeta = pDesc;
90  break;
91  }
92  }
93  if ( ! pDescMeta ) {
94  return CConstRef<CUser_object>();
95  }
96  return CConstRef<CUser_object>( &pDescMeta->GetUser() );
97 }
98 
99 // ----------------------------------------------------------------------------
101  CScope& scope,
102  CNcbiOstream& ostr,
103  TFlags uFlags ) :
104 // ----------------------------------------------------------------------------
105  CWriterBase( ostr, uFlags ),
106  m_Scope( scope )
107 {
108 };
109 
110 // ----------------------------------------------------------------------------
112 // ----------------------------------------------------------------------------
113 {
114 };
115 
116 // ----------------------------------------------------------------------------
118  const CSeq_annot& orig_annot,
119  const string&,
120  const string& )
121 // ----------------------------------------------------------------------------
122 {
123  CRef<CSeq_annot> annot(new CSeq_annot);
124  annot->Assign(orig_annot);
125  CSeq_annot_Handle sah = m_Scope.AddSeq_annot( *annot );
127 
128  CFeat_CI mf(sah, sel);
129  for ( ; mf; ++mf )
130  {
131  const CSeq_id *seq_id = mf->GetLocation().GetId();
132  const CBioseq_Handle& bsh = m_Scope.GetBioseqHandle( *seq_id );
133  if (!bsh)
134  {
135  string label;
136  seq_id->GetLabel(&label);
137  ERR_POST(Error << "Cannot process Seq-id: " << label << Endm);
138  return false;
139  }
140  }
141  try
142  {
144  }
145  catch(...)
146  {
147  return false;
148  }
149  if ( ! x_WriteInit( *annot ) ) {
150  return false;
151  }
152  if ( ! x_WriteMeta( *annot ) ) {
153  return false;
154  }
155  if ( ! x_WriteHeader( *annot ) ) {
156  return false;
157  }
158  if ( ! x_WriteData( *annot ) ) {
159  return false;
160  }
161  return false;
162 }
163 
164 
165 // ----------------------------------------------------------------------------
167 // ----------------------------------------------------------------------------
168 {
169  if (!m_Selector.get()) {
171  }
172  return *m_Selector;
173 }
174 
175 
176 // ----------------------------------------------------------------------------
178  const CSeq_annot& annot )
179 // ----------------------------------------------------------------------------
180 {
181  CConstRef<CUser_object> pVcfMetaInfo = s_GetVcfMetaInfo( annot );
182  if ( !pVcfMetaInfo || !pVcfMetaInfo->HasField("genotype-headers") ) {
183  return true;
184  }
185  m_GenotypeHeaders.clear();
186  const CUser_field::C_Data::TStrs& strs =
187  pVcfMetaInfo->GetField("genotype-headers").GetData().GetStrs();
188  copy(strs.begin(), strs.end(), back_inserter(m_GenotypeHeaders));
189  return true;
190 }
191 
192 // ----------------------------------------------------------------------------
194  const CSeq_annot& annot )
195 // ----------------------------------------------------------------------------
196 {
197  CConstRef<CUser_object> pVcfMetaInfo = s_GetVcfMetaInfo( annot );
198  if ( !pVcfMetaInfo ) {
199  return x_WriteMetaCreateNew( annot );
200  }
201  const CAnnotdesc::TUser& meta = *pVcfMetaInfo;
202  const CUser_field::C_Data::TStrs& directives =
203  meta.GetFieldRef("meta-information")->GetData().GetStrs();
204  for (CUser_field::C_Data::TStrs::const_iterator cit = directives.begin();
205  cit != directives.end(); ++cit ) {
206  m_Os << "##" << *cit << '\n';
207  }
208  return true;
209 }
210 
211 // ----------------------------------------------------------------------------
213  const CSeq_annot& annot )
214 // ----------------------------------------------------------------------------
215 {
216  string datestr;
217  if ( annot.IsSetDesc() ) {
218  const CAnnot_descr& desc = annot.GetDesc();
219  for ( list< CRef< CAnnotdesc > >::const_iterator cit = desc.Get().begin();
220  cit != desc.Get().end(); ++cit )
221  {
222  if ( (*cit)->IsCreate_date() ) {
223  const CDate& date = (*cit)->GetCreate_date();
224  if ( date.IsStd() ) {
225  date.GetDate( &datestr, "%4Y%2M%2D" );
226  }
227  }
228  }
229  }
230 
231  m_Os << "##fileformat=VCFv4.1"
232  << '\n';
233  if ( ! datestr.empty() ) {
234  m_Os << "##filedate=" << datestr << '\n';
235  }
236  m_Os << "##INFO=<ID=DB,Number=0,Type=Flag,Description=\"dbSNP Membership\">"
237  << '\n';
238  m_Os << "##INFO=<ID=H2,Number=0,Type=Flag,Description=\"Hapmap2 Membership\">"
239  << '\n';
240  m_Os << "##INFO=<ID=H3,Number=0,Type=Flag,Description=\"Hapmap3 Membership\">"
241  << '\n';
242  m_Os << "##INFO=<ID=RL,Number=1,Type=String,Description=\"Resource Link\">"
243  << '\n';
244  m_Os << "##INFO=<ID=FBV,Number=1,Type=String,Description=\"Frequency Based Validation\">"
245  << '\n';
246  m_Os << "##INFO=<ID=GTP,Number=1,Type=String,Description=\"Genotype\">"
247  << '\n';
248  m_Os << "##INFO=<ID=QC,Number=1,Type=String,Description=\"Quality Check\">"
249  << '\n';
250  return true;
251 }
252 
253 // ----------------------------------------------------------------------------
255  const CSeq_annot& annot )
256 // ----------------------------------------------------------------------------
257 {
258  m_Os << "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO";
259 
260  CConstRef<CUser_object> pVcfMetaInfo = s_GetVcfMetaInfo( annot );
261  if (m_GenotypeHeaders.empty()) {
262  m_Os << '\n';
263  return true;
264  }
265  m_Os << "\tFORMAT";
266  for ( vector<string>::const_iterator cit = m_GenotypeHeaders.begin();
267  cit != m_GenotypeHeaders.end(); ++cit ) {
268  m_Os << '\t' << *cit;
269  }
270  m_Os << '\n';
271  return true;
272 }
273 
274 // ----------------------------------------------------------------------------
276  const CSeq_annot& annot )
277 // ----------------------------------------------------------------------------
278 {
282 
283  CFeat_CI fi(sah, sel);
284 
285  const auto& range = GetRange();
286 
287  if (range.IsWhole()) {
288  for ( ; fi; ++fi ) {
289  if ( ! x_WriteFeature(*fi) ) {
290  return false;
291  }
292  }
293  return true;
294  }
295 
296 
297  for (; fi; ++fi) {
298  CMappedFeat mapped_feat = *fi;
299  if (mapped_feat.GetTotalRange().IntersectionWith(range).NotEmpty()) { // This code should be moved into the writer base class
300  CSeq_feat_Handle sfh = mapped_feat.GetSeq_feat_Handle();
301  CSeq_feat_EditHandle sfeh(sfh);
302  CRef<CSeq_feat> trimmed_feat = sequence::CFeatTrim::Apply(*mapped_feat.GetOriginalSeq_feat(), range);
303  sfeh.Replace(*trimmed_feat);
304  if (!x_WriteFeature(mapped_feat)) {
305  return false;
306  }
307  }
308  }
309  return true;
310 }
311 
312 
313 void CVcfWriter::x_GetTypeRefAlt(const CVariation_inst &inst, int &rtype, string &ref, list<int>& alt_types, vector<string> &alt)
314 {
315  int current_type = inst.GetType();
316  if (current_type != CVariation_inst::eType_identity)
317  rtype = current_type;
318  if (inst.IsSetDelta() && !inst.GetDelta().empty() && inst.GetDelta().front()->IsSetSeq()
319  && inst.GetDelta().front()->GetSeq().IsLiteral()
320  && inst.GetDelta().front()->GetSeq().GetLiteral().IsSetSeq_data()
321  && inst.GetDelta().front()->GetSeq().GetLiteral().GetSeq_data().IsIupacna())
322  {
323  string a = inst.GetDelta().front()->GetSeq().GetLiteral().GetSeq_data().GetIupacna().Get();
324 
325  if (!a.empty())
326  {
327  if (current_type == CVariation_inst::eType_identity ) {
328  ref = a;
329  return;
330  }
331 
332  if (current_type != CVariation_inst::eType_del) {
333  alt.push_back(a);
334  }
335  } else if (current_type == CVariation_inst::eType_delins) {
336  current_type = CVariation_inst::eType_del; // delins with an empty insertion string becomes a del
337  }
338 
339  alt_types.push_back(current_type);
340  } else if (current_type == CVariation_inst::eType_del ||
341  current_type == CVariation_inst::eType_delins) {
342  alt_types.push_back(CVariation_inst::eType_del);
343  }
344 }
345 
346 // ----------------------------------------------------------------------------
348  CFeat_CI feat_it)
349 // ----------------------------------------------------------------------------
350 {
351  if (!feat_it) {
352  return false;
353  }
354 
355  return x_WriteFeature(*feat_it);
356 }
357 
358 
359 // ----------------------------------------------------------------------------
361  const CMappedFeat& mf )
362 // ----------------------------------------------------------------------------
363 {
364  if (IsCanceled()) {
365  NCBI_THROW(
367  eInterrupted,
368  "Processing terminated by user");
369  }
370 
371  const CVariation_ref& vr = mf.GetData().GetVariation();
373  string ref;
374  vector<string> alt;
375  list<int> alt_types;
376  switch(vr.GetData().Which())
377  {
379  x_GetTypeRefAlt(vr.GetData().GetInstance(), type, ref, alt_types, alt);
380  break;
382  for (CVariation_ref::TData::TSet::TVariations::const_iterator inst = vr.GetData().GetSet().GetVariations().begin(); inst != vr.GetData().GetSet().GetVariations().end(); ++inst)
383  {
384  if ( (*inst)->IsSetData() && (*inst)->GetData().IsInstance() ) {
385  x_GetTypeRefAlt((*inst)->GetData().GetInstance(), type, ref, alt_types, alt);
386  }
387  }
388  break;
389  default:
390  break;
391  }
392 
396  {
397  ERR_POST(Warning << "Cannot process type: " << type << Endm);
398  return false;
399  }
400 
401  const CSeq_loc& loc = mf.GetLocation();
402  unsigned int start = loc.GetStart(eExtreme_Positional) + 1; // position in VCF is 1-based
403  string anchor;
405  {
407  if (loc.IsSetStrand())
408  strand = loc.GetStrand();
409  const CSeq_id *seq_id = loc.GetId();
410  int pos = start;
411  pos--; // going to 0-based position for GetSeqData
412  if (type == CVariation_inst::eType_ins) // assuming "insert before" semantics
413  {
414  if (pos > 0)
415  pos--;
416  }
417  else
418  {
419  if (pos > 0)
420  pos--;
421  else
422  pos = loc.GetStop(eExtreme_Positional) + 1 ;
423  }
424  const CBioseq_Handle& bsh = m_Scope.GetBioseqHandle( *seq_id );
425  if (bsh)
426  {
428  if (seqvec)
429  {
430  try
431  {
432  seqvec->GetSeqData(pos, pos+1, anchor);
433  }
434  catch(...) {}
435 
436  if (anchor.empty()) {
437  string msg = "Missing sequence data";
438  NCBI_THROW(CObjWriterException, eBadInput, msg);
439  }
440  }
441  }
442  else // if id == "local id 1"
443  {
444  string label;
445  seq_id->GetLabel(&label);
446  ERR_POST(Error << "Cannot process Seq-id: " << label << Endm);
447  return true;
448  }
449  }
450 
451  if (!x_WriteFeatureChrom(mf)) {
452  return false;
453  }
454  if (!x_WriteFeaturePos(mf, start, type)) {
455  return false;
456  }
457  if (!x_WriteFeatureId(mf)) {
458  return false;
459  }
460  if (!x_WriteFeatureRef(start, type, anchor, ref)) {
461  return false;
462  }
463  if (!x_WriteFeatureAlt(start, type, anchor, alt_types, alt)) {
464  return false;
465  }
466  if (!x_WriteFeatureQual(mf)) {
467  return false;
468  }
469  if (!x_WriteFeatureFilter(mf)) {
470  return false;
471  }
472  if (!x_WriteFeatureInfo(mf)) {
473  return false;
474  }
475  if (!x_WriteFeatureGenotypeData(mf)) {
476  return false;
477  }
478  m_Os << '\n';
479  return true;
480 }
481 
482 // ----------------------------------------------------------------------------
484  const CMappedFeat& mf )
485 // ----------------------------------------------------------------------------
486 {
487  string id;
488  if (mf.IsSetExts())
489  for (CSeq_feat::TExts::const_iterator uo = mf.GetExts().begin(); uo != mf.GetExts().end(); ++uo)
490  {
491  if ((*uo)->IsSetType() && (*uo)->GetType().IsStr() && (*uo)->GetType().GetStr() == "VCF_COLUMN_1_ID"
492  && (*uo)->IsSetData() && !(*uo)->GetData().empty() && (*uo)->GetData().front()->IsSetData() && (*uo)->GetData().front()->GetData().IsStr())
493  {
494  id = (*uo)->GetData().front()->GetData().GetStr();
495  }
496  }
497 
498  if (id.empty())
499  {
501  }
502  m_Os << id;
503  return true;
504 }
505 
506 /*
507 enum EType {
508  eType_unknown = 0, ///< delta=[]
509  eType_identity = 1, ///< delta=[]
510  eType_inv = 2, ///< delta=[del, ins.seq= RevComp(variation-location)]
511  eType_snv = 3, ///< delta=[morph of length 1] NOTE: this is snV not snP; the latter requires frequency-based validation to be established in VariantProperties the strict definition of SNP is an SNV with an e
512  eType_mnp = 4, ///< delta=[morph of length >1]
513  eType_delins = 5, ///< delta=[del, ins]
514  eType_del = 6, ///< delta=[del]
515  eType_ins = 7, ///< delta=[ins]
516  eType_microsatellite = 8, ///< delta=[del, ins.seq= repeat-unit with fuzzy multiplier] variation-location is the microsat expansion on the sequence
517  eType_transposon = 9, ///< delta=[del, ins.seq= known donor or 'this'] variation-location is equiv of transposon locs.
518  eType_cnv = 10, ///< delta=[del, ins= 'this' with fuzzy multiplier]
519  eType_direct_copy = 11, ///< delta=[ins.seq= upstream location on the same strand]
520  eType_rev_direct_copy = 12, ///< delta=[ins.seq= downstream location on the same strand]
521  eType_inverted_copy = 13, ///< delta=[ins.seq= upstream location on the opposite strand]
522  eType_everted_copy = 14, ///< delta=[ins.seq= downstream location on the opposite strand]
523  eType_translocation = 15, ///< delta=like delins
524  eType_prot_missense = 16, ///< delta=[morph of length 1]
525  eType_prot_nonsense = 17, ///< delta=[del]; variation-location is the tail of the protein being truncated
526  eType_prot_neutral = 18, ///< delta=[morph of length 1]
527  eType_prot_silent = 19, ///< delta=[morph of length 1, same AA as at variation-location]
528  eType_prot_other = 20, ///< delta=any
529  eType_other = 255 ///< delta=any
530  };
531 */
532 
533 // ----------------------------------------------------------------------------
535  const CMappedFeat& mf,
536  unsigned int start,
537  const int type
538  )
539 // ----------------------------------------------------------------------------
540 {
541  m_Os << "\t";
542 
544  start--;
545 
546  m_Os << NStr::UIntToString(start);
547  return true;
548 }
549 
550 // ----------------------------------------------------------------------------
552  const CMappedFeat& mf )
553 // ----------------------------------------------------------------------------
554 {
555  m_Os << "\t";
556 
557  vector<string> ids;
558  const CVariation_ref& var = mf.GetData().GetVariation();
559  if ( var.IsSetId() ) {
560  if (var.GetId().GetTag().IsStr() ) {
561  ids.push_back( var.GetId().GetTag().GetStr() );
562  }
563  else {
564  ids.push_back( NStr::IntToString(var.GetId().GetTag().GetId()) );
565  }
566  }
567 
568  if ( ids.empty() ) {
569  m_Os << ".";
570  }
571  else {
572  m_Os << NStr::Join( ids, ";" );
573  }
574  return true;
575 }
576 
577 // ----------------------------------------------------------------------------
579  const unsigned int start,
580  const int type,
581  const string &anchor,
582  const string &ref
583  )
584 // ----------------------------------------------------------------------------
585 {
586  m_Os << "\t";
587 
588  if (!anchor.empty())
589  {
591  {
592  if (start > 1)
593  m_Os << anchor << ref;
594  else
595  m_Os << ref << anchor;
596  }
597  else if (type == CVariation_inst::eType_ins)
598  {
599  m_Os << anchor;
600  }
601  return true;
602  }
603  if (!ref.empty())
604  {
605  m_Os << ref;
606  return true;
607  }
608 
609  m_Os << "?";
610  return true;
611 }
612 
613 // ----------------------------------------------------------------------------
615  const unsigned int start,
616  const int type,
617  const string &anchor,
618  const list<int>& alt_types,
619  const vector<string> &alt
620  )
621 // ----------------------------------------------------------------------------
622 {
623  m_Os << "\t";
624 
625  size_t count = 0;
626  int index=0;
627 
628  for (auto alt_type : alt_types) {
629  if (count) {
630  m_Os << ",";
631  }
632 
633  if (alt_type != CVariation_inst::eType_del) {
634  const string alt_string = alt[index++];
635  if (alt_type == CVariation_inst::eType_ins ||
636  alt_type == CVariation_inst::eType_delins) {
637 
638  if (start > 1) {
639  m_Os << anchor << alt_string;
640  } else {
641  m_Os << alt_string << anchor;
642  }
643  } else {
644  m_Os << alt_string;
645  }
646  ++count;
647  continue;
648  }
649 
650  // CVariation_inst::eType_del
651  if (!anchor.empty()) {
652  m_Os << anchor;
653  ++count;
654  }
655  }
656 
657  if (!count) {
658  m_Os << ".";
659  }
660  return true;
661 }
662 
663 // ----------------------------------------------------------------------------
665  const CMappedFeat& mf )
666 // ----------------------------------------------------------------------------
667 {
668  string score = ".";
669 
670  m_Os << "\t";
671 
672  if ( mf.IsSetExt() ) {
673  const CSeq_feat::TExt& ext = mf.GetExt();
674  if ( ext.IsSetType() && ext.GetType().IsStr() &&
675  ext.GetType().GetStr() == "VcfAttributes" )
676  {
677  if ( ext.HasField( "score" ) ) {
678  score = NStr::DoubleToString(
679  ext.GetField( "score" ).GetData().GetReal() );
680  }
681  }
682  }
683  m_Os << score;
684  return true;
685 }
686 
687 // ----------------------------------------------------------------------------
689  const CMappedFeat& mf )
690 // ----------------------------------------------------------------------------
691 {
692  m_Os << "\t";
693 
694  vector<string> filters;
695  if ( mf.IsSetExt() ) {
696  const CSeq_feat::TExt& ext = mf.GetExt();
697  if ( ext.IsSetType() && ext.GetType().IsStr() &&
698  ext.GetType().GetStr() == "VcfAttributes" )
699  {
700  if ( ext.HasField( "filter" ) ) {
701  filters.push_back( ext.GetField( "filter" ).GetData().GetStr() );
702  }
703  }
704  }
705  if ( ! filters.empty() ) {
706  m_Os << NStr::Join( filters, ":" );
707  }
708  else {
709  m_Os << ".";
710  }
711  return true;
712 }
713 
714 // ----------------------------------------------------------------------------
716  const CMappedFeat& mf )
717 // ----------------------------------------------------------------------------
718 {
719  typedef CVariantProperties VP;
720 
721  m_Os << "\t";
722 
723  vector<string> infos;
724  const CVariation_ref& var = mf.GetData().GetVariation();
725 
726  if (var.IsSetId()) {
727  string db = var.GetId().GetDb();
728  NStr::ToLower(db);
729  if (db == "dbsnp") {
730  infos.push_back("DB");
731  }
732  if (db == "hapmap2") {
733  infos.push_back("H2");
734  }
735  if (db == "hapmap3") {
736  infos.push_back("H3");
737  }
738  }
739 
740  if (mf.IsSetDbxref()) {
741  const vector<CRef<CDbtag> >& refs = mf.GetDbxref();
742  string pmids;
743  for ( vector<CRef<CDbtag> >::const_iterator cit = refs.begin();
744  cit != refs.end(); ++cit)
745  {
746  const CDbtag& ref = **cit;
747  if (ref.IsSetDb() && ref.IsSetTag() && ref.GetDb() == "PM") {
748  if (!pmids.empty()) {
749  pmids += ",";
750  }
751  pmids += "PM:";
752  pmids += NStr::IntToString(ref.GetTag().GetId());
753  }
754  }
755  if (!pmids.empty()) {
756  //infos.push_back("PMC");
757  infos.push_back(string("PMID=")+pmids);
758  }
759  }
760 
761  if (var.IsSetVariant_prop()) {
762  const CVariantProperties& props = var.GetVariant_prop();
763  if ( props.IsSetAllele_frequency()) {
764  infos.push_back( string("AF=") +
766  }
767  if (props.IsSetResource_link()) {
768  int rl = props.GetResource_link();
769  if (rl & VP::eResource_link_preserved) {
770  infos.push_back("PM");
771  }
772  if (rl & VP::eResource_link_provisional) {
773  infos.push_back("TPA");
774  }
775  if (rl & VP::eResource_link_has3D) {
776  infos.push_back("S3D");
777  }
778  if (rl & VP::eResource_link_submitterLinkout) {
779  infos.push_back("SLO");
780  }
781  if (rl & VP::eResource_link_clinical) {
782  infos.push_back("CLN");
783  }
784  if (rl & VP::eResource_link_genotypeKit) {
785  infos.push_back("HD");
786  }
787  }
788  if (props.IsSetGene_location()) {
789  int gl = props.GetGene_location();
790  if (gl & VP::eGene_location_near_gene_5) {
791  infos.push_back("R5");
792  }
793  if (gl & VP::eGene_location_near_gene_3) {
794  infos.push_back("R3");
795  }
796  if (gl & VP::eGene_location_intron) {
797  infos.push_back("INT");
798  }
799  if (gl & VP::eGene_location_donor) {
800  infos.push_back("DSS");
801  }
802  if (gl & VP::eGene_location_acceptor) {
803  infos.push_back("ASS");
804  }
805  if (gl & VP::eGene_location_utr_5) {
806  infos.push_back("U5");
807  }
808  if (gl & VP::eGene_location_utr_3) {
809  infos.push_back("U3");
810  }
811  }
812 
813  if (props.IsSetEffect()) {
814  int effect = props.GetEffect();
815  if (effect & VP::eEffect_synonymous) {
816  infos.push_back("SYN");
817  }
818  if (effect & VP::eEffect_stop_gain) {
819  infos.push_back("NSN");
820  }
821  if (effect & VP::eEffect_missense) {
822  infos.push_back("NSM");
823  }
824  if (effect & VP::eEffect_frameshift) {
825  infos.push_back("NSF");
826  }
827  }
828 
829  if (props.IsSetFrequency_based_validation()) {
830  int fbv = props.GetFrequency_based_validation();
831  if (fbv & VP::eFrequency_based_validation_is_mutation) {
832  infos.push_back("MUT");
833  }
834  if (fbv & VP::eFrequency_based_validation_above_5pct_all) {
835  infos.push_back("G3");
836  }
837  if (fbv & VP::eFrequency_based_validation_above_5pct_1plus) {
838  infos.push_back("G5");
839  }
840  if (fbv & VP::eFrequency_based_validation_validated) {
841  infos.push_back("VLD");
842  }
843  }
844 
845  if (props.IsSetAllele_frequency()) {
846  double alfrq = props.GetAllele_frequency();
847  infos.push_back(string("GMAF=") + NStr::DoubleToString(alfrq));
848  }
849 
850  if (props.IsSetGenotype()) {
851  int gt = props.GetGenotype();
852  if (gt & VP::eGenotype_has_genotypes) {
853  infos.push_back("GNO");
854  }
855  }
856 
857  if (props.IsSetQuality_check()) {
858  int qc = props.GetQuality_check();
859  if (qc & VP::eQuality_check_contig_allele_missing) {
860  infos.push_back("NOC");
861  }
862  if (qc & VP::eQuality_check_withdrawn_by_submitter) {
863  infos.push_back("WTD");
864  }
865  if (qc & VP::eQuality_check_non_overlapping_alleles) {
866  infos.push_back("NOV");
867  }
868  if (qc & VP::eQuality_check_genotype_conflict) {
869  infos.push_back("GCF");
870  }
871  }
872 
873  if (var.IsSetOther_ids()) {
874  const list<CRef<CDbtag> >& oids = var.GetOther_ids();
875  list<CRef<CDbtag> >::const_iterator cit;
876  for (cit = oids.begin(); cit != oids.end(); ++cit) {
877  const CDbtag& dbtag = **cit;
878  if (dbtag.GetType() != CDbtag::eDbtagType_BioProject) {
879  continue;
880  }
881  if (!dbtag.CanGetTag()) {
882  continue;
883  }
884  if (!dbtag.GetTag().IsId()) {
885  continue;
886  }
887  int id = dbtag.GetTag().GetId();
888  if (id == 60835) {
889  infos.push_back("PH3");
890  }
891  else if (id == 28889) {
892  infos.push_back("KGPhase1");
893  }
894  }
895  }
896  }
897 
898  if ( mf.IsSetExt() ) {
899  string info = ".";
900  const CSeq_feat::TExt& ext = mf.GetExt();
901  if ( ext.IsSetType() && ext.GetType().IsStr() &&
902  ext.GetType().GetStr() == "VcfAttributes" )
903  {
904  if ( ext.HasField( "info" ) ) {
905  vector<string> extraInfos;
906  info = ext.GetField( "info" ).GetData().GetStr();
908  for (vector<string>::const_iterator cit = extraInfos.begin();
909  cit != extraInfos.end();
910  ++cit) {
911  string value = *cit;
912  vector<string>::iterator fit =
913  std::find(infos.begin(), infos.end(), value);
914  if (fit == infos.end()) {
915  infos.push_back(value);
916  }
917  }
918  }
919  }
920  }
921 
922  if (infos.empty()) {
923  m_Os << ".";
924  }
925  else {
926  m_Os << NStr::Join(infos, ";");
927  }
928  return true;
929 }
930 
931 // ----------------------------------------------------------------------------
933  const CMappedFeat& mf )
934 // ----------------------------------------------------------------------------
935 {
936  if (m_GenotypeHeaders.empty()) {
937  return true;
938  }
939 
940  CConstRef<CUser_field> pFormat = mf.GetExt().GetFieldRef("format");
941  const CUser_field_Base::C_Data::TStrs& labels = pFormat->GetData().GetStrs();
942  m_Os << "\t" << NStr::Join(labels, ":");
943 
944  CConstRef<CUser_field> pGenotypeData = mf.GetExt().GetFieldRef("genotype-data");
945  const vector<CRef<CUser_field> > columns = pGenotypeData->GetData().GetFields();
946  for ( size_t hpos = 0; hpos < m_GenotypeHeaders.size(); ++hpos ) {
947 
948  _ASSERT(m_GenotypeHeaders[hpos] == columns[hpos]->GetLabel().GetStr());
949 
950  string values = NStr::Join( columns[hpos]->GetData().GetStrs(), ":" );
951  m_Os << "\t" << values;
952  }
953  return true;
954 }
955 
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CAnnot_descr –.
Definition: Annot_descr.hpp:66
CBioseq_Handle –.
Definition: Date.hpp:53
void GetDate(string *label, bool year_only=false) const
Append a standardized string representation of the date to the label.
Definition: Date.hpp:149
Definition: Dbtag.hpp:53
EDbtagType GetType(void) const
Definition: Dbtag.cpp:289
@ eDbtagType_BioProject
Definition: Dbtag.hpp:195
CFeat_CI –.
Definition: feat_ci.hpp:64
static CGenbankIdResolve & Get()
bool GetBestId(CSeq_id_Handle, CScope &, string &)
bool IsCanceled() const
Definition: writer.hpp:62
CMappedFeat –.
Definition: mapped_feat.hpp:59
CScope –.
Definition: scope.hpp:92
CSeqVector –.
Definition: seq_vector.hpp:65
CSeq_annot_Handle –.
CSeq_feat_EditHandle –.
CSeq_feat_Handle –.
CConstRef< CUser_field > GetFieldRef(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Definition: User_object.cpp:84
bool HasField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Verify that a named field exists.
const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Access a named field in this user object.
Definition: User_object.cpp:71
static void NormalizeVariation(CVariation &var, ETargetContext target_ctxt, CScope &scope)
CVariation_inst –.
bool x_WriteFeaturePos(const CMappedFeat &, unsigned int start, const int type)
Definition: vcf_writer.cpp:534
bool x_WriteHeader(const CSeq_annot &)
Definition: vcf_writer.cpp:254
bool x_WriteData(const CSeq_annot &)
Definition: vcf_writer.cpp:275
bool x_WriteFeatureAlt(const unsigned int start, const int type, const string &anchor, const list< int > &alt_types, const vector< string > &alt)
Definition: vcf_writer.cpp:614
vector< string > m_GenotypeHeaders
Definition: vcf_writer.hpp:128
virtual SAnnotSelector & SetAnnotSelector(void) override
Definition: vcf_writer.cpp:166
bool x_WriteFeatureQual(const CMappedFeat &)
Definition: vcf_writer.cpp:664
bool x_WriteFeatureGenotypeData(const CMappedFeat &)
Definition: vcf_writer.cpp:932
bool x_WriteInit(const CSeq_annot &)
Definition: vcf_writer.cpp:177
virtual ~CVcfWriter()
Definition: vcf_writer.cpp:111
bool xWriteFeature(CFeat_CI feat_it) override
Definition: vcf_writer.cpp:347
CScope & m_Scope
Definition: vcf_writer.hpp:127
bool x_WriteFeatureRef(const unsigned int start, const int type, const string &anchor, const string &ref)
Definition: vcf_writer.cpp:578
bool WriteAnnot(const CSeq_annot &, const string &="", const string &="") override
Write a raw Seq-annot to the internal output stream.
Definition: vcf_writer.cpp:117
bool x_WriteFeature(const CMappedFeat &)
Definition: vcf_writer.cpp:360
bool x_WriteFeatureId(const CMappedFeat &)
Definition: vcf_writer.cpp:551
bool x_WriteFeatureFilter(const CMappedFeat &)
Definition: vcf_writer.cpp:688
bool x_WriteFeatureChrom(const CMappedFeat &)
Definition: vcf_writer.cpp:483
bool x_WriteFeatureInfo(const CMappedFeat &)
Definition: vcf_writer.cpp:715
void x_GetTypeRefAlt(const CVariation_inst &inst, int &rtype, string &ref, list< int > &alt_types, vector< string > &alt)
Definition: vcf_writer.cpp:313
bool x_WriteMeta(const CSeq_annot &)
Definition: vcf_writer.cpp:193
bool x_WriteMetaCreateNew(const CSeq_annot &)
Definition: vcf_writer.cpp:212
CVcfWriter(CScope &, CNcbiOstream &, TFlags=fNormal)
Definition: vcf_writer.cpp:100
Defines and provides stubs for a general interface to a variety of file formatters.
Definition: writer.hpp:81
virtual const CRange< TSeqPos > & GetRange(void) const
Definition: writer.hpp:262
unique_ptr< SAnnotSelector > m_Selector
Definition: writer.hpp:269
virtual SAnnotSelector & SetAnnotSelector(void)
Definition: writer.hpp:246
CNcbiOstream & m_Os
Definition: writer.hpp:267
Include a standard set of the NCBI C++ Toolkit most basic headers.
static const column_t columns[]
Definition: utf8_2.c:22
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
Definition: Seq_id.cpp:2040
string GetLabel(const CSeq_id &id)
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
CSeq_annot_Handle AddSeq_annot(CSeq_annot &annot, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add Seq-annot, return its CSeq_annot_Handle.
Definition: scope.cpp:538
CSeq_annot_Handle GetSeq_annotHandle(const CSeq_annot &annot, EMissing action=eMissing_Default)
Definition: scope.cpp:192
const CSeq_feat::TExts & GetExts(void) const
bool IsSetDbxref(void) const
const CSeqFeatData & GetData(void) const
const CUser_object & GetExt(void) const
bool IsSetExts(void) const
CConstRef< CSeq_feat > GetOriginalSeq_feat(void) const
const CSeq_feat::TDbxref & GetDbxref(void) const
void Replace(const CSeq_feat &new_feat) const
Replace the feature with new Seq-feat object.
bool IsSetExt(void) const
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
const CSeq_loc & GetLocation(void) const
TRange GetTotalRange(void) const
Definition: mapped_feat.hpp:93
const CSeq_feat_Handle & GetSeq_feat_Handle(void) const
Get original feature handle.
Definition: mapped_feat.hpp:71
SAnnotSelector & SetSortOrder(ESortOrder sort_order)
Set sort order of annotations.
@ eSortOrder_None
do not sort annotations for faster retrieval
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
bool NotEmpty(void) const
Definition: range.hpp:152
TThisType IntersectionWith(const TThisType &r) const
Definition: range.hpp:312
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)
Convert double to string.
Definition: ncbistr.hpp:5187
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
Definition: ncbistr.hpp:2697
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
Definition: ncbistr.hpp:5109
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
@ fSplit_Truncate
Definition: ncbistr.hpp:2501
@ fSplit_MergeDelimiters
Merge adjacent delimiters.
Definition: ncbistr.hpp:2498
static const char label[]
const TStr & GetStr(void) const
Get the variant data.
bool IsStr(void) const
Check if variant Str is selected.
Definition: Object_id_.hpp:291
bool IsSetDb(void) const
name of database or system Check if a value has been assigned to Db data member.
Definition: Dbtag_.hpp:208
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
bool IsStd(void) const
Check if variant Std is selected.
Definition: Date_.hpp:320
const TTag & GetTag(void) const
Get the Tag member data.
Definition: Dbtag_.hpp:267
const TStrs & GetStrs(void) const
Get the variant data.
bool IsId(void) const
Check if variant Id is selected.
Definition: Object_id_.hpp:264
const TData & GetData(void) const
Get the Data member data.
bool CanGetTag(void) const
Check if it is safe to call GetTag method.
Definition: Dbtag_.hpp:261
bool IsSetTag(void) const
appropriate tag Check if a value has been assigned to Tag data member.
Definition: Dbtag_.hpp:255
const TFields & GetFields(void) const
Get the variant data.
const TDb & GetDb(void) const
Get the Db member data.
Definition: Dbtag_.hpp:220
const TStr & GetStr(void) const
Get the variant data.
Definition: Object_id_.hpp:297
TReal GetReal(void) const
Get the variant data.
const TType & GetType(void) const
Get the Type member data.
vector< CStringUTF8 > TStrs
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
const TVariation & GetVariation(void) const
Get the variant data.
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
const Tdata & Get(void) const
Get the member data.
const TDesc & GetDesc(void) const
Get the Desc member data.
Definition: Seq_annot_.hpp:852
bool IsSetDesc(void) const
used only for stand alone Seq-annots Check if a value has been assigned to Desc data member.
Definition: Seq_annot_.hpp:840
bool IsSet(void) const
Check if a value has been assigned to data member.
TType GetType(void) const
Get the Type member data.
const TInstance & GetInstance(void) const
Get the variant data.
bool IsSetFrequency_based_validation(void) const
Check if a value has been assigned to Frequency_based_validation data member.
const TVariant_prop & GetVariant_prop(void) const
Get the Variant_prop member data.
bool IsSetResource_link(void) const
Check if a value has been assigned to Resource_link data member.
TFrequency_based_validation GetFrequency_based_validation(void) const
Get the Frequency_based_validation member data.
E_Choice Which(void) const
Which variant is currently selected.
TGenotype GetGenotype(void) const
Get the Genotype member data.
const TSet & GetSet(void) const
Get the variant data.
bool IsSetDelta(void) const
Sequence that replaces the location, in biological order.
bool IsSetVariant_prop(void) const
variant properties bit fields Check if a value has been assigned to Variant_prop data member.
bool IsSetOther_ids(void) const
Check if a value has been assigned to Other_ids data member.
TEffect GetEffect(void) const
Get the Effect member data.
const TId & GetId(void) const
Get the Id member data.
const TDelta & GetDelta(void) const
Get the Delta member data.
const TData & GetData(void) const
Get the Data member data.
TResource_link GetResource_link(void) const
Get the Resource_link member data.
bool IsSetId(void) const
ids (i.e., SNP rsid / ssid, dbVar nsv/nssv) expected values include 'dbSNP|rs12334',...
TAllele_frequency GetAllele_frequency(void) const
Get the Allele_frequency member data.
TQuality_check GetQuality_check(void) const
Get the Quality_check member data.
bool IsSetEffect(void) const
Check if a value has been assigned to Effect data member.
bool IsSetGenotype(void) const
Check if a value has been assigned to Genotype data member.
const TOther_ids & GetOther_ids(void) const
Get the Other_ids member data.
bool IsSetQuality_check(void) const
Check if a value has been assigned to Quality_check data member.
bool IsSetAllele_frequency(void) const
NOTE: 'allele-frequency' here refers to the minor allele frequency of the default population Check if...
TGene_location GetGene_location(void) const
Get the Gene_location member data.
bool IsSetGene_location(void) const
Check if a value has been assigned to Gene_location data member.
const TVariations & GetVariations(void) const
Get the Variations member data.
@ eType_snv
delta=[morph of length 1] NOTE: this is snV not snP; the latter requires frequency-based validation t...
@ eType_mnp
delta=[morph of length >1]
@ eType_delins
delta=[del, ins]
@ e_Instance
actual sequence-edit at feat.location
sat & gt
static MDB_envinfo info
Definition: mdb_load.c:37
range(_Ty, _Ty) -> range< _Ty >
constexpr bool empty(list< Ts... >) noexcept
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
unsigned int a
Definition: ncbi_localip.c:102
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
#define fi
SAnnotSelector –.
Definition: type.c:6
#define _ASSERT
USING_SCOPE(objects)
CConstRef< CUser_object > s_GetVcfMetaInfo(const CSeq_annot &annot)
Definition: vcf_writer.cpp:67
Modified on Wed Apr 17 13:08:26 2024 by modify_doxy.py rev. 669887