NCBI C++ ToolKit
hgvs_parser.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: hgvs_parser.cpp 77102 2017-03-23 20:52:42Z kazimird $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * File Description:
27  * Sample library
28  *
29  */
30 
31 #include <ncbi_pch.hpp>
34 
41 
42 
44 
50 
51 
53 
55 #include <objects/seq/Seq_data.hpp>
57 #include <objects/seq/Num_ref.hpp>
64 
65 #include <serial/iterator.hpp>
66 #include <objmgr/util/sequence.hpp>
67 #include <objmgr/seq_vector.hpp>
70 
71 
73 
74 namespace variation_ref {
75 
76 
77 #define HGVS_THROW(err_code, message) NCBI_THROW(CHgvsParser::CHgvsParserException, err_code, message)
78 
79 #define HGVS_ASSERT_RULE(i, rule_id) \
80  if((i->value.id()) != (SGrammar::rule_id)) \
81  {HGVS_THROW(eGrammatic, "Unexpected rule " + CHgvsParser::SGrammar::s_GetRuleName(i->value.id()) ); }
82 
83 
85 CHgvsParser::SGrammar CHgvsParser::s_grammar;
86 
87 
88 //attach asserted sequence to the variation-ref in a user-object. This is for
89 //internal representation only, as the variation-ref travels up through the
90 //parse-tree nodes. Before we return the final variation, this will be repackaged
91 //as set of variations (see RepackageAssertedSequence()). This is done so that
92 //we don't have to deal with possibility of a variation-ref being a package
93 //within the parser.
95 {
97  uo->SetType().SetStr("hgvs_asserted_seq");
98 
99  uo->SetField("length").SetData().SetInt(literal.GetLength());
100  if(literal.GetSeq_data().IsIupacna()) {
101  uo->AddField("iupacna", literal.GetSeq_data().GetIupacna());
102  } else if(literal.GetSeq_data().IsNcbieaa()) {
103  uo->AddField("ncbieaa", literal.GetSeq_data().GetNcbieaa());
104  } else {
105  HGVS_THROW(eLogic, "Seq-data is neither IUPAC-AA or IUPAC-NA");
106  }
107  vr.SetExt(*uo);
108 }
109 
110 //if a variation has an asserted sequence, repackage it as a set having
111 //the original variation and a synthetic one representing the asserted sequence
113 {
114  if(vr.GetData().IsSet()) {
115  NON_CONST_ITERATE(CVariation_ref::TData::TSet::TVariations, it, vr.SetData().SetSet().SetVariations()) {
117  }
118  } else {
120  orig->Assign(vr);
121  orig->ResetLocation(); //location will be set on the package, as it is the same for both members
122 
124  vr.SetData().SetSet().SetVariations().push_back(orig);
125  vr.ResetExt();
126 
127  if(orig->IsSetExt() && orig->GetExt().GetType().GetStr() == "hgvs_asserted_seq") {
128  CRef<CVariation_ref> asserted_vr(new CVariation_ref);
129  vr.SetData().SetSet().SetVariations().push_back(asserted_vr);
130 
131  asserted_vr->SetData().SetInstance().SetObservation(CVariation_inst::eObservation_asserted);
132  asserted_vr->SetData().SetInstance().SetType(CVariation_inst::eType_identity);
133 
135  delta->SetSeq().SetLiteral().SetLength(orig->GetExt().GetField("length").GetData().GetInt());
136  if(orig->GetExt().HasField("iupacna")) {
137  delta->SetSeq().SetLiteral().SetSeq_data().SetIupacna().Set(orig->GetExt().GetField("iupacna").GetData().GetStr());
138  } else {
139  delta->SetSeq().SetLiteral().SetSeq_data().SetNcbieaa().Set(orig->GetExt().GetField("ncbieaa").GetData().GetStr());
140  }
141 
142  if( orig->GetData().GetInstance().GetDelta().size() > 0
143  && orig->GetData().GetInstance().GetDelta().front()->IsSetAction()
144  && orig->GetData().GetInstance().GetDelta().front()->GetAction() == CDelta_item::eAction_offset)
145  {
146  CRef<CDelta_item> offset_di(new CDelta_item);
147  offset_di->Assign(*orig->GetData().GetInstance().GetDelta().front());
148  asserted_vr->SetData().SetInstance().SetDelta().push_back(offset_di);
149  }
150 
151  asserted_vr->SetData().SetInstance().SetDelta().push_back(delta);
152 
153  if( orig->GetData().GetInstance().GetDelta().size() > 0
154  && orig->GetData().GetInstance().GetDelta().back() != orig->GetData().GetInstance().GetDelta().front()
155  && orig->GetData().GetInstance().GetDelta().back()->IsSetAction()
156  && orig->GetData().GetInstance().GetDelta().back()->GetAction() == CDelta_item::eAction_offset)
157  {
158  CRef<CDelta_item> offset_di(new CDelta_item);
159  offset_di->Assign(*orig->GetData().GetInstance().GetDelta().back());
160  asserted_vr->SetData().SetInstance().SetDelta().push_back(offset_di);
161  }
162  orig->ResetExt();
163  }
164  }
165 }
166 
167 
168 
169 
171 {
173 }
174 
175 const CSeq_loc& CHgvsParser::CContext::GetLoc() const
176 {
177  if(m_loc.loc.IsNull()) {
178  HGVS_THROW(eContext, "No seq-loc in context");
179  }
180  return *m_loc.loc;
181 }
182 
184 {
185  return m_loc;
186 }
187 
189 {
190  if(m_seq_id.IsNull()) {
191  HGVS_THROW(eContext, "No seq-id in context");
192  }
193  return *m_seq_id;
194 }
195 
197 {
198  if(m_cds.IsNull()) {
199  HGVS_THROW(eContext, "No CDS feature in context");
200  }
201  return *m_cds;
202 }
203 
205 {
206  if(check && m_mol_type == eMol_not_set) {
207  HGVS_THROW(eContext, "No sequence in context");
208  }
209  return m_mol_type;
210 }
211 
213 {
214  Clear();
215 
216  m_mol_type = mol_type;
217  if(m_seq_id.IsNull()) {
218  m_seq_id.Reset(new CSeq_id);
219  }
220  m_seq_id->Assign(id);
221 
222  m_bsh = m_scope->GetBioseqHandle(*m_seq_id);
223 
224  if(!m_bsh) {
225  HGVS_THROW(eContext, "Cannnot get bioseq for seq-id " + id.AsFastaString());
226  }
227 
228  if(mol_type == eMol_c) {
229  for(CFeat_CI ci(m_bsh); ci; ++ci) {
230  const CMappedFeat& mf = *ci;
231  if(mf.GetData().IsCdregion()) {
232  if(m_cds.IsNull()) {
233  m_cds.Reset(new CSeq_feat());
234  m_cds->Assign(mf.GetMappedFeature());
235  } else {
236  HGVS_THROW(eContext, "Multiple CDS features on the sequence");
237  }
238  }
239  }
240  if(m_cds.IsNull()) {
241  HGVS_THROW(eContext, "Could not find CDS feat");
242  }
243  }
244 }
245 
246 void CHgvsParser::CContext::Validate(const CSeq_literal& literal, const CSeq_loc& loc) const
247 {
248  //if literal has no sequence data - simply validate lengths
249  if(!literal.IsSetSeq_data()) {
250  if(literal.GetLength() != loc.GetTotalRange().GetLength()) {
251  HGVS_THROW(eSemantic, "Literal length does not match location length");
252  }
253  }
254 
255  string seq1 = "";
256  string seq2 = "";
257 
258  if(literal.GetSeq_data().IsIupacna()) {
259  seq1 = literal.GetSeq_data().GetIupacna();
261  v.GetSeqData(v.begin(), v.end(), seq2);
262  } else if(literal.GetSeq_data().IsNcbieaa()) {
263  seq1 = literal.GetSeq_data().GetNcbieaa();
265  v.GetSeqData(v.begin(), v.end(), seq2);
266  } else {
267  HGVS_THROW(eLogic, "Seq-literal of unsupported type");
268  }
269 
270  if(seq1 != seq2 && seq2 != "") {
271  HGVS_THROW(eSemantic, "Expected sequence '" + seq1 + "'; found '" + seq2 + "'");
272  }
273 }
274 
275 const string& CHgvsParser::SGrammar::s_GetRuleName(parser_id id)
276 {
277  TRuleNames::const_iterator it = s_GetRuleNames().find(id);
278  if(it == s_GetRuleNames().end()) {
279  HGVS_THROW(eLogic, "Rule name not hardcoded");
280  } else {
281  return it->second;
282  }
283 }
284 
285 
287 {
288  HGVS_ASSERT_RULE(i, eID_int_fuzz);
289  TIterator it = i->children.begin();
290 
291  CRef<CInt_fuzz> fuzz(new CInt_fuzz);
292  int value = 0;
293 
294  if(i->children.size() == 1) { //e.g. '5' or '?'
295  string s(it->value.begin(), it->value.end());
296  if(s == "?") {
297  value = 0;
299  } else {
301  fuzz.Reset();
302  }
303  } else if(i->children.size() == 3) { //e.g. '(5)' or '(?)'
304  ++it;
305  string s(it->value.begin(), it->value.end());
306  if(s == "?") {
307  value = 0;
309  } else {
312  }
313  } else if(i->children.size() == 5) { //e.g. '(5_7)' or '(?_10)'
314  ++it;
315  string s1(it->value.begin(), it->value.end());
316  ++it;
317  ++it;
318  string s2(it->value.begin(), it->value.end());
319 
320  if(s1 == "?" && s2 == "?") {
321  value = 0;
323  } else if(s1 != "?" && s2 != "?") {
324  value = NStr::StringToInt(s1);
325  fuzz->SetRange().SetMin(NStr::StringToInt(s1));
326  fuzz->SetRange().SetMax(NStr::StringToInt(s2));
327  } else if(s2 == "?") {
328  value = NStr::StringToInt(s1);
329  fuzz->SetLim(CInt_fuzz::eLim_gt);
330  } else if(s1 == "?") {
331  value = NStr::StringToInt(s2);
332  fuzz->SetLim(CInt_fuzz::eLim_lt);
333  } else {
334  HGVS_THROW(eLogic, "Unreachable code");
335  }
336  }
337 
338  CHgvsParser::SFuzzyInt fuzzy_int;
339  fuzzy_int.value = value;
340  fuzzy_int.fuzz = fuzz;
341  return fuzzy_int;
342 }
343 
345 {
346  HGVS_ASSERT_RULE(i, eID_abs_pos);
347  TIterator it = i->children.begin();
348 
349  CRef<CSeq_point> pnt(new CSeq_point);
350  pnt->SetId().Assign(context.GetId());
353 
354  bool is_relative_to_stop_codon = false;
355  if(i->children.size() == 2) {
356  is_relative_to_stop_codon = true;
357  string s(it->value.begin(), it->value.end());
358  if(s != "*") {
359  HGVS_THROW(eGrammatic, "Expected literal '*'");
360  }
361  if(context.GetMolType() != CContext::eMol_c) {
362  HGVS_THROW(eContext, "Expected 'c.' context for stop-codon-relative coordinate");
363  }
364 
365  offset = context.GetCDS().GetLocation().GetStop(eExtreme_Biological);
366  ++it;
367  } else {
368  if (context.GetMolType() == CContext::eMol_c) {
369  //Note: in RNA coordinates (r.) the coordinates are absolute, like in genomic sequences,
370  // "The RNA sequence type uses only GenBank mRNA records. The value 1 is assigned to the first
371  // base in the record and from there all bases are counted normally."
372  //so the cds-start offset applies only to "c." coordinates
373  offset = context.GetCDS().GetLocation().GetStart(eExtreme_Biological);
374  }
375  }
376 
377  SFuzzyInt int_fuzz = x_int_fuzz(it, context);
378  if(int_fuzz.value > 0 && !is_relative_to_stop_codon) {
379  /* In HGVS:
380  * the nucleotide 3' of the translation stop codon is *1, the next *2, etc.
381  * # there is no nucleotide 0
382  * # nucleotide 1 is the A of the ATG-translation initiation codon
383  * # the nucleotide 5' of the ATG-translation initiation codon is -1, the previous -2, etc.
384  * I.e. need to adjust if dealing with positive coordinates, except for *-relative ones.
385  */
386  offset--;
387  }
388 
389  if(int_fuzz.fuzz.IsNull()) {
390  pnt->SetPoint(offset + int_fuzz.value);
391  } else {
392  pnt->SetPoint(offset + int_fuzz.value);
393  pnt->SetFuzz(*int_fuzz.fuzz);
394  if(pnt->GetFuzz().IsRange()) {
395  pnt->SetFuzz().SetRange().SetMin() += offset;
396  pnt->SetFuzz().SetRange().SetMax() += offset;
397  }
398  }
399 
400  return pnt;
401 }
402 
403 
404 /*
405  * general_pos is either simple abs-pos that is passed down to x_abs_pos,
406  * or an intronic location that is specified by a mapping point in the
407  * local coordinates and the -upstream / +downstream offset after remapping.
408  *
409  * The mapping point can either be an abs-pos in local coordinates, or
410  * specified as offset in intron-specific coordinate system where IVS# specifies
411  * the intron number
412  */
414 {
415  HGVS_ASSERT_RULE(i, eID_general_pos);
416 
417  SOffsetPoint ofpnt;
418 
419 
420  if(i->children.size() == 1) {
421  //local coordinates
422  ofpnt.pnt = x_abs_pos(i->children.begin(), context);
423  } else {
424  //(str_p("IVS") >> int_p | abs_pos) >> sign_p >> int_fuzz
425 
426  TIterator it = i->children.end() - 1;
427  ofpnt.offset = x_int_fuzz(it, context);
428  --it;
429  string s_sign(it->value.begin(), it->value.end());
430  int sign1 = s_sign == "-" ? -1 : 1;
431  ofpnt.offset.value *= sign1;
432  if(ofpnt.offset.fuzz &&
433  ofpnt.offset.fuzz->IsLim() &&
435  {
437  }
438 
439 
440  --it;
441  if(it->value.id() == SGrammar::eID_abs_pos) {
442  //base-loc is an abs-pos
443  ofpnt.pnt = x_abs_pos(i->children.begin(), context);
444  } else {
445  //base-loc is IVS-relative.
446  ofpnt.pnt.Reset(new CSeq_point);
447  ofpnt.pnt->SetId().Assign(context.GetId());
448  ofpnt.pnt->SetStrand(eNa_strand_plus);
449 
450  TIterator it = i->children.begin();
451  string s_ivs(it->value.begin(), it->value.end());
452  ++it;
453  string s_ivs_num(it->value.begin(), it->value.end());
454  int ivs_num = NStr::StringToInt(s_ivs_num);
455 
456  //If IVS3+50, the mapping point is the last base of third exon
457  //if IVS3-50, the mapping point is the first base of the fourth exon
458  size_t target_exon_num = sign1 < 0 ? ivs_num + 1 : ivs_num;
459 
460  SAnnotSelector sel;
462  CBioseq_Handle bsh = context.GetScope().GetBioseqHandle(context.GetId());
463  size_t exon_num = 1;
464  //Note: IVS is cDNA-centric, so we'll have to use ordinals of the exons instead of /number qual
465  for(CFeat_CI ci(bsh, sel); ci; ++ci) {
466  const CMappedFeat& mf = *ci;
467  if(exon_num == target_exon_num) {
468  ofpnt.pnt->SetPoint(sign1 > 0 ? mf.GetLocation().GetStop(eExtreme_Biological)
469  : mf.GetLocation().GetStart(eExtreme_Biological));
470  break;
471  }
472  exon_num++;
473  }
474  }
475  }
476 
477  return ofpnt;
478 }
479 
480 
482 {
483  HGVS_ASSERT_RULE(i, eID_fuzzy_pos);
484 
485  SOffsetPoint pnt;
486  SOffsetPoint pnt1 = x_general_pos(i->children.begin(), context);
487  SOffsetPoint pnt2 = x_general_pos(i->children.begin() + 1, context);
488 
489  //Verify that on the same seq-id.
490  if(!pnt1.pnt->GetId().Equals(pnt2.pnt->GetId())) {
491  HGVS_THROW(eSemantic, "Points in a fuzzy pos are on different sequences");
492  }
493  if(pnt1.pnt->GetStrand() != pnt2.pnt->GetStrand()) {
494  HGVS_THROW(eSemantic, "Range-loc start/stop are on different strands.");
495  }
496 
497  //If One is empty, copy from the other and set TL for loc1 and TR for loc2
498  if(pnt1.pnt->GetPoint() == kInvalidSeqPos && pnt2.pnt->GetPoint() != kInvalidSeqPos) {
499  pnt1.pnt->Assign(*pnt2.pnt);
500  pnt1.pnt->SetFuzz().SetLim(CInt_fuzz::eLim_tl);
501  } else if(pnt1.pnt->GetPoint() != kInvalidSeqPos && pnt2.pnt->GetPoint() == kInvalidSeqPos) {
502  pnt2.pnt->Assign(*pnt1.pnt);
503  pnt2.pnt->SetFuzz().SetLim(CInt_fuzz::eLim_tr);
504  }
505 
506  if((pnt1.offset.value != 0 || pnt2.offset.value != 0) && !pnt1.pnt->Equals(*pnt2.pnt)) {
507  HGVS_THROW(eSemantic, "Base-points in an intronic fuzzy position must be equal");
508  }
509 
510  pnt.pnt = pnt1.pnt;
511  pnt.offset = pnt1.offset;
512 
513  if(pnt1.offset.value != pnt2.offset.value) {
514  pnt.offset.fuzz.Reset(new CInt_fuzz);
515  pnt.offset.fuzz->SetRange().SetMin(pnt1.offset.value);
516  pnt.offset.fuzz->SetRange().SetMax(pnt2.offset.value);
517  }
518 
519  return pnt;
520 
521 #if 0
522  todo: reconcile
523  //If Both are Empty - the result is empty, otherwise reconciliate
524  if(pnt1.pnt->GetPoint() == kInvalidSeqPos && pnt2.pnt->GetPoint() == kInvalidSeqPos) {
525  pnt.pnt = pnt1.pnt;
526  pnt.offset = pnt1.offset;
527  } else {
528  pnt.pnt.Reset(new CSeq_point);
529  pnt.pnt.Assign(*pnt1.pnt);
530 
531  TSeqPos min_pos = min(pnt1.pnt->GetPoint(), pnt2.pnt->GetPoint());
532  TSeqPos max_pos = max(pnt1.pnt->GetPoint(), pnt2.pnt->GetPoint());
533 
534  if(!pnt1->IsSetFuzz() && !pnt2->IsSetFuzz()) {
535  //Both are non-fuzzy - create the min-max fuzz.
536  //(10+50_10+60)
537  pnt->SetFuzz().SetRange().SetMin(min_pos);
538  pnt->SetFuzz().SetRange().SetMax(max_pos);
539 
540  } else if(pnt1->IsSetFuzz() && pnt2->IsSetFuzz()) {
541  //Both are fuzzy - reconcile the fuzz.
542 
543  if(pnt1->GetFuzz().GetLim() == CInt_fuzz::eLim_tr
544  && pnt2->GetFuzz().GetLim() == CInt_fuzz::eLim_tl)
545  {
546  //fuzz points inwards - create min-max fuzz
547  //(10+?_11-?)
548  pnt->SetFuzz().SetRange().SetMin(min_pos);
549  pnt->SetFuzz().SetRange().SetMax(max_pos);
550 
551  } else if (pnt1->GetFuzz().GetLim() == CInt_fuzz::eLim_tl
552  && pnt2->GetFuzz().GetLim() == CInt_fuzz::eLim_tr)
553  {
554  //fuzz points outwards - set fuzz to unk
555  //(10-?_10+?)
556  //(?_10+?)
557  //(10-?_?)
558  pnt->SetFuzz().SetLim(CInt_fuzz::eLim_unk);
559 
560  } else if (pnt1->GetFuzz().GetLim() == CInt_fuzz::eLim_tl
561  && pnt2->GetFuzz().GetLim() == CInt_fuzz::eLim_tl)
562  {
563  //fuzz is to the left - use 5'-most
564  //(?_10-?)
565  //(10-?_11-?)
566  pnt->SetPoint(pnt->GetStrand() == eNa_strand_minus ? max_pos : min_pos);
567 
568  } else if (pnt1->GetFuzz().GetLim() == CInt_fuzz::eLim_tr
569  && pnt2->GetFuzz().GetLim() == CInt_fuzz::eLim_tr)
570  {
571  //fuzz is to the right - use 3'-most
572  //(10+?_?)
573  //(10+?_11+?)
574  pnt->SetPoint(pnt->GetStrand() == eNa_strand_minus ? min_pos : max_pos);
575 
576  } else {
577  pnt->SetFuzz().SetLim(CInt_fuzz::eLim_unk);
578  }
579  } else {
580  // One of the two is non-fuzzy:
581  // use it to specify position, and the fuzz of the other to specify the fuzz
582  // e.g. (10+5_10+?) -> loc1=100005; loc2=100000tr -> 100005tr
583 
584  pnt->Assign(pnt1->IsSetFuzz() ? *pnt2 : *pnt1);
585  pnt->SetFuzz().Assign(pnt1->IsSetFuzz() ? pnt1->GetFuzz()
586  : pnt2->GetFuzz());
587 
588  }
589  }
590 #endif
591 
592 
593 
594 }
595 
596 
598 {
599  HGVS_ASSERT_RULE(i, eID_header);
600 
602 
603  TIterator it = i->children.rbegin()->children.begin();
604  string mol(it->value.begin(), it->value.end());
605  CContext::EMolType mol_type =
606  mol == "c" ? CContext::eMol_c
607  : mol == "g" ? CContext::eMol_g
608  : mol == "r" ? CContext::eMol_r
609  : mol == "p" ? CContext::eMol_p
610  : mol == "m" ? CContext::eMol_mt
611  : mol == "mt" ? CContext::eMol_mt
613 
614  it = (i->children.rbegin() + 1)->children.begin();
615  string id_str(it->value.begin(), it->value.end());
616 
617  CRef<CSeq_id> id(new CSeq_id(id_str));
618  ctx.SetId(*id, mol_type);
619 
620  if(i->children.size() == 3) {
621  it = (i->children.rbegin() + 2)->children.begin();
622  string tag_str(it->value.begin(), it->value.end());
623  //record tag in context, if it is necessary in the future
624  }
625 
626  return ctx;
627 }
628 
629 
631 {
632  HGVS_ASSERT_RULE(i, eID_pos_spec);
633 
634  SOffsetPoint pnt;
635  TIterator it = i->children.begin();
636  if(it->value.id() == SGrammar::eID_general_pos) {
637  pnt = x_general_pos(it, context);
638  } else if(it->value.id() == SGrammar::eID_fuzzy_pos) {
639  pnt = x_fuzzy_pos(it, context);
640  } else {
641  bool flip_strand = false;
642  if(i->children.size() == 3) {
643  //first child is 'o' - opposite
644  flip_strand = true;
645  ++it;
646  }
647 
648  CContext local_ctx = x_header(it, context);
649  ++it;
650  pnt = x_pos_spec(it, local_ctx);
651 
652  if(flip_strand) {
653  pnt.pnt->FlipStrand();
654  }
655  }
656 
657  return pnt;
658 }
659 
660 
662 {
663  HGVS_ASSERT_RULE(i, eID_prot_pos);
664  TIterator it = i->children.begin();
665 
666  CRef<CSeq_literal> prot_literal = x_raw_seq(it, context);
667 
668  if(context.GetMolType() != CContext::eMol_p) {
669  HGVS_THROW(eSemantic, "Expected protein context");
670  }
671 
672  if(prot_literal->GetLength() != 1) {
673  HGVS_THROW(eSemantic, "Expected single aa literal in prot-pos");
674  }
675 
676  ++it;
677  SOffsetPoint pnt = x_pos_spec(it, context);
678 
679  pnt.asserted_sequence = prot_literal->GetSeq_data().GetNcbieaa();
680 
681 #if 0
682  if(!pnt.IsOffset()) {
683  //Create temporary loc and validate against it, since at this point context does not
684  //have this loc set, since we are in the process of constructing it.
685  CRef<CSeq_loc> tmp_loc(new CSeq_loc);
686  tmp_loc->SetPnt(*pnt.pnt);
687  context.Validate(*prot_literal, *tmp_loc);
688  }
689 #endif
690 
691  return pnt;
692 }
693 
694 
696 {
697  SOffsetPoint pnt1, pnt2;
698 
699  SOffsetLoc ofloc;
700  ofloc.loc.Reset(new CSeq_loc(CSeq_loc::e_Int));
701 
702  if(i->value.id() == SGrammar::eID_prot_range) {
703  pnt1 = x_prot_pos(i->children.begin(), context);
704  pnt2 = x_prot_pos(i->children.begin() + 1, context);
705  } else if(i->value.id() == SGrammar::eID_nuc_range) {
706  pnt1 = x_pos_spec(i->children.begin(), context);
707  pnt2 = x_pos_spec(i->children.begin() + 1, context);
708  } else {
709  HGVS_ASSERT_RULE(i, eID_NONE);
710  }
711 
712  if(!pnt1.pnt->GetId().Equals(pnt2.pnt->GetId())) {
713  HGVS_THROW(eSemantic, "Range-loc start/stop are on different seq-ids.");
714  }
715  if(pnt1.pnt->GetStrand() != pnt2.pnt->GetStrand()) {
716  HGVS_THROW(eSemantic, "Range-loc start/stop are on different strands.");
717  }
718 
719  ofloc.loc->SetInt().SetId(pnt1.pnt->SetId());
720  ofloc.loc->SetInt().SetFrom(pnt1.pnt->GetPoint());
721  ofloc.loc->SetInt().SetTo(pnt2.pnt->GetPoint());
722  ofloc.loc->SetInt().SetStrand(pnt1.pnt->GetStrand());
723  if(pnt1.pnt->IsSetFuzz()) {
724  ofloc.loc->SetInt().SetFuzz_from(pnt1.pnt->SetFuzz());
725  }
726 
727  if(pnt2.pnt->IsSetFuzz()) {
728  ofloc.loc->SetInt().SetFuzz_to(pnt2.pnt->SetFuzz());
729  }
730  ofloc.start_offset = pnt1.offset;
731  ofloc.stop_offset = pnt2.offset;
732 
733  if(pnt1.asserted_sequence != "" || pnt2.asserted_sequence != "") {
734  ofloc.asserted_sequence = pnt1.asserted_sequence + ".." + pnt2.asserted_sequence;
735  }
736 
737  return ofloc;
738 }
739 
741 {
742  HGVS_ASSERT_RULE(i, eID_location);
743 
744  SOffsetLoc ofloc;
745  ofloc.loc.Reset(new CSeq_loc);
746 
747  TIterator it = i->children.begin();
748  CRef<CSeq_loc> loc(new CSeq_loc);
749  if(it->value.id() == SGrammar::eID_prot_pos) {
750  SOffsetPoint pnt = x_prot_pos(it, context);
751  ofloc.loc->SetPnt(*pnt.pnt);
752  ofloc.start_offset = pnt.offset;
754  } else if(it->value.id() == SGrammar::eID_pos_spec) {
755  SOffsetPoint pnt = x_pos_spec(it, context);
756  ofloc.loc->SetPnt(*pnt.pnt);
757  ofloc.start_offset = pnt.offset;
759  } else if(it->value.id() == SGrammar::eID_nuc_range || it->value.id() == SGrammar::eID_prot_range) {
760  ofloc = x_range(it, context);
761  } else {
762  HGVS_ASSERT_RULE(it, eID_NONE);
763  }
764 
765  if(ofloc.loc->IsPnt() && ofloc.loc->GetPnt().GetPoint() == kInvalidSeqPos) {
766  ofloc.loc->SetEmpty().Assign(context.GetId());
767  }
768  return ofloc;
769 }
770 
772 {
773  HGVS_ASSERT_RULE(i, eID_seq_loc);
774  TIterator it = i->children.begin();
775 
776  bool flip_strand = false;
777  if(i->children.size() == 3) {
778  //first child is 'o' - opposite
779  flip_strand = true;
780  ++it;
781  }
782 
783  CContext local_context = x_header(it, context);
784  ++it;
785  SOffsetLoc loc = x_location(it, local_context);
786 
787  if(flip_strand) {
788  loc.loc->FlipStrand();
789  }
790  if(loc.start_offset.value || loc.stop_offset.value) {
791  HGVS_THROW(eSemantic, "Intronic seq-locs are not supported in this context");
792  }
793 
794  return loc.loc;
795 }
796 
797 
799 {
800  HGVS_ASSERT_RULE(i, eID_seq_ref);
801  CHgvsParser::TDelta delta(new TDelta::TObjectType);
802  TIterator it = i->children.begin();
803 
804  if(it->value.id() == SGrammar::eID_seq_loc) {
805  CRef<CSeq_loc> loc = x_seq_loc(it, context);
806  delta->SetSeq().SetLoc(*loc);
807  } else if(it->value.id() == SGrammar::eID_nuc_range || it->value.id() == SGrammar::eID_prot_range) {
808  SOffsetLoc ofloc = x_range(it, context);
809  if(ofloc.IsOffset()) {
810  HGVS_THROW(eSemantic, "Intronic loc is not supported in this context");
811  }
812  delta->SetSeq().SetLoc().Assign(*ofloc.loc);
813  } else if(it->value.id() == SGrammar::eID_raw_seq) {
814  CRef<CSeq_literal> raw_seq = x_raw_seq(it, context);
815  delta->SetSeq().SetLiteral(*raw_seq);
816  } else if(it->value.id() == SGrammar::eID_int_fuzz) {
817  //known sequence length; may be approximate
818  SFuzzyInt int_fuzz = x_int_fuzz(it, context);
819  delta->SetSeq().SetLiteral().SetLength(int_fuzz.value);
820  if(int_fuzz.fuzz.IsNull()) {
821  ;//no-fuzz;
822  } else if(int_fuzz.fuzz->IsLim() && int_fuzz.fuzz->GetLim() == CInt_fuzz::eLim_unk) {
823  //unknown length (no value) - will represent as length=0 with gt fuzz
824  delta->SetSeq().SetLiteral().SetFuzz().SetLim(CInt_fuzz::eLim_gt);
825  } else {
826  delta->SetSeq().SetLiteral().SetFuzz(*int_fuzz.fuzz);
827  }
828  } else {
829  HGVS_ASSERT_RULE(it, eID_NONE);
830  }
831 
832  return delta;
833 }
834 
835 string CHgvsParser::s_hgvsaa2ncbieaa(const string& hgvsaa)
836 {
837  string ncbieaa = hgvsaa;
838  NStr::ReplaceInPlace(ncbieaa, "Gly", "G");
839  NStr::ReplaceInPlace(ncbieaa, "Pro", "P");
840  NStr::ReplaceInPlace(ncbieaa, "Ala", "A");
841  NStr::ReplaceInPlace(ncbieaa, "Val", "V");
842  NStr::ReplaceInPlace(ncbieaa, "Leu", "L");
843  NStr::ReplaceInPlace(ncbieaa, "Ile", "I");
844  NStr::ReplaceInPlace(ncbieaa, "Met", "M");
845  NStr::ReplaceInPlace(ncbieaa, "Cys", "C");
846  NStr::ReplaceInPlace(ncbieaa, "Phe", "F");
847  NStr::ReplaceInPlace(ncbieaa, "Tyr", "Y");
848  NStr::ReplaceInPlace(ncbieaa, "Trp", "W");
849  NStr::ReplaceInPlace(ncbieaa, "His", "H");
850  NStr::ReplaceInPlace(ncbieaa, "Lys", "K");
851  NStr::ReplaceInPlace(ncbieaa, "Arg", "R");
852  NStr::ReplaceInPlace(ncbieaa, "Gln", "Q");
853  NStr::ReplaceInPlace(ncbieaa, "Asn", "N");
854  NStr::ReplaceInPlace(ncbieaa, "Glu", "E");
855  NStr::ReplaceInPlace(ncbieaa, "Asp", "D");
856  NStr::ReplaceInPlace(ncbieaa, "Ser", "S");
857  NStr::ReplaceInPlace(ncbieaa, "Thr", "T");
858  NStr::ReplaceInPlace(ncbieaa, "X", "*");
859  NStr::ReplaceInPlace(ncbieaa, "?", "-");
860  return ncbieaa;
861 }
862 
863 
864 string CHgvsParser::s_hgvsUCaa2hgvsUL(const string& hgvsaa)
865 {
866  string s = hgvsaa;
867  NStr::ReplaceInPlace(s, "GLY", "Gly");
868  NStr::ReplaceInPlace(s, "PRO", "Pro");
869  NStr::ReplaceInPlace(s, "ALA", "Ala");
870  NStr::ReplaceInPlace(s, "VAL", "Val");
871  NStr::ReplaceInPlace(s, "LEU", "Leu");
872  NStr::ReplaceInPlace(s, "ILE", "Ile");
873  NStr::ReplaceInPlace(s, "MET", "Met");
874  NStr::ReplaceInPlace(s, "CYS", "Cys");
875  NStr::ReplaceInPlace(s, "PHE", "Phe");
876  NStr::ReplaceInPlace(s, "TYR", "Tyr");
877  NStr::ReplaceInPlace(s, "TRP", "Trp");
878  NStr::ReplaceInPlace(s, "HIS", "His");
879  NStr::ReplaceInPlace(s, "LYS", "Lys");
880  NStr::ReplaceInPlace(s, "ARG", "Arg");
881  NStr::ReplaceInPlace(s, "GLN", "Gln");
882  NStr::ReplaceInPlace(s, "ASN", "Asn");
883  NStr::ReplaceInPlace(s, "GLU", "Glu");
884  NStr::ReplaceInPlace(s, "ASP", "Asp");
885  NStr::ReplaceInPlace(s, "SER", "Ser");
886  NStr::ReplaceInPlace(s, "THR", "Thr");
887  return s;
888 }
889 
890 
892 {
893  HGVS_ASSERT_RULE(i, eID_raw_seq);
894  TIterator it = i->children.begin();
895  string seq_str(it->value.begin(), it->value.end());
896 
898  if(context.GetMolType() == CContext::eMol_p) {
899  seq_str = s_hgvsaa2ncbieaa(seq_str);
900  literal->SetSeq_data().SetNcbieaa().Set(seq_str);
901  } else {
902  if(context.GetMolType() == CContext::eMol_r) {
903  seq_str = NStr::ToUpper(seq_str);
904  NStr::ReplaceInPlace(seq_str, "U", "T");
905  }
906  literal->SetSeq_data().SetIupacna().Set(seq_str);
907  }
908 
909  literal->SetLength(seq_str.size());
910 
911  vector<TSeqPos> bad;
912  CSeqportUtil::Validate(literal->GetSeq_data(), &bad);
913 
914  if(bad.size() > 0) {
915  HGVS_THROW(eSemantic, "Invalid sequence at pos " + NStr::NumericToString(bad[0]) + " in " + seq_str);
916  }
917 
918  return literal;
919 }
920 
921 
923 {
924  HGVS_ASSERT_RULE(i, eID_delins);
925  TIterator it = i->children.begin();
927  CVariation_inst& var_inst = vr->SetData().SetInstance();
929  vr->SetLocation().Assign(context.GetLoc());
930 
931  ++it; //skip "del"
932 
933 
934  if(it->value.id() == SGrammar::eID_raw_seq) {
936  //context.Validate(*literal);
937  ++it;
939  }
940 
941  ++it; //skip "ins"
942 
943  CRef<CDelta_item> di_del(new CDelta_item);
944  di_del->SetAction(CDelta_item::eAction_del_at);
945  di_del->SetSeq().SetThis();
946  var_inst.SetDelta().push_back(di_del);
947 
948  TDelta di_ins = x_seq_ref(it, context);
949  var_inst.SetDelta().push_back(di_ins);
950 
951  return vr;
952 }
953 
955 {
956  HGVS_ASSERT_RULE(i, eID_deletion);
957  TIterator it = i->children.begin();
959  CVariation_inst& var_inst = vr->SetData().SetInstance();
960 
962  vr->SetLocation().Assign(context.GetLoc());
963 
965  di->SetAction(CDelta_item::eAction_del_at);
966  di->SetSeq().SetThis();
967  var_inst.SetDelta().push_back(di);
968 
969  ++it; //skip del
970 
971  if(it->value.id() == SGrammar::eID_raw_seq) {
973  //context.Validate(*literal);
974  ++it;
976  }
977 
978  var_inst.SetDelta();
979  return vr;
980 }
981 
982 
984 {
985  HGVS_ASSERT_RULE(i, eID_insertion);
986  TIterator it = i->children.begin();
987  ++it; //skip ins
989  CVariation_inst& var_inst = vr->SetData().SetInstance();
990 
992 
993  //point after which the insertion is.
994  CRef<CSeq_loc> pnt_loc(new CSeq_loc);
995 
996  //verify that the HGVS-location is of length two, as in HGVS coordinates insertion
997  //is denoted to be between the specified coordinates.
998  TSeqPos len = context.GetOffsetLoc().GetLength();
999  if(len != 2) {
1000  HGVS_THROW(eSemantic, "Encountered target location for an insertion with the length != 2");
1001  }
1002 
1003  pnt_loc->SetPnt().SetId().Assign(*context.GetLoc().GetId());
1004  pnt_loc->SetPnt().SetPoint(context.GetLoc().GetStop(eExtreme_Biological));
1005  pnt_loc->SetPnt().SetStrand(context.GetLoc().GetStrand());
1006 
1007  vr->SetLocation(*pnt_loc);
1008 
1009  //The delta consists of the self-location followed by the insertion sequence
1010  TDelta delta_ins = x_seq_ref(it, context);
1011  delta_ins->SetAction(CDelta_item::eAction_ins_before);
1012 
1013  var_inst.SetDelta().push_back(delta_ins);
1014 
1015  return vr;
1016 }
1017 
1018 
1020 {
1021  HGVS_ASSERT_RULE(i, eID_duplication);
1022  TIterator it = i->children.begin();
1024  CVariation_inst& var_inst = vr->SetData().SetInstance();
1026  vr->SetLocation().Assign(context.GetLoc());
1027 
1028  TDelta delta(new TDelta::TObjectType);
1029  delta->SetSeq().SetThis(); //delta->SetSeq().SetLoc(vr->SetLocation());
1030  delta->SetMultiplier(2);
1031  var_inst.SetDelta().push_back(delta);
1032 
1033  ++it; //skip dup
1034 
1035  //the next node is either expected length or expected sequence
1036  if(it != i->children.end() && it->value.id() == SGrammar::eID_seq_ref) {
1037  TDelta dup_seq = x_seq_ref(it, context);
1038  if(!dup_seq->GetSeq().IsLiteral()) {
1039  HGVS_THROW(eSemantic, "Expected literal after 'dup'");
1040  } else if(dup_seq->GetSeq().GetLiteral().GetLength() != context.GetOffsetLoc().GetLength()) {
1041  HGVS_THROW(eSemantic, "The expected duplication length is not equal to the location length");
1042  } else if(dup_seq->GetSeq().GetLiteral().IsSetSeq_data()) {
1043  //context.Validate(dup_seq->GetSeq().GetLiteral());
1044  AttachAssertedSequence(*vr, dup_seq->GetSeq().GetLiteral());
1045  }
1046  }
1047 
1048  return vr;
1049 }
1050 
1051 
1053 {
1054  HGVS_ASSERT_RULE(i, eID_nuc_subst);
1055  TIterator it = i->children.begin();
1057  CVariation_inst& var_inst = vr->SetData().SetInstance();
1058 
1059  vr->SetLocation().Assign(context.GetLoc());
1061 
1062  CRef<CSeq_literal> seq_from = x_raw_seq(it, context);
1063  if(seq_from->GetLength() != 1) {
1064  HGVS_THROW(eSemantic, "Expected literal of length 1 left of '>'");
1065  }
1066 
1067  //context.Validate(*seq_from);
1068  AttachAssertedSequence(*vr, *seq_from);
1069 
1070  ++it;//skip to ">"
1071  ++it;//skip to next
1072  CRef<CSeq_literal> seq_to = x_raw_seq(it, context);
1073  if(seq_to->GetLength() != 1) {
1074  HGVS_THROW(eSemantic, "Expected literal of length 1 right of '>'");
1075  }
1076 
1077  TDelta delta(new TDelta::TObjectType);
1078  delta->SetSeq().SetLiteral(*seq_to);
1079  var_inst.SetDelta().push_back(delta);
1080 
1081  return vr;
1082 }
1083 
1084 
1086 {
1087  HGVS_ASSERT_RULE(i, eID_nuc_inv);
1088 
1089  TIterator it = i->children.begin();
1091  CVariation_inst& var_inst = vr->SetData().SetInstance();
1093 
1094  CRef<CSeq_loc> loc(new CSeq_loc);
1095  loc->Assign(context.GetLoc());
1096  vr->SetLocation(*loc);
1097 
1098  TDelta delta(new TDelta::TObjectType);
1099  delta->SetSeq().SetLoc().Assign(*loc);
1100  delta->SetSeq().SetLoc().FlipStrand();
1101  var_inst.SetDelta().push_back(delta);
1102 
1103  ++it;
1104  if(it != i->children.end()) {
1105  string len_str(it->value.begin(), it->value.end());
1106  TSeqPos len = NStr::StringToUInt(len_str);
1107  if(len != loc->GetTotalRange().GetLength()) {
1108  HGVS_THROW(eSemantic, "Inversion length not equal to location length");
1109  }
1110  }
1111 
1112  return vr;
1113 }
1114 
1115 
1117 {
1118  HGVS_ASSERT_RULE(i, eID_ssr);
1119  TIterator it = i->children.begin();
1121  vr->SetData().SetInstance().SetType(CVariation_inst::eType_microsatellite);
1122 
1123 
1125  if(it->value.id() == SGrammar::eID_raw_seq) {
1126  literal = x_raw_seq(it, context);
1127  ++it;
1128  }
1129 
1130 
1131  //The location may either specify a repeat unit, or point to the first base
1132  //of a repeat unit. In terms of Variation-inst convention, the location
1133  //and delta should be the whole repeat-unit.
1134  CRef<CSeq_loc> loc(new CSeq_loc);
1135  if(context.GetLoc().IsPnt()) {
1136  loc->SetInt().SetId().Assign(*context.GetLoc().GetId());
1137  loc->SetInt().SetStrand(context.GetLoc().GetStrand());
1138 
1139  TSeqPos d = literal.IsNull() ? 0 : literal->GetLength() - 1;
1140  if(context.GetLoc().GetStrand() == eNa_strand_minus) {
1141  loc->SetInt().SetFrom(context.GetLoc().GetPnt().GetPoint() - d);
1142  loc->SetInt().SetTo(context.GetLoc().GetPnt().GetPoint());
1143  } else {
1144  loc->SetInt().SetFrom(context.GetLoc().GetPnt().GetPoint());
1145  loc->SetInt().SetTo(context.GetLoc().GetPnt().GetPoint() + d);
1146  }
1147  } else {
1148  loc->Assign(context.GetLoc());
1149  if(!literal.IsNull()) {
1150  //context.Validate(*literal);
1152  }
1153  }
1154 
1155  vr->SetLocation().Assign(*loc);
1156 
1157  if(it->value.id() == SGrammar::eID_ssr) { // list('['>>int_p>>']', '+') with '[',']','+' nodes discarded;
1158  //Note: see ssr grammar in the header for reasons why we have to match all alleles here
1159  //rather than match them separately as mut_insts
1160 
1161  vr->SetData().SetSet().SetType(CVariation_ref::TData::TSet::eData_set_type_genotype);
1162  for(; it != i->children.end(); ++it) {
1163  string s1(it->value.begin(), it->value.end());
1165  vr2->SetData().SetInstance().SetType(CVariation_inst::eType_microsatellite);
1166 
1167  TDelta delta(new TDelta::TObjectType);
1168  delta->SetSeq().SetLoc().Assign(*loc);
1169  delta->SetMultiplier(NStr::StringToInt(s1));
1170 
1171  vr2->SetData().SetInstance().SetDelta().push_back(delta);
1172  vr2->SetLocation().Assign(*loc);
1173  vr->SetData().SetSet().SetVariations().push_back(vr2);
1174  }
1175  vr = x_unwrap_iff_singleton(*vr);
1176  } else {
1177  TDelta delta(new TDelta::TObjectType);
1178  delta->SetSeq().SetLoc().Assign(*loc);
1179 
1180  SFuzzyInt int_fuzz = x_int_fuzz(it, context);
1181  delta->SetMultiplier(int_fuzz.value);
1182  if(int_fuzz.fuzz.IsNull()) {
1183  ;
1184  } else {
1185  delta->SetMultiplier_fuzz(*int_fuzz.fuzz);
1186  }
1187  vr->SetData().SetInstance().SetDelta().push_back(delta);
1188  }
1189 
1190  return vr;
1191 }
1192 
1193 
1195 {
1196  HGVS_ASSERT_RULE(i, eID_translocation);
1197  TIterator it = i->children.end() - 1; //note: seq-loc follows iscn expression, i.e. last child
1199  CVariation_inst& var_inst = vr->SetData().SetInstance();
1201 
1202  CRef<CSeq_loc> loc = x_seq_loc(it, context);
1203  vr->SetLocation(*loc);
1204  TDelta delta(new TDelta::TObjectType);
1205  delta->SetSeq().SetLoc().SetNull();
1206  var_inst.SetDelta().push_back(delta);
1207 
1208  return vr;
1209 }
1210 
1211 
1213 {
1214  HGVS_ASSERT_RULE(i, eID_conversion);
1215  TIterator it = i->children.begin();
1217  CVariation_inst& var_inst = vr->SetData().SetInstance();
1219  vr->SetLocation().Assign(context.GetLoc());
1220 
1221  CRef<CSeq_loc> loc_this(new CSeq_loc);
1222  loc_this->Assign(context.GetLoc());
1223 
1224  ++it;
1225  CRef<CSeq_loc> loc_other = x_seq_loc(it, context);
1226 
1227  TDelta delta(new TDelta::TObjectType);
1228  delta->SetSeq().SetLoc().SetEquiv().Set().push_back(loc_this);
1229  delta->SetSeq().SetLoc().SetEquiv().Set().push_back(loc_other);
1230  var_inst.SetDelta().push_back(delta);
1231 
1232  return vr;
1233 }
1234 
1235 
1237 {
1238  HGVS_ASSERT_RULE(i, eID_prot_fs);
1239  TIterator it = i->children.begin();
1241 
1242  if(context.GetMolType() != context.eMol_p) {
1243  HGVS_THROW(eContext, "Frameshift can only be specified in protein context");
1244  }
1245 
1246  vr->SetData().SetNote("Frameshift");
1247  vr->SetLocation().Assign(context.GetLoc());
1248 
1249  typedef CVariation_ref::TConsequence::value_type::TObjectType TConsequence;
1250  CRef<TConsequence> cons(new TConsequence);
1251  cons->SetFrameshift();
1252  vr->SetConsequence().push_back(cons);
1253 
1254 
1255  ++it; //skip 'fs'
1256  if(it != i->children.end()) {
1257  //fsX# description: the remaining tokens are 'X' and integer
1258  ++it; //skip 'X'
1259  string s(it->value.begin(), it->value.end());
1260  int x_length = NStr::StringToInt(s);
1261  cons->SetFrameshift().SetX_length(x_length);
1262  }
1263 
1264  return vr;
1265 }
1266 
1267 
1269 {
1270  HGVS_ASSERT_RULE(i, eID_prot_ext);
1271  TIterator it = i->children.begin();
1272 
1273  if(context.GetMolType() != CContext::eMol_p) {
1274  HGVS_THROW(eContext, "Expected protein context");
1275  }
1276 
1278  CVariation_inst& var_inst = vr->SetData().SetInstance();
1280  string ext_type_str(it->value.begin(), it->value.end());
1281  ++it;
1282  string ext_len_str(it->value.begin(), it->value.end());
1283  int ext_len = NStr::StringToInt(ext_len_str);
1284 
1285  vr->SetLocation().SetPnt().SetId().Assign(context.GetId());
1286  vr->SetLocation().SetPnt().SetStrand(eNa_strand_plus);
1287 
1288  TDelta delta(new TDelta::TObjectType);
1289  delta->SetSeq().SetLiteral().SetLength(abs(ext_len));
1290 
1291  TDelta delta_this(new TDelta::TObjectType);
1292  delta_this->SetSeq().SetThis();
1293 
1294  if(ext_type_str == "extMet") {
1295  if(ext_len > 0) {
1296  HGVS_THROW(eSemantic, "extMet must be followed by a negative integer");
1297  }
1298  vr->SetLocation().SetPnt().SetPoint(0);
1299  //extension precedes first AA
1300  var_inst.SetDelta().push_back(delta);
1301  var_inst.SetDelta().push_back(delta_this);
1302  } else if(ext_type_str == "extX") {
1303  if(ext_len < 0) {
1304  HGVS_THROW(eSemantic, "exX must be followed by a non-negative integer");
1305  }
1306 
1307  vr->SetLocation().SetPnt().SetPoint(context.GetLength() - 1);
1308  //extension follows last AA
1309  var_inst.SetDelta().push_back(delta_this);
1310  var_inst.SetDelta().push_back(delta);
1311  } else {
1312  HGVS_THROW(eGrammatic, "Unexpected ext_type: " + ext_type_str);
1313  }
1314 
1315  return vr;
1316 }
1317 
1318 
1320 {
1321  HGVS_ASSERT_RULE(i, eID_prot_missense);
1322  TIterator it = i->children.begin();
1323 
1324  HGVS_ASSERT_RULE(it, eID_aminoacid);
1325  TIterator it2 = it->children.begin();
1326 
1327  string seq_str(it2->value.begin(), it2->value.end());
1328  seq_str = s_hgvsaa2ncbieaa(seq_str);
1329 
1330  if(context.GetMolType() != CContext::eMol_p) {
1331  HGVS_THROW(eContext, "Expected protein context");
1332  }
1333 
1335  CVariation_inst& var_inst = vr->SetData().SetInstance();
1337 
1338  vr->SetLocation().Assign(context.GetLoc());
1339 
1340  TDelta delta(new TDelta::TObjectType);
1341  delta->SetSeq().SetLiteral().SetSeq_data().SetNcbieaa().Set(seq_str);
1342  delta->SetSeq().SetLiteral().SetLength(1);
1343  var_inst.SetDelta().push_back(delta);
1344 
1345  return vr;
1346 }
1347 
1348 
1350 {
1352  CVariation_inst& var_inst = vr->SetData().SetInstance();
1354 
1355  CRef<CSeq_loc> loc(new CSeq_loc);
1356  if(context.IsSetLoc()) {
1357  loc->Assign(context.GetLoc());
1358  } else {
1359  loc->SetEmpty().Assign(context.GetId());
1360  }
1361 
1362  vr->SetLocation(*loc);
1363 
1364  TDelta delta(new TDelta::TObjectType);
1365  delta->SetSeq().SetThis();
1366  var_inst.SetDelta().push_back(delta);
1367 
1368  return vr;
1369 }
1370 
1371 
1373 {
1374  HGVS_ASSERT_RULE(i, eID_mut_inst);
1375 
1376  TIterator it = i->children.begin();
1377 
1379  if(it->value.id() == SGrammar::eID_mut_inst) {
1380  string s(it->value.begin(), it->value.end());
1381  if(s == "?") {
1382  variation_ref->SetData().SetUnknown();
1383  variation_ref->SetLocation().Assign(context.GetLoc());
1384  } else if(s == "=") {
1386  } else {
1387  HGVS_THROW(eGrammatic, "Unexpected inst terminal: " + s);
1388  }
1389  } else {
1390  variation_ref =
1391  it->value.id() == SGrammar::eID_delins ? x_delins(it, context)
1392  : it->value.id() == SGrammar::eID_deletion ? x_deletion(it, context)
1393  : it->value.id() == SGrammar::eID_insertion ? x_insertion(it, context)
1394  : it->value.id() == SGrammar::eID_duplication ? x_duplication(it, context)
1395  : it->value.id() == SGrammar::eID_nuc_subst ? x_nuc_subst(it, context)
1396  : it->value.id() == SGrammar::eID_nuc_inv ? x_nuc_inv(it, context)
1397  : it->value.id() == SGrammar::eID_ssr ? x_ssr(it, context)
1398  : it->value.id() == SGrammar::eID_conversion ? x_conversion(it, context)
1399  : it->value.id() == SGrammar::eID_prot_ext ? x_prot_ext(it, context)
1400  : it->value.id() == SGrammar::eID_prot_fs ? x_prot_fs(it, context)
1401  : it->value.id() == SGrammar::eID_prot_missense ? x_prot_missense(it, context)
1402  : it->value.id() == SGrammar::eID_translocation ? x_translocation(it, context)
1404 
1405  if(variation_ref.IsNull()) {
1406  HGVS_ASSERT_RULE(it, eID_NONE);
1407  }
1408  }
1409 
1410  return variation_ref;
1411 }
1412 
1414 {
1415  HGVS_ASSERT_RULE(i, eID_expr1);
1416  TIterator it = i->children.begin();
1418 
1419  string s(it->value.begin(), it->value.end());
1420  if(it->value.id() == i->value.id() && s == "(") {
1421  ++it;
1422  vr = x_expr1(it, context);
1423  vr->SetValidated(false);
1424  } else if(it->value.id() == SGrammar::eID_list1a) {
1425  vr = x_list(it, context);
1426  } else if(it->value.id() == SGrammar::eID_header) {
1427  CContext local_ctx = x_header(it, context);
1428  ++it;
1429  vr = x_expr2(it, local_ctx);
1430  } else if(it->value.id() == SGrammar::eID_translocation) {
1431  vr = x_translocation(it, context);
1432  } else {
1433  HGVS_ASSERT_RULE(it, eID_NONE);
1434  }
1435 
1436  return vr;
1437 }
1438 
1440 {
1441  HGVS_ASSERT_RULE(i, eID_expr2);
1442  TIterator it = i->children.begin();
1444 
1445  string s(it->value.begin(), it->value.end());
1446  if(it->value.id() == i->value.id() && s == "(") {
1447  ++it;
1448  vr = x_expr2(it, context);
1449  vr->SetValidated(false);
1450  } else if(it->value.id() == SGrammar::eID_list2a) {
1451  vr = x_list(it, context);
1452  } else if(it->value.id() == SGrammar::eID_location) {
1453  CContext local_context(context);
1454  SOffsetLoc ofloc = x_location(it, local_context);
1455  local_context.SetLoc(ofloc);
1456  ++it;
1457  vr = x_expr3(it, local_context);
1458 
1459  //if the location is intronic, create delta-items for intronic offsets
1460  CRef<CDelta_item> di1;
1461  if(ofloc.start_offset.value || ofloc.start_offset.fuzz) {
1462  di1.Reset(new CDelta_item);
1463  di1->SetAction(CDelta_item::eAction_offset);
1464  di1->SetSeq().SetLiteral().SetLength(abs(ofloc.start_offset.value));
1465  if(ofloc.start_offset.value < 0) {
1466  di1->SetMultiplier(-1);
1467  }
1468 
1469  if(ofloc.start_offset.fuzz) {
1470  di1->SetSeq().SetLiteral().SetFuzz().Assign(*ofloc.start_offset.fuzz);
1471  }
1472  }
1473  CRef<CDelta_item> di2;
1474  if(ofloc.stop_offset.value || ofloc.stop_offset.fuzz) {
1475  di2.Reset(new CDelta_item);
1476  di2->SetAction(CDelta_item::eAction_offset);
1477  if(ofloc.stop_offset.value < 0) {
1478  di2->SetMultiplier(-1);
1479  }
1480 
1481  di2->SetSeq().SetLiteral().SetLength(abs(ofloc.stop_offset.value));
1482  if(ofloc.stop_offset.fuzz) {
1483  di2->SetSeq().SetLiteral().SetFuzz().Assign(*ofloc.stop_offset.fuzz);
1484  }
1485  }
1486 
1487  //attach intronic offsets to the variation delta-items
1488  for(CTypeIterator<CVariation_inst> it2(Begin(*vr)); it2; ++it2) {
1489  CVariation_inst& inst = *it2;
1490  if(di1) {
1491  inst.SetDelta().push_front(di1);
1492  }
1493  if(di2) {
1494  inst.SetDelta().push_back(di2);
1495  }
1496  }
1497 
1498 
1499  //in some cases, e.g. protein variations, asserted sequence comes from the location-specification, rather than
1500  //variation-specification,
1501  if(ofloc.asserted_sequence != "") {
1502  CSeq_literal tmp_literal;
1503  tmp_literal.SetLength(ofloc.GetLength());
1504  if(context.GetMolType() == CContext::eMol_p) {
1505  tmp_literal.SetSeq_data().SetNcbieaa().Set(ofloc.asserted_sequence);
1506  } else {
1507  tmp_literal.SetSeq_data().SetIupacna().Set(ofloc.asserted_sequence);
1508  }
1509  AttachAssertedSequence(*vr, tmp_literal);
1510  }
1511 
1512  } else if(it->value.id() == SGrammar::eID_prot_ext) {
1513  vr = x_prot_ext(it, context);
1514  } else if(it->value.id() == i->value.id()) {
1515  vr.Reset(new CVariation_ref);
1516  if(s == "?") {
1517  vr->SetData().SetUnknown();
1518  vr->SetLocation().SetEmpty().Assign(context.GetId());
1519  } else if(s == "0?" || s == "0") {
1520  vr->SetData().SetUnknown();
1521  typedef CVariation_ref::TConsequence::value_type::TObjectType TConsequence;
1522  CRef<TConsequence> cons(new TConsequence);
1523  cons->SetNote("loss of product");
1524  vr->SetConsequence().push_back(cons);
1525 
1526  vr->SetLocation().SetEmpty().Assign(context.GetId());
1527  if(s == "0?") {
1528  vr->SetValidated(false);
1529  }
1530  } else if(s == "=") {
1531  vr = x_identity(context);
1532  } else {
1533  HGVS_THROW(eGrammatic, "Unexpected expr terminal: " + s);
1534  }
1535  } else {
1536  HGVS_ASSERT_RULE(it, eID_NONE);
1537  }
1538 
1539  return vr;
1540 }
1541 
1542 
1544 {
1545  HGVS_ASSERT_RULE(i, eID_expr3);
1546  TIterator it = i->children.begin();
1548 
1549  string s(it->value.begin(), it->value.end());
1550  if(it->value.id() == i->value.id() && s == "(") {
1551  ++it;
1552  vr = x_expr3(it, context);
1553  vr->SetValidated(false);
1554  } else if(it->value.id() == SGrammar::eID_list3a) {
1555  vr = x_list(it, context);
1556  } else if(it->value.id() == SGrammar::eID_mut_inst) {
1557  vr.Reset(new CVariation_ref);
1558  vr->SetData().SetSet().SetType(CVariation_ref::TData::TSet::eData_set_type_compound);
1559  for(; it != i->children.end(); ++it) {
1560  CRef<CVariation_ref> inst_ref = x_mut_inst(it, context);
1561  vr->SetData().SetSet().SetVariations().push_back(inst_ref);
1562  }
1563  vr = x_unwrap_iff_singleton(*vr);
1564  } else {
1565  HGVS_ASSERT_RULE(it, eID_NONE);
1566  }
1567 
1568  return vr;
1569 }
1570 
1572 {
1573  if(!SGrammar::s_is_list(i->value.id())) {
1574  HGVS_ASSERT_RULE(i, eID_NONE);
1575  }
1576 
1578  TVariationSet& varset = vr->SetData().SetSet();
1580  string delimiter = "";
1581 
1582  for(TIterator it = i->children.begin(); it != i->children.end(); ++it) {
1583  //will process two elements from the children list: delimiter and following expression; the first one does not have the delimiter.
1584  if(it != i->children.begin()) {
1585  string delim(it->value.begin(), it->value.end());
1586  if(it->value.id() != i->value.id()) {
1587  HGVS_THROW(eGrammatic, "Expected terminal");
1588  } else if(delimiter == "") {
1589  //first time
1590  delimiter = delim;
1591  } else if(delimiter != delim) {
1592  HGVS_THROW(eSemantic, "Non-unique delimiters within a list");
1593  }
1594  ++it;
1595  }
1596 
1598  if(it->value.id() == SGrammar::eID_expr1) {
1599  vr = x_expr1(it, context);
1600  } else if(it->value.id() == SGrammar::eID_expr2) {
1601  vr = x_expr2(it, context);
1602  } else if(it->value.id() == SGrammar::eID_expr3) {
1603  vr = x_expr3(it, context);
1604  } else if(SGrammar::s_is_list(it->value.id())) {
1605  vr = x_list(it, context);
1606  } else {
1607  HGVS_ASSERT_RULE(it, eID_NONE);
1608  }
1609 
1610  varset.SetVariations().push_back(vr);
1611  }
1612 
1613  if(delimiter == ";") {
1615  } else if(delimiter == "+") {
1617  } else if(delimiter == "(+)") {
1619  } else if(delimiter == ",") {
1620  //if the context is rna (r.) then this describes multiple products from the same precursor;
1621  //otherwise this describes mosaic cases
1622  if(context.GetMolType(false) == CContext::eMol_r) {
1623  //Note: GetMolType(check=false) because MolType may not eMol_not_set, as
1624  //there may not be a sequence in context, e.g.
1625  //[NM_004004.2:c.35delG,NM_006783.1:c.689_690insT]" - individual
1626  //elements have their own sequence context, but none at the set level.
1628  } else {
1630  }
1631  } else if(delimiter == "") {
1632  ;//single-element list
1633  } else {
1634  HGVS_THROW(eGrammatic, "Unexpected delimiter " + delimiter);
1635  }
1636 
1637  vr = x_unwrap_iff_singleton(*vr);
1638  return vr;
1639 }
1640 
1641 
1643 {
1644  HGVS_ASSERT_RULE(i, eID_root);
1645 
1647 
1650 
1651  CRef<CSeq_feat> feat(new CSeq_feat);
1652  feat->SetLocation().Assign(vr->GetLocation());
1653  vr->ResetLocation();
1654  feat->SetData().SetVariation(*vr);
1655 
1656  return feat;
1657 }
1658 
1660 {
1661  if(v.GetData().IsSet() && v.GetData().GetSet().GetVariations().size() == 1) {
1662  return *v.SetData().SetSet().SetVariations().begin();
1663  } else {
1664  return CRef<CVariation_ref>(&v);
1665  }
1666 }
1667 
1668 
1670 {
1671  tree_parse_info<> info = pt_parse(hgvs_expression.c_str(), s_grammar, +space_p);
1672  CRef<CSeq_feat> feat;
1673 
1674  try {
1675  if(!info.full) {
1676 #if 0
1677  CNcbiOstrstream ostr;
1678  tree_to_xml(ostr, info.trees, hgvs_expression.c_str() , CHgvsParser::SGrammar::s_GetRuleNames());
1679  string tree_str = CNcbiOstrstreamToString(ostr);
1680 #endif
1681  HGVS_THROW(eGrammatic, "Syntax error at pos " + NStr::NumericToString(info.length + 1));
1682  } else {
1684  feat = x_root(info.trees.begin(), context);
1685  feat->SetData().SetVariation().SetName(hgvs_expression);
1686  }
1687  } catch (CException& e) {
1688  if(flags && fOpFlags_RelaxedAA && NStr::Find(hgvs_expression, "p.")) {
1689  //expression was protein, try non-hgvs-compliant representation of prots
1690  string hgvs_expr2 = s_hgvsUCaa2hgvsUL(hgvs_expression);
1691  TOpFlags flags2 = flags & ~fOpFlags_RelaxedAA; //unset the bit so we don't infinite-recurse
1692  feat = AsVariationFeat(hgvs_expr2, flags2);
1693  } else {
1694  NCBI_RETHROW_SAME(e, "");
1695  }
1696  }
1697 
1698  return feat;
1699 }
1700 
1701 
1702 
1703 
1704 string CHgvsParser::AsHgvsExpression(const CSeq_feat& variation_feat)
1705 {
1706  HGVS_THROW(eLogic, "Not implemented");
1707 
1708  string s = x_AsHgvsExpression(variation_feat.GetData().GetVariation(),
1709  variation_feat.GetLocation(),
1710  true);
1711  return s;
1712 }
1713 
1715  const CVariation_ref& variation,
1716  const CSeq_loc& parent_loc, //if variation has seq-loc set, it will be used instead.
1717  bool is_top_level)
1718 {
1719  string outs("");
1720 
1721  CRef<CSeq_loc> loc(new CSeq_loc);
1722  loc->Assign(variation.IsSetLocation() ? variation.GetLocation() : parent_loc);
1723 
1724  if(variation.GetData().IsSet()) {
1725  const CVariation_ref::TData::TSet& vset = variation.GetData().GetSet();
1726  string delim_type =
1733  : "](+)[";
1734 
1736  string delim = "";
1737  ITERATE(CVariation_ref::TData::TSet::TVariations, it, variation.GetData().GetSet().GetVariations()) {
1738  outs += delim + x_AsHgvsExpression(**it, *loc, false);
1739  delim = delim_type;
1740  }
1742 
1743  } else {
1744  outs = x_InstToString(variation.GetData().GetInstance(), *loc);
1745  //note: loc is the in/out parameter here
1746  }
1747 
1748  if(variation.IsSetLocation() || is_top_level) {
1749  string loc_str = x_SeqLocToStr(*loc, true);
1750  outs = loc_str + outs;
1751  }
1752 
1753  return outs;
1754 }
1755 
1756 
1757 
1758 ///////////////////////////////////////////////////////////////////////////////
1759 //
1760 // Methods and functions pertaining to converting variation to HGVS
1761 //
1762 ///////////////////////////////////////////////////////////////////////////////
1763 
1764 string CHgvsParser::x_InstToString(const CVariation_inst& inst, CSeq_loc& loc)
1765 {
1766  string out = "";
1767 
1768  bool append_delta = false;
1769  bool flipped_strand = false;
1770 
1771  CRef<CSeq_loc> hgvs_loc(new CSeq_loc);
1772  hgvs_loc->Assign(loc);
1774  hgvs_loc->FlipStrand();
1775  flipped_strand = true;
1776  }
1777 
1778 
1780  out = "=";
1781  } else if(inst.GetType() == CVariation_inst::eType_inv) {
1782  out = "inv";
1783  } else if(inst.GetType() == CVariation_inst::eType_snv) {
1784  out = x_LocToSeqStr(*hgvs_loc) + ">"; //on the +strand
1785  append_delta = true;
1786  } else if(inst.GetType() == CVariation_inst::eType_mnp
1789  {
1790  append_delta = true;
1791  } else if(inst.GetType() == CVariation_inst::eType_del) {
1792  out = "del";
1793  size_t len = sequence::GetLength(loc, NULL);
1794  if(len == 1) {
1795  ; //...del
1796  } else if(len < 10) {
1797  //...delACGT
1798  out += x_LocToSeqStr(*hgvs_loc);
1799  } else {
1800  //...del25
1802  }
1803  } else if(inst.GetType() == CVariation_inst::eType_ins) {
1804  //If the insertion is this*2 then this is a dup
1805  bool is_dup = false;
1806  if(inst.GetDelta().size() == 1) {
1807  const CDelta_item& delta = **inst.GetDelta().begin();
1808  if(delta.GetSeq().IsThis() && delta.IsSetMultiplier() && delta.GetMultiplier() == 2) {
1809  is_dup = true;
1810  }
1811  }
1812 
1813  if(is_dup) {
1814  out = "dup";
1815  } else {
1816  out = "ins";
1817  append_delta = true;
1818 
1819  //Whether the second nucleotide will be upstream or downsream of the loc will depend
1820  //on whether the loc is on plus or minus strand, and where 'this' is in variation-ref.
1821 
1822  if(inst.GetDelta().size() != 2) {
1823  NCBI_THROW(CException, eUnknown, "Expected inst.delta to be of length 2");
1824  } else {
1825  bool ins_after = false;
1826  if(inst.GetDelta().begin()->GetObject().GetSeq().IsThis()) {
1827  ins_after = true;
1828  } else if(inst.GetDelta().rbegin()->GetObject().GetSeq().IsThis()) {
1829  ins_after = false;
1830  } else {
1831  NCBI_THROW(CException, eUnknown, "In insertion, expected expected first or last element of delta to be 'this'");
1832  }
1833 
1835  ins_after = !ins_after;
1836  }
1837 
1838  hgvs_loc->SetInt().SetId().Assign(sequence::GetId(loc, NULL));
1839  hgvs_loc->SetInt().SetStrand(eNa_strand_plus);
1840  if(ins_after) {
1841  hgvs_loc->SetInt().SetFrom(sequence::GetStop(loc, NULL, eExtreme_Positional));
1842  hgvs_loc->SetInt().SetTo(1 + sequence::GetStop(loc, NULL, eExtreme_Positional));
1843  } else {
1844  hgvs_loc->SetInt().SetFrom(-1 + sequence::GetStart(loc, NULL, eExtreme_Positional));
1845  hgvs_loc->SetInt().SetTo(sequence::GetStart(loc, NULL, eExtreme_Positional));
1846  }
1847  }
1848  }
1849 
1850  } else if(inst.GetType() == CVariation_inst::eType_microsatellite) {
1851  out = "";
1852 
1853  // In HGVS land the location is not the whole repeat, as in variation-ref,
1854  // but the first base of the repeat unit.
1855 
1856  hgvs_loc.Reset(new CSeq_loc);
1857  hgvs_loc->SetPnt().SetId().Assign(sequence::GetId(loc, NULL));
1858  hgvs_loc->SetPnt().SetStrand(eNa_strand_plus);
1859  hgvs_loc->SetPnt().SetPoint(sequence::GetStart(loc, NULL, eExtreme_Biological));
1860 
1861  append_delta = true;
1862  } else if(inst.GetType() == CVariation_inst::eType_prot_missense
1865  {
1866  append_delta = true;
1867  } else if(inst.GetType() == CVariation_inst::eType_prot_silent) {
1868  out = "=";
1869  } else {
1870  NCBI_THROW(CException, eUnknown, "Cannot process this type of variation-inst");
1871  }
1872 
1873  // According to the HGVS standard
1874  // http://varnomen.hgvs.org/recommendations/DNA/variant/duplication/
1875  // the 'dup' HGVS expressions are not to include the duplicated nucleotides.
1876  // Format: “prefix”“position(s)_duplicated”“dup”, e.g. g.123_345dup
1877  if (is_dup) {
1878  loc.Assign(*hgvs_loc);
1879  return out;
1880  }
1881 
1882  //append the deltas
1884  const CDelta_item& delta = **it;
1885  if(delta.GetSeq().IsThis()) {
1886 
1887  //"this" does not appear in HGVS nomenclature - we simply skip it as we
1888  //have adjusted the location according to HGVS convention for insertinos
1889  if(inst.GetType() == CVariation_inst::eType_ins && &delta == inst.GetDelta().begin()->GetPointer()) {
1890  ; //only expecting this this is an isertion and this is the first element of delta
1891  } else {
1892  NCBI_THROW(CException, eUnknown, "'this-loc' is not expected here");
1893  }
1894 
1895  } else if(delta.GetSeq().IsLiteral()) {
1896  out += x_SeqLiteralToStr(delta.GetSeq().GetLiteral(), flipped_strand);
1897  } else if(delta.GetSeq().IsLoc()) {
1898 
1899  CRef<CSeq_loc> delta_loc(new CSeq_loc());
1900  delta_loc->Assign(delta.GetSeq().GetLoc());
1901  if(flipped_strand) {
1902  delta_loc->FlipStrand();
1903  }
1904 
1905  string delta_loc_str;
1906  //the repeat-unit in microsattelite is always literal sequence:
1907  //NG_011572.1:g.5658NG_011572.1:g.5658_5660(15_24) - incorrect
1908  //NG_011572.1:g.5658CAG(15_24) - correct
1910  delta_loc_str = x_SeqLocToStr(*delta_loc, true);
1911  } else {
1912  delta_loc_str = x_LocToSeqStr(*delta_loc);
1913  }
1914 
1915  out += delta_loc_str;
1916 
1917  } else {
1918  NCBI_THROW(CException, eUnknown, "Encountered unhandled delta class");
1919  }
1920 
1921  //add multiplier, but make sure we're dealing with SSR.
1922  if(delta.IsSetMultiplier()) {
1923  if(inst.GetType() == CVariation_inst::eType_microsatellite && !delta.GetSeq().IsThis()) {
1924  string multiplier_str = x_IntWithFuzzToStr(
1925  delta.GetMultiplier(),
1926  delta.IsSetMultiplier_fuzz() ? &delta.GetMultiplier_fuzz() : NULL);
1927 
1928  if(!NStr::StartsWith(multiplier_str, "(")) {
1929  multiplier_str = "[" + multiplier_str + "]";
1930  //In HGVS-land the fuzzy multiplier value existis as is, but an exact value
1931  //is enclosed in brackets a-la allele-set.
1932  }
1933 
1934  out += multiplier_str;
1935  } else {
1936  NCBI_THROW(CException, eUnknown, "Multiplier value is set in unexpected context (only STR supported)");
1937  }
1938  }
1939  }
1940 
1941  loc.Assign(*hgvs_loc);
1942  return out;
1943 }
1944 
1945 
1946 string CHgvsParser::x_SeqLiteralToStr(const CSeq_literal& literal, bool flip_strand)
1947 {
1948  string out("");
1949 
1950  if(literal.IsSetSeq_data()) {
1951  CRef<CSeq_data> sd(new CSeq_data);
1952 
1953  sd->Assign(literal.GetSeq_data());
1954  if(flip_strand) {
1955  CSeqportUtil::ReverseComplement(*sd, sd, 0, literal.GetLength());
1956  }
1957 
1958  if( sd->IsIupacna()
1959  || sd->IsNcbi2na()
1960  || sd->IsNcbi4na()
1961  || sd->IsNcbi8na()
1962  || sd->IsNcbipna())
1963  {
1964  CSeqportUtil::Convert(*sd, sd, CSeq_data::e_Iupacna, 0, literal.GetLength() );
1965  out += sd->GetIupacna().Get();
1966  } else if(sd->IsIupacaa()
1967  || sd->IsNcbi8aa()
1968  || sd->IsNcbieaa()
1969  || sd->IsNcbipaa()
1970  || sd->IsNcbistdaa())
1971  {
1972  CSeqportUtil::Convert(*sd, sd, CSeq_data::e_Iupacaa, 0, literal.GetLength() );
1973  out += sd->GetIupacaa().Get();
1974  }
1975 
1976  } else {
1977  string multiplier_str = x_IntWithFuzzToStr(
1978  literal.GetLength(),
1979  literal.IsSetFuzz() ? &literal.GetFuzz() : NULL);
1980  out += multiplier_str;
1981  }
1982  return out;
1983 }
1984 
1985 
1986 string CHgvsParser::x_LocToSeqStr(const CSeq_loc& loc)
1987 {
1989  string seq_str;
1990  v.GetSeqData(v.begin(), v.end(), seq_str);
1991  return seq_str;
1992 }
1993 
1994 
1995 string CHgvsParser::x_SeqLocToStr(const CSeq_loc& loc, bool with_header)
1996 {
1997  const CSeq_id& seq_id = sequence::GetId(loc, NULL);
1998 
1999  string header = x_SeqIdToHgvsHeader(seq_id);
2000 
2001  //for c.-based coordinates, calculate the first pos as start of CDS.
2002  TSeqPos first_pos = 0;
2003  if(NStr::EndsWith(header, ":c.")) {
2004  CBioseq_Handle bsh = m_scope->GetBioseqHandle(seq_id);
2005  for(CFeat_CI ci(bsh); ci; ++ci) {
2006  const CMappedFeat& mf = *ci;
2007  if(mf.GetData().IsCdregion()) {
2009  }
2010  }
2011  }
2012 
2013  string loc_str = "";
2014  if(loc.IsPnt()) {
2015  loc_str = x_SeqPntToStr(loc.GetPnt(), first_pos);
2016  } else {
2017  CRef<CSeq_point> p_start(new CSeq_point);
2019  p_start->SetId().Assign(sequence::GetId(loc, NULL));
2020  if(loc.IsInt() && loc.GetInt().IsSetFuzz_from()) {
2021  p_start->SetFuzz().Assign(loc.GetInt().GetFuzz_from());
2022  }
2023  string s_start = x_SeqPntToStr(*p_start, first_pos);
2024 
2025  CRef<CSeq_point> p_stop(new CSeq_point);
2027  p_stop->SetId().Assign(sequence::GetId(loc, NULL));
2028  if(loc.IsInt() && loc.GetInt().IsSetFuzz_to()) {
2029  p_stop->SetFuzz().Assign(loc.GetInt().GetFuzz_to());
2030  }
2031  string s_stop = x_SeqPntToStr(*p_stop, first_pos);
2032 
2033  loc_str = s_start + "_" + s_stop;
2034  }
2035 
2036  string out = (with_header ? header : "") + loc_str;
2037 
2038  return out;
2039 }
2040 
2041 
2042 string CHgvsParser::x_SeqPntToStr(const CSeq_point& pnt, TSeqPos first_pos)
2043 {
2044  CRef<CSeq_point> mapped_pnt;
2046 
2047  mapped_pnt.Reset(new CSeq_point);
2048  mapped_pnt->Assign(pnt);
2049 
2050 
2051  //convert to c. coordinates as necessary.
2052  //remember that there's no position-zero in HGVS.
2053  long point_pos = mapped_pnt->GetPoint() + 1; //hgvs absolute coordinates are 1-based.
2054  point_pos -= first_pos;
2055  if(point_pos <= 0) {
2056  point_pos--;
2057  }
2058 
2059  string outs = NStr::NumericToString(point_pos);
2062  }
2063 
2064  return outs;
2065 }
2066 
2068 {
2069  string moltype = "";
2071  if(bsh.GetInst_Mol() == CSeq_inst::eMol_aa) {
2072  moltype = "p.";
2073  } else if(bsh.GetInst_Mol() == CSeq_inst::eMol_rna) {
2074  moltype = "c.";
2075 
2076  } else {
2077  moltype = "g.";
2079  const CSeqdesc& desc = **it;
2080  if(desc.IsSource()
2081  && desc.GetSource().IsSetGenome()
2083  {
2084  moltype = "mt.";
2085  }
2086  }
2087  }
2088 
2090  return accver + ":" + moltype;
2091 }
2092 
2094 {
2095  string out = NStr::NumericToString(value);
2096 
2097  if(fuzz) {
2098  if(fuzz->IsRange()) {
2099  string from = NStr::NumericToString(fuzz->GetRange().GetMin());
2100  string to = NStr::NumericToString(fuzz->GetRange().GetMax());
2101  out = "(" + from + "_" + to + ")";
2102  } else if(fuzz->IsLim()) {
2103  if(fuzz->GetLim() == CInt_fuzz::eLim_gt || fuzz->GetLim() == CInt_fuzz::eLim_tr) {
2104  out = "(" + out + "_?)";
2105  } else if(fuzz->GetLim() == CInt_fuzz::eLim_lt || fuzz->GetLim() == CInt_fuzz::eLim_tl) {
2106  out = "(?_" + out + ")";
2107  } else {
2108  out = "(" + out + ")";
2109  }
2110  }
2111  }
2112  return out;
2113 }
2114 
2115 
2116 TSeqPos CHgvsParser::x_GetInstLength(const CVariation_inst& inst, const CSeq_loc& this_loc)
2117 {
2118  TSeqPos len(0);
2119 
2121  const CDelta_item& d = **it;
2122  int multiplier = d.IsSetMultiplier() ? d.GetMultiplier() : 1;
2123  TSeqPos d_len(0);
2124  if(d.GetSeq().IsLiteral()) {
2125  d_len = d.GetSeq().GetLiteral().GetLength();
2126  } else if(d.GetSeq().IsThis()) {
2127  d_len = sequence::GetLength(this_loc, m_scope);
2128  } else if(d.GetSeq().IsLoc()) {
2129  d_len = sequence::GetLength(d.GetSeq().GetLoc(), m_scope);
2130  } else {
2131  NCBI_THROW(CException, eUnknown, "Unhandled code");
2132  }
2133  len += d_len * multiplier;
2134  }
2135  return len;
2136 }
2137 
2138 string CHgvsParser::x_GetInstData(const CVariation_inst& inst, const CSeq_loc& this_loc)
2139 {
2140  CNcbiOstrstream ostr;
2142  const CDelta_item& d = **it;
2143  int multiplier = d.IsSetMultiplier() ? d.GetMultiplier() : 1;
2144 
2145  string d_seq;
2146  if(d.GetSeq().IsLiteral()) {
2147  d_seq = x_SeqLiteralToStr(d.GetSeq().GetLiteral(), false);
2148  } else if(d.GetSeq().IsThis()) {
2149  d_seq = x_LocToSeqStr(this_loc);
2150  } else if(d.GetSeq().IsLoc()) {
2151  d_seq = x_LocToSeqStr(d.GetSeq().GetLoc());
2152  } else {
2153  HGVS_THROW(eLogic, "Unhandled");
2154  }
2155  for(int i = 0; i < multiplier; i++) {
2156  ostr << d_seq;
2157  }
2158  }
2159 
2160  return CNcbiOstrstreamToString(ostr);
2161 }
2162 
2163 };
2164 
2166 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CBioseq_Handle –.
CFeat_CI –.
Definition: feat_ci.hpp:64
CMappedFeat –.
Definition: mapped_feat.hpp:59
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CSeqVector –.
Definition: seq_vector.hpp:65
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
void FlipStrand(void)
Definition: Seq_point.cpp:208
static TSeqPos Convert(const CSeq_data &in_seq, CSeq_data *out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx=0, TSeqPos uLength=0, bool bAmbig=false, Uint4 seed=17734276)
static TSeqPos ReverseComplement(CSeq_data *in_seq, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
static void Validate(const CSeq_data &in_seq, vector< TSeqPos > *badIdx, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
Template class for iteration on objects of class C.
Definition: iterator.hpp:673
CUser_object & AddField(const string &label, const string &value, EParseField parse=eParse_String)
add a data field to the user object that holds a given value
CUser_field & SetField(const string &str, const string &delim=".", const string &obj_subtype=kEmptyStr, NStr::ECase use_case=NStr::eCase)
Access a named field in this user object.
CVariation_inst –.
Set of related Variations.
void SetExt(TExt &value)
void ResetExt(void)
void Validate(const CSeq_literal &literal) const
void SetLoc(const SOffsetLoc &loc)
const CSeq_loc & GetLoc() const
const CSeq_feat & GetCDS() const
void SetId(const CSeq_id &id, EMolType mol_type)
const SOffsetLoc & GetOffsetLoc() const
EMolType GetMolType(bool check=true) const
CVariation_inst::TDelta::value_type TDelta
static SOffsetPoint x_general_pos(TIterator const &i, const CContext &context)
static CRef< CVariation_ref > x_ssr(TIterator const &i, const CContext &context)
static CRef< CVariation_ref > x_expr2(TIterator const &i, const CContext &context)
static string s_hgvsaa2ncbieaa(const string &hgvsaa)
Convert HGVS amino-acid code to ncbieaa.
CRef< CSeq_feat > AsVariationFeat(const string &hgvs_expression, TOpFlags=fOpFlags_Default)
static CRef< CVariation_ref > x_prot_missense(TIterator const &i, const CContext &context)
string x_SeqIdToHgvsHeader(const CSeq_id &id)
Convert seq-id to HGVS seq-id header, e.g. "NM_123456.7:c." or "NG_123456.7:p".
string x_GetInstData(const CVariation_inst &inst, const CSeq_loc &this_loc)
static CRef< CVariation_ref > x_nuc_subst(TIterator const &i, const CContext &context)
static SOffsetLoc x_range(TIterator const &i, const CContext &context)
TSeqPos x_GetInstLength(const CVariation_inst &inst, const CSeq_loc &this_loc)
static CRef< CVariation_ref > x_duplication(TIterator const &i, const CContext &context)
static CRef< CVariation_ref > x_prot_ext(TIterator const &i, const CContext &context)
string x_AsHgvsExpression(const CVariation_ref &variation, const CSeq_loc &parent_loc, bool is_top_level)
variatino must have seq-loc specified
string AsHgvsExpression(const CSeq_feat &feat)
static CRef< CVariation_ref > x_mut_inst(TIterator const &i, const CContext &context)
string x_SeqPntToStr(const CSeq_point &pnt, TSeqPos first_pos)
static SFuzzyInt x_int_fuzz(TIterator const &i, const CContext &context)
static CRef< CSeq_loc > x_seq_loc(TIterator const &i, const CContext &context)
static CRef< CVariation_ref > x_list(TIterator const &i, const CContext &context)
string x_IntWithFuzzToStr(int value, const CInt_fuzz *fuzz=NULL)
static CContext x_header(TIterator const &i, const CContext &context)
static CRef< CVariation_ref > x_identity(const CContext &context)
static CRef< CVariation_ref > x_delins(TIterator const &i, const CContext &context)
static CRef< CSeq_literal > x_raw_seq(TIterator const &i, const CContext &context)
string x_InstToString(const CVariation_inst &inst, CSeq_loc &loc)
Only subset of insts can be expressed as HGVS, this will throw otherwise.
static CRef< CVariation_ref > x_insertion(TIterator const &i, const CContext &context)
static CRef< CVariation_ref > x_unwrap_iff_singleton(CVariation_ref &v)
static SOffsetPoint x_prot_pos(TIterator const &i, const CContext &context)
static SOffsetPoint x_pos_spec(TIterator const &i, const CContext &context)
static CRef< CVariation_ref > x_translocation(TIterator const &i, const CContext &context)
string x_LocToSeqStr(const CSeq_loc &loc)
static CRef< CVariation_ref > x_deletion(TIterator const &i, const CContext &context)
static string s_hgvsUCaa2hgvsUL(const string &hgvsaa)
Convert non-HGVS compliant all-uppercase AAs to UpLow, e.g. ILECYS ->IleCys.
static TDelta x_seq_ref(TIterator const &i, const CContext &context)
static SOffsetLoc x_location(TIterator const &i, const CContext &context)
string x_SeqLocToStr(const CSeq_loc &loc, bool with_header)
static CRef< CVariation_ref > x_expr1(TIterator const &i, const CContext &context)
TParseTreeMatch::const_tree_iterator TIterator
string x_SeqLiteralToStr(const CSeq_literal &literal, bool flip_strand)
static CRef< CSeq_point > x_abs_pos(TIterator const &i, const CContext &context)
static SOffsetPoint x_fuzzy_pos(TIterator const &i, const CContext &context)
static CRef< CVariation_ref > x_expr3(TIterator const &i, const CContext &context)
static CRef< CVariation_ref > x_nuc_inv(TIterator const &i, const CContext &context)
@ fOpFlags_RelaxedAA
try assumbing all-uppercase three-letter AA representation
Definition: hgvs_parser.hpp:91
static CRef< CVariation_ref > x_conversion(TIterator const &i, const CContext &context)
static CRef< CVariation_ref > x_prot_fs(TIterator const &i, const CContext &context)
static CRef< CSeq_feat > x_root(TIterator const &i, const CContext &context)
static void s_FactorOutLocsInPlace(CVariation_ref &v)
static uch flags
std::ofstream out("events_result.xml")
main entry point for tests
CS_CONTEXT * ctx
Definition: t0006.c:12
#define check(s)
Definition: describecol2.c:21
int offset
Definition: replacements.h:160
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
#define NCBI_RETHROW_SAME(prev_exception, message)
Generic macro to re-throw the same exception.
Definition: ncbiexpt.hpp:749
@ eUnknown
Definition: app_popup.hpp:72
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
const TPrim & Get(void) const
Definition: serialbase.hpp:347
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
void FlipStrand(void)
Flip the strand (e.g. plus to minus)
Definition: Seq_loc.cpp:3969
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
Definition: Seq_loc.cpp:337
TRange GetTotalRange(void) const
Definition: Seq_loc.hpp:913
void SetPnt(TPnt &v)
Definition: Seq_loc.hpp:985
void SetEmpty(TEmpty &v)
Definition: Seq_loc.hpp:981
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
Definition: iterator.hpp:1004
TSeqPos GetStop(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the stop of the location.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
TSeqPos GetStart(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the start of the location.
string GetAccessionForId(const objects::CSeq_id &id, CScope &scope, EAccessionVersion use_version=eWithAccessionVersion, EGetIdType flags=0)
Retrieve the accession string for a Seq-id.
Definition: sequence.cpp:708
@ eWithAccessionVersion
accession.version (when possible)
Definition: sequence.hpp:91
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
const TDescr & GetDescr(void) const
const CSeqFeatData & GetData(void) const
TInst_Mol GetInst_Mol(void) const
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
SAnnotSelector & IncludeFeatSubtype(TFeatSubtype subtype)
Include feature subtype in the search.
const CSeq_loc & GetLocation(void) const
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
void Reset(void)
Definition: mapped_feat.cpp:77
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
const_iterator begin(void) const
Definition: seq_vector.hpp:298
const_iterator end(void) const
Definition: seq_vector.hpp:305
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
position_type GetLength(void) const
Definition: range.hpp:158
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
CTempString literal(const char(&str)[Size])
Templatized initialization from a string literal.
Definition: tempstr.hpp:441
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
Definition: ncbistr.hpp:5430
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Definition: ncbistr.cpp:642
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3405
static string & ToUpper(string &str)
Convert string to upper case – string& version.
Definition: ncbistr.cpp:424
@ fWithSign
Prefix the output value with a sign ('+'/'-')
Definition: ncbistr.hpp:253
TGenome GetGenome(void) const
Get the Genome member data.
Definition: BioSource_.hpp:422
bool IsSetGenome(void) const
Check if a value has been assigned to Genome data member.
Definition: BioSource_.hpp:397
void SetMin(TMin value)
Assign a value to Min data member.
Definition: Int_fuzz_.hpp:528
bool IsLim(void) const
Check if variant Lim is selected.
Definition: Int_fuzz_.hpp:636
TRange & SetRange(void)
Select the variant.
Definition: Int_fuzz_.cpp:165
TLim GetLim(void) const
Get the variant data.
Definition: Int_fuzz_.hpp:642
TMin GetMin(void) const
Get the Min member data.
Definition: Int_fuzz_.hpp:519
void SetMax(TMax value)
Assign a value to Max data member.
Definition: Int_fuzz_.hpp:481
TLim & SetLim(void)
Select the variant.
Definition: Int_fuzz_.hpp:649
void SetType(TType &value)
Assign a value to Type data member.
bool IsRange(void) const
Check if variant Range is selected.
Definition: Int_fuzz_.hpp:603
void SetData(TData &value)
Assign a value to Data data member.
TMax GetMax(void) const
Get the Max member data.
Definition: Int_fuzz_.hpp:472
const TRange & GetRange(void) const
Get the variant data.
Definition: Int_fuzz_.cpp:159
@ eLim_gt
greater than
Definition: Int_fuzz_.hpp:211
@ eLim_unk
unknown
Definition: Int_fuzz_.hpp:210
@ eLim_lt
less than
Definition: Int_fuzz_.hpp:212
@ eLim_tl
space to left of position
Definition: Int_fuzz_.hpp:214
@ eLim_tr
space to right of position
Definition: Int_fuzz_.hpp:213
void SetLocation(TLocation &value)
Assign a value to Location data member.
Definition: Seq_feat_.cpp:131
bool IsCdregion(void) const
Check if variant Cdregion is selected.
const TLocation & GetLocation(void) const
Get the Location member data.
Definition: Seq_feat_.hpp:1117
const TData & GetData(void) const
Get the Data member data.
Definition: Seq_feat_.hpp:925
void SetData(TData &value)
Assign a value to Data data member.
Definition: Seq_feat_.cpp:94
const TVariation & GetVariation(void) const
Get the variant data.
void SetPoint(TPoint value)
Assign a value to Point data member.
Definition: Seq_point_.hpp:312
void SetId(TId &value)
Assign a value to Id data member.
Definition: Seq_point_.cpp:61
const TPnt & GetPnt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:238
TPoint GetPoint(void) const
Get the Point member data.
Definition: Seq_point_.hpp:303
void SetStrand(TStrand value)
Assign a value to Strand data member.
Definition: Seq_point_.hpp:359
void SetFuzz(TFuzz &value)
Assign a value to Fuzz data member.
Definition: Seq_point_.cpp:71
bool IsSetFuzz(void) const
Check if a value has been assigned to Fuzz data member.
Definition: Seq_point_.hpp:408
const TFuzz & GetFuzz(void) const
Get the Fuzz member data.
Definition: Seq_point_.hpp:420
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_point_.hpp:390
TStrand GetStrand(void) const
Get the Strand member data.
Definition: Seq_point_.hpp:350
bool IsPnt(void) const
Check if variant Pnt is selected.
Definition: Seq_loc_.hpp:540
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ e_Int
from to
Definition: Seq_loc_.hpp:101
const TIupacaa & GetIupacaa(void) const
Get the variant data.
Definition: Seq_data_.hpp:530
void SetLength(TLength value)
Assign a value to Length data member.
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
bool IsNcbipna(void) const
Check if variant Ncbipna is selected.
Definition: Seq_data_.hpp:604
bool IsNcbieaa(void) const
Check if variant Ncbieaa is selected.
Definition: Seq_data_.hpp:644
bool IsNcbi8aa(void) const
Check if variant Ncbi8aa is selected.
Definition: Seq_data_.hpp:624
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
bool IsIupacaa(void) const
Check if variant Iupacaa is selected.
Definition: Seq_data_.hpp:524
const TIupacna & GetIupacna(void) const
Get the variant data.
Definition: Seq_data_.hpp:510
bool IsNcbistdaa(void) const
Check if variant Ncbistdaa is selected.
Definition: Seq_data_.hpp:684
const TSource & GetSource(void) const
Get the variant data.
Definition: Seqdesc_.cpp:566
bool IsSource(void) const
Check if variant Source is selected.
Definition: Seqdesc_.hpp:1190
bool IsNcbi4na(void) const
Check if variant Ncbi4na is selected.
Definition: Seq_data_.hpp:564
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
bool IsNcbi8na(void) const
Check if variant Ncbi8na is selected.
Definition: Seq_data_.hpp:584
TLength GetLength(void) const
Get the Length member data.
const TNcbieaa & GetNcbieaa(void) const
Get the variant data.
Definition: Seq_data_.hpp:650
bool IsNcbipaa(void) const
Check if variant Ncbipaa is selected.
Definition: Seq_data_.hpp:664
bool IsNcbi2na(void) const
Check if variant Ncbi2na is selected.
Definition: Seq_data_.hpp:544
bool IsIupacna(void) const
Check if variant Iupacna is selected.
Definition: Seq_data_.hpp:504
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
@ e_Iupacna
IUPAC 1 letter nuc acid code.
Definition: Seq_data_.hpp:104
@ e_Iupacaa
IUPAC 1 letter amino acid code.
Definition: Seq_data_.hpp:105
TType GetType(void) const
Get the Type member data.
list< CRef< CVariation_ref > > TVariations
const TSet & GetSet(void) const
Get the variant data.
TType GetType(void) const
Get the Type member data.
void SetType(TType value)
Assign a value to Type data member.
const TLoc & GetLoc(void) const
Get the variant data.
const TDelta & GetDelta(void) const
Get the Delta member data.
void SetData(TData &value)
Assign a value to Data data member.
const TData & GetData(void) const
Get the Data member data.
const TSeq & GetSeq(void) const
Get the Seq member data.
const TLiteral & GetLiteral(void) const
Get the variant data.
bool IsSet(void) const
Check if variant Set is selected.
void SetType(TType value)
Assign a value to Type data member.
TMultiplier GetMultiplier(void) const
Get the Multiplier member data.
list< CRef< CDelta_item > > TDelta
TVariations & SetVariations(void)
Assign a value to Variations data member.
bool IsLiteral(void) const
Check if variant Literal is selected.
bool IsSetMultiplier(void) const
Multiplier allows representing a tandem, e.g.
TDelta & SetDelta(void)
Assign a value to Delta data member.
bool IsThis(void) const
Check if variant This is selected.
bool IsLoc(void) const
Check if variant Loc is selected.
const TVariations & GetVariations(void) const
Get the Variations member data.
@ eType_snv
delta=[morph of length 1] NOTE: this is snV not snP; the latter requires frequency-based validation t...
@ eType_inv
delta=[del, ins.seq= RevComp(variation-location)]
@ eType_mnp
delta=[morph of length >1]
@ eType_microsatellite
delta=[del, ins.seq= repeat-unit with fuzzy multiplier] variation-location is the microsat expansion ...
@ eType_delins
delta=[del, ins]
@ eType_prot_nonsense
delta=[del]; variation-location is the tail of the protein being truncated
@ eType_transposon
delta=[del, ins.seq= known donor or 'this'] variation-location is equiv of transposon locs.
@ eType_prot_silent
delta=[morph of length 1, same AA as at variation-location]
@ eType_prot_missense
delta=[morph of length 1]
@ eType_translocation
delta=like delins
@ eType_prot_neutral
delta=[morph of length 1]
@ eAction_offset
go downstream by distance specified by multiplier (upstream if < 0), in genomic context.
@ eAction_del_at
excise sequence at location if multiplier is specified, delete len(location)*multiplier positions dow...
@ eAction_ins_before
insert seq before the location.start
@ eData_set_type_haplotype
changes on the same allele, e.g r.[13g>a;15u>c]
@ eData_set_type_compound
complex change at the same location on the same molecule
@ eData_set_type_genotype
changes on different alleles in the same genotype, e.g. g.[476C>T]+[476C>T]
@ eData_set_type_package
set represents a package of observations at a given location, generally containing asserted + referen...
@ eData_set_type_individual
same organism; allele relationship unknown, e.g. g.[476C>T(+)183G>C]
@ eData_set_type_mosaic
different genotypes in the same individual
@ eData_set_type_alleles
set represents a set of observed alleles
@ eData_set_type_products
different products arising from the same variation in a precursor, e.g. r.[13g>a, 13_88del]
@ eObservation_asserted
inst represents the asserted base at a position
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
int i
int len
static MDB_envinfo info
Definition: mdb_load.c:37
#define HGVS_ASSERT_RULE(i, rule_id)
Definition: hgvs_parser.cpp:79
#define HGVS_THROW(err_code, message)
Definition: hgvs_parser.cpp:77
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
void AttachAssertedSequence(CVariation_ref &vr, const CSeq_literal &literal)
Definition: hgvs_parser.cpp:94
void RepackageAssertedSequence(CVariation_ref &vr)
#define abs(a)
Definition: ncbi_heapmgr.c:130
T max(T x_, T y_)
T min(T x_, T y_)
Int4 delta(size_t dimension_, const Int4 *score_)
static const char delimiter[]
SAnnotSelector –.
static const string & s_GetRuleName(parser_id id)
std::map< parser_id, std::string > TRuleNames
static TRuleNames & s_GetRuleNames()
static bool s_is_list(parser_id id)
void Assign(const SOffsetPoint &other)
static CS_CONTEXT * context
Definition: will_convert.c:21
Modified on Sun Jun 23 05:21:18 2024 by modify_doxy.py rev. 669887