NCBI C++ ToolKit
hgvs_parser2.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: hgvs_parser2.cpp 90719 2020-07-14 17:21:57Z villamar $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * File Description:
27  * Sample library
28  *
29  */
30 
31 #include <ncbi_pch.hpp>
32 
33 
34 #include <serial/iterator.hpp>
35 
41 
46 
53 
56 #include <objects/seq/Seq_data.hpp>
58 #include <objects/seq/Num_ref.hpp>
62 
66 
69 
70 #include <objmgr/util/sequence.hpp>
71 #include <objmgr/seq_vector.hpp>
72 #include <objmgr/align_ci.hpp>
73 #include <objmgr/feat_ci.hpp>
75 
78 
79 
81 
82 namespace variation {
83 
84 #define HGVS_THROW(err_code, message) NCBI_THROW(CHgvsParser::CHgvsParserException, err_code, message)
85 
86 #define HGVS_ASSERT_RULE(i, rule_id) \
87  if((i->value.id()) != (SGrammar::rule_id)) \
88  {HGVS_THROW(eGrammatic, "Unexpected rule " + CHgvsParser::SGrammar::s_GetRuleName(i->value.id()) ); }
89 
90 
92 
94 {
95  "NONE",
96  "root",
97  "list_delimiter",
98  "list1a",
99  "list2a",
100  "list3a",
101  "list1b",
102  "list2b",
103  "list3b",
104  "expr1",
105  "expr2",
106  "expr3",
107  "translocation",
108  "header",
109  "seq_id",
110  "mut_list",
111  "mut_ref",
112  "mol",
113  "int_fuzz",
114  "abs_pos",
115  "general_pos",
116  "fuzzy_pos",
117  "pos_spec",
118  "location",
119  "nuc_range",
120  "prot_range",
121  "mut_inst",
122  "raw_seq",
123  "raw_seq_or_len",
124  "aminoacid1",
125  "aminoacid2",
126  "aminoacid3",
127  "nuc_subst",
128  "deletion",
129  "insertion",
130  "delins",
131  "duplication",
132  "nuc_inv",
133  "ssr",
134  "conversion",
135  "seq_loc",
136  "seq_ref",
137  "prot_pos",
138  "prot_fs",
139  "prot_missense",
140  "prot_ext",
141  "no_change"
142 };
143 
144 
146 {
147  if(v.SetPlacements().size() == 0) {
149  v.SetPlacements().push_back(p);
150  }
151  return *v.SetPlacements().front();
152 }
153 
155 {
156  CVariationMethod& m = variation.SetMethod();
157  m.SetMethod();
158 
159  if(find(m.GetMethod().begin(),
160  m.GetMethod().end(),
162  {
164  }
165 }
166 
167 
168 bool SeqsMatch(const string& query, const char* text)
169 {
170  static const char* iupac_bases = ".TGKCYSBAWRDMHVN"; //position of the iupac literal = 4-bit mask for A|C|G|T
171  for(size_t i = 0; i < query.size(); i++) {
172  size_t a = CTempString(iupac_bases).find(query[i]);
173  size_t b = CTempString(iupac_bases).find(text[i]);
174  if(!(a & b)) {
175  return false;
176  }
177  }
178  return true;
179 }
180 
181 
182 CRef<CSeq_loc> FindSSRLoc(const CSeq_loc& loc, const string& seq, CScope& scope)
183 {
184  //Extend the loc 10kb up and down; Find all occurences of seq in the resulting
185  //interval, create locs for individual repeat units; then merge them, and keep the interval that
186  //overlaps the original.
187 
188  if (loc.GetStart(eExtreme_Positional) > loc.GetStop(eExtreme_Positional)) {
189  HGVS_THROW(eSemantic, "Invalid range: start position is bigger than stop position");
190  }
191  const TSeqPos ext_interval = 10000;
192 
195  TSeqPos seq_len = bsh.GetInst_Length();
196  loc1->SetInt().SetFrom() -= min(ext_interval, loc1->GetInt().GetFrom());
197  loc1->SetInt().SetTo() += min(ext_interval, seq_len - 1 - loc1->GetInt().GetTo());
198 
199  CSeqVector v(*loc1, scope, CBioseq_Handle::eCoding_Iupac);
200  string str1;
201  v.GetSeqData(v.begin(), v.end(), str1);
202 
203  CRef<CSeq_loc> container(new CSeq_loc(CSeq_loc::e_Mix));
204 
205  if (str1.size() >= seq.size()) {
206  for (size_t i = 0u; i < str1.size() - seq.size(); ++i) {
207  if (SeqsMatch(seq, &str1[i])) {
208  CRef<CSeq_loc> repeat_unit_loc(new CSeq_loc);
209  repeat_unit_loc->Assign(*loc1);
210 
212  repeat_unit_loc->SetInt().SetTo() -= i;
213  repeat_unit_loc->SetInt().SetFrom(repeat_unit_loc->GetInt().GetTo() - (seq.size() - 1));
214  } else {
215  repeat_unit_loc->SetInt().SetFrom() += i;
216  repeat_unit_loc->SetInt().SetTo(repeat_unit_loc->GetInt().GetFrom() + (seq.size() - 1));
217  }
218  container->SetMix().Set().push_back(repeat_unit_loc);
219  }
220  }
221  }
222 
224  merged_repeats->ChangeToMix();
225  CRef<CSeq_loc> result(new CSeq_loc(CSeq_loc::e_Null));
226  result->Assign(loc);
227 
228  for(CSeq_loc_CI ci(*merged_repeats); ci; ++ci) {
229  const CSeq_loc& loc2 = ci.GetEmbeddingSeq_loc();
231  result->Add(loc2);
232  }
233  }
234 
236 }
237 
238 
239 
240 
242 {
243  p.ResetStart_offset();
245  if(fint.value || fint.fuzz) {
246  p.SetStart_offset(fint.value);
247  }
248 
249  if(fint.fuzz) {
250  p.SetStart_offset_fuzz().Assign(*fint.fuzz);
251  }
252 
253 #if 0
254  if(!fint.value
255  && fint.fuzz
256  && fint.fuzz->IsLim()
257  && ( fint.fuzz->GetLim() == CInt_fuzz::eLim_lt
258  || fint.fuzz->GetLim() == CInt_fuzz::eLim_tl))
259  {
260  // VAR-832
261  // interpret c.x-? as c.x-(?_1), not as c.x+(?_0)
262  p.SetStart_offset(-1);
263  }
264 #endif
265 
266 }
267 
269 {
270  p.ResetStop_offset();
272  if(fint.value || fint.fuzz) {
273  p.SetStop_offset(fint.value);
274  }
275 
276  if(fint.fuzz) {
277  p.SetStop_offset_fuzz().Assign(*fint.fuzz);
278  }
279 
280 #if 0
281  if(!fint.value
282  && fint.fuzz
283  && fint.fuzz->IsLim()
284  && ( fint.fuzz->GetLim() == CInt_fuzz::eLim_gt
285  || fint.fuzz->GetLim() == CInt_fuzz::eLim_tr))
286  {
287  // VAR-832
288  // interpret c.x+? as c.x+(1_?), not as c.x+(0_?)
289  p.SetStop_offset(1);
290  }
291 #endif
292 }
293 
294 
295 
296 //if a variation has an asserted sequence, stored in placement.seq, repackage it as a set having
297 //the original variation and a synthetic one representing the asserted sequence. The placement.seq
298 //is cleared, as it is a placeholder for the actual reference sequence.
300 {
301  if(vr.IsSetPlacements() && SetFirstPlacement(vr).IsSetSeq()) {
302  CRef<CVariation> container(new CVariation);
303  container->SetPlacements() = vr.SetPlacements();
304 
306  orig->Assign(vr);
307  orig->ResetPlacements(); //location will be set on the package, as it is the same for both members
308 
309  container->SetData().SetSet().SetType(CVariation::TData::TSet::eData_set_type_package);
310  container->SetData().SetSet().SetVariations().push_back(orig);
311 
312  CRef<CVariation> asserted_vr(new CVariation);
313  asserted_vr->SetData().SetInstance().SetObservation(CVariation_inst::eObservation_asserted);
314  asserted_vr->SetData().SetInstance().SetType(CVariation_inst::eType_identity);
315 
317  delta->SetSeq().SetLiteral().Assign(SetFirstPlacement(vr).GetSeq());
318  asserted_vr->SetData().SetInstance().SetDelta().push_back(delta);
319 
320  SetFirstPlacement(*container).ResetSeq();
321  container->SetData().SetSet().SetVariations().push_back(asserted_vr);
322 
323  vr.Assign(*container);
324 
325  } else if(vr.GetData().IsSet()) {
326  NON_CONST_ITERATE(CVariation::TData::TSet::TVariations, it, vr.SetData().SetSet().SetVariations()) {
328  }
329  }
330 }
331 
332 
333 //HGVS distinguished between c. n. and r. molecules, while in
334 //VariantPlacement we have moltype cdna (which maps onto c.) and rna (which maps onto n.)
335 //
336 //If we have an HGVS expression like NM_123456.7:r. the VariantPlacement will have moltype rna
337 //that would map-back onto NM_123456.7:n., which is what we don't want, as "n.' reserved
338 //for non-coding RNAs, so we'll convert rna moltype to cdna based on accession here.
339 //Note that this is a post-processing step, as in the midst of parsing we want to treat
340 //NM_123456.7:r. as non-coding rna, since these have absolute coordinates rather than cds-relative.
341 void AdjustMoltype(CVariation& vr, CScope& scope)
342 {
343  CVariationUtil util(scope);
344 
345  for(CTypeIterator<CVariantPlacement> it(Begin(vr)); it; ++it) {
346  CVariantPlacement& p = *it;
347 
348  if(p.IsSetMol()
350  && p.GetLoc().GetId())
351  {
352  p.SetMol(util.GetMolType(*p.GetLoc().GetId()));
353  }
354  }
355 }
356 
357 
359  : m_hgvs(other.m_hgvs)
360 {
361  this->m_bsh = other.m_bsh;
362  this->m_cds = other.m_cds;
363  this->m_scope = other.m_scope;
365  this->m_placement.Reset();
366  if(other.m_placement) {
367  /*
368  * Note: need to make a copy of the placement, such that if preceding subvariation
369  * is location-specific and the other one isn't, first's placement is not
370  * given to the other one, e.g. "NM_004004.2:c.[35_36dup;40del]+[=]" - if we
371  * made a shallow copy, then the location context for "[=]" would be erroneously
372  * inherited from the last sibling: "NM_004004.2:c.40=" instead of whole "NM_004004.2:c.="
373  */
375  this->m_placement->Assign(*other.m_placement);
376  }
377 }
378 
380 {
381  if(m_cds.IsNull()) {
382  HGVS_THROW(eContext, "No CDS feature in context");
383  }
384  return *m_cds;
385 }
386 
388 {
389  return sequence::GetId(GetPlacement().GetLoc(), NULL);
390 }
391 
392 
394 {
395  Clear();
396 
397  SetPlacement().SetMol(mol);
398  SetPlacement().SetLoc().SetWhole().Assign(id);
399 
400  m_bsh = m_scope->GetBioseqHandle(id);
401 
402  if(!m_bsh) {
403  HGVS_THROW(eContext, "Cannnot get bioseq for seq-id " + id.AsFastaString());
404  }
405 
406  if(mol == CVariantPlacement::eMol_cdna) {
407  SAnnotSelector sel;
408  sel.SetResolveTSE();
410  for(CFeat_CI ci(m_bsh, sel); ci; ++ci) {
411  const CMappedFeat& mf = *ci;
412  if(mf.GetData().IsCdregion()) {
413  if(m_cds.IsNull()) {
414  m_cds.Reset(new CSeq_feat());
415  m_cds->Assign(mf.GetMappedFeature());
416  } else {
417  HGVS_THROW(eContext, "Multiple CDS features on the sequence");
418  }
419  }
420  }
421  if(m_cds.IsNull()) {
422  HGVS_THROW(eContext, "Could not find CDS feat");
423  }
424  }
425 }
426 
427 
428 const string CHgvsParser::SGrammar::s_GetRuleName(parser_id id)
429 {
430  if(id.to_long() >= CHgvsParser::SGrammar::eNodeIds_SIZE) {
431  HGVS_THROW(eLogic, "Rule name not hardcoded");
432  } else {
433  return s_rule_names[id.to_long()];
434  }
435 }
436 
437 
439 {
440  HGVS_ASSERT_RULE(i, eID_int_fuzz);
441  TIterator it = i->children.begin();
442 
444  fint.fuzz.Reset(new CInt_fuzz);
445 
446  if(i->children.size() == 1) { //e.g. '5' or '?'
447  string s(it->value.begin(), it->value.end());
448  if(s == "?") {
449  fint.SetPureFuzz();
450  } else {
451  fint.value = NStr::StringToInt(s);
452  fint.fuzz.Reset();
453  }
454  } else if(i->children.size() == 3) { //e.g. '(5)' or '(?)'
455  ++it;
456  string s(it->value.begin(), it->value.end());
457  if(s == "?") {
458  fint.SetPureFuzz();
459  } else {
460  fint.value = NStr::StringToInt(s);
462  }
463  } else if(i->children.size() == 5) { //e.g. '(5_7)' or '(?_10)'
464  ++it;
465  string s1(it->value.begin(), it->value.end());
466  ++it;
467  ++it;
468  string s2(it->value.begin(), it->value.end());
469 
470  if(s1 == "?" && s2 == "?") {
471  fint.SetPureFuzz();
472  } else if(s1 != "?" && s2 != "?") {
473  fint.value = NStr::StringToInt(s1);
474  fint.fuzz->SetRange().SetMin(NStr::StringToInt(s1));
475  fint.fuzz->SetRange().SetMax(NStr::StringToInt(s2));
476  } else if(s2 == "?") {
477  fint.value = NStr::StringToInt(s1);
479  } else if(s1 == "?") {
480  fint.value = NStr::StringToInt(s2);
482  } else {
483  HGVS_THROW(eLogic, "Unreachable code");
484  }
485  }
486 
487  return fint;
488 }
489 
490 
491 
492 /* In HGVS:
493  * the nucleotide 3' of the translation stop codon is *1, the next *2, etc.
494  * # there is no nucleotide 0
495  * # nucleotide 1 is the A of the ATG-translation initiation codon
496  * # the nucleotide 5' of the ATG-translation initiation codon is -1, the previous -2, etc.
497  *
498  * I.e. need to adjust if dealing with positive coordinates, except for *-relative ones.
499  */
500 template<typename T>
501 T AdjustHgvsCoord(T val, TSeqPos offset, bool adjust)
502 {
503  val += offset;
504  if(adjust && val > (T)offset) {
505  // note: val may be unsigned, so check for (val+offset > offset)
506  // instead of (val > 0)
507  val -= 1;
508  }
509  return val;
510 }
511 
513 {
514  HGVS_ASSERT_RULE(i, eID_abs_pos);
515  TIterator it = i->children.begin();
516 
517  TSeqPos offset(0);
518  bool adjust = true; //see AdjustHgvsCoord comments above
519  if(i->children.size() == 2) {
520  adjust = false;
521  string s(it->value.begin(), it->value.end());
522  if(s != "*") {
523  HGVS_THROW(eGrammatic, "Expected literal '*'");
524  }
525  if(context.GetPlacement().GetMol() != CVariantPlacement::eMol_cdna) {
526  HGVS_THROW(eContext, "Expected 'c.' context for stop-codon-relative coordinate");
527  }
528 
529  offset = context.GetCDS().GetLocation().GetStop(eExtreme_Biological);
530  ++it;
531  } else {
532  if (context.GetPlacement().GetMol() == CVariantPlacement::eMol_cdna) {
533  //Note: in RNA coordinates (r.) the coordinates are absolute, like in genomic sequences,
534  // "The RNA sequence type uses only GenBank mRNA records. The value 1 is assigned to the first
535  // base in the record and from there all bases are counted normally."
536  //so the cds-start offset applies only to "c." coordinates
537  offset = context.GetCDS().GetLocation().GetStart(eExtreme_Biological);
538  }
539  }
540 
541  CRef<CSeq_point> pnt(new CSeq_point);
542  {{
543  SFuzzyInt int_fuzz = x_int_fuzz(it, context);
544 
545  pnt->SetId().Assign(context.GetId());
547  if(int_fuzz.IsPureFuzz()) {
548  pnt->SetPoint(kInvalidSeqPos);
549  } else {
550  pnt->SetPoint(AdjustHgvsCoord(int_fuzz.value, offset, adjust));
551  }
552 
553  if(!int_fuzz.fuzz.IsNull()) {
554  pnt->SetFuzz(*int_fuzz.fuzz);
555  if(pnt->GetFuzz().IsRange()) {
556  CInt_fuzz_Base::TRange& r = pnt->SetFuzz().SetRange();
557  r.SetMin(AdjustHgvsCoord(r.GetMin(), offset, adjust));
558  r.SetMax(AdjustHgvsCoord(r.GetMax(), offset, adjust));
559  }
560  }
561  }}
562 
563  return pnt;
564 }
565 
567 {
568  return p.GetPoint() == kInvalidSeqPos
569  && p.IsSetFuzz()
570  && p.GetFuzz().IsLim()
572 }
573 
574 
575 /*
576  * general_pos is either simple abs-pos that is passed down to x_abs_pos,
577  * or an intronic location that is specified by a mapping point in the
578  * local coordinates and the -upstream / +downstream offset after remapping.
579  *
580  * The mapping point can either be an abs-pos in local coordinates, or
581  * specified as offset in intron-specific coordinate system where IVS# specifies
582  * the intron number
583  */
585 {
586  HGVS_ASSERT_RULE(i, eID_general_pos);
587 
588  SOffsetPoint ofpnt;
589 
590  if(i->children.size() == 1) {
591  //local coordinates
592  ofpnt.pnt = x_abs_pos(i->children.begin(), context);
593  } else {
594  //(str_p("IVS") >> int_p | abs_pos) >> sign_p >> int_fuzz
595 
596  TIterator it = i->children.end() - 1;
597  ofpnt.offset = x_int_fuzz(it, context);
598  --it;
599 
600  //adjust for sign; convert +? or -? offsets to
601  //0> or 0<
602  string s_sign(it->value.begin(), it->value.end());
603  int sign1 = s_sign == "-" ? -1 : 1;
604  ofpnt.offset.value *= sign1;
605  if(ofpnt.offset.fuzz && ofpnt.offset.fuzz->IsRange()) {
606  ofpnt.offset.fuzz->SetRange().SetMin() *= sign1;
607  ofpnt.offset.fuzz->SetRange().SetMax() *= sign1;
608  } else if(ofpnt.offset.IsPureFuzz()) {
610  }
611 
612  --it;
613  if(it->value.id() == SGrammar::eID_abs_pos) {
614  //base-loc is an abs-pos
615  ofpnt.pnt = x_abs_pos(i->children.begin(), context);
616  } else {
617  //base-loc is IVS-relative.
618  ofpnt.pnt.Reset(new CSeq_point);
619  ofpnt.pnt->SetId().Assign(context.GetId());
620  ofpnt.pnt->SetStrand(eNa_strand_plus);
621 
622  TIterator it = i->children.begin();
623  string s_ivs(it->value.begin(), it->value.end());
624  ++it;
625  string s_ivs_num(it->value.begin(), it->value.end());
626  int ivs_num = NStr::StringToInt(s_ivs_num);
627 
628  //If IVS3+50, the mapping point is the last base of third exon
629  //if IVS3-50, the mapping point is the first base of the fourth exon
630  size_t target_exon_num = sign1 < 0 ? ivs_num + 1 : ivs_num;
631 
632  SAnnotSelector sel;
634  CBioseq_Handle bsh = context.GetScope().GetBioseqHandle(context.GetId());
635  size_t exon_num = 1;
636  //Note: IVS is cDNA-centric, so we'll have to use ordinals of the exons instead of /number qual
637  for(CFeat_CI ci(bsh, sel); ci; ++ci) {
638  const CMappedFeat& mf = *ci;
639  if(exon_num == target_exon_num) {
640  ofpnt.pnt->SetPoint(sign1 > 0 ? mf.GetLocation().GetStop(eExtreme_Biological)
641  : mf.GetLocation().GetStart(eExtreme_Biological));
642  break;
643  }
644  exon_num++;
645  }
646  }
647  }
648 
649  //We could be dealing with improper HGVS expression (that we need to support anyway)
650  //where the coordinate extends beyond the range of sequence
651  //e.g. NM_000518:c.-78A>G, where codon-start is at 51. In this case the resulting
652  //coordinate will be negative; we'll convert it to offset-format (27 bases upstream of pos 0)
653 
654  if(!IsPureFuzzPoint(*ofpnt.pnt)
655  && ( context.GetPlacement().GetMol() == CVariantPlacement::eMol_cdna
656  || context.GetPlacement().GetMol() == CVariantPlacement::eMol_rna))
657  {
658  if(static_cast<TSignedSeqPos>(ofpnt.pnt->GetPoint()) < 0) {
659  ofpnt.offset.value += static_cast<TSignedSeqPos>(ofpnt.pnt->GetPoint());
660  ofpnt.pnt->SetPoint(0);
661  } else {
662  CVariationUtil vu(context.GetScope());
663  TSeqPos transcribed_len = vu.GetEffectiveTranscriptLength(context.GetBioseqHandle());
664  // Note: positions past the last exon are interpreted as specifying near-gene/intronic
665  // target rather than polyA. JIRA:SNP-7341
666 
667  if(ofpnt.pnt->GetPoint() >= transcribed_len) {
668  TSeqPos anchor_pos = transcribed_len - 1;
669  TSeqPos overrun = ofpnt.pnt->GetPoint() - anchor_pos;
670  ofpnt.offset.value += overrun;
671  ofpnt.pnt->SetPoint(anchor_pos);
672  }
673  }
674  }
675 
676  return ofpnt;
677 }
678 
679 
681 {
682  HGVS_ASSERT_RULE(i, eID_fuzzy_pos);
683 
684  SOffsetPoint pnt1 = x_general_pos(i->children.begin(), context);
685  SOffsetPoint pnt2 = x_general_pos(i->children.begin() + 1, context);
686 
687  //Verify that on the same seq-id.
688  if(!pnt1.pnt->GetId().Equals(pnt2.pnt->GetId())) {
689  HGVS_THROW(eSemantic, "Points in a fuzzy pos are on different sequences");
690  }
691  if(pnt1.pnt->GetStrand() != pnt2.pnt->GetStrand()) {
692  HGVS_THROW(eSemantic, "Range-loc start/stop are on different strands.");
693  }
694 
695  if(IsPureFuzzPoint(*pnt1.pnt) || IsPureFuzzPoint(*pnt2.pnt)) {
696  if(IsPureFuzzPoint(*pnt2.pnt)) {
697  pnt1.pnt->SetFuzz().SetLim(CInt_fuzz::eLim_tr);
698  return pnt1;
699  } else {
700  pnt2.pnt->SetFuzz().SetLim(CInt_fuzz::eLim_tl);
701  return pnt2;
702  }
703  }
704 
705  if((pnt1.offset.value != 0 || pnt2.offset.value != 0) && !pnt1.pnt->Equals(*pnt2.pnt)) {
706  HGVS_THROW(eSemantic, "Base-points in an intronic fuzzy position must be equal");
707  }
708 
709  SOffsetPoint pnt = pnt1;
710  if(pnt1.offset.value != pnt2.offset.value) {
711  pnt.offset.fuzz.Reset(new CInt_fuzz);
712  pnt.offset.fuzz->SetRange().SetMin(pnt1.offset.value);
713  pnt.offset.fuzz->SetRange().SetMax(pnt2.offset.value);
714  }
715 
716  return pnt;
717 
718 #if 0
719  todo: reconcile
720  //If Both are Empty - the result is empty, otherwise reconciliate
721  if(pnt1.pnt->GetPoint() == kInvalidSeqPos && pnt2.pnt->GetPoint() == kInvalidSeqPos) {
722  pnt.pnt = pnt1.pnt;
723  pnt.offset = pnt1.offset;
724  } else {
725  pnt.pnt.Reset(new CSeq_point);
726  pnt.pnt.Assign(*pnt1.pnt);
727 
728  TSeqPos min_pos = min(pnt1.pnt->GetPoint(), pnt2.pnt->GetPoint());
729  TSeqPos max_pos = max(pnt1.pnt->GetPoint(), pnt2.pnt->GetPoint());
730 
731  if(!pnt1->IsSetFuzz() && !pnt2->IsSetFuzz()) {
732  //Both are non-fuzzy - create the min-max fuzz.
733  //(10+50_10+60)
734  pnt->SetFuzz().SetRange().SetMin(min_pos);
735  pnt->SetFuzz().SetRange().SetMax(max_pos);
736 
737  } else if(pnt1->IsSetFuzz() && pnt2->IsSetFuzz()) {
738  //Both are fuzzy - reconcile the fuzz.
739 
740  if(pnt1->GetFuzz().GetLim() == CInt_fuzz::eLim_tr
741  && pnt2->GetFuzz().GetLim() == CInt_fuzz::eLim_tl)
742  {
743  //fuzz points inwards - create min-max fuzz
744  //(10+?_11-?)
745  pnt->SetFuzz().SetRange().SetMin(min_pos);
746  pnt->SetFuzz().SetRange().SetMax(max_pos);
747 
748  } else if (pnt1->GetFuzz().GetLim() == CInt_fuzz::eLim_tl
749  && pnt2->GetFuzz().GetLim() == CInt_fuzz::eLim_tr)
750  {
751  //fuzz points outwards - set fuzz to unk
752  //(10-?_10+?)
753  //(?_10+?)
754  //(10-?_?)
755  pnt->SetFuzz().SetLim(CInt_fuzz::eLim_unk);
756 
757  } else if (pnt1->GetFuzz().GetLim() == CInt_fuzz::eLim_tl
758  && pnt2->GetFuzz().GetLim() == CInt_fuzz::eLim_tl)
759  {
760  //fuzz is to the left - use 5'-most
761  //(?_10-?)
762  //(10-?_11-?)
763  pnt->SetPoint(pnt->GetStrand() == eNa_strand_minus ? max_pos : min_pos);
764 
765  } else if (pnt1->GetFuzz().GetLim() == CInt_fuzz::eLim_tr
766  && pnt2->GetFuzz().GetLim() == CInt_fuzz::eLim_tr)
767  {
768  //fuzz is to the right - use 3'-most
769  //(10+?_?)
770  //(10+?_11+?)
771  pnt->SetPoint(pnt->GetStrand() == eNa_strand_minus ? min_pos : max_pos);
772 
773  } else {
774  pnt->SetFuzz().SetLim(CInt_fuzz::eLim_unk);
775  }
776  } else {
777  // One of the two is non-fuzzy:
778  // use it to specify position, and the fuzz of the other to specify the fuzz
779  // e.g. (10+5_10+?) -> loc1=100005; loc2=100000tr -> 100005tr
780 
781  pnt->Assign(pnt1->IsSetFuzz() ? *pnt2 : *pnt1);
782  pnt->SetFuzz().Assign(pnt1->IsSetFuzz() ? pnt1->GetFuzz()
783  : pnt2->GetFuzz());
784 
785  }
786  }
787 #endif
788 
789 
790 
791 }
792 
794 {
795  set<CSeq_id_Handle> annotated_transcript_feats;
796  set<CSeq_id_Handle> annotated_transcript_aligns;
797 
798  SAnnotSelector sel;
799  sel.SetResolveTSE();
801  for(CFeat_CI ci(bsh, sel); ci; ++ci) {
802  const CMappedFeat& mf = *ci;
803  if(mf.IsSetProduct() && mf.GetProduct().GetId()) {
804  annotated_transcript_feats.insert(
805  CSeq_id_Handle::GetHandle(*mf.GetProduct().GetId()));
806  }
807  }
808 
809  for(CAlign_CI ci(bsh, sel); ci; ++ci) {
810  const CSeq_align& aln = *ci;
811  if(aln.GetSegs().IsSpliced()) {
812  annotated_transcript_aligns.insert(
814  }
815  }
816 
817  vector<CSeq_id_Handle> v;
818  set_intersection(annotated_transcript_feats.begin(),
819  annotated_transcript_feats.end(),
820  annotated_transcript_aligns.begin(),
821  annotated_transcript_aligns.end(),
822  back_inserter(v));
823 
824  return v.size() != 1 ? CSeq_id_Handle()
825  : sequence::GetId(v.front(),
826  bsh.GetScope(),
828 }
829 
831 {
832  ITERATE(CBioseq_Handle::TId, it, bsh.GetId()) {
833  const CSeq_id& id = *it->GetSeqId();
834  if( id.IsGeneral()
835  && id.GetGeneral().GetDb() == "LRG")
836  {
837  return true;
838  }
839  }
840  return false;
841 }
842 
843 
845 {
846  HGVS_ASSERT_RULE(i, eID_header);
847 
849 
850  TIterator it = i->children.rbegin()->children.begin();
851  string mol(it->value.begin(), it->value.end());
852  CVariantPlacement::TMol mol_type =
853  mol == "c" ? CVariantPlacement::eMol_cdna
854  : mol == "g" ? CVariantPlacement::eMol_genomic
855  : mol == "r" ? CVariantPlacement::eMol_rna
856  : mol == "n" ? CVariantPlacement::eMol_rna
857  : mol == "p" ? CVariantPlacement::eMol_protein
861 
862  it = (i->children.rbegin() + 1)->children.begin();
863  string id_str(it->value.begin(), it->value.end());
864 
865  CSeq_id_Handle idh = context.ResolevSeqId(id_str);
866  CBioseq_Handle bsh = context.GetScope().GetBioseqHandle(idh);
867  if(!bsh) {
868  HGVS_THROW(eSemantic, "Could not resolve seq-id-str='" + id_str + "'; idh=" + idh.AsString());
869  }
870 
871 
872  if(bsh.IsNucleotide()
873  && mol_type == CVariantPlacement::eMol_protein
874  && NStr::Find(id_str, "CCDS") == 0)
875  {
876  //If we have something like CCDS2.1:p., the CCDS2.1 will resolve
877  //to an NM, but we need to resolve it to corresponding NP.
878  //
879  //We could do this for all seq-ids, as long as there's unique CDS,
880  //but as per SNP-4536 the seq-id and moltype correspondence must be enforced,
881  //so we do it for CCDS only
882 
883  SAnnotSelector sel;
884  sel.SetResolveTSE();
886  bool already_found = false;
887  for(CFeat_CI ci(bsh, sel); ci; ++ci) {
888  const CMappedFeat& mf = *ci;
889  if(mf.IsSetProduct() && mf.GetProduct().GetId()) {
890  if(already_found) {
891  HGVS_THROW(eSemantic, "Can't resolve to prot - multiple CDSes on " + idh.AsString());
892  } else {
893  idh = sequence::GetId(
894  *mf.GetProduct().GetId(),
895  context.GetScope(),
897  already_found = true;
898  }
899  }
900  }
901  if(!already_found) {
902  HGVS_THROW(eSemantic, "Can't resolve to prot - can't find CDS on " + idh.AsString());
903  }
904  } else if( ( mol_type == CVariantPlacement::eMol_cdna
905  || mol_type == CVariantPlacement::eMol_rna)
906  && idh.IdentifyAccession() == CSeq_id::eAcc_refseq_genomic) //e.g. NG_009822.1:c.1437+1G>A
907  {
908  //VAR-861
909  if(!IsLRG(bsh)) {
910  HGVS_THROW(eSemantic, "Specifying c. expression in NG coordinates is only supported for LRG subset where NM/NG associations are stable");
911  }
912 
914  if(!idh2) {
915  HGVS_THROW(eSemantic, "Can't resolve to a unique transcript on NG: " + idh.AsString());
916  } else {
917  idh = idh2;
918  }
919  }
920 
921  ctx.SetId(*idh.GetSeqId(), mol_type);
922 
923  if(i->children.size() == 3) {
924  it = (i->children.rbegin() + 2)->children.begin();
925  string tag_str(it->value.begin(), it->value.end());
926  //record tag in context, if it is necessary in the future
927  }
928 
929  return ctx;
930 }
931 
932 
934 {
935  HGVS_ASSERT_RULE(i, eID_pos_spec);
936 
937  SOffsetPoint pnt;
938  TIterator it = i->children.begin();
939  if(it->value.id() == SGrammar::eID_general_pos) {
940  pnt = x_general_pos(it, context);
941  } else if(it->value.id() == SGrammar::eID_fuzzy_pos) {
942  pnt = x_fuzzy_pos(it, context);
943  } else {
944  bool flip_strand = false;
945  if(i->children.size() == 3) {
946  //first child is 'o' - opposite
947  flip_strand = true;
948  ++it;
949  }
950 
951  CContext local_ctx = x_header(it, context);
952  ++it;
953  pnt = x_pos_spec(it, local_ctx);
954 
955  if(flip_strand) {
956  pnt.pnt->FlipStrand();
957  }
958  }
959 
960  return pnt;
961 }
962 
963 
965 {
966  HGVS_ASSERT_RULE(i, eID_prot_pos);
967  TIterator it = i->children.begin();
968 
969  CRef<CSeq_literal> prot_literal = x_raw_seq(it, context);
970 
971  if(context.GetPlacement().GetMol() != CVariantPlacement::eMol_protein) {
972  HGVS_THROW(eSemantic, "Expected protein context");
973  }
974 
975  if(prot_literal->GetLength() != 1) {
976  HGVS_THROW(eSemantic, "Expected single aa literal in prot-pos");
977  }
978 
979  ++it;
980  SOffsetPoint pnt = x_pos_spec(it, context);
981 
982  pnt.asserted_sequence = prot_literal->GetSeq_data().GetNcbieaa();
983 
984  return pnt;
985 }
986 
987 
989 {
990  SOffsetPoint pnt1, pnt2;
991 
993  p->Assign(context.GetPlacement());
994 
995  if(i->value.id() == SGrammar::eID_prot_range) {
996  pnt1 = x_prot_pos(i->children.begin(), context);
997  pnt2 = x_prot_pos(i->children.begin() + 1, context);
998  } else if(i->value.id() == SGrammar::eID_nuc_range) {
999  pnt1 = x_pos_spec(i->children.begin(), context);
1000  pnt2 = x_pos_spec(i->children.begin() + 1, context);
1001  } else {
1002  HGVS_ASSERT_RULE(i, eID_NONE);
1003  }
1004 
1005  if(!pnt1.pnt->GetId().Equals(pnt2.pnt->GetId())) {
1006  HGVS_THROW(eSemantic, "Range-loc start/stop are on different seq-ids.");
1007  }
1008  if(pnt1.pnt->GetStrand() != pnt2.pnt->GetStrand()) {
1009  HGVS_THROW(eSemantic, "Range-loc start/stop are on different strands.");
1010  }
1011 
1012  p->SetLoc().SetInt().SetId(pnt1.pnt->SetId());
1013  p->SetLoc().SetInt().SetFrom(pnt1.pnt->GetPoint());
1014  p->SetLoc().SetInt().SetTo(pnt2.pnt->GetPoint());
1015  p->SetLoc().SetInt().SetStrand(pnt1.pnt->GetStrand());
1016  if(pnt1.pnt->IsSetFuzz()) {
1017  p->SetLoc().SetInt().SetFuzz_from(pnt1.pnt->SetFuzz());
1018  }
1019 
1020  if(pnt2.pnt->IsSetFuzz()) {
1021  p->SetLoc().SetInt().SetFuzz_to(pnt2.pnt->SetFuzz());
1022  }
1023 
1024  s_SetStartOffset(*p, pnt1.offset);
1025  s_SetStopOffset(*p, pnt2.offset);
1026 
1027  if(pnt1.asserted_sequence != "" || pnt2.asserted_sequence != "") {
1028  //for proteins, the asserted sequence is specified as part of location, rather than variation
1029  p->SetSeq().SetLength(CVariationUtil::s_GetLength(*p, NULL));
1030  string& seq_str = (context.GetPlacement().GetMol() == CVariantPlacement::eMol_protein)
1031  ? p->SetSeq().SetSeq_data().SetNcbieaa().Set()
1032  : p->SetSeq().SetSeq_data().SetIupacna().Set();
1033  seq_str = pnt1.asserted_sequence + ".." + pnt2.asserted_sequence;
1034  }
1035 
1036  return p;
1037 }
1038 
1040 {
1041  HGVS_ASSERT_RULE(i, eID_location);
1042 
1044  placement->Assign(context.GetPlacement());
1045 
1046  TIterator it = i->children.begin();
1047  CRef<CSeq_loc> loc(new CSeq_loc);
1048  if(it->value.id() == SGrammar::eID_prot_pos || it->value.id() == SGrammar::eID_pos_spec) {
1049  SOffsetPoint pnt = it->value.id() == SGrammar::eID_prot_pos
1050  ? x_prot_pos(it, context)
1051  : x_pos_spec(it, context);
1052  placement->SetLoc().SetPnt(*pnt.pnt);
1053  s_SetStartOffset(*placement, pnt.offset);
1054  if(pnt.asserted_sequence != "") {
1055  placement->SetSeq().SetLength(CVariationUtil::s_GetLength(*placement, NULL));
1056  string& seq_str = (context.GetPlacement().GetMol() == CVariantPlacement::eMol_protein)
1057  ? placement->SetSeq().SetSeq_data().SetNcbieaa().Set()
1058  : placement->SetSeq().SetSeq_data().SetIupacna().Set();
1059  seq_str = pnt.asserted_sequence;
1060  }
1061 
1062  //todo point with pos=0 and fuzz=unk -> unknown pos ->set loc to empty
1063 
1064  } else if(it->value.id() == SGrammar::eID_nuc_range || it->value.id() == SGrammar::eID_prot_range) {
1065  placement = x_range(it, context);
1066  } else {
1067  HGVS_ASSERT_RULE(it, eID_NONE);
1068  }
1069 
1070  if(placement->GetLoc().IsPnt() && placement->GetLoc().GetPnt().GetPoint() == kInvalidSeqPos) {
1071  placement->SetLoc().SetEmpty().Assign(context.GetId());
1072  }
1073 
1074  CVariationUtil util(context.GetScope());
1075  if(CVariationUtil::eFail == util.CheckExonBoundary(*placement)) {
1078  exception->SetMessage("HGVS exon-boundary position not represented in the transcript annotation");
1079  placement->SetExceptions().push_back(exception);
1080  }
1081  util.CheckPlacement(*placement);
1082 
1083  return placement;
1084 }
1085 
1086 
1088 {
1089  HGVS_ASSERT_RULE(i, eID_seq_loc);
1090  TIterator it = i->children.begin();
1091 
1092  bool flip_strand = false;
1093  if(i->children.size() == 3) {
1094  //first child is 'o' - opposite
1095  flip_strand = true;
1096  ++it;
1097  }
1098 
1099  CContext local_context = x_header(it, context);
1100  ++it;
1101  CRef<CVariantPlacement> p = x_location(it, local_context);
1102 
1103  if(flip_strand) {
1104  p->SetLoc().FlipStrand();
1105  }
1106 
1107  if(p->IsSetStop_offset() || p->IsSetStart_offset()) {
1108  HGVS_THROW(eSemantic, "Intronic seq-locs are not supported in this context");
1109  }
1110 
1111  CRef<CSeq_loc> loc(new CSeq_loc);
1112  loc->Assign(p->GetLoc());
1113  return loc;
1114 }
1115 
1117 {
1118  HGVS_ASSERT_RULE(i, eID_raw_seq_or_len);
1119 
1121  TIterator it = i->children.begin();
1122 
1123  if(it == i->children.end()) {
1124  HGVS_THROW(eLogic, "Unexpected parse-tree state when parsing " + context.GetHgvs());
1125  }
1126 
1127  if(it->value.id() == SGrammar::eID_raw_seq) {
1128  literal = x_raw_seq(it, context);
1129  } else if(it->value.id() == SGrammar::eID_int_fuzz) {
1130  SFuzzyInt int_fuzz = x_int_fuzz(it, context);
1131  literal.Reset(new CSeq_literal);
1132  literal->SetLength(int_fuzz.value);
1133  if(int_fuzz.fuzz.IsNull()) {
1134  ;//no-fuzz;
1135  } else if(int_fuzz.IsPureFuzz()) {
1136  //unknown length (no value) - will represent as length=0 with gt fuzz
1137  literal->SetFuzz().SetLim(CInt_fuzz::eLim_gt);
1138  } else {
1139  literal->SetFuzz(*int_fuzz.fuzz);
1140  }
1141  } else {
1142  HGVS_ASSERT_RULE(it, eID_NONE);
1143  }
1144  return literal;
1145 }
1146 
1148 {
1149  HGVS_ASSERT_RULE(i, eID_seq_ref);
1150  CHgvsParser::TDelta delta(new TDelta::TObjectType);
1151  TIterator it = i->children.begin();
1152 
1153  if(it->value.id() == SGrammar::eID_seq_loc) {
1154  CRef<CSeq_loc> loc = x_seq_loc(it, context);
1155  delta->SetSeq().SetLoc(*loc);
1156  } else if(it->value.id() == SGrammar::eID_nuc_range || it->value.id() == SGrammar::eID_prot_range) {
1158  if(p->IsSetStart_offset() || p->IsSetStop_offset()) {
1159  HGVS_THROW(eSemantic, "Intronic loc is not supported in this context");
1160  }
1161  delta->SetSeq().SetLoc().Assign(p->GetLoc());
1162  } else if(it->value.id() == SGrammar::eID_raw_seq_or_len) {
1164  delta->SetSeq().SetLiteral(*literal);
1165  } else {
1166  HGVS_ASSERT_RULE(it, eID_NONE);
1167  }
1168 
1169  return delta;
1170 }
1171 
1172 
1173 bool CHgvsParser::s_hgvsaa2ncbieaa(const string& hgvsaa, string& out)
1174 {
1175  //try to interpret sequence that was matched by either of aminoacid1, aminoacid, or aminoacid3
1176  string tmp_out(""); //so that the caller may pass same variable for in and out
1177  bool ret = s_hgvsaa2ncbieaa(hgvsaa, true, tmp_out);
1178  if(!ret) {
1179  ret = s_hgvsaa2ncbieaa(hgvsaa, false, tmp_out);
1180  }
1181  if(!ret) {
1182  ret = s_hgvs_iupacaa2ncbieaa(hgvsaa, tmp_out);
1183  }
1184 
1185  out = tmp_out;
1186  return ret;
1187 }
1188 
1189 bool CHgvsParser::s_hgvs_iupacaa2ncbieaa(const string& hgvsaa, string& out)
1190 {
1191  out = hgvsaa;
1192 
1193  //"X" used to mean "Ter" in HGVS; now it means "unknown aminoacid"
1194  //Still, we'll interpret it as Ter, simply beacuse it is more likely
1195  //that the submitter is using legacy representation.
1196  NStr::ReplaceInPlace(out, "X", "*");
1197  NStr::ReplaceInPlace(out, "?", "X");
1198  return true;
1199 }
1200 
1201 bool CHgvsParser::s_hgvsaa2ncbieaa(const string& hgvsaa, bool uplow, string& out)
1202 {
1203  string in = hgvsaa;
1204  out = "";
1205  while(in != "") {
1206  bool found = false;
1207  for(size_t i_ncbistdaa = 0; i_ncbistdaa < 28; i_ncbistdaa++) {
1208  string iupac3 = CSeqportUtil::GetIupacaa3(i_ncbistdaa);
1209  if(NStr::StartsWith(in, uplow ? iupac3 : NStr::ToUpper(iupac3))) {
1212  i_ncbistdaa);
1214  found = true;
1215  break;
1216  }
1217  }
1218  if(found) {
1219  in = in.substr(3);
1220  } else if(NStr::StartsWith(in, "*")) { out.push_back('*'); in = in.substr(1);
1221  } else if(NStr::StartsWith(in, "X")) { out.push_back('*'); in = in.substr(1);
1222  } else if(NStr::StartsWith(in, "?")) { out.push_back('X'); in = in.substr(1);
1223  //} else if(NStr::StartsWith(in, "STOP", NStr::eNocase)) { out.push_back('X'); in = in.substr(4); //VAR-283
1224  } else {
1225  out = hgvsaa;
1226  return false;
1227  }
1228  }
1229  return true;
1230 }
1231 
1232 
1233 
1235 {
1236  HGVS_ASSERT_RULE(i, eID_raw_seq);
1237  TIterator it = i->children.begin();
1238 
1239  string seq_str(it->value.begin(), it->value.end());
1240 
1242  if(context.GetPlacement().GetMol() == CVariantPlacement::eMol_protein) {
1243  s_hgvsaa2ncbieaa(seq_str, seq_str);
1244  literal->SetSeq_data().SetNcbieaa().Set(seq_str);
1245  } else {
1246  seq_str = NStr::ToUpper(seq_str);
1247  NStr::ReplaceInPlace(seq_str, "U", "T");
1248  literal->SetSeq_data().SetIupacna().Set(seq_str);
1249  }
1250 
1251  literal->SetLength(seq_str.size());
1252 
1253  vector<TSeqPos> bad;
1254  CSeqportUtil::Validate(literal->GetSeq_data(), &bad);
1255 
1256  if(bad.size() > 0) {
1257  HGVS_THROW(eSemantic, "Invalid sequence at pos " + NStr::IntToString(bad[0]) + " in " + seq_str);
1258  }
1259 
1260  return literal;
1261 }
1262 
1263 
1264 int GetDeltaLength(const CDelta_item& delta, int loc_len)
1265 {
1266  int len = !delta.IsSetSeq() ? 0
1267  : delta.GetSeq().IsLiteral() ? delta.GetSeq().GetLiteral().GetLength()
1268  : delta.GetSeq().IsLoc() ? sequence::GetLength(delta.GetSeq().GetLoc(), NULL)
1269  : delta.GetSeq().IsThis() ? loc_len
1270  : 0;
1271  if(delta.IsSetMultiplier()) {
1272  len *= delta.GetMultiplier();
1273  }
1274  return len;
1275 }
1276 
1277 CVariation_inst::EType GetDelInsSubtype(int del_len, int ins_len)
1278 {
1279  return del_len > 0 && ins_len == 0 ? CVariation_inst::eType_del
1280  : del_len == 0 && ins_len > 0 ? CVariation_inst::eType_ins
1281  : del_len == ins_len && del_len != 1 ? CVariation_inst::eType_mnp
1282  : del_len == ins_len && del_len == 1 ? CVariation_inst::eType_snv
1284 }
1285 
1287 {
1288  HGVS_ASSERT_RULE(i, eID_delins);
1289  TIterator it = i->children.begin();
1290  CRef<CVariation> del_vr = x_deletion(it, context);
1291  ++it;
1292  CRef<CVariation> ins_vr = x_insertion(it, context, false);
1293  //note: don't verify location, as it must be len=2 for pure-insertion only
1294 
1295  //The resulting delins variation has deletion's placement (with asserted seq, if any),
1296  //and insertion's inst, except action type is "replace" (default) rather than "ins-before",
1297  //so we reset action
1298 
1299  int placement_len = CVariationUtil::s_GetLength(SetFirstPlacement(*del_vr), NULL);
1300  int del_len = GetDeltaLength(*del_vr->GetData().GetInstance().GetDelta().front(), placement_len);
1301  int ins_len = GetDeltaLength(*ins_vr->GetData().GetInstance().GetDelta().front(), placement_len);
1302  del_vr->SetData().SetInstance().SetType(GetDelInsSubtype(del_len, ins_len));
1303 
1304  del_vr->SetData().SetInstance().SetDelta() = ins_vr->SetData().SetInstance().SetDelta();
1305  del_vr->SetData().SetInstance().SetDelta().front()->ResetAction();
1306 
1307  if(ins_len == 1 && del_len == 1) {
1310  ex->SetMessage("delins used for single-nt substitution");
1311  SetFirstPlacement(*del_vr).SetExceptions().push_back(ex);
1312  }
1313 
1314  return del_vr;
1315 }
1316 
1318 {
1319  HGVS_ASSERT_RULE(i, eID_deletion);
1320  TIterator it = i->children.begin();
1321  CRef<CVariation> vr(new CVariation);
1322  CVariation_inst& var_inst = vr->SetData().SetInstance();
1323 
1326  p.Assign(context.GetPlacement());
1327 
1329  di->SetAction(CDelta_item::eAction_del_at);
1330  di->SetSeq().SetThis();
1331  var_inst.SetDelta().push_back(di);
1332 
1333  ++it;
1334 
1335  if(it != i->children.end() && it->value.id() == SGrammar::eID_raw_seq_or_len) {
1337  ++it;
1339 
1340  if(literal->GetLength() != CVariationUtil::s_GetLength(p, NULL)) {
1343  ex->SetMessage("Sequence length is inconsistent with location length");
1344  p.SetExceptions().push_back(ex);
1345  }
1346  }
1347 
1348  var_inst.SetDelta();
1349  return vr;
1350 }
1351 
1352 
1354 {
1355  HGVS_ASSERT_RULE(i, eID_insertion);
1356  TIterator it = i->children.begin();
1357  ++it; //skip ins
1358  CRef<CVariation> vr(new CVariation);
1359  CVariation_inst& var_inst = vr->SetData().SetInstance();
1360 
1362 
1363  SetFirstPlacement(*vr).Assign(context.GetPlacement());
1364 
1365  if(check_loc && CVariationUtil::s_GetLength(*vr->GetPlacements().front(), NULL) != 2) {
1366  HGVS_THROW(eSemantic, "Location must be a dinucleotide");
1367  }
1368 
1369  TDelta delta_ins = x_seq_ref(it, context);
1370 
1371  //todo:
1372  //alternative representation: if delta is literal, might use action=morph and prefix/suffix the insertion with the flanking nucleotides.
1373  delta_ins->SetAction(CDelta_item::eAction_ins_before);
1374 
1375  var_inst.SetDelta().push_back(delta_ins);
1376 
1377  return vr;
1378 }
1379 
1380 
1382 {
1383  HGVS_ASSERT_RULE(i, eID_duplication);
1384  TIterator it = i->children.begin();
1385  CRef<CVariation> vr(new CVariation);
1386  CVariation_inst& var_inst = vr->SetData().SetInstance();
1387  var_inst.SetType(CVariation_inst::eType_ins); //replace seq @ location with this*2
1388 
1389  SetFirstPlacement(*vr).Assign(context.GetPlacement());
1390 
1391  TDelta delta(new TDelta::TObjectType);
1392  delta->SetSeq().SetThis(); //delta->SetSeq().SetLoc(vr->SetLocation());
1393  delta->SetMultiplier(2);
1394  var_inst.SetDelta().push_back(delta);
1395 
1396  ++it; //skip dup
1397 
1398  //the next node is either expected length or expected sequence
1399  if(it != i->children.end() && it->value.id() == SGrammar::eID_seq_ref) {
1400  TDelta dup_seq = x_seq_ref(it, context);
1401  if(dup_seq->GetSeq().IsLiteral()) {
1402  SetFirstPlacement(*vr).SetSeq(dup_seq->SetSeq().SetLiteral());
1403 
1404  if(CVariationUtil::s_GetLength(*vr->GetPlacements().front(), NULL) != dup_seq->GetSeq().GetLiteral().GetLength()) {
1405  HGVS_THROW(eSemantic, "Location length and asserted sequence length differ");
1406  }
1407  }
1408  }
1409 
1410  return vr;
1411 }
1412 
1413 
1415 {
1416  HGVS_ASSERT_RULE(i, eID_no_change);
1417  TIterator it = i->children.begin();
1418  CRef<CVariation> vr(new CVariation);
1419  CVariation_inst& var_inst = vr->SetData().SetInstance();
1420 
1422  p.Assign(context.GetPlacement());
1423 
1424  //VAR-574: The no-change variation is interpreted as X>X
1425 
1426  if(it->value.id() == SGrammar::eID_raw_seq) {
1427  CRef<CSeq_literal> seq_from = x_raw_seq(it, context);
1428  p.SetSeq(*seq_from);
1429  ++it;
1430  } else if(p.GetLoc().IsWhole()) {
1431  // will fall-back 'identity' inst-type instead of whole-seq mnp
1432  } else {
1433  CVariationUtil util(context.GetScope());
1434  util.AttachSeq(p);
1435  p.ResetExceptions();
1436  // if could not attach seq (e.g. too-large or intronic context)
1437  // will fall-back on 'identity' inst-type below, so ignoring
1438  // the exceptions.
1439  }
1440 
1441  var_inst.SetType(
1446 
1447  TDelta delta(new TDelta::TObjectType);
1448  if(p.IsSetSeq()) {
1449  delta->SetSeq().SetLiteral().Assign(p.GetSeq());
1450  } else {
1451  delta->SetSeq().SetThis();
1452  }
1453  var_inst.SetDelta().push_back(delta);
1454 
1455  return vr;
1456 }
1457 
1458 
1460 {
1461  HGVS_ASSERT_RULE(i, eID_nuc_subst);
1462  TIterator it = i->children.begin();
1463  CRef<CVariation> vr(new CVariation);
1464  CVariation_inst& var_inst = vr->SetData().SetInstance();
1465 
1466  SetFirstPlacement(*vr).Assign(context.GetPlacement());
1467 
1468  if(it->value.id() == SGrammar::eID_raw_seq) {
1469  CRef<CSeq_literal> seq_from = x_raw_seq(it, context);
1470  SetFirstPlacement(*vr).SetSeq(*seq_from);
1471  ++it;
1472  }
1473 
1474  ++it;//skip ">"
1475 
1476  CRef<CSeq_literal> seq_to = x_raw_seq(it, context);
1477  TDelta delta(new TDelta::TObjectType);
1478  delta->SetSeq().SetLiteral(*seq_to);
1479  var_inst.SetDelta().push_back(delta);
1480 
1481  var_inst.SetType(
1484  seq_to->GetLength()));
1485 
1486  return vr;
1487 }
1488 
1489 
1491 {
1492  HGVS_ASSERT_RULE(i, eID_nuc_inv);
1493 
1494  TIterator it = i->children.begin();
1495  CRef<CVariation> vr(new CVariation);
1496  CVariation_inst& var_inst = vr->SetData().SetInstance();
1498 
1499  SetFirstPlacement(*vr).Assign(context.GetPlacement());
1500 
1501 #if 0
1502  TDelta delta(new TDelta::TObjectType);
1503  delta->SetSeq().SetLoc().Assign(*loc);
1504  delta->SetSeq().SetLoc().FlipStrand();
1505  var_inst.SetDelta().push_back(delta);
1506 #else
1507  //don't put anything in the delta, as the inversion sequence is placement-specific, not variation-specific
1508  var_inst.SetDelta();
1509 #endif
1510 
1511  ++it;
1512 
1513  //capture asserted seq
1514  if(it != i->children.end() && it->value.id() == SGrammar::eID_seq_ref) {
1515  TDelta dup_seq = x_seq_ref(it, context);
1516  if(dup_seq->GetSeq().IsLiteral()) {
1517  SetFirstPlacement(*vr).SetSeq(dup_seq->SetSeq().SetLiteral());
1518  }
1519  }
1520 
1521  return vr;
1522 }
1523 
1524 
1526 {
1527  HGVS_ASSERT_RULE(i, eID_ssr);
1528  TIterator it = i->children.begin();
1529  CRef<CVariation> vr(new CVariation);
1530  vr->SetData().SetInstance().SetType(CVariation_inst::eType_microsatellite);
1531 
1532 
1534  if(it->value.id() == SGrammar::eID_raw_seq) {
1535  literal = x_raw_seq(it, context);
1536  ++it;
1537  }
1538 
1540  p.Assign(context.GetPlacement());
1541 
1542  // The location may either specify a repeat unit,
1543  // or point to the first base of a repeat unit.
1544  // We normalize it so it is alwas the repeat unit.
1545 #if 1
1546  if( !literal.IsNull()
1547  && literal->IsSetSeq_data()
1548  && literal->GetSeq_data().IsIupacna()
1549  && !p.IsSetStart_offset()
1550  && !p.IsSetStop_offset())
1551  {
1552  CRef<CSeq_loc> ssr_loc = FindSSRLoc(
1553  p.GetLoc(),
1554  literal->GetSeq_data().GetIupacna(),
1555  context.GetScope());
1556  p.SetLoc().Assign(*ssr_loc);
1557  } else if(p.IsSetStart_offset() && !p.IsSetStop_offset()) {
1559  + (literal.IsNull() ? 0 : literal->GetLength() - 1));
1560  }
1561 #else
1562  if(SetFirstPlacement(*vr).GetLoc().IsPnt() && !literal.IsNull()) {
1563  ExtendDownstream(SetFirstPlacement(*vr), literal->GetLength() - 1);
1564  }
1565 #endif
1566 
1567 
1568  if(it->value.id() == SGrammar::eID_ssr) { // list('['>>int_p>>']', '+') with '[',']','+' nodes discarded;
1569  //Note: see ssr grammar in the header for reasons why we have to match all alleles here
1570  //rather than match them separately as mut_insts
1571 
1572  vr->SetData().SetSet().SetType(CVariation::TData::TSet::eData_set_type_genotype);
1573  for(; it != i->children.end(); ++it) {
1574  string s1(it->value.begin(), it->value.end());
1575  CRef<CVariation> vr2(new CVariation);
1576  vr2->SetData().SetInstance().SetType(CVariation_inst::eType_microsatellite);
1577 
1578  TDelta delta(new TDelta::TObjectType);
1579  if(!literal.IsNull()) {
1580  delta->SetSeq().SetLiteral().Assign(*literal);
1581  } else {
1582  delta->SetSeq().SetThis();
1583  }
1584  delta->SetMultiplier(NStr::StringToInt(s1));
1585 
1586  vr2->SetData().SetInstance().SetDelta().push_back(delta);
1587  vr->SetData().SetSet().SetVariations().push_back(vr2);
1588  }
1589  vr = x_unwrap_iff_singleton(*vr);
1590  } else {
1591  TDelta delta(new TDelta::TObjectType);
1592  if(!literal.IsNull()) {
1593  delta->SetSeq().SetLiteral().Assign(*literal);
1594  } else {
1595  delta->SetSeq().SetThis();
1596  }
1597 
1598  SFuzzyInt int_fuzz = x_int_fuzz(it, context);
1599  delta->SetMultiplier(int_fuzz.value);
1600  if(int_fuzz.fuzz.IsNull()) {
1601  ;
1602  } else {
1603  delta->SetMultiplier_fuzz(*int_fuzz.fuzz);
1604  }
1605  vr->SetData().SetInstance().SetDelta().push_back(delta);
1606  }
1607 
1608  return vr;
1609 }
1610 
1611 
1613 {
1614  HGVS_ASSERT_RULE(i, eID_translocation);
1615  TIterator it = i->children.end() - 1; //note: seq-loc follows iscn expression, i.e. last child
1616  CRef<CVariation> vr(new CVariation);
1617  CVariation_inst& var_inst = vr->SetData().SetInstance();
1619 
1620  CRef<CSeq_loc> loc = x_seq_loc(it, context);
1621  SetFirstPlacement(*vr).SetLoc().Assign(*loc);
1622  CVariationUtil util(context.GetScope());
1623  SetFirstPlacement(*vr).SetMol(util.GetMolType(sequence::GetId(*loc, NULL)));
1624 
1625  it = i->children.begin();
1626  string iscn_expr(it->value.begin(), it->value.end());
1627  vr->SetSynonyms().push_back("ISCN:" + iscn_expr);
1628  var_inst.SetDelta(); //no delta contents
1629 
1630  return vr;
1631 }
1632 
1633 
1635 {
1636  HGVS_ASSERT_RULE(i, eID_conversion);
1637  TIterator it = i->children.begin();
1638  CRef<CVariation> vr(new CVariation);
1639  CVariation_inst& var_inst = vr->SetData().SetInstance();
1641 
1642  SetFirstPlacement(*vr).Assign(context.GetPlacement());
1643 
1644  ++it;
1645  CRef<CSeq_loc> loc_other = x_seq_loc(it, context);
1646 
1647  TDelta delta(new TDelta::TObjectType);
1648  delta->SetSeq().SetLoc().Assign(*loc_other);
1649  var_inst.SetDelta().push_back(delta);
1650 
1651  return vr;
1652 }
1653 
1654 
1656 {
1657  HGVS_ASSERT_RULE(i, eID_prot_fs);
1658  TIterator it = i->children.begin();
1659  CRef<CVariation> vr(new CVariation);
1660 
1661  if(context.GetPlacement().GetMol() != CVariantPlacement::eMol_protein) {
1662  HGVS_THROW(eContext, "Frameshift can only be specified in protein context");
1663  }
1664 
1665  vr->SetData().SetNote("Frameshift");
1666  vr->SetFrameshift();
1667 
1668  SetFirstPlacement(*vr).Assign(context.GetPlacement());
1669 
1670  ++it; //skip 'fs'
1671  if(it != i->children.end()) {
1672  //fsX# description: the remaining tokens are 'X' and integer
1673  ++it; //skip 'X'
1674  if(it != i->children.end()) {
1675  string s(it->value.begin(), it->value.end());
1676  int x_length = NStr::StringToInt(s);
1677  vr->SetFrameshift().SetX_length(x_length);
1678  }
1679  }
1680 
1681  return vr;
1682 }
1683 
1684 
1686 {
1687  HGVS_ASSERT_RULE(i, eID_prot_ext);
1688  TIterator it = i->children.begin();
1689 
1690  if(context.GetPlacement().GetMol() != CVariantPlacement::eMol_protein) {
1691  HGVS_THROW(eContext, "Expected protein context");
1692  }
1693 
1694  CRef<CVariation> vr(new CVariation);
1695  CVariation_inst& var_inst = vr->SetData().SetInstance();
1697  string ext_type_str(it->value.begin(), it->value.end());
1698  ++it;
1699  string ext_len_str(it->value.begin(), it->value.end());
1700  int ext_len = NStr::StringToInt(ext_len_str);
1701 
1702  SetFirstPlacement(*vr).Assign(context.GetPlacement());
1703  SetFirstPlacement(*vr).SetLoc().SetPnt().SetId().Assign(context.GetId());
1704  SetFirstPlacement(*vr).SetLoc().SetPnt().SetStrand(eNa_strand_plus);
1705 
1706  TDelta delta(new TDelta::TObjectType);
1707  delta->SetSeq().SetLiteral().SetLength(abs(ext_len) + 1);
1708  //extension of Met or X by N bases = replacing first or last AA with (N+1) AAs
1709 
1710  if(ext_type_str == "extMet") {
1711  if(ext_len > 0) {
1712  HGVS_THROW(eSemantic, "extMet must be followed by a negative integer");
1713  }
1714  SetFirstPlacement(*vr).SetLoc().SetPnt().SetPoint(0);
1715  //extension precedes first AA
1716  var_inst.SetDelta().push_back(delta);
1717  } else if(ext_type_str == "extX"
1718  || ext_type_str == "ext*"
1719  || ext_type_str == "extTer")
1720  {
1721  if(ext_len < 0) {
1722  HGVS_THROW(eSemantic, "exX must be followed by a non-negative integer");
1723  }
1724 
1725  SetFirstPlacement(*vr).SetLoc().SetPnt().SetPoint(context.GetBioseqHandle().GetInst_Length() - 1);
1726  //extension follows last AA
1727  var_inst.SetDelta().push_back(delta);
1728  } else {
1729  HGVS_THROW(eGrammatic, "Unexpected ext_type: " + ext_type_str);
1730  }
1731 
1732  return vr;
1733 }
1734 
1735 
1737 {
1738  HGVS_ASSERT_RULE(i, eID_prot_missense);
1739  TIterator it = i->children.begin();
1740 
1741  CRef<CSeq_literal> prot_literal = x_raw_seq(it, context);
1742 
1743  if(context.GetPlacement().GetMol() != CVariantPlacement::eMol_protein) {
1744  HGVS_THROW(eContext, "Expected protein context");
1745  }
1746 
1747  CRef<CVariation> vr(new CVariation);
1748  CVariation_inst& var_inst = vr->SetData().SetInstance();
1749  var_inst.SetType(prot_literal->GetLength() == 1 ?
1752 
1753  SetFirstPlacement(*vr).Assign(context.GetPlacement());
1754 
1755  TDelta delta(new TDelta::TObjectType);
1756  delta->SetSeq().SetLiteral(*prot_literal);
1757  var_inst.SetDelta().push_back(delta);
1758 
1759  return vr;
1760 }
1761 
1762 
1764 {
1765  CRef<CVariation> vr(new CVariation);
1766  string s(i->value.begin(), i->value.end());
1767  s = s.substr(1); //truncate the leading pipe
1768  SetFirstPlacement(*vr).Assign(context.GetPlacement());
1769  vr->SetData().SetNote(s);
1770  return vr;
1771 }
1772 
1773 
1775 {
1776  HGVS_ASSERT_RULE(i, eID_mut_inst);
1777 
1778  TIterator it = i->children.begin();
1779 
1780  CRef<CVariation> vr(new CVariation);
1781  if(it->value.id() == SGrammar::eID_mut_inst) {
1782  string s(it->value.begin(), it->value.end());
1783  if(s == "?") {
1784  vr->SetData().SetUnknown();
1785  SetFirstPlacement(*vr).Assign(context.GetPlacement());
1786  } else {
1787  vr = x_string_content(it, context);
1788  }
1789  } else {
1790  vr =
1791  it->value.id() == SGrammar::eID_no_change ? x_no_change(it, context)
1792  : it->value.id() == SGrammar::eID_delins ? x_delins(it, context)
1793  : it->value.id() == SGrammar::eID_deletion ? x_deletion(it, context)
1794  : it->value.id() == SGrammar::eID_insertion ? x_insertion(it, context, true)
1795  : it->value.id() == SGrammar::eID_duplication ? x_duplication(it, context)
1796  : it->value.id() == SGrammar::eID_nuc_subst ? x_nuc_subst(it, context)
1797  : it->value.id() == SGrammar::eID_nuc_inv ? x_nuc_inv(it, context)
1798  : it->value.id() == SGrammar::eID_ssr ? x_ssr(it, context)
1799  : it->value.id() == SGrammar::eID_conversion ? x_conversion(it, context)
1800  : it->value.id() == SGrammar::eID_prot_ext ? x_prot_ext(it, context)
1801  : it->value.id() == SGrammar::eID_prot_fs ? x_prot_fs(it, context)
1802  : it->value.id() == SGrammar::eID_prot_missense ? x_prot_missense(it, context)
1803  : it->value.id() == SGrammar::eID_translocation ? x_translocation(it, context)
1805 
1806  if(vr.IsNull()) {
1807  HGVS_ASSERT_RULE(it, eID_NONE);
1808  }
1809  }
1810 
1811  return vr;
1812 }
1813 
1815 {
1816  HGVS_ASSERT_RULE(i, eID_expr1);
1817  TIterator it = i->children.begin();
1818  CRef<CVariation> vr;
1819 
1820  string s(it->value.begin(), it->value.end());
1821  if(it->value.id() == i->value.id() && s == "(") {
1822  ++it;
1823  vr = x_expr1(it, context);
1824  SetComputational(*vr);
1825  } else if(it->value.id() == SGrammar::eID_list1a) {
1826  vr = x_list(it, context);
1827  } else if(it->value.id() == SGrammar::eID_header) {
1828  CContext local_ctx = x_header(it, context);
1829  ++it;
1830  vr = x_expr2(it, local_ctx);
1831  } else if(it->value.id() == SGrammar::eID_translocation) {
1832  vr = x_translocation(it, context);
1833  } else {
1834  HGVS_ASSERT_RULE(it, eID_NONE);
1835  }
1836 
1837  return vr;
1838 }
1839 
1841 {
1842  HGVS_ASSERT_RULE(i, eID_expr2);
1843  TIterator it = i->children.begin();
1844  CRef<CVariation> vr;
1845 
1846  string s(it->value.begin(), it->value.end());
1847  if(it->value.id() == i->value.id() && s == "(") {
1848  ++it;
1849  vr = x_expr2(it, context);
1850  SetComputational(*vr);
1851  } else if(it->value.id() == SGrammar::eID_list2a) {
1852  vr = x_list(it, context);
1853  } else if(it->value.id() == SGrammar::eID_location) {
1854  CContext local_context(context);
1855  CRef<CVariantPlacement> placement = x_location(it, local_context);
1856  local_context.SetPlacement().Assign(*placement);
1857  ++it;
1858  vr = x_expr3(it, local_context);
1859  } else if(it->value.id() == SGrammar::eID_prot_ext) {
1860  vr = x_prot_ext(it, context);
1861  } else if(it->value.id() == SGrammar::eID_no_change) {
1862  vr = x_no_change(it, context);
1863  } else if(it->value.id() == i->value.id()) {
1864  vr.Reset(new CVariation);
1865  SetFirstPlacement(*vr).Assign(context.GetPlacement());
1866 
1867  if(s == "?") {
1868  vr->SetData().SetUnknown();
1869  //SetFirstPlacement(*vr).SetLoc().SetEmpty().Assign(context.GetId());
1870  } else if(s == "0?" || s == "0") {
1871  //loss of product: represent as deletion of the whole product sequence.
1872  SetFirstPlacement(*vr).SetLoc().SetWhole().Assign(context.GetId());
1873  CVariation_inst& var_inst = vr->SetData().SetInstance();
1876  di->SetAction(CDelta_item::eAction_del_at);
1877  di->SetSeq().SetThis();
1878  var_inst.SetDelta().push_back(di);
1879 
1880  if(s == "0?") {
1881  SetComputational(*vr);
1882  }
1883  } else {
1884  HGVS_THROW(eGrammatic, "Unexpected expr terminal: " + s);
1885  }
1886  } else {
1887  HGVS_ASSERT_RULE(it, eID_NONE);
1888  }
1889 
1890  return vr;
1891 }
1892 
1893 
1895 {
1896  HGVS_ASSERT_RULE(i, eID_expr3);
1897  TIterator it = i->children.begin();
1898  CRef<CVariation> vr;
1899 
1900  string s(it->value.begin(), it->value.end());
1901  if(it->value.id() == i->value.id() && s == "(") {
1902  ++it;
1903  vr = x_expr3(it, context);
1904  SetComputational(*vr);
1905  } else if(it->value.id() == SGrammar::eID_list3a) {
1906  vr = x_list(it, context);
1907  } else if(it->value.id() == SGrammar::eID_mut_inst) {
1908  vr.Reset(new CVariation);
1909  vr->SetData().SetSet().SetType(CVariation::TData::TSet::eData_set_type_compound);
1910  for(; it != i->children.end(); ++it) {
1911  CRef<CVariation> inst_ref = x_mut_inst(it, context);
1912 
1913  if(inst_ref->GetData().IsNote()
1914  && inst_ref->GetData().GetNote() == "Frameshift")
1915  {
1916  if(vr->SetData().SetSet().SetVariations().size() > 0) {
1917  //if inst_ref is a frameshift subexpression, we need to attach it as attribute of the
1918  //previous variation in a compound inst-list, since frameshift is not a subtype of
1919  //Variation.data, and thus not represented as a separate subvariation.
1920  //e.g. p.Ile20Serfs - frameshift an attribute of Ile20Ser
1921  vr->SetData().SetSet().SetVariations().back()->SetFrameshift().Assign(inst_ref->GetFrameshift());
1922  } else {
1923  //in updated HGVS spec fs can be a standalone, e.g. p.Ile20fs.
1924  //In this case we'll represent it as p.Ile20Xaafs
1925  CVariation_inst& inst = inst_ref->SetData().SetInstance();
1927 
1929  delta->SetSeq().SetLiteral().SetLength(1);
1930  delta->SetSeq().SetLiteral().SetSeq_data().SetNcbieaa().Set("X");
1931  inst.SetDelta().push_back(delta);
1932  vr->SetData().SetSet().SetVariations().push_back(inst_ref);
1933  }
1934  } else {
1935  vr->SetData().SetSet().SetVariations().push_back(inst_ref);
1936  }
1937  }
1938  vr = x_unwrap_iff_singleton(*vr);
1939  } else {
1940  HGVS_ASSERT_RULE(it, eID_NONE);
1941  }
1942 
1943  return vr;
1944 }
1945 
1946 
1948 {
1949  HGVS_ASSERT_RULE(i, eID_list_delimiter);
1950  TIterator it = i->children.begin();
1951  string s(it->value.begin(), it->value.end());
1952 
1957  : s == ";" ? CVariation::TData::TSet::eData_set_type_haplotype //note that within context of list#a, ";" delimits genotype
1958  : s == "(+)" || s == "(;)" ? CVariation::TData::TSet::eData_set_type_individual
1960 }
1961 
1962 
1964 {
1965  if(!SGrammar::s_is_list(i->value.id())) {
1966  HGVS_ASSERT_RULE(i, eID_NONE);
1967  }
1968 
1969  CRef<CVariation> vr(new CVariation);
1970  TVariationSet& varset = vr->SetData().SetSet();
1972 
1973 
1974  for(TIterator it = i->children.begin(); it != i->children.end(); ++it) {
1975  //will process two elements from the children list: delimiter and following expression.
1976  //The first one does not have the delimiter. The delimiter determines the set-type.
1977  if(it != i->children.begin()) {
1978  if(SGrammar::s_is_list_a(i->value.id())) {
1979  /*
1980  * list#a is delimited by either ";"(HGVS-2.0) or "+"(HGVS_1.0);
1981  * Both represent alleles within genotype.
1982  * Note: the delimiter rule in the context is chset_p<>("+;"), i.e.
1983  * a terminal, not a rule like list_delimiter; so calling
1984  * x_list_delimiter(...) parser here would throw
1985  */
1987  } else {
1990  varset.SetType(set_type);
1991  } else if(set_type != varset.GetType()) {
1992  HGVS_THROW(eSemantic, "Non-unique delimiters within a list");
1993  }
1994  }
1995  ++it;
1996  }
1997 
1998  CRef<CVariation> vr;
1999  if(it->value.id() == SGrammar::eID_expr1) {
2000  vr = x_expr1(it, context);
2001  } else if(it->value.id() == SGrammar::eID_expr2) {
2002  vr = x_expr2(it, context);
2003  } else if(it->value.id() == SGrammar::eID_expr3) {
2004  vr = x_expr3(it, context);
2005  } else if(SGrammar::s_is_list(it->value.id())) {
2006  vr = x_list(it, context);
2007  } else {
2008  HGVS_ASSERT_RULE(it, eID_NONE);
2009  }
2010 
2011  varset.SetVariations().push_back(vr);
2012  }
2013 
2014  vr = x_unwrap_iff_singleton(*vr);
2015  return vr;
2016 }
2017 
2018 
2020 {
2021  HGVS_ASSERT_RULE(i, eID_root);
2022 
2024 
2026  AdjustMoltype(*vr, context.GetScope());
2028 
2029  vr->Index();
2030  return vr;
2031 }
2032 
2034 {
2035  if(v.GetData().IsSet() && v.GetData().GetSet().GetVariations().size() == 1) {
2036  CRef<CVariation> first = v.SetData().SetSet().SetVariations().front();
2037  if(!first->IsSetPlacements() && v.IsSetPlacements()) {
2038  first->SetPlacements() = v.SetPlacements();
2039  }
2040  return first;
2041  } else {
2042  return CRef<CVariation>(&v);
2043  }
2044 }
2045 
2046 
2048 {
2049  CVariationUtil util(scope);
2050  for(CTypeIterator<CVariantPlacement> it(Begin(v)); it; ++it) {
2051  CVariantPlacement& p = *it;
2052  CVariantPlacement::TMol mol = util.GetMolType(sequence::GetId(p.GetLoc(), NULL));
2053  if(p.GetMol() != mol) {
2055  p2->Assign(p);
2056  p2->SetMol(mol);
2057 
2058  string asserted_header = CHgvsParser::s_SeqIdToHgvsStr(p, &scope);
2059  string expected_header = CHgvsParser::s_SeqIdToHgvsStr(*p2, &scope);
2060 
2063  ex->SetMessage("Inconsistent mol-type. asserted:'" + asserted_header + "'; expected:'" + expected_header + "'");
2064  p.SetExceptions().push_back(ex);
2065  }
2066  }
2067 }
2068 
2070 {
2071  string hgvs2 = NStr::TruncateSpaces(hgvs);
2072  tree_parse_info<> info = pt_parse(hgvs2.c_str(), *s_grammar, +space_p);
2073 
2074  if(!info.full) {
2075 #if 0
2076  CNcbiOstrstream ostr;
2077  tree_to_xml(ostr, info.trees, hgvs2.c_str() , CHgvsParser::SGrammar::s_GetRuleNames());
2078  string tree_str = CNcbiOstrstreamToString(ostr);
2079 #endif
2080  HGVS_THROW(eGrammatic, "Syntax error at pos " + NStr::SizetToString(info.length + 1) + " in " + hgvs2 + "");
2081  }
2082 
2084  CRef<CVariation> vr = x_root(info.trees.begin(), context);
2085  vr->SetName(hgvs2);
2086  sx_AppendMoltypeExceptions(*vr, context.GetScope());
2087 
2088  CVariationUtil util(context.GetScope());
2089  util.CheckAmbiguitiesInLiterals(*vr);
2090 
2091  return vr;
2092 }
2093 
2094 
2095 
2097 {
2098  v.Index();
2099 
2100  //compute and attach placement-specific HGVS expressions
2101  for(CTypeIterator<CVariation> it(Begin(v)); it; ++it) {
2102  CVariation& v2 = *it;
2103  if(!v2.IsSetPlacements()) {
2104  continue;
2105  }
2106  NON_CONST_ITERATE(CVariation::TPlacements, it2, v2.SetPlacements()) {
2107  CVariantPlacement& p2 = **it2;
2108 
2109  if(!p2.GetLoc().GetId()) {
2110  continue;
2111  }
2112 
2113  if(p2.GetMol() != CVariantPlacement::eMol_protein && v2.GetConsequenceParent()) {
2114  //if this variation is in consequnece, only compute HGVS for protein variations
2115  //(as otherwise it will throw - can't have HGVS expression for protein with nuc placement)
2116  continue;
2117  }
2118 
2119  //compute hgvs-expression specific to the placement and the variation to which it is attached
2120  try {
2121  string hgvs_expression = AsHgvsExpression(v2, CConstRef<CSeq_id>(p2.GetLoc().GetId()));
2122  p2.SetHgvs_name(hgvs_expression);
2123  } catch (CException& e ) {
2124  CNcbiOstrstream ostr;
2125  ostr << MSerial_AsnText << p2;
2126  string s = CNcbiOstrstreamToString(ostr);
2127  NCBI_REPORT_EXCEPTION("Can't compute HGVS expression for " + s, e);
2128  }
2129  }
2130  }
2131 
2132  //If the root variation does not have placements (e.g. a container for placement-specific subvariations)
2133  //then compute the hgvs expression for the root placement and attach it to variation itself as a synonym.
2134  if(!v.IsSetPlacements()) {
2135  string root_output_hgvs = AsHgvsExpression(v);
2136  v.SetSynonyms().push_back(root_output_hgvs);
2137  }
2138 }
2139 
2140 
2141 
2142 };
2143 
2145 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
Definition: Na_strand.hpp:63
@ eExtreme_Biological
5' and 3'
Definition: Na_strand.hpp:62
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CAlign_CI –.
Definition: align_ci.hpp:63
CBioseq_Handle –.
CFeat_CI –.
Definition: feat_ci.hpp:64
CMappedFeat –.
Definition: mapped_feat.hpp:59
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CSafeStatic<>::
CScope –.
Definition: scope.hpp:92
CSeqVector –.
Definition: seq_vector.hpp:65
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
void FlipStrand(void)
Definition: Seq_point.cpp:208
static TIndex GetMapToIndex(CSeq_data::E_Choice from_type, CSeq_data::E_Choice to_type, TIndex from_idx)
static const string & GetCode(CSeq_data::E_Choice code_type, TIndex idx)
static void Validate(const CSeq_data &in_seq, vector< TSeqPos > *badIdx, TSeqPos uBeginIdx=0, TSeqPos uLength=0)
static const string & GetIupacaa3(TIndex ncbistdaa)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
Template class for iteration on objects of class C.
Definition: iterator.hpp:673
CVariantPlacement –.
CVariationException –.
CVariationMethod –.
Set of related Variations.
Definition: Variation_.hpp:127
CVariation_inst –.
void Index()
Definition: Variation.cpp:55
Definition: set.hpp:45
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
const_iterator begin() const
Definition: set.hpp:135
const_iterator end() const
Definition: set.hpp:136
CContext(CRef< CScope > scope, CSeq_id_Resolver::TResolvers &id_resolvers, const string &hgvs)
const CSeq_id & GetId() const
void SetId(const CSeq_id &id, CVariantPlacement::TMol mol)
CVariantPlacement & SetPlacement()
const CSeq_feat & GetCDS() const
CSeq_id_Resolver::TResolvers m_seq_id_resolvers
CRef< CVariantPlacement > m_placement
static CRef< CVariation > x_string_content(TIterator const &i, const CContext &context)
static CRef< CVariation > x_root(TIterator const &i, const CContext &context)
static CVariation::TData::TSet::EData_set_type x_list_delimiter(TIterator const &i, const CContext &context)
static SOffsetPoint x_general_pos(TIterator const &i, const CContext &context)
static void sx_AppendMoltypeExceptions(CVariation &v, CScope &scope)
static CRef< CVariation > x_nuc_subst(TIterator const &i, const CContext &context)
static string s_SeqIdToHgvsStr(const CVariantPlacement &vp, CScope *scope=NULL)
Construct an hgvs "header" consisting of seq-id and mol-type, e.g. "NG_016831.1:g....
static CRef< CVariation > x_expr3(TIterator const &i, const CContext &context)
static CRef< CVariation > x_expr1(TIterator const &i, const CContext &context)
static CRef< CVariation > x_delins(TIterator const &i, const CContext &context)
static CRef< CVariation > x_unwrap_iff_singleton(CVariation &v)
static SFuzzyInt x_int_fuzz(TIterator const &i, const CContext &context)
static CRef< CSeq_loc > x_seq_loc(TIterator const &i, const CContext &context)
static CRef< CVariation > x_no_change(TIterator const &i, const CContext &context)
TParseTreeMatch::const_tree_iterator TIterator
static CRef< CVariation > x_insertion(TIterator const &i, const CContext &context, bool check_loc)
static void s_SetStopOffset(CVariantPlacement &p, const CHgvsParser::SFuzzyInt &fint)
static CContext x_header(TIterator const &i, const CContext &context)
static void s_SetStartOffset(CVariantPlacement &p, const CHgvsParser::SFuzzyInt &fint)
CSeq_id_Resolver::TResolvers m_seq_id_resolvers
string AsHgvsExpression(const CVariation &variation, CConstRef< CSeq_id > seq_id=CConstRef< CSeq_id >(NULL))
static CRef< CSeq_literal > x_raw_seq(TIterator const &i, const CContext &context)
CVariation_inst::TDelta::value_type TDelta
static CRef< CVariation > x_translocation(TIterator const &i, const CContext &context)
static CRef< CVariation > x_prot_ext(TIterator const &i, const CContext &context)
static CSafeStatic< SGrammar > s_grammar
void AttachHgvs(CVariation &v)
static CRef< CVariation > x_list(TIterator const &i, const CContext &context)
static SOffsetPoint x_prot_pos(TIterator const &i, const CContext &context)
CRef< CScope > m_scope
static bool s_hgvs_iupacaa2ncbieaa(const string &hgvsaa, string &out)
static SOffsetPoint x_pos_spec(TIterator const &i, const CContext &context)
static CRef< CVariation > x_duplication(TIterator const &i, const CContext &context)
static CRef< CVariation > x_mut_inst(TIterator const &i, const CContext &context)
static CRef< CVariation > x_nuc_inv(TIterator const &i, const CContext &context)
static CRef< CSeq_literal > x_raw_seq_or_len(TIterator const &i, const CContext &context)
static CRef< CVariation > x_prot_missense(TIterator const &i, const CContext &context)
static TDelta x_seq_ref(TIterator const &i, const CContext &context)
static CRef< CVariation > x_deletion(TIterator const &i, const CContext &context)
static CRef< CVariantPlacement > x_range(TIterator const &i, const CContext &context)
static CRef< CVariation > x_prot_fs(TIterator const &i, const CContext &context)
static CRef< CSeq_point > x_abs_pos(TIterator const &i, const CContext &context)
static SOffsetPoint x_fuzzy_pos(TIterator const &i, const CContext &context)
static CRef< CVariantPlacement > x_location(TIterator const &i, const CContext &context)
static bool s_hgvsaa2ncbieaa(const string &hgvsaa, string &out)
Convert HGVS amino-acid code to ncbi code.
static CRef< CVariation > x_expr2(TIterator const &i, const CContext &context)
CRef< CVariation > AsVariation(const string &hgvs_expression, TOpFlags=fOpFlags_Default)
static CRef< CVariation > x_ssr(TIterator const &i, const CContext &context)
static CRef< CVariation > x_conversion(TIterator const &i, const CContext &context)
static void s_FactorOutPlacements(CVariation &v)
If at any level in variation-set all variations have all same placements, move them to the parent lev...
static TSeqPos s_GetLength(const CVariantPlacement &p, CScope *scope)
TSeqPos GetEffectiveTranscriptLength(const CBioseq_Handle &bsh)
Length up to last position of the last exon (i.e.
static uch flags
#define T(s)
Definition: common.h:230
std::ofstream out("events_result.xml")
main entry point for tests
CS_CONTEXT * ctx
Definition: t0006.c:12
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
int offset
Definition: replacements.h:160
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_REPORT_EXCEPTION(title, ex)
Generate a report on the exception.
Definition: ncbiexpt.hpp:755
const CVect2< U > & v2
Definition: globals.hpp:440
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
CConstRef< CSeq_id > GetSeqId(void) const
CSeq_id::EAccessionInfo IdentifyAccession(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
string AsString(void) const
@ eAcc_refseq_genomic
Definition: Seq_id.hpp:430
void SetMix(TMix &v)
Definition: Seq_loc.hpp:987
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
Definition: Seq_loc.cpp:337
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
void ChangeToMix(void)
Definition: Seq_loc.cpp:3633
@ fSortAndMerge_All
Definition: Seq_loc.hpp:334
@ fMerge_SingleRange
Definition: Seq_loc.hpp:332
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
Definition: iterator.hpp:1004
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
CRef< CSeq_loc > Seq_loc_Merge(const CSeq_loc &loc, CSeq_loc::TOpFlags flags, CScope *scope)
Merge ranges in the seq-loc.
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eNoOverlap
CSeq_locs do not overlap or abut.
@ eGetId_ForceAcc
return only an accession based seq-id
Definition: sequence.hpp:100
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
vector< CSeq_id_Handle > TId
bool IsNucleotide(void) const
const CSeqFeatData & GetData(void) const
bool IsSetProduct(void) const
TInst_Length GetInst_Length(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
const TId & GetId(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
SAnnotSelector & IncludeFeatSubtype(TFeatSubtype subtype)
Include feature subtype in the search.
SAnnotSelector & SetResolveTSE(void)
SetResolveTSE() is equivalent to SetResolveMethod(eResolve_TSE).
const CSeq_loc & GetLocation(void) const
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
SAnnotSelector & IncludeFeatType(TFeatType type)
Include feature type in the search.
const CSeq_loc & GetProduct(void) const
void Reset(void)
Definition: mapped_feat.cpp:77
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
const_iterator begin(void) const
Definition: seq_vector.hpp:298
const_iterator end(void) const
Definition: seq_vector.hpp:305
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
Definition: ncbiobj.hpp:735
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
Definition: ncbistr.cpp:2742
CTempString literal(const char(&str)[Size])
Templatized initialization from a string literal.
Definition: tempstr.hpp:441
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5078
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2882
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5406
size_type find(const CTempString match, size_type pos=0) const
Find the first instance of the entire matching string within the current string, beginning at an opti...
Definition: tempstr.hpp:655
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3396
static string & ToUpper(string &str)
Convert string to upper case – string& version.
Definition: ncbistr.cpp:424
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
Definition: ncbistr.cpp:3177
void SetMin(TMin value)
Assign a value to Min data member.
Definition: Int_fuzz_.hpp:528
bool IsLim(void) const
Check if variant Lim is selected.
Definition: Int_fuzz_.hpp:636
TRange & SetRange(void)
Select the variant.
Definition: Int_fuzz_.cpp:165
TLim GetLim(void) const
Get the variant data.
Definition: Int_fuzz_.hpp:642
void SetMax(TMax value)
Assign a value to Max data member.
Definition: Int_fuzz_.hpp:481
TLim & SetLim(void)
Select the variant.
Definition: Int_fuzz_.hpp:649
bool IsRange(void) const
Check if variant Range is selected.
Definition: Int_fuzz_.hpp:603
@ eLim_gt
greater than
Definition: Int_fuzz_.hpp:211
@ eLim_unk
unknown
Definition: Int_fuzz_.hpp:210
@ eLim_other
something else
Definition: Int_fuzz_.hpp:216
@ eLim_lt
less than
Definition: Int_fuzz_.hpp:212
@ eLim_tl
space to left of position
Definition: Int_fuzz_.hpp:214
@ eLim_tr
space to right of position
Definition: Int_fuzz_.hpp:213
bool IsSpliced(void) const
Check if variant Spliced is selected.
Definition: Seq_align_.hpp:778
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool IsCdregion(void) const
Check if variant Cdregion is selected.
void SetPoint(TPoint value)
Assign a value to Point data member.
Definition: Seq_point_.hpp:312
void SetId(TId &value)
Assign a value to Id data member.
Definition: Seq_point_.cpp:61
const TPnt & GetPnt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:238
TPoint GetPoint(void) const
Get the Point member data.
Definition: Seq_point_.hpp:303
void SetStrand(TStrand value)
Assign a value to Strand data member.
Definition: Seq_point_.hpp:359
TFrom GetFrom(void) const
Get the From member data.
void SetFuzz(TFuzz &value)
Assign a value to Fuzz data member.
Definition: Seq_point_.cpp:71
bool IsSetFuzz(void) const
Check if a value has been assigned to Fuzz data member.
Definition: Seq_point_.hpp:408
const TFuzz & GetFuzz(void) const
Get the Fuzz member data.
Definition: Seq_point_.hpp:420
const TId & GetId(void) const
Get the Id member data.
Definition: Seq_point_.hpp:390
TStrand GetStrand(void) const
Get the Strand member data.
Definition: Seq_point_.hpp:350
TTo GetTo(void) const
Get the To member data.
bool IsWhole(void) const
Check if variant Whole is selected.
Definition: Seq_loc_.hpp:522
const TInt & GetInt(void) const
Get the variant data.
Definition: Seq_loc_.cpp:194
bool IsPnt(void) const
Check if variant Pnt is selected.
Definition: Seq_loc_.hpp:540
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ e_Null
not placed
Definition: Seq_loc_.hpp:98
TLength GetLength(void) const
Get the Length member data.
const TNcbieaa & GetNcbieaa(void) const
Get the variant data.
Definition: Seq_data_.hpp:650
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
@ e_Ncbieaa
extended ASCII 1 letter aa codes
Definition: Seq_data_.hpp:111
@ e_Ncbistdaa
consecutive codes for std aas
Definition: Seq_data_.hpp:113
void SetStop_offset_fuzz(TStop_offset_fuzz &value)
Assign a value to Stop_offset_fuzz data member.
TMol GetMol(void) const
Get the Mol member data.
TMethod & SetMethod(void)
Assign a value to Method data member.
list< CRef< CVariantPlacement > > TPlacements
void SetStop_offset(TStop_offset value)
Assign a value to Stop_offset data member.
bool IsSetMol(void) const
Check if a value has been assigned to Mol data member.
list< CRef< CVariation > > TVariations
Definition: Variation_.hpp:158
bool IsSetPlacements(void) const
where this beast is seen note that this is a set of locations, and there are no restrictions to the c...
void SetHgvs_name(const THgvs_name &value)
Assign a value to Hgvs_name data member.
void ResetExceptions(void)
Reset Exceptions data member.
void SetData(TData &value)
Assign a value to Data data member.
Definition: Variation_.cpp:773
void ResetStop_offset(void)
Reset Stop_offset data member.
void ResetStart_offset_fuzz(void)
Reset Start_offset_fuzz data member.
const TSet & GetSet(void) const
Get the variant data.
Definition: Variation_.cpp:220
bool IsSetSeq(void) const
for situations in which a raw location isn't sufficient Check if a value has been assigned to Seq dat...
TExceptions & SetExceptions(void)
Assign a value to Exceptions data member.
const TVariations & GetVariations(void) const
Get the Variations member data.
void SetSeq(TSeq &value)
Assign a value to Seq data member.
void ResetStop_offset_fuzz(void)
Reset Stop_offset_fuzz data member.
void ResetStart_offset(void)
Reset Start_offset data member.
void SetType(TType value)
Assign a value to Type data member.
const TSeq & GetSeq(void) const
Get the Seq member data.
TPlacements & SetPlacements(void)
Assign a value to Placements data member.
void SetStart_offset(TStart_offset value)
Assign a value to Start_offset data member.
void ResetSeq(void)
Reset Seq data member.
const TData & GetData(void) const
Get the Data member data.
void SetMol(TMol value)
Assign a value to Mol data member.
bool IsSetStop_offset(void) const
Check if a value has been assigned to Stop_offset data member.
TVariations & SetVariations(void)
Assign a value to Variations data member.
const TMethod & GetMethod(void) const
Get the Method member data.
bool IsSetStart_offset(void) const
location refinements, describing offsets into introns from product coordinates.
const TLoc & GetLoc(void) const
Get the Loc member data.
bool IsSet(void) const
Check if variant Set is selected.
void SetLoc(TLoc &value)
Assign a value to Loc data member.
TSynonyms & SetSynonyms(void)
Assign a value to Synonyms data member.
TType GetType(void) const
Get the Type member data.
void SetStart_offset_fuzz(TStart_offset_fuzz &value)
Assign a value to Start_offset_fuzz data member.
TStart_offset GetStart_offset(void) const
Get the Start_offset member data.
@ eData_set_type_genotype
changes on different alleles in the same genotype, e.g. g.[476C>T]+[476C>T]
Definition: Variation_.hpp:143
@ eData_set_type_chimeric
e.g. c.[1C>T//2G>T]
Definition: Variation_.hpp:149
@ eData_set_type_package
set represents a package of observations at a given location, generally containing asserted + referen...
Definition: Variation_.hpp:148
@ eData_set_type_compound
complex change at the same location on the same molecule
Definition: Variation_.hpp:140
@ eData_set_type_individual
same organism; allele relationship unknown, e.g. g.[476C>T(+)183G>C]
Definition: Variation_.hpp:145
@ eData_set_type_products
different products arising from the same variation in a precursor, e.g. r.[13g>a, 13_88del]
Definition: Variation_.hpp:141
@ eData_set_type_haplotype
changes on the same allele, e.g r.[13g>a;15u>c]
Definition: Variation_.hpp:142
@ eData_set_type_mosaic
different genotypes in the same individual
Definition: Variation_.hpp:144
@ eMol_cdna
"c." coordinates in HGVS
@ eMol_mitochondrion
"mt." coordinates in HGVS
@ eMol_rna
"n." coordinates in HGVS
@ eMol_protein
"p." coordinates in HGVS
@ eMol_genomic
"g." coordinates in HGVS
@ eCode_hgvs_exon_boundary
anchor position in an intronic HGVS expression is not at an exon boundary
@ eCode_hgvs_parsing
invalid hgvs expression
@ eCode_inconsistent_asserted_moltype
asserted mol-type is inconsistent with seq-id (e.g. NM_12345.6:g.)
void SetType(TType value)
Assign a value to Type data member.
TDelta & SetDelta(void)
Assign a value to Delta data member.
@ eType_snv
delta=[morph of length 1] NOTE: this is snV not snP; the latter requires frequency-based validation t...
@ eType_inv
delta=[del, ins.seq= RevComp(variation-location)]
@ eType_mnp
delta=[morph of length >1]
@ eType_microsatellite
delta=[del, ins.seq= repeat-unit with fuzzy multiplier] variation-location is the microsat expansion ...
@ eType_delins
delta=[del, ins]
@ eType_transposon
delta=[del, ins.seq= known donor or 'this'] variation-location is equiv of transposon locs.
@ eType_prot_silent
delta=[morph of length 1, same AA as at variation-location]
@ eType_prot_missense
delta=[morph of length 1]
@ eType_translocation
delta=like delins
@ eAction_del_at
excise sequence at location if multiplier is specified, delete len(location)*multiplier positions dow...
@ eAction_ins_before
insert seq before the location.start
@ eObservation_asserted
inst represents the asserted base at a position
#define HGVS_ASSERT_RULE(i, rule_id)
#define HGVS_THROW(err_code, message)
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
int i
int len
static void text(MDB_val *v)
Definition: mdb_dump.c:62
static MDB_envinfo info
Definition: mdb_load.c:37
CVariation_inst::EType GetDelInsSubtype(int del_len, int ins_len)
void SetComputational(CVariation &variation)
bool IsPureFuzzPoint(const CSeq_point &p)
void RepackageAssertedSequence(CVariation &vr)
bool IsLRG(CBioseq_Handle &bsh)
T AdjustHgvsCoord(T val, TSeqPos offset, bool adjust)
CSeq_id_Handle GetUniquePrimaryTranscriptId(CBioseq_Handle &bsh)
bool SeqsMatch(const string &query, const char *text)
CVariantPlacement & SetFirstPlacement(CVariation &v)
CRef< CSeq_loc > FindSSRLoc(const CSeq_loc &loc, const string &seq, CScope &scope)
int GetDeltaLength(const CDelta_item &delta, int loc_len)
void AdjustMoltype(CVariation &vr, CScope &scope)
#define abs(a)
Definition: ncbi_heapmgr.c:130
unsigned int a
Definition: ncbi_localip.c:102
T max(T x_, T y_)
T min(T x_, T y_)
std::istream & in(std::istream &in_, double &x_)
Int4 delta(size_t dimension_, const Int4 *score_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
int GetLoc(const string &acc, const string &pat, CSeq_loc &loc, CScope &scope)
SAnnotSelector –.
static string query
static bool s_is_list(parser_id id)
static bool s_is_list_a(parser_id id)
static const char * s_rule_names[SGrammar::eNodeIds_SIZE]
static const string s_GetRuleName(parser_id id)
void Assign(const SOffsetPoint &other)
else result
Definition: token2.c:20
static CS_CONTEXT * context
Definition: will_convert.c:21
Modified on Fri Sep 20 14:57:52 2024 by modify_doxy.py rev. 669887