34 #ifndef HGVSPARSER_HPP_
35 #define HGVSPARSER_HPP_
40 #include <boost/version.hpp>
41 #if BOOST_VERSION >= 103800
42 #include <boost/spirit/include/classic.hpp>
43 #include <boost/spirit/include/classic_core.hpp>
44 #include <boost/spirit/include/classic_ast.hpp>
45 #include <boost/spirit/include/classic_parse_tree.hpp>
46 #include <boost/spirit/include/classic_tree_to_xml.hpp>
48 using namespace BOOST_SPIRIT_CLASSIC_NS;
51 #include <boost/spirit.hpp>
52 #include <boost/spirit/core.hpp>
53 #include <boost/spirit/tree/ast.hpp>
54 #include <boost/spirit/tree/parse_tree.hpp>
55 #include <boost/spirit/tree/tree_to_xml.hpp>
56 using namespace boost::spirit;
78 #define HGVS_THROW(err_code, message) NCBI_THROW(CHgvsParser::CHgvsParserException, err_code, message)
91 fOpFlags_RelaxedAA = 1 << 0,
92 fOpFlags_Default = fOpFlags_RelaxedAA
98 string AsHgvsExpression(
const CSeq_feat& feat);
120 switch(GetErrCode()) {
121 case eLogic:
return "eLogic";
122 case eGrammatic:
return "eGrammatic";
123 case eSemantic:
return "eSemantic";
124 case eContext:
return "eContext";
125 case eAlignment:
return "eAlignment";
126 case ePrecondition:
return "ePrecondition";
127 case eOther:
return "eOther";
152 fuzz->Assign(*other.
fuzz);
192 pnt->Assign(*other.
pnt);
211 start_offset.Reset();
223 loc.Reset(
new CSeq_loc);
225 loc->Assign(*other.
loc);
231 return start_offset.value || start_offset.value || stop_offset.fuzz || stop_offset.fuzz;
273 m_mol_type = eMol_not_set;
281 return m_bsh.GetBioseqLength();
289 void SetId(
const CSeq_id&
id, EMolType mol_type);
293 if(!m_loc.IsOffset()) {
311 return !m_loc.loc.IsNull();
319 const CSeq_loc&
GetLoc()
const;
327 EMolType GetMolType(
bool check=
true)
const;
419 if(s_rule_names->size() == 0) {
420 m_[eID_NONE] =
"NONE";
421 m_[eID_root] =
"root";
422 m_[eID_list1a] =
"list1a";
423 m_[eID_list2a] =
"list2a";
424 m_[eID_list3a] =
"list3a";
425 m_[eID_list1b] =
"list1b";
426 m_[eID_list2b] =
"list2b";
427 m_[eID_list3b] =
"list3b";
428 m_[eID_expr1] =
"expr1";
429 m_[eID_expr2] =
"expr2";
430 m_[eID_expr3] =
"expr3";
431 m_[eID_translocation] =
"translocation";
432 m_[eID_header] =
"header";
433 m_[eID_location] =
"location";
435 m_[eID_seq_id] =
"seq_id";
436 m_[eID_mut_list] =
"mut_list";
437 m_[eID_mut_ref] =
"mut_ref";
438 m_[eID_nuc_range] =
"nuc_range";
439 m_[eID_prot_range] =
"prot_range";
440 m_[eID_mut_inst] =
"mut_inst";
441 m_[eID_int_fuzz] =
"int_fuzz";
442 m_[eID_abs_pos] =
"abs_pos";
443 m_[eID_general_pos] =
"general_pos";
444 m_[eID_fuzzy_pos] =
"fuzzy_pos";
445 m_[eID_pos_spec] =
"pos_spec";
446 m_[eID_raw_seq] =
"raw_seq";
447 m_[eID_aminoacid] =
"aminoacid";
448 m_[eID_nuc_subst] =
"nuc_subst";
449 m_[eID_deletion] =
"deletion";
450 m_[eID_insertion] =
"insertion";
451 m_[eID_delins] =
"delins";
452 m_[eID_duplication] =
"duplication";
453 m_[eID_nuc_inv] =
"nuc_inv";
455 m_[eID_conversion] =
"conversion";
456 m_[eID_seq_loc] =
"seq_loc";
457 m_[eID_seq_ref] =
"seq_ref";
458 m_[eID_prot_pos] =
"prot_pos";
459 m_[eID_prot_fs] =
"prot_fs";
460 m_[eID_prot_missense] =
"prot_missense";
461 m_[eID_prot_ext] =
"prot_ext";
463 return s_rule_names.
Get();
466 static const string& s_GetRuleName(parser_id
id);
468 template <
typename ScannerT>
471 rule<ScannerT, parser_context<>, parser_tag<eID_root> >
root;
472 rule<ScannerT, parser_context<>, parser_tag<eID_list1a> >
list1a;
473 rule<ScannerT, parser_context<>, parser_tag<eID_list2a> >
list2a;
474 rule<ScannerT, parser_context<>, parser_tag<eID_list3a> >
list3a;
475 rule<ScannerT, parser_context<>, parser_tag<eID_list1b> >
list1b;
476 rule<ScannerT, parser_context<>, parser_tag<eID_list2b> >
list2b;
477 rule<ScannerT, parser_context<>, parser_tag<eID_list3b> >
list3b;
478 rule<ScannerT, parser_context<>, parser_tag<eID_expr1> >
expr1;
479 rule<ScannerT, parser_context<>, parser_tag<eID_expr2> >
expr2;
480 rule<ScannerT, parser_context<>, parser_tag<eID_expr3> >
expr3;
481 rule<ScannerT, parser_context<>, parser_tag<eID_translocation> >
translocation;
482 rule<ScannerT, parser_context<>, parser_tag<eID_header> >
header;
483 rule<ScannerT, parser_context<>, parser_tag<eID_seq_id> >
seq_id;
484 rule<ScannerT, parser_context<>, parser_tag<eID_mol> >
mol;
485 rule<ScannerT, parser_context<>, parser_tag<eID_mut_list > >
mut_list;
486 rule<ScannerT, parser_context<>, parser_tag<eID_mut_ref> >
mut_ref;
487 rule<ScannerT, parser_context<>, parser_tag<eID_mut_inst> >
mut_inst;
488 rule<ScannerT, parser_context<>, parser_tag<eID_int_fuzz> >
int_fuzz;
489 rule<ScannerT, parser_context<>, parser_tag<eID_abs_pos> >
abs_pos;
490 rule<ScannerT, parser_context<>, parser_tag<eID_general_pos> >
general_pos;
491 rule<ScannerT, parser_context<>, parser_tag<eID_fuzzy_pos> >
fuzzy_pos;
492 rule<ScannerT, parser_context<>, parser_tag<eID_pos_spec> >
pos_spec;
493 rule<ScannerT, parser_context<>, parser_tag<eID_location> >
location;
494 rule<ScannerT, parser_context<>, parser_tag<eID_nuc_range> >
nuc_range;
495 rule<ScannerT, parser_context<>, parser_tag<eID_prot_range> >
prot_range;
496 rule<ScannerT, parser_context<>, parser_tag<eID_raw_seq> >
raw_seq;
497 rule<ScannerT, parser_context<>, parser_tag<eID_aminoacid> >
aminoacid;
498 rule<ScannerT, parser_context<>, parser_tag<eID_nuc_subst> >
nuc_subst;
499 rule<ScannerT, parser_context<>, parser_tag<eID_deletion> >
deletion;
500 rule<ScannerT, parser_context<>, parser_tag<eID_insertion> >
insertion;
501 rule<ScannerT, parser_context<>, parser_tag<eID_delins> >
delins;
502 rule<ScannerT, parser_context<>, parser_tag<eID_duplication> >
duplication;
503 rule<ScannerT, parser_context<>, parser_tag<eID_nuc_inv> >
nuc_inv;
504 rule<ScannerT, parser_context<>, parser_tag<eID_ssr> >
ssr;
505 rule<ScannerT, parser_context<>, parser_tag<eID_conversion> >
conversion;
506 rule<ScannerT, parser_context<>, parser_tag<eID_seq_loc> >
seq_loc;
507 rule<ScannerT, parser_context<>, parser_tag<eID_seq_ref> >
seq_ref;
508 rule<ScannerT, parser_context<>, parser_tag<eID_prot_pos> >
prot_pos;
509 rule<ScannerT, parser_context<>, parser_tag<eID_prot_missense> >
prot_missense;
510 rule<ScannerT, parser_context<>, parser_tag<eID_prot_ext> >
prot_ext;
511 rule<ScannerT, parser_context<>, parser_tag<eID_prot_fs> >
prot_fs;
515 aminoacid = str_p(
"Ala")
537 | chset<>(
"XARNDCEQGHILKMFPSTWYV")
540 raw_seq = leaf_node_d[+aminoacid | +chset<>(
"ACGTN") | +chset<>(
"acgun")];
556 int_fuzz = ch_p(
'(') >> (ch_p(
'?')|int_p) >> ch_p(
'_') >> (ch_p(
'?')|int_p) >> ch_p(
')')
557 | ch_p(
'(') >> int_p >> ch_p(
')')
560 abs_pos = !ch_p(
'*') >> int_fuzz;
564 general_pos = (str_p(
"IVS") >> int_p | abs_pos) >> sign_p >> int_fuzz
570 fuzzy_pos = discard_node_d[ch_p(
'(')]
572 >> discard_node_d[ch_p(
'_')]
574 >> discard_node_d[ch_p(
')')];
576 pos_spec = general_pos
578 | !ch_p(
'o') >> header >> pos_spec;
581 prot_pos = raw_seq >> pos_spec;
583 prot_range = prot_pos >> discard_node_d[ch_p(
'_')] >> prot_pos;
585 nuc_range = pos_spec >> discard_node_d[ch_p(
'_')] >> pos_spec;
587 location = nuc_range | pos_spec | prot_range | prot_pos;
593 seq_id = leaf_node_d[alpha_p >> +(alnum_p | chset<>(
"._-|"))];
595 mol = str_p(
"mt") | chset<>(
"gcrpm");
598 >> !(discard_node_d[ch_p(
'{')]
600 >> discard_node_d[ch_p(
'}')])
601 >> discard_node_d[ch_p(
':')]
603 >> discard_node_d[ch_p(
'.')];
612 seq_loc = !ch_p(
'o') >> header >>
location;
615 | (nuc_range|prot_range)
623 nuc_subst = raw_seq >> ch_p(
'>') >> raw_seq;
625 deletion = str_p(
"del") >> !(raw_seq | int_p);
627 duplication = str_p(
"dup") >> !seq_ref;
629 insertion = str_p(
"ins") >> seq_ref;
631 conversion = str_p(
"con") >> seq_loc;
633 delins = str_p(
"del") >> !raw_seq >> str_p(
"ins") >> seq_ref;
635 nuc_inv = str_p(
"inv") >> !int_p;
637 ssr = !raw_seq >> ( int_fuzz - (ch_p(
'?')|int_p)
638 | list_p(discard_node_d[ch_p(
'[')]
640 >> discard_node_d[ch_p(
']')],
641 discard_node_d[ch_p(
'+')]));
655 prot_fs = str_p(
"fs") >> !(ch_p(
'X') >> int_p);
657 prot_ext = (str_p(
"extMet") | str_p(
"extX")) >> int_p;
659 prot_missense = aminoacid;
662 translocation = str_p(
"t(")
663 >> leaf_node_d[*(print_p - ch_p(
'(') - ch_p(
')'))]
665 >> leaf_node_d[*(print_p - ch_p(
'(') - ch_p(
')'))]
688 root = list_p(expr1, ch_p(
'+'));
694 expr1 = ch_p(
'(') >> expr1 >> ch_p(
')')
698 list1a = list_p(discard_node_d[ch_p(
'[')] >> list1b >> discard_node_d[ch_p(
']')], ch_p(
'+'));
699 list1b = list_p(expr1, chset<>(
",;") | str_p(
"(+)"));
702 expr2 = ch_p(
'(') >> expr2 >> ch_p(
')')
710 list2a = list_p(discard_node_d[ch_p(
'[')] >> list2b >> discard_node_d[ch_p(
']')], ch_p(
'+'));
711 list2b = list_p(expr2, chset<>(
",;") | str_p(
"(+)"));
714 expr3 = ch_p(
'(') >> expr3 >> ch_p(
')')
724 list3a = list_p(discard_node_d[ch_p(
'[')] >> list3b >> discard_node_d[ch_p(
']')], ch_p(
'+'));
725 list3b = list_p(expr3, chset<>(
",;") | str_p(
"(+)"));
730 rule<ScannerT, parser_context<>, parser_tag<eID_root> >
const&
start()
const
738 return id == SGrammar::eID_list1a
739 ||
id == SGrammar::eID_list2a
740 ||
id == SGrammar::eID_list3a
741 ||
id == SGrammar::eID_list1b
742 ||
id == SGrammar::eID_list2b
743 ||
id == SGrammar::eID_list3b
744 ||
id == SGrammar::eID_root;
796 static string s_hgvsaa2ncbieaa(
const string& hgvsaa);
799 static string s_hgvsUCaa2hgvsUL(
const string& hgvsaa);
806 const CSeq_loc& parent_loc,
813 string x_GetInstData(
const CVariation_inst& inst,
const CSeq_loc& this_loc);
824 string x_LocToSeqStr(
const CSeq_loc& loc);
833 string x_SeqLocToStr(
const CSeq_loc& loc,
bool with_header);
839 string x_SeqIdToHgvsHeader(
const CSeq_id&
id);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
T & Get(void)
Create the variable if not created yet, return the reference.
namespace ncbi::objects::
Set of related Variations.
void Validate(const CSeq_literal &literal) const
void SetLoc(const SOffsetLoc &loc)
CContext(CRef< CScope > scope)
TSeqPos GetLength() const
CContext(const CContext &other)
CScope & GetScope() const
NCBI_EXCEPTION_DEFAULT(CHgvsParserException, CException)
@ eGrammatic
Expression is not a valid language.
@ eAlignment
Some problem with getting alignment.
@ ePrecondition
Precondition is not met.
@ eLogic
Problem with the code.
@ eSemantic
Expression is invalid in some way.
@ eContext
Some problem with context.
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
tree_match< char const * > TParseTreeMatch
CVariation_inst::TDelta::value_type TDelta
static CSafeStatic< SGrammar > s_grammar
CVariation_ref::TData::TSet TVariationSet
static CRef< CVariation_ref > s_ProtToCdna(const CVariation_ref &vr, CScope &scope)
TParseTreeMatch::const_tree_iterator TIterator
CHgvsParser(CScope &scope)
Include a standard set of the NCBI C++ Toolkit most basic headers.
static const char location[]
unsigned int TSeqPos
Type for sequence locations and lengths.
EErrCode
Error types that an application can generate.
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
CTempString literal(const char(&str)[Size])
Templatized initialization from a string literal.
double value_type
The numeric datatype used by the parser.
const GenericPointer< typename T::ValueType > T2 value
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
int GetLoc(const string &acc, const string &pat, CSeq_loc &loc, CScope &scope)
void Assign(const SFuzzyInt &other)
rule< ScannerT, parser_context<>, parser_tag< eID_insertion > > insertion
rule< ScannerT, parser_context<>, parser_tag< eID_seq_loc > > seq_loc
rule< ScannerT, parser_context<>, parser_tag< eID_seq_ref > > seq_ref
rule< ScannerT, parser_context<>, parser_tag< eID_prot_fs > > prot_fs
rule< ScannerT, parser_context<>, parser_tag< eID_prot_pos > > prot_pos
rule< ScannerT, parser_context<>, parser_tag< eID_prot_range > > prot_range
rule< ScannerT, parser_context<>, parser_tag< eID_list3b > > list3b
rule< ScannerT, parser_context<>, parser_tag< eID_expr1 > > expr1
rule< ScannerT, parser_context<>, parser_tag< eID_nuc_subst > > nuc_subst
rule< ScannerT, parser_context<>, parser_tag< eID_location > > location
rule< ScannerT, parser_context<>, parser_tag< eID_list3a > > list3a
rule< ScannerT, parser_context<>, parser_tag< eID_delins > > delins
rule< ScannerT, parser_context<>, parser_tag< eID_abs_pos > > abs_pos
rule< ScannerT, parser_context<>, parser_tag< eID_prot_ext > > prot_ext
rule< ScannerT, parser_context<>, parser_tag< eID_nuc_inv > > nuc_inv
rule< ScannerT, parser_context<>, parser_tag< eID_ssr > > ssr
rule< ScannerT, parser_context<>, parser_tag< eID_general_pos > > general_pos
rule< ScannerT, parser_context<>, parser_tag< eID_pos_spec > > pos_spec
rule< ScannerT, parser_context<>, parser_tag< eID_header > > header
rule< ScannerT, parser_context<>, parser_tag< eID_mut_ref > > mut_ref
rule< ScannerT, parser_context<>, parser_tag< eID_list2b > > list2b
rule< ScannerT, parser_context<>, parser_tag< eID_root > > const & start() const
rule< ScannerT, parser_context<>, parser_tag< eID_list1b > > list1b
rule< ScannerT, parser_context<>, parser_tag< eID_int_fuzz > > int_fuzz
rule< ScannerT, parser_context<>, parser_tag< eID_expr2 > > expr2
rule< ScannerT, parser_context<>, parser_tag< eID_translocation > > translocation
rule< ScannerT, parser_context<>, parser_tag< eID_fuzzy_pos > > fuzzy_pos
rule< ScannerT, parser_context<>, parser_tag< eID_list1a > > list1a
rule< ScannerT, parser_context<>, parser_tag< eID_prot_missense > > prot_missense
rule< ScannerT, parser_context<>, parser_tag< eID_duplication > > duplication
definition(SGrammar const &)
rule< ScannerT, parser_context<>, parser_tag< eID_mut_list > > mut_list
rule< ScannerT, parser_context<>, parser_tag< eID_deletion > > deletion
rule< ScannerT, parser_context<>, parser_tag< eID_aminoacid > > aminoacid
rule< ScannerT, parser_context<>, parser_tag< eID_seq_id > > seq_id
rule< ScannerT, parser_context<>, parser_tag< eID_list2a > > list2a
rule< ScannerT, parser_context<>, parser_tag< eID_root > > root
rule< ScannerT, parser_context<>, parser_tag< eID_mut_inst > > mut_inst
rule< ScannerT, parser_context<>, parser_tag< eID_expr3 > > expr3
rule< ScannerT, parser_context<>, parser_tag< eID_nuc_range > > nuc_range
rule< ScannerT, parser_context<>, parser_tag< eID_conversion > > conversion
rule< ScannerT, parser_context<>, parser_tag< eID_mol > > mol
rule< ScannerT, parser_context<>, parser_tag< eID_raw_seq > > raw_seq
static CSafeStatic< TRuleNames > s_rule_names
std::map< parser_id, std::string > TRuleNames
static TRuleNames & s_GetRuleNames()
static bool s_is_list(parser_id id)
void Assign(const SOffsetLoc &other)
void Assign(const SOffsetPoint &other)
static CS_CONTEXT * context