NCBI C++ ToolKit
hgvs_protein_parser.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #include <ncbi_pch.hpp>
2 #include <boost/spirit/include/phoenix.hpp>
5 
6 using boost::spirit::qi::_1;
7 using boost::spirit::qi::_2;
8 using boost::spirit::qi::_3;
9 using boost::spirit::qi::_val;
10 using boost::spirit::qi::_a;
11 using boost::phoenix::bind;
12 
15 
16 void IsMet1(const CRef<CAaSite>& aa_site, boost::spirit::unused_type context, bool& match)
17 {
18  match = false;
19  if (aa_site->IsSetAa() &&
20  aa_site->IsSetIndex() &&
21  (aa_site->GetIndex() == 1 &&
22  (NStr::Equal(aa_site->GetAa(), "Met")
23  || NStr::Equal(aa_site->GetAa(), "M"))))
24  {
25  match = true;
26  }
27 }
28 
29 
31  SHgvsProteinGrammar::base_type(simple_protein_variant)
32 {
34 
36 
38  frameshift |
40  missense |
41  nonsense |
42  unknown_sub |
43  silent |
44  aa_dup |
45  aa_delins |
46  aa_del |
47  aa_ins |
48  aa_ssr;
49 
52 
54 
56 
58 
64 
65  aa1_stop_seq = (aa1_seq >> tok.stop) [_val = _1 + "*"];
66 
67  aa3_stop_seq = (aa3_seq >> tok.stop) [_val = _1 + "*"];
68 
69  aa_dup = (aa_loc >> tok.dup) ACTION1(AssignAaDup);
70 
71  aa_del = (aa_loc >> tok.del) ACTION1(AssignAaDel);
72 
76  // Need to be able to insert a "remote" sequence
77 
79 
81 
82  aa_repeat_precise = ("[" >> tok.pos_int >> "]") ACTION1(AssignCount);
83 
85 
86  aa_repeat_range = ("(" >> nn_int >> "_" >> tok.pos_int >> ")") ACTION2(AssignCountRange) |
87  ("(" >> nn_int >> "_" >> tok.unknown_val >> ")") ACTION2(AssignCountRange) |
88  ("(" >> tok.unknown_val >> "_" >> tok.pos_int >> ")") ACTION2(AssignCountRange);
89 
93 
94  frameshift_long_form = (aa1_site >> aa1 >> tok.fs >> tok.stop >> (tok.pos_int | tok.unknown_val )) ACTION1(AssignFrameshift) |
95  (aa3_site >> tok.aa3 >> tok.fs >> tok.stop >> (tok.pos_int | tok.unknown_val)) ACTION1(AssignFrameshift);
96 
98  (aa3_site >> tok.aa3 >> tok.fs) ACTION1(AssignFrameshift);
99 
101 
103 
104  cterm_extension = (tok.stop >> tok.pos_int >> (aa1 | tok.aa3) >> tok.ext >> tok.stop >> end_codon_shift)
106 
107  nterm_extension = (aa_site [IsMet1] >> tok.ext >> "-" >> end_codon_shift)
109  (aa3_site [IsMet1] >> tok.aa3 >> tok.ext >> "-" >> end_codon_shift)
111  (aa1_site [IsMet1] >> aa1 >> tok.ext >> "-" >> end_codon_shift)
113 
116 
118 
119  aa_loc = aa3_loc | aa1_loc;
120 
122 
124 
127 
129 
130  aa3_site = (tok.aa3 >> tok.pos_int) ACTION2(AssignAaSite);
131 
132  aa3_seq = +(tok.aa3) [_val += _1];
133 
136 
138 
140 
141  aa1_seq = +(aa1) [_val += _1];
142 
143  aa1 = tok.ACGT | tok.definite_aa1;
144 
145  nn_int = tok.zero [_val = "0"] |
146  tok.pos_int [_val = _1];
147 }
148 
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5384
bool IsSetIndex(void) const
Check if a value has been assigned to Index data member.
Definition: AaSite_.hpp:199
TIndex GetIndex(void) const
Get the Index member data.
Definition: AaSite_.hpp:218
const TAa & GetAa(void) const
Get the Aa member data.
Definition: AaSite_.hpp:258
bool IsSetAa(void) const
Check if a value has been assigned to Aa data member.
Definition: AaSite_.hpp:246
#define ACTION1(func)
#define ACTION3(func)
#define ACTION2(func)
USING_SCOPE(objects)
void IsMet1(const CRef< CAaSite > &aa_site, boost::spirit::unused_type context, bool &match)
static int match(register const pcre_uchar *eptr, register const pcre_uchar *ecode, const pcre_uchar *mstart, int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)
Definition: pcre_exec.c:513
void AssignAaIntervalLocation(CRef< CAaInterval > aa_interval, CRef< CAaLocation > &result)
void AssignFuzzyCount(const string &count, CRef< CCount > &result)
void AssignUnknownSub(CRef< CAaSite > initial, CRef< CSimpleVariant > &result)
void AssignFrameshift(CRef< CAaSite > aa_site, CRef< CSimpleVariant > &result)
void AssignMissense(CRef< CAaSite > initial, const CProteinSub::TFinal &final, CRef< CSimpleVariant > &result)
void AssignAaInsertion(CRef< CAaInterval > aa_interval, const CInsertion::TSeqinfo::TRaw_seq &raw_seq, CRef< CSimpleVariant > &result)
void AssignAaSiteLocation(CRef< CAaSite > aa_site, CRef< CAaLocation > &result)
void AssignNtermExtension(CRef< CAaSite > initial_start_site, CRef< CCount > new_start_site, CRef< CSimpleVariant > &result)
void AssignCountRange(const string &start, const string &stop, CRef< CCount > &result)
void AssignCount(const string &count, CRef< CCount > &result)
void AssignAaDup(CRef< CAaLocation > aa_loc, CRef< CSimpleVariant > &result)
void AssignAaDelins(CRef< CAaLocation > aa_loc, const string &raw_seq, CRef< CSimpleVariant > &result)
void AssignAaInterval(CRef< CAaSite > start, CRef< CAaSite > &stop, CRef< CAaInterval > &result)
void AssignSilent(CRef< CAaLocation > loc, CRef< CSimpleVariant > &result)
void AssignAaDel(CRef< CAaLocation > aa_loc, CRef< CSimpleVariant > &result)
void AssignAaInsertionSize(CRef< CAaInterval > aa_interval, CRef< CCount > seq_size, CRef< CSimpleVariant > &result)
void AssignCtermExtension(const string &initial_stop_site, const string &aa, CRef< CCount > new_stop_site, CRef< CSimpleVariant > &result)
void AssignAaDelinsSize(CRef< CAaLocation > aa_loc, CRef< CCount > seq_size, CRef< CSimpleVariant > &result)
void AssignAaSite(const string &aa, const string &pos, CRef< CAaSite > &result)
void AssignAaSSR(CRef< CAaLocation > aa_loc, CRef< CCount > count, CRef< CSimpleVariant > &result)
void AssignNonsense(CRef< CAaSite > initial, CRef< CSimpleVariant > &result)
void AssignFuzzyLocalVariation(CRef< CSimpleVariant > input, CRef< CSimpleVariant > &result)
TSimpleToken dup
Definition: hgvs_lexer.hpp:21
TSimpleToken ext
Definition: hgvs_lexer.hpp:27
TAttributedToken ACGT
Definition: hgvs_lexer.hpp:29
TSimpleToken nochange
Definition: hgvs_lexer.hpp:37
TSimpleToken fs
Definition: hgvs_lexer.hpp:28
TSimpleToken zero
Definition: hgvs_lexer.hpp:38
TSimpleToken ins
Definition: hgvs_lexer.hpp:24
TSimpleToken del
Definition: hgvs_lexer.hpp:22
TSimpleToken delins
Definition: hgvs_lexer.hpp:23
TAttributedToken unknown_val
Definition: hgvs_lexer.hpp:36
TAttributedToken pos_int
Definition: hgvs_lexer.hpp:34
TSimpleToken stop
Definition: hgvs_lexer.hpp:33
TAttributedToken definite_aa1
Definition: hgvs_lexer.hpp:31
TAttributedToken fuzzy_pos_int
Definition: hgvs_lexer.hpp:35
TAttributedToken aa3
Definition: hgvs_lexer.hpp:32
TRule< CSimpleVariant > frameshift_nonstandard
TRule< CSimpleVariant > aa_ins
TRule< CSimpleVariant > nterm_extension
TRule< CSimpleVariant > silent
TRule< CCount > aa_repeat_precise
TRule< CSimpleVariant > simple_protein_variant
TRule< CSimpleVariant > aa_ssr
TRule< CAaInterval > aa3_interval
TRule< CAaLocation > aa_loc
TRule< CAaLocation > aa3_loc
TRule< CCount > aa_repeat_range
TRule< CSimpleVariant > missense
TRule< CAaSite > aa3_site
TRule< CSimpleVariant > protein_extension
TRule< CSimpleVariant > cterm_extension
TRule< CSimpleVariant > aa_delins
TRule< CSimpleVariant > unknown_sub
TRule< CSimpleVariant > frameshift
TRule< CCount > aa_repeat_fuzzy
TRule< CCount > end_codon_shift
SHgvsProteinGrammar(const SHgvsLexer &tok)
TRule< CSimpleVariant > frameshift_short_form
TRule< CAaSite > aa_site
TRule< CSimpleVariant > nonsense
TRule< CAaInterval > aa1_interval
TRule< CSimpleVariant > aa_del
TRule< CSimpleVariant > frameshift_long_form
TRule< CSimpleVariant > protein_confirmed_simple_variation
TRule< CAaSite > aa1_site
TRule< CSimpleVariant > aa_dup
TRule< CAaInterval > aa_interval
TRule< CSimpleVariant > protein_fuzzy_simple_variation
TRule< CAaLocation > aa1_loc
static CS_CONTEXT * context
Definition: will_convert.c:21
Modified on Mon Apr 22 04:01:39 2024 by modify_doxy.py rev. 669887