44 static const char* digit_str =
"0123456789";
45 static const char* alpha_str =
"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
59 default:
return false;
67 size_t len = word.size();
68 if (last_letter ==
's') {
69 if (
len >= 5 && word.substr(
len-5) ==
"trans") {
73 if (second_to_last_letter !=
's'
74 && second_to_last_letter !=
'i'
75 && second_to_last_letter !=
'u'
76 && next_letter ==
',') {
88 char last_letter, second_to_last_letter, next_letter;
89 bool may_contain_plural =
false;
90 string word_skip =
" ,";
98 if (
arr.size() == 1) {
103 last_letter =
arr[0][
len-1];
104 second_to_last_letter =
arr[0][
len-2];
111 vector <string>::const_iterator jt;
120 last_letter = (*it)[
len-1];
121 second_to_last_letter = (*it)[
len-2];
124 if (may_contain_plural) {
128 if (++jt !=
arr.end()) {
133 return may_contain_plural;
139 if (bp ==
'(')
return ')';
140 else if (bp ==
'[')
return ']';
141 else if (bp ==
'{')
return '}';
156 "[acyl-carrier protein]",
157 "[acyl-carrier-protein]",
158 "(acyl carrier protein)"
167 if (idx > 2 && start.substr(idx-3, 6) ==
"NAD(P)") {
169 start = start.substr(idx + 3);
176 start = start.substr(idx +
len);
182 ns = start.find(start[idx], idx+1);
183 ep = start.find(
x_GetClose(start[idx]), idx+1);
184 if (ep != string::npos && (ns == string::npos || ns > ep)) {
187 start = start.substr(ep+1);
189 else if (ep - idx > 3 && start.substr(ep - 3, 3) ==
"ing") {
191 start = start.substr(ep + 1);
204 string open_bp(
"(["), sch_src(
str);
206 if (sch_src.empty()) {
210 idx = sch_src.find_first_of(open_bp);
211 while (idx != string::npos && num_found <
n) {
212 end = sch_src.find(
x_GetClose(sch_src[idx]), idx);
214 idx = sch_src.find_first_of(open_bp);
216 else if (end == string::npos) {
217 idx = sch_src.find_first_of(open_bp, idx+1);
220 idx = sch_src.find_first_of(open_bp, end);
225 if (num_found >=
n) {
247 size_t len_str = start_str.size();
251 if (len_str >= len_i && (start_str.substr(len_str-len_i) ==
ok_num_prefix[
i])) {
261 if (start_str.empty()) {
264 size_t pos = start_str.find_last_of(
' ');
265 if (pos != string::npos) {
266 string strtmp = start_str.substr(0, pos);
267 pos =
strtmp.find_last_not_of(
' ');
268 if (pos != string::npos) {
281 size_t pos = after_str.find_first_of(
' ');
282 if (pos != string::npos) {
283 after_str = after_str.substr(pos+1);
285 if (after_str.size() >= 7) {
286 after_str = after_str.substr(7);
301 unsigned num_digits = 0;
304 while (!sch_str.empty()) {
305 p = sch_str.find_first_of(digit_str);
306 if (p == string::npos) {
309 strtmp = sch_str.substr(0, p);
311 p2 = sch_str.find_first_not_of(digit_str, p+1);
312 if (p2 != string::npos) {
313 sch_str = sch_str.substr(p2);
320 if (num_digits == 3) {
321 sch_str = sch_str.substr(p+1);
327 if (p < sch_str.size() - 1) {
328 if (!sch_str.empty()) {
329 sch_str = sch_str.substr(p+1);
331 if (sch_str.empty() || !
isdigit(sch_str.front())) {
344 if (
str.find(
'_') == string::npos) {
351 for (
unsigned i=0;
i<
arr.size() - 1;
i++) {
375 if (
str.empty())
return false;
377 size_t pattern_len = prefix.size();
382 size_t digit_len =
str.find_first_not_of(digit_str, pattern_len);
383 if (digit_len != string::npos && digit_len ==
str.size()) {
392 if (
str.empty())
return false;
393 else if (
str[
str.size()-1] != open_p)
return false;
403 while (!sch_src.empty()) {
404 pos = sch_src.find_first_of(
"()[]");
405 if (pos == string::npos) {
412 ch_src = sch_src[pos];
413 if (ch_src ==
'(' || ch_src ==
'[') {
416 else if (sch_src[pos] ==
')') {
424 else if (sch_src[pos] ==
']') {
433 sch_src = (pos < sch_src.size()-1) ? sch_src.substr(pos+1) :
kEmptyStr;
453 if (pos != string::npos && (!pos || !
isalpha (
str[pos-1]))) {
481 if (
orig.find_first_not_of(alpha_str) != string::npos) {
484 if (
orig ==
str.original().uppercase()) {
517 if (words->IsSetSynonyms()) {
518 for (
auto syn : words->GetSynonyms()) {
532 return "/[A-Za-hj-rtv-z]s\\b/";
535 return "/\\d\\d\\d/";
543 return "/[\\(\\)\\[\\]]/";
static const char * ok_num_prefix[]
static const char * skip_bracket_paren[]
User-defined methods of the data storage class.
static string QuoteString(const string &str)
Quote special characters to insert string into regular expression.
bool x_ProductContainsTerm(const string &str, const string &pattern) const
char x_GetClose(char bp) const
bool x_IsPrefixPlusNumbers(const string &str, const string &prefix) const
bool Match(const CMatchString &str) const
bool x_StringMayContainPlural(const string &str) const
bool x_DoesStrContainPlural(const string &word, char last_letter, char second_to_last_letter, char next_letter) const
bool x_ContainsNorMoreSetsOfBracketsOrParentheses(const string &str, const int &n) const
bool x_SkipBracketOrParen(size_t idx, string &start) const
bool x_ContainsThreeOrMoreNumbersTogether(const string &str) const
bool x_StringContainsUnbalancedParentheses(const string &str) const
bool x_InWordBeforeCytochromeOrCoenzyme(const string &start_str) const
bool x_IsPropClose(const string &str, char open_p) const
bool x_StringContainsUnderscore(const string &str) const
bool x_PrecededByOkPrefix(const string &start_str) const
bool x_FollowedByFamily(string &after_str) const
bool Match(const CMatchString &str) const
static vector< string > arr
static const char * str(char *buf, int n)
constexpr size_t ArraySize(const Element(&)[Size])
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
@ eTrunc_Begin
Truncate leading whitespace only.
@ eNocase
Case insensitive compare.
TToo_long GetToo_long(void) const
Get the variant data.
const TIgnore_words & GetIgnore_words(void) const
Get the Ignore_words member data.
const TPrefix_and_numbers & GetPrefix_and_numbers(void) const
Get the variant data.
TN_or_more_brackets_or_parentheses GetN_or_more_brackets_or_parentheses(void) const
Get the variant data.
const TMatch_text & GetMatch_text(void) const
Get the Match_text member data.
TNot_present GetNot_present(void) const
Get the Not_present member data.
const THas_term & GetHas_term(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
const TString_constraint & GetString_constraint(void) const
Get the variant data.
bool IsSet(void) const
Check if a value has been assigned to data member.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetMatch_text(void) const
Check if a value has been assigned to Match_text data member.
bool IsSetIgnore_words(void) const
Check if a value has been assigned to Ignore_words data member.
@ e_N_or_more_brackets_or_parentheses
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
Simultaneous search of multiple RegEx patterns in the input string.
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::SIZE size