63 static constexpr std::array<string_view, 10> weasels = {
82 if (
arr.size() == 1) {
88 for (
i=0;
i< (
int)(
arr.size() - 1);
i++) {
91 for(
auto& it: weasels) {
105 for ( ;
i< (
int)(
arr.size()-1);
i++) {
106 ret_str +=
arr[
i] +
' ';
108 ret_str +=
arr[
arr.size()-1];
118 if (up_str ==
str)
return true;
125 string low_str =
str;
128 if (low_str ==
str)
return true;
135 for (
unsigned i=0;
i<
str.size();
i++) {
147 string comp_str1, comp_str2;
148 comp_str1 = str1.substr(0, len1);
149 comp_str2 = str2.substr(0, len1);
150 if (case_sensitive) {
151 return (comp_str1 == comp_str2);
165 size_t pos_match = 0, pos_str = 0;
166 bool wd_case, whole_wd, word_start_m, word_start_s;
167 bool match =
true, recursive_match =
false;
168 unsigned len_m = str_match.size(), len_s =
str.size(), target_match_len=0;
176 vector <string> word_word;
181 word_word.push_back(
strtmp);
185 while (
match && pos_match < len_m && pos_str < len_s && !recursive_match) {
186 cp_m = str_match.substr(pos_match);
187 cp_s =
str.substr(pos_str);
193 wd_case = (*it)->GetCase_sensitive();
194 whole_wd = (*it)->GetWhole_word();
195 len1 = word_word[
i].size();
198 word_start_m = (!pos_match && is_start) || !
isalpha(str_match[pos_match - 1]);
199 ch1 = (cp_m.size() <= len1) ?
' ' : cp_m[len1];
202 if (!whole_wd || (!
isalpha(
ch1) && word_start_m)) {
203 if ( !(*it)->CanGetSynonyms() || (*it)->GetSynonyms().empty()) {
205 recursive_match =
true;
210 ITERATE (list <string>, sit, (*it)->GetSynonyms()) {
211 len2 = (*sit).size();
215 word_start_s = (!pos_str && is_start) || !
isalpha(
str[pos_str - 1]);
216 ch2 = (cp_s.size() <= len2) ?
' ' : cp_s[len2];
218 if (!whole_wd || (!
isalpha(
ch2) && word_start_s)) {
219 if (
AdvancedStringCompare(cp_s.substr(len2), cp_m.substr(len1), str_cons, word_start_m & word_start_s, &target_match_len)) {
220 recursive_match =
true;
232 if (!recursive_match) {
262 if (
match && !recursive_match) {
267 while (pos_match < str_match.size() && ((ig_space &&
isspace(str_match[pos_match])) || (ig_punct &&
ispunct(str_match[pos_match])))) {
271 if (pos_match < str_match.size()) {
281 if (
match && ini_target_match_len) {
282 *ini_target_match_len += target_match_len;
304 unsigned len =
str.size();
305 while (!rval && pos <
len) {
324 else if (disallow_slash && ch ==
'/') {
338 string::const_iterator it =
str.begin();
340 if ((strip_space &&
isspace(*it)) || (strip_punct &&
ispunct(*it))) {
345 }
while (++it !=
str.end());
351 static bool IsWholeWordMatch(
const string& start,
const size_t& found,
const unsigned& match_len,
bool disallow_slash =
false)
359 else if (start.empty() || found == string::npos) {
368 after_idx = found + match_len;
369 if (after_idx < start.size() &&
DisallowCharacter(start[after_idx], disallow_slash)) {
384 size_t cp =
str.substr(0, hyphen-1).find_last_not_of(
' ');
385 if (cp != string::npos) {
386 cp =
str.substr(0, cp).find_last_not_of(
" ,;");
388 if (cp == string::npos) {
392 unsigned len = hyphen - cp;
397 cp =
str.find_first_not_of(
' ', hyphen+1);
398 if (cp != string::npos) {
399 cp =
str.find_first_not_of(
" ,;");
401 if (cp == string::npos) {
409 second =
str.substr(hyphen+1,
len);
413 if (
first.empty() || second.empty()) {
429 if (
str.find_first_not_of(digit_str) != string::npos) {
438 string new_first, new_second, new_str;
445 else if (
first.empty() || second.empty()) {
449 int str_num, first_num, second_num;
450 str_num = first_num = second_num = 0;
453 string comp_str1, comp_str2;
459 if ((str_num > first_num && str_num < second_num) || (str_num > second_num && str_num < first_num)) {
465 prefix_len =
first.find_first_of(digit_str) + 1;
467 new_str =
str.substr(prefix_len - 1);
468 new_first =
first.substr(prefix_len - 1);
469 comp_str1 =
str.substr(0, prefix_len);
470 comp_str2 =
first.substr(0, prefix_len);
475 if ((str_num > first_num && str_num < second_num) || (str_num > second_num && str_num < first_num)) {
483 while (prefix_len <
first.size() && prefix_len < second.size() &&
first[prefix_len] == second[prefix_len]) {
488 comp_str1 =
str.substr(0, prefix_len);
489 comp_str2 =
first.substr(0, prefix_len);
490 if (prefix_len <=
first.size() && prefix_len <= second.size() &&
isdigit (
first[prefix_len-1]) &&
isdigit (second[prefix_len-1]) && comp_str1 == comp_str2) {
491 new_first =
first.substr(prefix_len);
492 new_second = second.substr(prefix_len);
493 new_str =
str.substr(prefix_len);
498 if ((str_num > first_num && str_num < second_num) || (str_num > second_num && str_num < first_num)) {
504 size_t idx1, idx2, idx_str;
505 string suf1, suf2, sub_str;
506 idx1 =
first.find_first_not_of(digit_str);
507 suf1 =
first.substr(prefix_len + idx1);
508 idx2 = second.find_first_not_of(digit_str);
509 suf2 = second.substr(prefix_len + idx2);
510 idx_str =
str.find_first_not_of(digit_str);
511 sub_str =
str.substr(prefix_len + idx_str);
512 if (suf1 == suf2 && suf1 == sub_str) {
517 if ((str_num > first_num && str_num < second_num) || (str_num > second_num && str_num < first_num)) {
530 if (list.empty() ||
str.empty()) {
534 size_t idx =
str.find_first_not_of(alpha_str);
535 if (idx == string::npos) {
539 idx =
str.substr(idx).find_first_not_of(digit_str);
542 size_t hyphen = list.find(
'-');
544 string range_start, range_end;
545 while (hyphen != string::npos && !rval) {
547 hyphen = list.substr(1).find(
'-');
555 hyphen = list.find(
'-', hyphen + 1);
568 string this_str(
str);
595 tmp_cons.
Assign(*str_cons);
616 if (string::npos == pFound) {
621 while (!rval && pFound != string::npos) {
623 search.find(pattern, pFound+1):
625 rval = (pFound != string::npos)?
640 while (pFound != string::npos && !rval) {
641 if ((pFound + pattern.size()) == search.size()) {
644 pFound = string::npos;
647 if (pattern.empty()) {
667 if (pFound == string::npos) {
672 while (!rval && pFound != string::npos) {
674 if (pFound != string::npos) {
708 for (
auto& it: conset.
Get()) {
710 cerr <<
"Bad suspect rule constraint!\n";
749 string str =
"Unknown replacement function";
759 str =
"replace '" + func.
GetHaem_replace() +
"' with 'heme' if whole word, 'hem' otherwise";
772 loc_word = cons.
GetNot_present() ?
"does not contain" :
"contains";
778 loc_word = cons.
GetNot_present() ?
"does not start with" :
"starts with";
781 loc_word = cons.
GetNot_present() ?
"does not end with" :
"ends with";
791 if ((*it)->CanGetSynonyms() && !(*it)->GetSynonyms().empty()) {
795 CWord_substitution::TSynonyms::const_iterator z = sn;
796 syns += (++z == synonyms.end()) ?
" and " :
", ";
798 syns +=
"\'" + *sn +
"\'";
800 sub_words += sub_words.empty() ?
"" :
", ";
801 sub_words +=
"allow '" + ((*it)->CanGetWord() ? (*it)->GetWord() :
"") +
"' to be replaced by " + syns;
802 if ((*it)->GetCase_sensitive()) sub_words +=
", case-sensitive";
803 if ((*it)->GetWhole_word()) sub_words +=
", whole word";
812 params += cons.
GetIgnore_weasel() ? params.empty() ?
"ignore \'putative\' synonyms" :
", ignore \'putative\' synonyms" :
kEmptyStr;
815 str += params.empty() ?
kEmptyStr :
" (" + params +
")";
828 switch (func.
Which()) {
832 return "may contain plural";
836 return "Three or more numbers together";
838 return "contains underscore";
842 return "is all capital letters";
844 return "contains unbalanced brackets or parentheses";
849 return "contains \'" + func.
GetHas_term() +
"\' at start or separated from other letters by numbers, spaces, or punctuation, but does not also contain 'domain'";
853 return "Unknown search function";
859 switch (pos.
Which()) {
885 partial =
" that are partial on both ends";
888 partial =
" that are complete on both ends";
891 partial =
" that are 5' complete and 3' partial";
894 partial =
" that are 5' partial and 3' complete";
896 string location_type;
898 location_type =
" with single interval";
901 location_type =
" with joined intervals";
904 location_type =
" with ordered intervals";
909 dist5 = dist5.empty() ? dist5 :
" with 5\' end " + dist5;
914 dist3 = dist3.empty() ? dist3 :
" with 3\' end " + dist3;
918 seq_word =
"nucleotide sequences";
921 seq_word =
"protein sequences";
925 strand =
" on plus strands";
928 strand =
" on minus strands";
930 if (partial.empty() && location_type.empty() && dist5.empty() && dist3.empty() && seq_word.empty() && strand.empty()) {
933 string str =
"only objects";
935 if (strand.empty() && !seq_word.empty()) {
936 str +=
" on " + seq_word;
938 else if (!strand.empty()) {
940 str += seq_word.empty() ?
kEmptyStr :
" of " + seq_word;
943 str += location_type;
952 string str =
"Invalid field type";
953 switch (vnp.
Which()) {
956 return "e_Source_qual";
962 return "missing field";
971 return label.empty() ?
"Unknown feature" :
label;
976 return "e_Cds_gene_prot";
980 return "e_Molinfo_field";
988 return "e_Rna_field";
991 return "e_Struc_comment_field";
1020 switch (choice.
Which()) {
1031 return "[[CDS Gene Prot QUAL CONSTRAINT]]";
1033 return "[[CDS Gene Prot PSEUDO CONSTRAINT]]";
1036 return "[[SEQUENCE CONSTRAINT]]";
1039 return "[[PUB CONSTRAINT]]";
1044 return "[[MOLINFO CONSTRAINT]]";
1047 return "[[FIELD MISSING CONSTRAINT]]";
1050 return "[[TRANSLATION CONSTRAINT]]";
1074 static const char* rule_type[] = {
1079 "Organelles not appropriate in prokaryote",
1080 "Suspicious phrase; should this be nonfunctional?",
1081 "May contain database identifier more appropriate in note; remove from product name",
1082 "Remove organism from product name",
1083 "Possible parsing error or incorrect formatting; remove inappropriate symbols",
1084 "Implies evolutionary relationship; change to -like protein",
1085 "Consider adding 'protein' to the end of the product name",
1086 "Correct the name or use 'hypothetical protein'",
1087 "Use American spelling",
1088 "Use short product name instead of descriptive phrase",
1089 "use protein instead of gene as appropriate"
1105 if (!except.empty())
out +=
" but not " + except;
1106 if (!feat_constraint.empty())
out +=
", " + feat_constraint;
1107 if (!replace.empty())
out +=
", " + replace;
1109 if (!descr.empty())
out +=
" Description: " + descr;
bool IsAllPunctuation(const string &str)
static bool CaseNCompareEqual(string str1, string str2, unsigned len1, bool case_sensitive)
static bool DoesSingleStringMatchConstraint(const string &str, const CString_constraint *str_cons)
bool IsAllCaps(const string &str)
static bool IsWholeWordMatch(const string &start, const size_t &found, const unsigned &match_len, bool disallow_slash=false)
static bool DisallowCharacter(const char ch, bool disallow_slash)
static const string SkipWeasel(const string &str)
static bool GetSpanFromHyphenInString(const string &str, const size_t &hyphen, string &first, string &second)
static bool IsStringInSpanInList(const string &str, const string &list)
static bool IsStringConstraintEmpty(const CString_constraint *constraint)
static bool AdvancedStringCompare(const string &str, const string &str_match, const CString_constraint *str_cons, bool is_start, unsigned *ini_target_match_len=0)
static bool StringIsPositiveAllDigits(const string &str)
bool IsAllLowerCase(const string &str)
static string StripUnimportantCharacters(const string &str, bool strip_space, bool strip_punct)
static bool IsStringInSpan(const string &str, const string &first, const string &second)
static bool AdvancedStringMatch(const string &str, const CString_constraint *str_cons)
User-defined methods of the data storage class.
bool ApplyToString(string &result, const CMatchString &str, CConstRef< CString_constraint > find) const
bool Match(const CMatchString &str) const
void SetMatch_text(const TMatch_text &value)
string SummarizeLocationConstraint(const CLocation_constraint &) const
string SummarizeEndDistance(const CLocation_pos_constraint &) const
string GetRuleTypeName(void) const
string SummarizeSourceConstraint(const CSource_constraint &) const
bool ApplyToString(string &result, const CMatchString &str) const
bool StringMatchesSuspectProductRule(const CMatchString &str) const
string SummarizeRule(void) const
string SummarizeConstraintSet(const CConstraint_choice_set &) const
string SummarizeConstraint(const CConstraint_choice &) const
string SummarizeSearchFunc(const CSearch_func &) const
string SummarizeFieldType(const CField_type &) const
string SummarizeStringConstraint(const CString_constraint &) const
string SummarizeReplaceRule(const CReplace_rule &) const
string SummarizeFieldConstraint(const CField_constraint &) const
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
static vector< string > arr
std::ofstream out("events_result.xml")
main entry point for tests
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static const char * str(char *buf, int n)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
#define ENUM_METHOD_NAME(EnumName)
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
size_type find(const CTempString match, size_type pos=0) const
Find the first instance of the entire matching string within the current string, beginning at an opti...
static string & ToUpper(string &str)
Convert string to upper case – string& version.
static string & ToLower(string &str)
Convert string to lower case – string& version.
static const char label[]
TCase_sensitive GetCase_sensitive(void) const
Get the Case_sensitive member data.
TToo_long GetToo_long(void) const
Get the variant data.
const TIgnore_words & GetIgnore_words(void) const
Get the Ignore_words member data.
TStrand GetStrand(void) const
Get the Strand member data.
TIgnore_space GetIgnore_space(void) const
Get the Ignore_space member data.
E_Choice Which(void) const
Which variant is currently selected.
const TLocation & GetLocation(void) const
Get the variant data.
const TField & GetField(void) const
Get the variant data.
const TSimple_replace & GetSimple_replace(void) const
Get the variant data.
TMatch_location GetMatch_location(void) const
Get the Match_location member data.
const TPrefix_and_numbers & GetPrefix_and_numbers(void) const
Get the variant data.
const TReplace & GetReplace(void) const
Get the Replace member data.
E_Choice Which(void) const
Which variant is currently selected.
const TString & GetString(void) const
Get the variant data.
const THaem_replace & GetHaem_replace(void) const
Get the variant data.
bool CanGetReplace(void) const
Check if it is safe to call GetReplace method.
TIs_all_caps GetIs_all_caps(void) const
Get the Is_all_caps member data.
bool CanGetFeat_constraint(void) const
Check if it is safe to call GetFeat_constraint method.
bool IsSetDescription(void) const
Check if a value has been assigned to Description data member.
TWhole_string GetWhole_string(void) const
Get the Whole_string member data.
TPartial3 GetPartial3(void) const
Get the Partial3 member data.
TWhole_word GetWhole_word(void) const
Get the Whole_word member data.
TRule_type GetRule_type(void) const
Get the Rule_type member data.
const TDescription & GetDescription(void) const
Get the Description member data.
TIgnore_weasel GetIgnore_weasel(void) const
Get the Ignore_weasel member data.
const TField & GetField(void) const
Get the Field member data.
TMove_to_note GetMove_to_note(void) const
Get the Move_to_note member data.
const TExcept & GetExcept(void) const
Get the Except member data.
TDist_from_end GetDist_from_end(void) const
Get the variant data.
const TFeature_field & GetFeature_field(void) const
Get the variant data.
EMacro_feature_type
feature values
TIgnore_punct GetIgnore_punct(void) const
Get the Ignore_punct member data.
const TEnd5 & GetEnd5(void) const
Get the End5 member data.
TLocation_type GetLocation_type(void) const
Get the Location_type member data.
const TFind & GetFind(void) const
Get the Find member data.
E_Choice Which(void) const
Which variant is currently selected.
EPartial_constraint
Access to EPartial_constraint's attributes (values, names) as defined in spec.
const TFeat_constraint & GetFeat_constraint(void) const
Get the Feat_constraint member data.
EString_location
simple constraints
E_Choice Which(void) const
Which variant is currently selected.
TN_or_more_brackets_or_parentheses GetN_or_more_brackets_or_parentheses(void) const
Get the variant data.
TSeq_type GetSeq_type(void) const
Get the Seq_type member data.
const TMatch_text & GetMatch_text(void) const
Get the Match_text member data.
const TString_constraint & GetString_constraint(void) const
Get the String_constraint member data.
bool CanGetEnd5(void) const
Check if it is safe to call GetEnd5 method.
TPartial5 GetPartial5(void) const
Get the Partial5 member data.
TNot_present GetNot_present(void) const
Get the Not_present member data.
TMax_dist_from_end GetMax_dist_from_end(void) const
Get the variant data.
bool IsSetFind(void) const
Check if a value has been assigned to Find data member.
const THas_term & GetHas_term(void) const
Get the variant data.
TIs_all_punct GetIs_all_punct(void) const
Get the Is_all_punct member data.
E_Choice Which(void) const
Which variant is currently selected.
bool CanGetEnd3(void) const
Check if it is safe to call GetEnd3 method.
bool CanGetExcept(void) const
Check if it is safe to call GetExcept method.
TIs_all_lower GetIs_all_lower(void) const
Get the Is_all_lower member data.
TWeasel_to_putative GetWeasel_to_putative(void) const
Get the Weasel_to_putative member data.
const Tdata & Get(void) const
Get the member data.
const TReplace & GetReplace(void) const
Get the Replace member data.
const TReplace_func & GetReplace_func(void) const
Get the Replace_func member data.
const TSource & GetSource(void) const
Get the variant data.
bool IsSetReplace(void) const
Check if a value has been assigned to Replace data member.
bool CanGetReplace(void) const
Check if it is safe to call GetReplace method.
const TString_constraint & GetString_constraint(void) const
Get the variant data.
TType GetType(void) const
Get the Type member data.
E_Choice Which(void) const
Which variant is currently selected.
TMin_dist_from_end GetMin_dist_from_end(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
bool CanGetMatch_text(void) const
Check if it is safe to call GetMatch_text method.
bool CanGetIgnore_words(void) const
Check if it is safe to call GetIgnore_words method.
const TEnd3 & GetEnd3(void) const
Get the End3 member data.
const TField & GetField(void) const
Get the Field member data.
@ e_N_or_more_brackets_or_parentheses
@ eStrand_constraint_plus
@ eStrand_constraint_minus
@ e_not_set
No variant selected.
@ e_not_set
No variant selected.
@ eSeqtype_constraint_prot
@ eSeqtype_constraint_nuc
@ ePartial_constraint_complete
@ ePartial_constraint_partial
@ ePartial_constraint_either
@ eString_location_inlist
@ eString_location_equals
@ eString_location_contains
@ eString_location_starts
@ eLocation_type_constraint_ordered
@ eLocation_type_constraint_joined
@ eLocation_type_constraint_single_interval
unsigned int
A callback function used to compare two keys in a database.
static const BitmapCharRec ch1
static const BitmapCharRec ch2
static int match(register const pcre_uchar *eptr, register const pcre_uchar *ecode, const pcre_uchar *mstart, int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)