22 #define THIS_FILE "xgbfeat.cpp"
32 #define THIS_MODULE this_module
36 #define ERR_FEATURE 1, 0
37 #define ERR_FEATURE_UnknownFeatureKey 1, 1
38 #define ERR_FEATURE_MissManQual 1, 2
39 #define ERR_FEATURE_QualWrongThisFeat 1, 3
40 #define ERR_FEATURE_FeatureKeyReplaced 1, 4
41 #define ERR_FEATURE_LocationParsing 1, 5
42 #define ERR_FEATURE_IllegalFormat 1, 6
43 #define ERR_QUALIFIER 2, 0
44 #define ERR_QUALIFIER_InvalidDataFormat 2, 1
45 #define ERR_QUALIFIER_Too_many_tokens 2, 2
46 #define ERR_QUALIFIER_MultiValue 2, 3
47 #define ERR_QUALIFIER_UnknownSpelling 2, 4
48 #define ERR_QUALIFIER_Xtratext 2, 5
49 #define ERR_QUALIFIER_SeqPosComma 2, 6
50 #define ERR_QUALIFIER_Pos 2, 7
51 #define ERR_QUALIFIER_EmptyNote 2, 8
52 #define ERR_QUALIFIER_NoteEmbeddedQual 2, 9
53 #define ERR_QUALIFIER_EmbeddedQual 2, 10
54 #define ERR_QUALIFIER_AA 2, 11
55 #define ERR_QUALIFIER_Seq 2, 12
56 #define ERR_QUALIFIER_BadECnum 2, 13
57 #define ERR_QUALIFIER_Cons_splice 2, 14
59 #define ParFlat_Stoken_type 1
60 #define ParFlat_BracketInt_type 2
61 #define ParFlat_Integer_type 3
62 #define ParFlat_Number_type 4
69 bool perform_corrections);
74 bool perform_corrections);
81 const Char* array_string[],
86 bool perform_corrections);
93 #define ParFlat_SPLIT_IGNORE 4
95 "citation",
"EC_number",
"rpt_type",
"usedin"
146 for (TQualVector::iterator cur = quals.begin(); cur != quals.end();) {
147 const string& qual_str = (*cur)->GetQual();
148 if (qual_str ==
"gsdb_id") {
153 string qs = qual_str;
154 if (qs ==
"geo_loc_name") {
163 if (qual_type == cur_type) {
176 if (perform_corrections) {
177 cur = quals.erase(cur);
189 for (
const auto& cur : quals) {
204 if (perform_corrections)
226 for (
auto& cur : quals) {
227 const string& qual_str = cur->GetQual();
233 if (! cur->IsSetVal()) {
237 string val_str = cur->GetVal();
238 if (*val_str.begin() !=
'(') {
241 if (*val_str.rbegin() !=
')') {
245 val_str = val_str.substr(1, val_str.size() - 2);
246 size_t sep_pos = val_str.find(
',');
247 if (sep_pos == string::npos) {
248 cur->SetVal(val_str);
254 cur->SetVal(val_str.substr(0, sep_pos));
256 size_t offset = sep_pos + 1;
257 sep_pos = val_str.find(
',',
offset);
258 while (sep_pos != string::npos) {
264 quals.push_back(qual_new);
434 auto val_class = QUAL_TYPE_TO_VAL_CLASS_MAP.
find(qual_type);
435 if (val_class == QUAL_TYPE_TO_VAL_CLASS_MAP.
end()) {
439 return val_class->second;
447 "EXPERIMENTAL",
"NOT_EXPERIMENTAL"
457 "long_terminal_repeat",
458 "non_LTR_retrotransposon_polymeric_tract",
459 "X_element_combinatorial_repeat",
462 "centromeric_repeat",
463 "engineered_foreign_repetitive_element",
483 for (TQualVector::iterator cur = quals.begin(); cur != quals.end();) {
484 const string& qual_str = (*cur)->GetQual();
485 if (qual_str ==
"gsdb_id") {
490 string qs = qual_str;
491 if (qs ==
"geo_loc_name") {
502 if (perform_corrections) {
503 cur = quals.erase(cur);
515 ret =
CkQualNote(*(*cur), error_msgs, perform_corrections);
521 ret =
CkQualText(*(*cur),
nullptr,
false, error_msgs, perform_corrections);
557 ret =
CkQualEcnum(*(*cur), error_msgs, perform_corrections);
605 if ((*cur)->IsSetVal() && ! (*cur)->GetVal().empty()) {
610 if (perform_corrections) {
619 cur = quals.erase(cur);
648 size_t comma = caa.find(
',');
649 if (comma != string::npos) {
650 caa = caa.substr(0, comma);
656 while (*
str !=
'\0' && (*
str ==
' ' || *
str ==
')'))
707 while (*
str !=
'\0' && (*
str ==
',' || *
str ==
' '))
717 string aa(
str, eptr);
771 bool has_embedded =
false;
774 retval =
CkQualText(cur, &has_embedded,
true, error_msgs, perform_corrections);
778 string val_str = cur.
GetVal();
779 std::replace(val_str.begin(), val_str.end(),
'\"',
'\'');
801 for (bptr =
value; *bptr !=
'\0';) {
802 for (; *bptr !=
'/' && *bptr !=
'\0'; bptr++)
806 for (++bptr, ptr = bptr; *bptr !=
'=' && *bptr !=
' ' && *bptr !=
'\0'; bptr++)
809 string qual(ptr, bptr);
833 bool perform_corrections)
841 *has_embedded =
false;
851 if (perform_corrections) {
860 while (*
str !=
'\0' && (*
str ==
' ' || *
str ==
'\"')) {
863 if (*(
str - 1) ==
'\"') {
872 while (*
str !=
'\0' && (*
str ==
' ' || *
str ==
'\"'))
881 string value(bptr, eptr);
910 const Char* eptr =
nullptr;
922 while (*
str !=
'\0' && (*
str ==
',' || *
str ==
' '))
932 string aa(
str, eptr);
987 while (*
str !=
'\0' && *
str ==
' ')
991 string msg(bptr, eptr);
993 for (
Int2 i = 0;
i < totalstr; ++
i) {
1030 retval =
CkQualText(cur,
nullptr,
false, error_msgs, perform_corrections);
1035 while (*
str !=
'\0' && (*
str ==
' ' || *
str ==
'\"'))
1038 for (; *
str !=
'\0' && *
str !=
'\"';
str++)
1066 const Char* yes_or_no =
"not \'YES\', \'NO\' or \'ABSENT\'";
1083 for (; *
str ==
' ';
str++)
1085 for (; *
str ==
',';
str++)
1087 for (; *
str ==
' ';
str++)
1106 while (*
str !=
'\0' && (*
str ==
' ' || *
str ==
')'))
1112 bptr =
"extra characters";
1162 bool token_there =
false;
1171 if (! token_there) {
1182 while (*
str !=
'\0' && *
str ==
' ')
1187 string token(bptr, eptr);
1216 bptr =
"Invalid [integer] format";
1219 bptr =
"Not an integer number";
1222 bptr =
"Invalid format";
1226 bptr =
"Bad qualifier value";
1260 return "NULL value";
1314 if (*
str ==
'.' && *(
str + 1) ==
'.') {
static char ValidAminoAcid(string_view abbrev)
@Gb_qual.hpp User-defined methods of the data storage class.
bool IsLegalQualifier(EQualifier qual) const
Test wheather a certain qualifier is legal for the feature.
EQualifier
List of available qualifiers for feature keys.
@ eQual_environmental_sample
@ eQual_recombination_class
@ eQual_UniProtKB_evidence
@ eQual_culture_collection
@ eQual_ribosomal_slippage
@ eQual_calculated_mol_wt
@ eQual_metagenome_source
@ eQual_mobile_element_type
@ eQual_artificial_location
const TQualifiers & GetMandatoryQualifiers(void) const
Get the list of all mandatory qualifiers for the feature.
static EQualifier GetQualifierType(CTempString qual)
convert qual string to enumerated value
static CTempString GetQualifierAsString(EQualifier qual)
Convert a qualifier from an enumerated value to a string representation or empty if not found.
const_iterator end() const
const_iterator find(const key_type &key) const
bool StringEquNI(const char *s1, const char *s2, size_t n)
static const char * str(char *buf, int n)
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
@ eTrunc_End
Truncate trailing whitespace only.
static const char label[]
const TVal & GetVal(void) const
Get the Val member data.
void SetQual(const TQual &value)
Assign a value to Qual data member.
bool IsSetQual(void) const
Check if a value has been assigned to Qual data member.
void SetVal(const TVal &value)
Assign a value to Val data member.
const TQual & GetQual(void) const
Get the Qual member data.
bool IsSetVal(void) const
Check if a value has been assigned to Val data member.
range(_Ty, _Ty) -> range< _Ty >
constexpr bool empty(list< Ts... >) noexcept
const GenericPointer< typename T::ValueType > T2 value
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
#define ParFlat_Stoken_type
#define ERR_QUALIFIER_Pos
static int CkQualMatchToken(CGb_qual &cur, bool error_msgs, const Char *array_string[], Int2 totalstr)
int XGBFeatKeyQualValid(CSeqFeatData::ESubtype subtype, TQualVector &quals, bool error_msgs, bool perform_corrections)
static int CkQualPosaa(CGb_qual &cur, bool error_msgs)
#define ERR_QUALIFIER_MultiValue
static const Char * CkNumberType(const Char *str)
static int CkQualPosSeqaa(CGb_qual &cur, bool error_msgs, string &aa, const Char *eptr)
#define ERR_FEATURE_QualWrongThisFeat
#define ERR_QUALIFIER_UnknownSpelling
static int CkQualText(CGb_qual &cur, bool *has_embedded, bool from_note, bool error_msgs, bool perform_corrections)
const Char * ParFlat_IntOrString[]
#define ParFlat_Number_type
const Char * ParFlat_LRBString[]
#define ERR_QUALIFIER_SeqPosComma
#define ParFlat_SPLIT_IGNORE
static const Char * CkLabelType(const Char *str)
#define ERR_QUALIFIER_Too_many_tokens
#define ERR_QUALIFIER_BadECnum
static int CkQualSeqaa(CGb_qual &cur, bool error_msgs)
#define ERR_FEATURE_MissManQual
static ETokenClass GetQualifierClass(CSeqFeatData::EQualifier qual_type)
static Int2 GBQualSplit(const Char *qual)
static int CkQualNote(CGb_qual &cur, bool error_msgs, bool perform_corrections)
#define ParFlat_Integer_type
#define ERR_QUALIFIER_EmptyNote
static int CkQualEcnum(CGb_qual &cur, bool error_msgs, bool perform_corrections)
const Char * GBQual_names_split_ignore[4]
static const Char * CkBracketType(const Char *str)
static int CkQualTokenType(CGb_qual &cur, bool error_msgs, Uint1 type)
static bool ScanEmbedQual(const Char *value)
#define ParFlat_BracketInt_type
static const Char * this_module
#define ERR_QUALIFIER_InvalidDataFormat
const Char * ParFlat_RptString[]
#define ERR_QUALIFIER_Seq
static int GBQualSemanticValid(TQualVector &quals, bool error_msgs, bool perform_corrections)
static int SplitMultiValQual(TQualVector &quals)
static int CkQualSite(CGb_qual &cur, bool error_msgs)
#define ERR_QUALIFIER_Xtratext
#define ERR_QUALIFIER_Cons_splice
const Char * ParFlat_ExpString[]
#define GB_FEAT_ERR_SILENT
std::vector< CRef< objects::CGb_qual > > TQualVector
#define GB_FEAT_ERR_REPAIRABLE