47 #define THIS_FILE "xgbparint.cpp"
95 #define ERR_FEATURE_LocationParsing_validatr 1, 5
127 auto end_it =
next(current);
128 for (
auto it =
head; it != end_it; ++it) {
129 switch (it->choice) {
134 temp +=
"complement";
248 if (tokens.size() == 1) {
251 auto current_it = begin(tokens);
253 for (
auto scanner_it =
next(current_it);
254 scanner_it != end(tokens);
258 if (scanner_it != end(tokens) &&
267 for (
auto current_it = begin(tokens);
268 current_it != end(tokens);
272 auto scanner_it =
next(current_it);
273 if (scanner_it != end(tokens)) {
276 while (scanner_it != end(tokens)) {
279 if (scanner_it != end(tokens) &&
297 unsigned int current_col)
299 string temp_string = line.substr(0, current_col + 1) +
" ";
304 static unsigned advance_to(
const char c,
unsigned current_pos,
const string& line)
306 int pos = current_pos;
307 while (pos < line.size()) {
308 if (line[pos] == c) {
319 if (accession.empty()) {
323 auto IsAlpha = [](
char c) {
return isalpha(c); };
325 auto it = find_if_not(begin(accession),
330 if (it == end(accession)) {
334 auto prefix_length = distance(begin(accession), it);
336 if (prefix_length != 2) {
340 it = find_if_not(it, end(accession), IsAlpha);
341 if (it == end(accession)) {
344 prefix_length = distance(begin(accession), it);
345 if (prefix_length == 3 || prefix_length == 7) {
346 return prefix_length;
349 }
else if (accession.size() >= 3 &&
352 accession[2] ==
'S') {
356 if (prefix_length == 1 ||
357 prefix_length == 2 ||
358 prefix_length == 4 ||
359 prefix_length == 6) {
360 return prefix_length;
367 static int sGetAccession(
string& accession,
unsigned int& current_col,
const string& line,
bool accver)
369 const auto length = line.size();
370 string_view tempString(line.c_str() + current_col, length - current_col);
372 size_t accessionLength = prefixLength;
374 tempString = tempString.substr(prefixLength);
375 auto notDigitPos = tempString.find_first_not_of(
"0123456789");
376 if (notDigitPos != string_view::npos) {
377 accessionLength += notDigitPos;
378 if (accver && tempString[notDigitPos] ==
'.') {
380 if (tempString.size() > notDigitPos) {
381 tempString = tempString.substr(notDigitPos + 1);
382 notDigitPos = tempString.find_first_not_of(
"0123456789");
383 if (notDigitPos != string_view::npos) {
384 accessionLength += notDigitPos;
389 accessionLength = length - current_col;
393 if (notDigitPos == string_view::npos || tempString[notDigitPos] !=
':') {
399 accession =
string(line.c_str() + current_col, accessionLength);
400 current_col += accessionLength;
411 if (! linein.empty()) {
412 string line{ linein };
414 auto length = line.size();
415 unsigned current_col = 0;
417 while (current_col < length) {
419 if (
isspace(line[current_col]) || line[current_col] ==
'~') {
425 if (
isdigit(line[current_col])) {
427 CTempString tempString(line.c_str() + current_col,
size_t(length - current_col));
429 auto num_digits = (not_digit_pos == string_view::npos) ?
size_t(length - current_col) : not_digit_pos;
430 current_token.
data =
string(line.c_str() + current_col, num_digits);
431 tokens.push_back(current_token);
432 current_col += num_digits;
436 bool skip_new_token =
false;
437 switch (line[current_col]) {
440 if (
auto closing_quote_pos = line.find(
'\"', current_col + 1);
441 closing_quote_pos == string::npos) {
445 size_t len = closing_quote_pos - current_col + 1;
446 current_token.
data =
string(line.c_str(), +current_col);
458 current_col =
advance_to(
'(', current_col, line);
474 current_col =
advance_to(
'(', current_col, line);
493 current_col =
advance_to(
'(', current_col, line);
504 (current_col < length - 3) &&
505 (line[current_col + 3] ==
'(' ||
506 line[current_col + 3] ==
' ' ||
507 line[current_col + 3] ==
'\t' ||
508 line[current_col + 3] ==
'\0')) {
510 current_token.
data =
"gap";
513 tokens.push_back(current_token);
524 for (;
isdigit(line[current_col]); current_col++)
532 current_col =
advance_to(
'(', current_col, line);
546 current_col =
advance_to(
'(', current_col, line);
560 skip_new_token =
true;
572 if (current_col < length - 1 && line[current_col + 1] ==
's') {
575 tokens.push_back(current_token);
579 if (current_col < length - 1) {
580 if (line[current_col + 1] ==
')') {
585 tokens.push_back(current_token);
587 tokens.push_back(current_token);
589 if (current_col < length - 1) {
590 if (line[current_col + 1] ==
';') {
615 if (current_col == length - 1 || line[current_col + 1] !=
'.') {
653 if (current_col < length - 1 && line[current_col + 1] ==
's') {
656 if (current_col < length - 1) {
657 if (line[current_col + 1] ==
';') {
674 if (! skip_new_token) {
675 tokens.push_back(current_token);
687 if (current_token != end(tokens)) {
691 if (current_token == end(tokens)) {
693 const string par_msg =
"mismatched parentheses (" + to_string(paren_count) +
")";
711 if (current_token != end(tokens)) {
748 id.SetGeneral().SetTag().SetId(0);
756 auto it =
next(current_it);
774 auto gapsize_it = it++;
778 auto pLoc =
XGapToSeqLocEx(atoi(gapsize_it->data.c_str()), unknown);
784 current_it =
next(it);
809 bool strange_sin_dot =
false;
810 auto end_it = end(tokens);
827 strange_sin_dot =
true;
867 if (! strange_sin_dot) {
868 if (currentPt == end_it) {
876 numPt = atoi(currentPt->data.c_str()) - 1;
889 if (num_found != 1) {
897 bool one_of_ok =
true;
898 bool at_end_one_of =
false;
908 numPt = atoi(currentPt->data.c_str()) - 1;
914 while (one_of_ok && ! at_end_one_of && currentPt != end_it) {
915 switch (currentPt->choice) {
925 at_end_one_of =
true;
930 if (! one_of_ok && ! at_end_one_of) {
931 while (! at_end_one_of && currentPt != end_it) {
933 at_end_one_of =
true;
968 auto end_it = end(tokens);
971 if (accver && currentPt->data.find(
'.') >= currentPt->data.size() - 1) {
980 if (currentPt == end_it) {
990 }
else if (! seq_ids.empty()) {
992 new_id->
Assign(*(*seq_ids.begin()));
1000 if (currentPt == end_it) {
1011 switch (currentPt->choice) {
1045 ret->SetInt().SetFuzz_from(*new_fuzz);
1047 ret->SetInt().SetId(*new_id);
1049 xgbload_number(ret->SetInt().SetFrom(), ret->SetInt().SetFuzz_from(), keep_rawPt, currentPt, tokens, numErrors,
TAKE_FIRST);
1052 ret->SetInt().ResetFuzz_from();
1054 xgbcheck_range(ret->GetInt().GetFrom(), *new_id, keep_rawPt, numErrors, tokens, currentPt);
1060 if (currentPt != end_it) {
1061 bool in_caret =
false;
1062 switch (currentPt->choice) {
1091 if (ret->GetInt().IsSetFuzz_from()) {
1107 if (currentPt == end_it) {
1122 if (ret->GetInt().IsSetFuzz_from()) {
1132 xgbload_number(ret->SetInt().SetTo(), ret->SetInt().SetFuzz_to(), keep_rawPt, currentPt, tokens, numErrors,
TAKE_SECOND);
1135 ret->SetInt().ResetFuzz_to();
1137 xgbcheck_range(ret->GetInt().GetTo(), *new_id, keep_rawPt, numErrors, tokens, currentPt);
1144 TSeqPos to = ret->GetInt().GetTo();
1151 point.
SetFuzz().SetRange().SetMax(to);
1157 ret->GetInt().GetFrom() == ret->GetInt().GetTo() &&
1158 ! ret->GetInt().IsSetFuzz_from() &&
1159 ! ret->GetInt().IsSetFuzz_to()) {
1190 bool add_nulls =
false;
1191 auto current_token = currentPt;
1192 bool did_complement =
false;
1194 auto end_it = end(tokens);
1199 switch (current_token->choice) {
1202 if (currentPt == end_it) {
1217 if (currentPt == end_it) {
1230 retval =
xgbloc_ver(keep_rawPt, parenPt, currentPt, tokens, numErrors, seq_ids, accver);
1235 did_complement =
true;
1236 if (currentPt != end_it) {
1292 xgbgap(currentPt, end_it, retval,
false);
1295 xgbgap(currentPt, end_it, retval,
true);
1305 retval =
xgbint_ver(keep_rawPt, currentPt, tokens, numErrors, seq_ids, accver);
1319 }
while (in_sites && currentPt != end_it);
1321 if (! numErrors && ! did_complement && retval &&
1327 if (currentPt == end_it) {
1343 if (currentPt == end_it) {
1357 while (! numErrors && currentPt != end_it) {
1359 while (currentPt != end_it &&
1366 if (currentPt == end_it)
1371 if (retval->
IsMix())
1372 retval->
SetMix().AddSeqLoc(*next_loc);
1386 if (retval->
IsMix())
1387 retval->
SetMix().AddSeqLoc(*null_loc);
1399 if (currentPt == end_it) {
1420 retval->
SetWhole().Assign(*(*seq_ids.begin()));
1437 ret =
xgbloc_ver(keep_rawPt, parenPt, currentPt, tokens, numErrors, seq_ids, accver);
1439 if (currentPt == end(tokens)) {
1472 if (tokens.empty()) {
1484 auto head_token = tokens.begin();
1485 auto current_token = head_token;
1486 auto end_it = tokens.end();
1488 int paren_count = 0;
1492 if (current_token != end_it) {
1493 switch (current_token->choice) {
1499 ret =
xgbloc_ver(keep_rawPt, paren_count, current_token, tokens, numErrors, seq_ids, accver);
1532 ret =
xgbint_ver(keep_rawPt, current_token, tokens, numErrors, seq_ids, accver);
1538 ret =
xgbreplace_ver(keep_rawPt, paren_count, current_token, tokens, numErrors, seq_ids, accver);
1548 }
while (in_sites && current_token != end_it);
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
std::list< CRef< objects::CSeq_id > > TSeqIdList
void Nlm_ErrSetContext(const char *module, const char *fname, int line)
void Nlm_ErrPostStr(ErrSev sev, int lev1, int lev2, string_view str)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
unsigned int TSeqPos
Type for sequence locations and lengths.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
void SetNull(void)
Override all setters to incorporate cache invalidation.
CSeq_loc * SeqLocRevCmpl(const CSeq_loc &loc, CScope *scope)
Get reverse complement of the seq-loc (?)
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
size_type find_first_not_of(const CTempString match, size_type pos=0) const
Find the first occurrence of any character not in the matching string within the current string,...
@ eNocase
Case insensitive compare.
void SetMin(TMin value)
Assign a value to Min data member.
TRange & SetRange(void)
Select the variant.
TMin GetMin(void) const
Get the Min member data.
void SetMax(TMax value)
Assign a value to Max data member.
TLim & SetLim(void)
Select the variant.
TMax GetMax(void) const
Get the Max member data.
const TRange & GetRange(void) const
Get the variant data.
@ eLim_tl
space to left of position
@ e_not_set
No variant selected.
void SetTo(TTo value)
Assign a value to To data member.
void SetPoint(TPoint value)
Assign a value to Point data member.
void SetId(TId &value)
Assign a value to Id data member.
bool IsMix(void) const
Check if variant Mix is selected.
TPoint GetPoint(void) const
Get the Point member data.
void SetId(TId &value)
Assign a value to Id data member.
TFrom GetFrom(void) const
Get the From member data.
void SetFuzz(TFuzz &value)
Assign a value to Fuzz data member.
void SetFrom(TFrom value)
Assign a value to From data member.
virtual void Reset(void)
Reset the whole object.
bool IsSetId(void) const
WARNING: this used to be optional Check if a value has been assigned to Id data member.
bool IsInt(void) const
Check if variant Int is selected.
const TInt & GetInt(void) const
Get the variant data.
bool IsNull(void) const
Check if variant Null is selected.
bool IsSetFuzz_from(void) const
Check if a value has been assigned to Fuzz_from data member.
bool IsPnt(void) const
Check if variant Pnt is selected.
range(_Ty, _Ty) -> range< _Ty >
constexpr auto front(list< Head, As... >, T=T()) noexcept -> Head
Miscellaneous common-use basic types and functionality.
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
static void do_xgbparse_error(string_view msg, string_view details)
static CRef< CSeq_loc > xgbloc_ver(bool &keep_rawPt, int &parenPt, TTokenIt ¤tPt, const TTokens &tokens, int &numErrors, const TSeqIdList &seq_ids, bool accver)
static void sConvertIntToPoint(CSeq_loc &loc)
static void xlex_error_func(string_view msg, const string &line, unsigned int current_col)
static CRef< CSeq_loc > xgbint_ver(bool &keep_rawPt, TTokenIt ¤tPt, const TTokens &tokens, int &numErrors, const TSeqIdList &seq_ids, bool accver)
static string xgbparse_point(TTokenConstIt head, TTokenConstIt current)
static int xgbparselex_ver(string_view linein, TTokens &tokens, bool accver)
static void xgbgap(TTokenIt ¤t_it, TTokenConstIt end_it, CRef< CSeq_loc > &loc, bool unknown)
static void xfind_one_of_num(list< STokenInfo > &tokens)
list< STokenInfo > TTokens
static void * xgbparse_range_data
static CRef< CSeq_loc > XGapToSeqLocEx(Int4 range, bool unknown)
static void xgbparse_error(string_view front, TTokenConstIt head, TTokenConstIt current)
static void xgbparse_better_be_done(int &numErrors, TTokenIt current_token, const TTokens &tokens, bool &keep_rawPt, int paren_count)
static int sGetAccession(string &accession, unsigned int ¤t_col, const string &line, bool accver)
TTokens::const_iterator TTokenConstIt
static CRef< CSeq_loc > xgbreplace_ver(bool &keep_rawPt, int &parenPt, TTokenIt ¤tPt, const TTokens &tokens, int &numErrors, const TSeqIdList &seq_ids, bool accver)
static void xgbload_number(TSeqPos &numPt, CInt_fuzz &fuzz, bool &keep_rawPt, TTokenIt ¤tPt, const TTokens &tokens, int &numErrors, int take_which)
void xinstall_gbparse_error_handler(X_gbparse_errfunc new_func)
const char * unkseqlitdbtag
void xinstall_gbparse_range_func(void *data, X_gbparse_rangefunc new_func)
static void xgbcheck_range(TSeqPos num, const CSeq_id &id, bool &keep_rawPt, int &numErrors, const TTokens &tokens, TTokenConstIt current)
TTokens::iterator TTokenIt
static unsigned advance_to(const char c, unsigned current_pos, const string &line)
static X_gbparse_errfunc Err_func
CRef< CSeq_loc > xgbparseint_ver(string_view raw_intervals, bool &keep_rawPt, int &numErrors, const TSeqIdList &seq_ids, bool accver)
static X_gbparse_rangefunc Range_func
#define ERR_FEATURE_LocationParsing_validatr
static size_t sParseAccessionPrefix(string_view accession)
Int4(* X_gbparse_rangefunc)(void *, const objects::CSeq_id &id)
void(* X_gbparse_errfunc)(string_view, string_view)