47 #define THIS_FILE "xgbparint.cpp"
95 #define ERR_FEATURE_LocationParsing_validatr 1, 5
127 auto end_it =
next(current);
128 for (
auto it =
head; it != end_it; ++it) {
129 switch (it->choice) {
134 temp +=
"complement";
248 if (tokens.size() == 1) {
251 auto current_it = begin(tokens);
253 for (
auto scanner_it =
next(current_it);
254 scanner_it != end(tokens);
258 if (scanner_it != end(tokens) &&
267 for (
auto current_it = begin(tokens);
268 current_it != end(tokens);
272 auto scanner_it =
next(current_it);
273 if (scanner_it != end(tokens)) {
276 while (scanner_it != end(tokens)) {
279 if (scanner_it != end(tokens) &&
296 const int current_col)
298 string temp_string = line.substr(0, current_col + 1) +
" ";
303 static unsigned advance_to(
const char c,
unsigned current_pos,
const string& line)
305 int pos = current_pos;
306 while (pos < line.size()) {
307 if (line[pos] == c) {
318 if (accession.
empty()) {
322 auto IsAlpha = [](
char c) {
return isalpha(c); };
324 auto it = find_if_not(begin(accession),
329 if (it == end(accession)) {
333 auto prefix_length = distance(begin(accession), it);
335 if (prefix_length != 2) {
339 it = find_if_not(it, end(accession), IsAlpha);
340 if (it == end(accession)) {
343 prefix_length = distance(begin(accession), it);
344 if (prefix_length == 3 || prefix_length == 7) {
345 return prefix_length;
348 }
else if (accession.
size() >= 3 &&
351 accession[2] ==
'S') {
355 if (prefix_length == 1 ||
356 prefix_length == 2 ||
357 prefix_length == 4 ||
358 prefix_length == 6) {
359 return prefix_length;
366 static int sGetAccession(
string& accession,
unsigned int& current_col,
const string& line,
bool accver)
368 const auto length = line.size();
369 CTempString tempString(line.c_str() + current_col, length - current_col);
371 size_t accessionLength = prefixLength;
373 tempString = tempString.
substr(prefixLength);
375 if (notDigitPos !=
NPOS) {
376 accessionLength += notDigitPos;
377 if (accver && tempString[notDigitPos] ==
'.') {
379 if (tempString.
size() > notDigitPos) {
380 tempString = tempString.
substr(notDigitPos + 1);
382 if (notDigitPos !=
NPOS) {
383 accessionLength += notDigitPos;
388 accessionLength = length - current_col;
392 if (notDigitPos ==
NPOS || tempString[notDigitPos] !=
':') {
398 accession =
string(line.c_str() + current_col, accessionLength);
399 current_col += accessionLength;
411 string line{ linein };
413 auto length = line.size();
414 unsigned current_col = 0;
416 while (current_col < length) {
418 if (
isspace(line[current_col]) || line[current_col] ==
'~') {
424 if (
isdigit(line[current_col])) {
426 CTempString tempString(line.c_str() + current_col,
size_t(length - current_col));
428 auto num_digits = (not_digit_pos ==
NPOS) ?
size_t(length - current_col) : not_digit_pos;
429 current_token.
data =
string(line.c_str() + current_col, num_digits);
430 tokens.push_back(current_token);
431 current_col += num_digits;
435 bool skip_new_token =
false;
436 switch (line[current_col]) {
439 if (
auto closing_quote_pos = line.find(
'\"', current_col + 1);
440 closing_quote_pos == string::npos) {
444 size_t len = closing_quote_pos - current_col + 1;
445 current_token.
data =
string(line.c_str(), +current_col);
457 current_col =
advance_to(
'(', current_col, line);
473 current_col =
advance_to(
'(', current_col, line);
492 current_col =
advance_to(
'(', current_col, line);
503 (current_col < length - 3) &&
504 (line[current_col + 3] ==
'(' ||
505 line[current_col + 3] ==
' ' ||
506 line[current_col + 3] ==
'\t' ||
507 line[current_col + 3] ==
'\0')) {
509 current_token.
data =
"gap";
512 tokens.push_back(current_token);
523 for (;
isdigit(line[current_col]); current_col++)
531 current_col =
advance_to(
'(', current_col, line);
545 current_col =
advance_to(
'(', current_col, line);
559 skip_new_token =
true;
571 if (current_col < length - 1 && line[current_col + 1] ==
's') {
574 tokens.push_back(current_token);
578 if (current_col < length - 1) {
579 if (line[current_col + 1] ==
')') {
584 tokens.push_back(current_token);
586 tokens.push_back(current_token);
588 if (current_col < length - 1) {
589 if (line[current_col + 1] ==
';') {
614 if (current_col == length - 1 || line[current_col + 1] !=
'.') {
652 if (current_col < length - 1 && line[current_col + 1] ==
's') {
655 if (current_col < length - 1) {
656 if (line[current_col + 1] ==
';') {
673 if (! skip_new_token) {
674 tokens.push_back(current_token);
686 if (current_token != end(tokens)) {
690 if (current_token == end(tokens)) {
692 const string par_msg =
"mismatched parentheses (" + to_string(paren_count) +
")";
712 if (current_token != end(tokens)) {
749 id.SetGeneral().SetTag().SetId(0);
757 auto it =
next(current_it);
775 auto gapsize_it = it++;
779 auto pLoc =
XGapToSeqLocEx(atoi(gapsize_it->data.c_str()), unknown);
785 current_it =
next(it);
810 bool strange_sin_dot =
false;
811 auto end_it = end(tokens);
828 strange_sin_dot =
true;
868 if (! strange_sin_dot) {
869 if (currentPt == end_it) {
877 numPt = atoi(currentPt->data.c_str()) - 1;
890 if (num_found != 1) {
898 bool one_of_ok =
true;
899 bool at_end_one_of =
false;
909 numPt = atoi(currentPt->data.c_str()) - 1;
915 while (one_of_ok && ! at_end_one_of && currentPt != end_it) {
916 switch (currentPt->choice) {
926 at_end_one_of =
true;
931 if (! one_of_ok && ! at_end_one_of) {
932 while (! at_end_one_of && currentPt != end_it) {
934 at_end_one_of =
true;
969 auto end_it = end(tokens);
972 if (accver && currentPt->data.find(
'.') >= currentPt->data.size() - 1) {
981 if (currentPt == end_it) {
991 }
else if (! seq_ids.empty()) {
993 new_id->
Assign(*(*seq_ids.begin()));
1001 if (currentPt == end_it) {
1012 switch (currentPt->choice) {
1046 ret->SetInt().SetFuzz_from(*new_fuzz);
1048 ret->SetInt().SetId(*new_id);
1050 xgbload_number(ret->SetInt().SetFrom(), ret->SetInt().SetFuzz_from(), keep_rawPt, currentPt, tokens, numErrors,
TAKE_FIRST);
1053 ret->SetInt().ResetFuzz_from();
1055 xgbcheck_range(ret->GetInt().GetFrom(), *new_id, keep_rawPt, numErrors, tokens, currentPt);
1061 if (currentPt != end_it) {
1062 bool in_caret =
false;
1063 switch (currentPt->choice) {
1092 if (ret->GetInt().IsSetFuzz_from()) {
1108 if (currentPt == end_it) {
1123 if (ret->GetInt().IsSetFuzz_from()) {
1133 xgbload_number(ret->SetInt().SetTo(), ret->SetInt().SetFuzz_to(), keep_rawPt, currentPt, tokens, numErrors,
TAKE_SECOND);
1136 ret->SetInt().ResetFuzz_to();
1138 xgbcheck_range(ret->GetInt().GetTo(), *new_id, keep_rawPt, numErrors, tokens, currentPt);
1145 TSeqPos to = ret->GetInt().GetTo();
1152 point.
SetFuzz().SetRange().SetMax(to);
1158 ret->GetInt().GetFrom() == ret->GetInt().GetTo() &&
1159 ! ret->GetInt().IsSetFuzz_from() &&
1160 ! ret->GetInt().IsSetFuzz_to()) {
1191 bool add_nulls =
false;
1192 auto current_token = currentPt;
1193 bool did_complement =
false;
1195 auto end_it = end(tokens);
1200 switch (current_token->choice) {
1203 if (currentPt == end_it) {
1218 if (currentPt == end_it) {
1231 retval =
xgbloc_ver(keep_rawPt, parenPt, currentPt, tokens, numErrors, seq_ids, accver);
1236 did_complement =
true;
1237 if (currentPt != end_it) {
1293 xgbgap(currentPt, end_it, retval,
false);
1296 xgbgap(currentPt, end_it, retval,
true);
1306 retval =
xgbint_ver(keep_rawPt, currentPt, tokens, numErrors, seq_ids, accver);
1320 }
while (in_sites && currentPt != end_it);
1322 if (! numErrors && ! did_complement && retval &&
1328 if (currentPt == end_it) {
1344 if (currentPt == end_it) {
1358 while (! numErrors && currentPt != end_it) {
1360 while (currentPt != end_it &&
1367 if (currentPt == end_it)
1372 if (retval->
IsMix())
1373 retval->
SetMix().AddSeqLoc(*next_loc);
1387 if (retval->
IsMix())
1388 retval->
SetMix().AddSeqLoc(*null_loc);
1400 if (currentPt == end_it) {
1421 retval->
SetWhole().Assign(*(*seq_ids.begin()));
1438 ret =
xgbloc_ver(keep_rawPt, parenPt, currentPt, tokens, numErrors, seq_ids, accver);
1440 if (currentPt == end(tokens)) {
1473 if (tokens.empty()) {
1485 auto head_token = tokens.begin();
1486 auto current_token = head_token;
1487 auto end_it = tokens.end();
1489 int paren_count = 0;
1493 if (current_token != end_it) {
1494 switch (current_token->choice) {
1500 ret =
xgbloc_ver(keep_rawPt, paren_count, current_token, tokens, numErrors, seq_ids, accver);
1533 ret =
xgbint_ver(keep_rawPt, current_token, tokens, numErrors, seq_ids, accver);
1539 ret =
xgbreplace_ver(keep_rawPt, paren_count, current_token, tokens, numErrors, seq_ids, accver);
1549 }
while (in_sites && current_token != end_it);
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
std::list< CRef< objects::CSeq_id > > TSeqIdList
void Nlm_ErrSetContext(const char *module, const char *fname, int line)
void Nlm_ErrPostEx(ErrSev sev, int lev1, int lev2, const char *fmt,...)
unsigned int TSeqPos
Type for sequence locations and lengths.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
void SetNull(void)
Override all setters to incorporate cache invalidation.
CSeq_loc * SeqLocRevCmpl(const CSeq_loc &loc, CScope *scope)
Get reverse complement of the seq-loc (?)
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
size_type find_first_not_of(const CTempString match, size_type pos=0) const
Find the first occurrence of any character not in the matching string within the current string,...
size_type size(void) const
Return the length of the represented array.
@ eNocase
Case insensitive compare.
void SetMin(TMin value)
Assign a value to Min data member.
TRange & SetRange(void)
Select the variant.
TMin GetMin(void) const
Get the Min member data.
void SetMax(TMax value)
Assign a value to Max data member.
TLim & SetLim(void)
Select the variant.
TMax GetMax(void) const
Get the Max member data.
const TRange & GetRange(void) const
Get the variant data.
@ eLim_tl
space to left of position
@ e_not_set
No variant selected.
void SetTo(TTo value)
Assign a value to To data member.
void SetPoint(TPoint value)
Assign a value to Point data member.
void SetId(TId &value)
Assign a value to Id data member.
bool IsMix(void) const
Check if variant Mix is selected.
TPoint GetPoint(void) const
Get the Point member data.
void SetId(TId &value)
Assign a value to Id data member.
TFrom GetFrom(void) const
Get the From member data.
void SetFuzz(TFuzz &value)
Assign a value to Fuzz data member.
void SetFrom(TFrom value)
Assign a value to From data member.
virtual void Reset(void)
Reset the whole object.
bool IsSetId(void) const
WARNING: this used to be optional Check if a value has been assigned to Id data member.
bool IsInt(void) const
Check if variant Int is selected.
const TInt & GetInt(void) const
Get the variant data.
bool IsNull(void) const
Check if variant Null is selected.
bool IsSetFuzz_from(void) const
Check if a value has been assigned to Fuzz_from data member.
bool IsPnt(void) const
Check if variant Pnt is selected.
range(_Ty, _Ty) -> range< _Ty >
constexpr auto front(list< Head, As... >, T=T()) noexcept -> Head
Miscellaneous common-use basic types and functionality.
static CRef< CSeq_loc > xgbloc_ver(bool &keep_rawPt, int &parenPt, TTokenIt ¤tPt, const TTokens &tokens, int &numErrors, const TSeqIdList &seq_ids, bool accver)
static void sConvertIntToPoint(CSeq_loc &loc)
static CRef< CSeq_loc > xgbint_ver(bool &keep_rawPt, TTokenIt ¤tPt, const TTokens &tokens, int &numErrors, const TSeqIdList &seq_ids, bool accver)
static string xgbparse_point(TTokenConstIt head, TTokenConstIt current)
CRef< CSeq_loc > xgbparseint_ver(const char *raw_intervals, bool &keep_rawPt, int &numErrors, const TSeqIdList &seq_ids, bool accver)
static void xgbgap(TTokenIt ¤t_it, TTokenConstIt end_it, CRef< CSeq_loc > &loc, bool unknown)
static void xfind_one_of_num(list< STokenInfo > &tokens)
list< STokenInfo > TTokens
static void xgbparse_error(const char *front, TTokenConstIt head, TTokenConstIt current)
static void do_xgbparse_error(const char *msg, const char *details)
static void * xgbparse_range_data
static size_t sParseAccessionPrefix(const CTempString &accession)
static CRef< CSeq_loc > XGapToSeqLocEx(Int4 range, bool unknown)
static void xgbparse_better_be_done(int &numErrors, TTokenIt current_token, const TTokens &tokens, bool &keep_rawPt, int paren_count)
static int sGetAccession(string &accession, unsigned int ¤t_col, const string &line, bool accver)
TTokens::const_iterator TTokenConstIt
static CRef< CSeq_loc > xgbreplace_ver(bool &keep_rawPt, int &parenPt, TTokenIt ¤tPt, const TTokens &tokens, int &numErrors, const TSeqIdList &seq_ids, bool accver)
static void xgbload_number(TSeqPos &numPt, CInt_fuzz &fuzz, bool &keep_rawPt, TTokenIt ¤tPt, const TTokens &tokens, int &numErrors, int take_which)
static void xlex_error_func(const char *msg, const string &line, const int current_col)
void xinstall_gbparse_error_handler(X_gbparse_errfunc new_func)
const char * unkseqlitdbtag
void xinstall_gbparse_range_func(void *data, X_gbparse_rangefunc new_func)
static void xgbcheck_range(TSeqPos num, const CSeq_id &id, bool &keep_rawPt, int &numErrors, const TTokens &tokens, TTokenConstIt current)
TTokens::iterator TTokenIt
static unsigned advance_to(const char c, unsigned current_pos, const string &line)
static X_gbparse_errfunc Err_func
static int xgbparselex_ver(const char *linein, TTokens &tokens, bool accver)
static X_gbparse_rangefunc Range_func
#define ERR_FEATURE_LocationParsing_validatr
Int4(* X_gbparse_rangefunc)(void *, const objects::CSeq_id &id)
void(* X_gbparse_errfunc)(const Char *, const Char *)