51 #define THIS_FILE "indx_blk.cpp"
59 " ",
"single",
"double",
"mixed",
nullptr
63 " ",
"Linear",
"Circular",
"Tandem",
nullptr
75 " ",
"NA",
"DNA",
"genomic DNA",
"other DNA",
"unassigned DNA",
"RNA",
76 "mRNA",
"rRNA",
"tRNA",
"uRNA",
"scRNA",
"snRNA",
"snoRNA",
"pre-RNA",
77 "pre-mRNA",
"genomic RNA",
"other RNA",
"unassigned RNA",
"cRNA",
82 " ",
"PRI",
"ROD",
"MAM",
"VRT",
"INV",
"PLN",
"BCT",
"RNA",
83 "VRL",
"PHG",
"SYN",
"UNA",
"EST",
"PAT",
"STS",
"ORG",
"GSS",
84 "HUM",
"HTG",
"CON",
"HTC",
"ENV",
"TSA",
nullptr
88 "AJ",
"AL",
"AM",
"AN",
"AX",
"BN",
"BX",
"CQ",
"CR",
"CS",
"CT",
"CU",
89 "FB",
"FM",
"FN",
"FO",
"FP",
"FQ",
"FR",
"GM",
"GN",
"HA",
"HB",
"HC",
90 "HD",
"HE",
"HF",
"HG",
"HH",
"HI",
"JA",
"JB",
"JC",
"JD",
"JE",
"LK",
91 "LL",
"LM",
"LN",
"LO",
"LP",
"LQ",
"LR",
"LS",
"LT",
"MP",
"MQ",
"MR",
92 "MS",
"OA",
"OB",
"OC",
"OD",
"OE",
"OU",
"OV",
"OW",
"OX",
"OY",
"OZ",
105 "AB",
"AG",
"AK",
"AP",
"AT",
"AU",
"AV",
"BA",
"BB",
"BD",
"BJ",
"BP",
106 "BR",
"BS",
"BW",
"BY",
"CI",
"CJ",
"DA",
"DB",
"DC",
"DD",
"DE",
"DF",
107 "DG",
"DH",
"DI",
"DJ",
"DK",
"DL",
"DM",
"FS",
"FT",
"FU",
"FV",
"FW",
108 "FX",
"FY",
"FZ",
"GA",
"GB",
"HT",
"HU",
"HV",
"HW",
"HX",
"HY",
"HZ",
109 "LA",
"LB",
"LC",
"LD",
"LE",
"LF",
"LG",
"LH",
"LI",
"LJ",
"LU",
"LV",
110 "LX",
"LY",
"LZ",
"MA",
"MB",
"MC",
"MD",
"ME",
"OF",
"OG",
"OH",
"OI",
111 "OJ",
"PA",
"PE",
"PF",
"PG",
"PH",
"PI",
"PJ",
"PK",
nullptr
115 "AA",
"AC",
"AD",
"AE",
"AF",
"AH",
"AI",
"AQ",
"AR",
"AS",
"AW",
"AY",
116 "AZ",
"BC",
"BE",
"BF",
"BG",
"BH",
"BI",
"BK",
"BL",
"BM",
"BQ",
"BT",
117 "BU",
"BV",
"BZ",
"CA",
"CB",
"CC",
"CD",
"CE",
"CF",
"CG",
"CH",
"CK",
118 "CL",
"CM",
"CN",
"CO",
"CP",
"CV",
"CW",
"CX",
"CY",
"CZ",
"DN",
"DP",
119 "DQ",
"DR",
"DS",
"DT",
"DU",
"DV",
"DW",
"DX",
"DY",
"DZ",
"EA",
"EB",
120 "EC",
"ED",
"EE",
"EF",
"EG",
"EH",
"EI",
"EJ",
"EK",
"EL",
"EM",
"EN",
121 "EP",
"EQ",
"ER",
"ES",
"ET",
"EU",
"EV",
"EW",
"EX",
"EY",
"EZ",
"FA",
122 "FC",
"FD",
"FE",
"FF",
"FG",
"FH",
"FI",
"FJ",
"FK",
"FL",
"GC",
"GD",
123 "GE",
"GF",
"GG",
"GH",
"GJ",
"GK",
"GL",
"GO",
"GP",
"GQ",
"GR",
"GS",
124 "GT",
"GU",
"GV",
"GW",
"GX",
"GY",
"GZ",
"HJ",
"HK",
"HL",
"HM",
"HN",
125 "HO",
"HP",
"HQ",
"HR",
"HS",
"JF",
"JG",
"JH",
"JI",
"JJ",
"JK",
"JL",
126 "JM",
"JN",
"JO",
"JP",
"JQ",
"JR",
"JS",
"JT",
"JU",
"JV",
"JW",
"JX",
127 "JY",
"JZ",
"KA",
"KB",
"KC",
"KD",
"KE",
"KF",
"KG",
"KH",
"KI",
"KJ",
128 "KK",
"KL",
"KM",
"KN",
"KO",
"KP",
"KQ",
"KR",
"KS",
"KT",
"KU",
"KV",
129 "KX",
"KY",
"KZ",
"MF",
"MG",
"MH",
"MI",
"MJ",
"MK",
"ML",
"MM",
"MN",
130 "MO",
"MT",
"MU",
"MV",
"MW",
"MX",
"MY",
"MZ",
"OK",
"OL",
"OM",
"ON",
131 "OO",
"OP",
"OQ",
"OR",
"OS",
"OT",
"PP",
"PQ",
"PR",
"PS",
"PT",
"PU",
136 "NC_",
"NG_",
"NM_",
"NP_",
"NR_",
"NT_",
"NW_",
"XM_",
"XP_",
"XR_",
147 "AF",
"AY",
"DQ",
"EF",
"EU",
"FJ",
"GQ",
"HQ",
"JF",
"JN",
"JQ",
"JX",
148 "KC",
"KF",
"KJ",
"KM",
"KP",
"KR",
"KT",
"KU",
"KX",
"KY",
"MF",
"MG",
149 "MH",
"MK",
"MN",
"MT",
nullptr
153 "BK",
"BL",
"GJ",
"GK",
nullptr
157 "BR",
"HT",
"HU",
nullptr
169 "CH",
"CT",
"CU",
"DF",
"DG",
"DS",
170 "EM",
"EN",
"EP",
"EQ",
"FA",
"FM",
171 "GG",
"GJ",
"GK",
"GL",
"HT",
"HU",
172 "JH",
"KB",
"KD",
"KE",
"KI",
"KK",
173 "KL",
"KN",
"KQ",
"KV",
"KZ",
"LD",
189 "Ill",
"JAN",
"FEB",
"MAR",
"APR",
"MAY",
"JUN",
"JUL",
"AUG",
"SEP",
"OCT",
"NOV",
"DEC",
nullptr
193 "bp",
"bp.",
"bp,",
"AA",
"AA.",
"AA,",
nullptr
240 auto ptr = stoken->
list.begin();
241 auto sptr =
next(ptr);
242 for (
i = 1;
i < stoken->
num;
i++, ptr = sptr, sptr =
next(ptr)) {
264 bool end_of_file =
false;
268 finfo.
pos = (size_t)ftell(
fp);
269 if (! fgets(finfo.
str,
sizeof(finfo.
str) - 1,
fp))
275 auto n = strlen(finfo.
str);
278 if (finfo.
str[
n] !=
'\n' && finfo.
str[
n] !=
'\r') {
284 return (end_of_file);
290 const char* p =
nullptr;
299 for (p = fbuf.
current, q = res,
i = 0;
i <
l;
i++, p++) {
301 if (*p ==
'\n' || *p ==
'\r') {
315 bool end_of_file =
false;
326 return (end_of_file);
347 return (end_of_file);
360 const char* p = keyword.
data();
380 const char* p = locus;
384 for (; *p !=
'\0'; p++) {
385 if ((*p >=
'0' && *p <=
'9') || (*p >=
'A' && *p <=
'Z') ||
388 if (((*p >=
'a' && *p <=
'z') || *p ==
'_' || *p ==
'-' || *p ==
'(' ||
389 *p ==
')' || *p ==
'/') &&
422 bool underscore =
false;
426 for (p = locus, x = y = 0; *p !=
'\0'; p++) {
427 if ((*p >=
'0' && *p <=
'9') || (*p >=
'A' && *p <=
'Z')) {
432 }
else if (*p ==
'_')
438 if (*p !=
'\0' || x == 0 || y == 0) {
455 if (date[2] ==
'-' && date[6] ==
'-' &&
468 static const vector<string> strandSpecs = {
469 " ",
"ss-",
"ds-",
"ms-"
471 static const auto strandSpecCount = strandSpecs.size();
475 for (
int i = 0;
i < strandSpecCount; ++
i) {
503 static const vector<string> topologies = {
504 " ",
"linear ",
"circular ",
"tandem "
506 static const auto topologyCount = topologies.size();
510 for (
int i = 0;
i < topologyCount; ++
i) {
617 if (ptr[11] !=
'\0' && ptr[11] !=
'\n' && ptr[11] !=
' ' &&
642 for (p++; *p ==
' ';)
683 return find_if(current_it, tempString.
end(),
isSpace);
691 return find_if_not(current_it, tempString.
end(),
isSpace);
705 if (locusLine.
substr(0, 5) !=
"LOCUS") {
711 if (it == locusLine.
end()) {
716 if (it == locusLine.
end()) {
722 if (it == locusLine.
end()) {
741 if ((space_it - it) == 3) {
742 auto currentSubstr = locusLine.
substr(it - begin(locusLine), 3);
743 if (currentSubstr ==
"ss-" ||
744 currentSubstr ==
"ds-" ||
745 currentSubstr ==
"ms-") {
757 if (it != locusLine.
end()) {
764 if (it != locusLine.
end()) {
771 if (it != locusLine.
end()) {
806 bool badlocus =
false;
807 if (stoken->num > 2) {
812 if (
StringLen(p) > 78 && p[28] ==
' ' && p[63] ==
' ' && p[67] ==
' ') {
831 auto ptr = stoken->list.begin();
834 next(ptr) != stoken->list.end() && *
next(ptr) ==
"SV"s) {
835 for (
i = 0, p = finfo.
str; *p !=
'\0'; p++)
836 if (*p ==
';' && p[1] ==
' ')
840 if (! ptr->empty() && ptr->back() ==
';')
846 if (
i != 6 || (stoken->num != 10 && stoken->num != 11)) {
864 if (it == stoken->list.end() || it->empty() ||
891 entry->
bases = (size_t)atoi(bases);
897 auto it = stoken->list.begin();
898 for (
i = 1;
i < stoken->num; ++
i)
905 auto it = stoken->list.begin();
906 for (
i = 1;
i < j; ++
i)
948 if (!
str || *
str ==
'\0')
952 if (*
str >=
'0' && *
str <=
'9')
963 auto pos =
str.find_last_of(
"0123456789");
964 if (pos != string::npos) {
989 return (c >=
'A' && c <=
'Z');
994 const Char* p = acnum;
996 if (! p || *p ==
'\0')
1020 if (
isdigit(p[7]) && p[8] ==
'S' &&
1028 if (
isdigit(p[5]) && p[6] ==
'S' &&
1047 if (
i == 0 || ! accpref)
1050 if (2 <
i &&
i < 10)
1053 const char**
b = accpref;
1069 else if (accformat == 8)
1071 else if (accformat == 4)
1076 if (p[0] >=
'0' && p[0] <=
'9' && p[1] >=
'0' && p[1] <=
'9') {
1077 for (p += 2; *p ==
'0';)
1090 if (accession.length() < 13 ||
1091 accession.length() > 15 ||
1092 accession[6] !=
'S') {
1097 if (any_of(begin(accession),
1098 begin(accession) + 4,
1099 [](
const char c) {
return !
isalpha(c); })) {
1104 if (!
isdigit(accession[4]) ||
1113 if (any_of(begin(accession) + 7,
1115 [](
const char c) {
return !
isdigit(c); })) {
1120 if (all_of(begin(accession) + 7,
1122 [](
const char c) {
return c ==
'0'; })) {
1131 if (initialType == -1) {
1135 if (accession[0] ==
'G')
1137 switch (initialType) {
1149 if (accession[0] ==
'K' || accession[1] ==
'T') {
1150 switch (initialType) {
1162 if (initialType == 1) {
1163 if (accession[0] ==
'I') {
1166 if (accession[0] ==
'H') {
1196 auto length = accession.length();
1200 all_of(begin(accession) + 2, end(accession), [](
const char c) {
return isdigit(c); })) {
1204 if (length > 12 && length < 16 && accession[6] ==
'S') {
1211 if (accession.substr(0, 3) ==
"NZ_"sv) {
1212 accession = accession.substr(3);
1214 length = accession.length();
1215 if (length < 12 || length > 17) {
1220 if (all_of(begin(accession), begin(accession) + 4, [](
const char c) {
return isalpha(c); }) &&
1221 all_of(begin(accession) + 4, end(accession), [](
const char c) {
return isdigit(c); })) {
1224 if (any_of(begin(accession) + 6, end(accession), [](
const char c) {
return c !=
'0'; })) {
1226 }
else if (accession[4] ==
'0' && accession[5] ==
'0') {
1237 if (all_of(begin(accession), begin(accession) + 6, [](
const char c) {
return isalpha(c); }) &&
1238 all_of(begin(accession) + 6, end(accession), [](
const char c) {
return isdigit(c); })) {
1240 if (any_of(begin(accession) + 8, end(accession), [](
const char c) {
return c !=
'0'; })) {
1244 if (accession[6] ==
'0' && accession[7] ==
'0') {
1258 if (! acc || acc[0] ==
'\0')
1269 return (*
b !=
nullptr);
1272 if (acc[0] <
'A' || acc[0] >
'Z' || acc[1] <
'0' || acc[1] >
'9' ||
1273 ((acc[3] <
'0' || acc[3] >
'9') && (acc[3] <
'A' || acc[3] >
'Z')) ||
1274 ((acc[4] <
'0' || acc[4] >
'9') && (acc[4] <
'A' || acc[4] >
'Z')) ||
1275 acc[5] <
'0' || acc[5] >
'9')
1278 if (acc[0] >=
'O' && acc[0] <=
'Q') {
1279 if ((acc[2] <
'0' || acc[2] >
'9') && (acc[2] <
'A' || acc[2] >
'Z'))
1281 }
else if (acc[2] <
'A' || acc[2] >
'Z')
1287 if (acc[0] >=
'O' && acc[0] <=
'Q')
1290 if (acc[6] <
'A' || acc[6] >
'Z' || acc[9] <
'0' || acc[9] >
'9' ||
1291 ((acc[7] <
'A' || acc[7] >
'Z') && (acc[7] <
'0' || acc[7] >
'9')) ||
1292 ((acc[8] <
'A' || acc[8] >
'Z') && (acc[8] <
'0' || acc[8] >
'9')))
1299 static bool sCheckAccession(
const list<string>& tokens,
1302 const char* priacc,
int skip)
1316 auto it = tokens.begin();
1322 if((priformat == 3 || priformat == 4 || priformat == 8) &&
1329 for(; it != tokens.end(); ++it)
1332 if(acnum[0] ==
'-' && acnum[1] ==
'\0')
1335 if(skip == 2 &&
count == 0)
1336 accformat = priformat;
1341 if(acnum[
len-1] ==
';')
1349 if(
len != 8 &&
len != 10)
1353 for(
i = 2;
i < 8 && badac ==
false;
i++)
1354 if(acnum[
i] <
'0' || acnum[
i] >
'9')
1358 else if(accformat == 2)
1360 if(
len != 9 &&
len != 12)
1364 for(
i = 3;
i <
len && badac ==
false;
i++)
1365 if(acnum[
i] <
'0' || acnum[
i] >
'9')
1369 else if(accformat == 3)
1371 if(len < 12 || len > 14)
1375 for(
i = 4;
i <
len && badac ==
false;
i++)
1376 if(acnum[
i] <
'0' || acnum[
i] >
'9')
1380 else if(accformat == 8)
1382 if(len < 15 || len > 17)
1386 for(
i = 6;
i <
len && !badac;
i++)
1387 if(acnum[
i] <
'0' || acnum[
i] >
'9')
1391 else if(accformat == 4)
1393 if(len < 15 || len > 17)
1397 for(
i = 7;
i <
len && badac ==
false;
i++)
1398 if(acnum[
i] <
'0' || acnum[
i] >
'9')
1402 else if(accformat == 5)
1408 for(
i = 5;
i <
len && badac ==
false;
i++)
1409 if(acnum[
i] <
'0' || acnum[
i] >
'9')
1413 else if(accformat == 6)
1415 if(
len != 11 || acnum[0] !=
'N' || acnum[1] !=
'Z' ||
1416 acnum[2] !=
'_' || acnum[3] <
'A' || acnum[3] >
'Z' ||
1417 acnum[4] <
'A' || acnum[4] >
'Z')
1421 for(
i = 5;
i <
len && badac ==
false;
i++)
1422 if(acnum[
i] <
'0' || acnum[
i] >
'9')
1426 else if(accformat == 7)
1428 if(len < 13 || len > 15)
1432 for(
i = 7;
i <
len && badac ==
false;
i++)
1433 if(acnum[
i] <
'0' || acnum[
i] >
'9')
1437 else if(accformat == 0)
1439 if(
len != 6 &&
len != 10)
1441 else if(acnum[0] >=
'A' && acnum[0] <=
'Z')
1454 for(
i = 1;
i < 6 && badac ==
false;
i++)
1455 if(acnum[
i] <
'0' || acnum[
i] >
'9')
1468 "Bad accession #, %s for this entry", acnum);
1474 if(skip == 2 &&
count == 0 && !iswgs &&
1475 (accformat == 3 || accformat == 4 || accformat == 8))
1478 "This record has a WGS 'project' accession as its primary accession number. WGS project-accessions are only expected to be used as secondary accession numbers.");
1524 auto tbp = stoken->
list.begin();
1528 if ((priformat == 3 || priformat == 4 || priformat == 8) &&
1535 for (; tbp != stoken->
list.end(); ++tbp) {
1537 if (acnum[0] ==
'-' && acnum[1] ==
'\0')
1540 if (skip == 2 &&
count == 0)
1541 accformat = priformat;
1546 if (acnum[
len - 1] ==
';') {
1551 if (accformat == 1) {
1553 }
else if (accformat == 2) {
1555 }
else if (accformat == 3) {
1557 }
else if (accformat == 8) {
1559 }
else if (accformat == 4) {
1561 }
else if (accformat == 5) {
1563 }
else if (accformat == 6) {
1564 badac = (
len != 11 || acnum[0] !=
'N' || acnum[1] !=
'Z' ||
1565 acnum[2] !=
'_' || acnum[3] <
'A' || acnum[3] >
'Z' ||
1566 acnum[4] <
'A' || acnum[4] >
'Z') ||
1568 }
else if (accformat == 7) {
1570 }
else if (accformat == 9) {
1572 }
else if (accformat == 0) {
1573 if (
len != 6 &&
len != 10)
1594 if (skip == 2 &&
count == 0 && ! iswgs &&
1595 (accformat == 3 || accformat == 4 || accformat == 8)) {
1608 if (acc[2] ==
'\0') {
1663 if (acc[1] ==
'\0' && (*acc ==
'I' || *acc ==
'A' || *acc ==
'E')) {
1664 if (parseInfo.
all ==
true ||
1680 if (
i != 2 &&
i != 4)
1684 if (acc[0] ==
'D' &&
1687 if ((acc[0] ==
'E' || acc[0] ==
'Y') &&
1720 if (! acc || *acc ==
'\0')
1729 if (acc[0] ==
'U' && acc[1] ==
'\0' &&
1775 if (! acc || *acc ==
'\0' ||
StringLen(acc) != 4)
1959 if (skip == 1 && ! stoken->list.empty()) {
1960 stoken->list.pop_front();
1963 if (skip == 0 && ! stoken->list.empty()) {
1964 auto tail = entry->
secaccs.before_begin();
1967 entry->
secaccs.splice_after(tail, stoken->list);
1975 if (stoken->num < 2) {
1995 temp += to_string(entry->
vernum);
2019 if (acc[0] >=
'A' && acc[0] <=
'Z' && acc[1] >=
'A' && acc[1] <=
'Z') {
2022 if (acc[2] >=
'A' && acc[2] <=
'Z' && acc[3] >=
'A' && acc[3] <=
'Z') {
2023 if (acc[4] >=
'A' && acc[4] <=
'Z') {
2028 }
else if (acc[2] ==
'_') {
2043 if (stoken->num > 2)
2053 stoken->list.pop_front();
2054 stoken->list.pop_front();
2055 entry->
secaccs = std::move(stoken->list);
2066 if (
i == 3 ||
i == 8) {
2069 }
else if (
i == 5) {
2070 const char* p = entry->
acnum;
2164 const char* p = keyword.
data();
2166 bool done = end_of_file;
2200 fprintf(stderr,
"Unknown flatfile format.\n");
void FreeIndexblk(IndexblkPtr ibp)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
#define ERR_ENTRY_Skipped
bool StringEquNI(const char *s1, const char *s2, size_t n)
bool StringEquN(const char *s1, const char *s2, size_t n)
bool StringEqu(const char *s1, const char *s2)
void StringCpy(char *d, const char *s)
size_t StringLen(const char *s)
void MemCpy(void *p, const void *q, size_t sz)
void FtaInstallPrefix(int prefix, const char *name, const char *location)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static const char * str(char *buf, int n)
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
static E_Choice GetAccType(EAccessionInfo info)
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
const_iterator end() const
Return an iterator to the string's ending position (one past the end of the represented sequence)
static int StringToNonNegativeInt(const CTempString str, TStringToNumFlags flags=0)
Convert string to non-negative integer value.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
const char * const_iterator
const char * data(void) const
Return a pointer to the array represented.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
size_type size(void) const
Return the length of the represented array.
static string & ToLower(string &str)
Convert string to lower case – string& version.
const_iterator begin() const
Return an iterator to the string's starting position.
@ eCurrent
Use current time. See also CCurrentTime.
@ e_Other
for historical reasons, 'other' = 'refseq'
@ e_Tpe
Third Party Annot/Seq EMBL.
@ e_Tpd
Third Party Annot/Seq DDBJ.
@ e_not_set
No variant selected.
@ e_Tpg
Third Party Annot/Seq Genbank.
@ ParFlat_COL_MOLECULE_NEW
@ ParFlat_COL_TOPOLOGY_NEW
static int s_RefineWGSType(string_view accession, int initialType)
static const char * ParFlat_NA_array_DDBJ[]
bool sIsUpperAlpha(char c)
static const char * ddbj_accpref[]
static const char * ddbj_wgs_accpref[]
static const char * ncbi_tpa_accpref[]
NCBI_UNUSED bool SkipTitle(FILE *fp, FinfoBlk &finfo, const char *str, size_t len)
static bool CheckAccession(TokenStatBlkPtr stoken, Parser::ESource source, Parser::EMode mode, const char *priacc, unsigned skip)
CRef< CDate_std > GetUpdateDate(const char *ptr, Parser::ESource source)
static CTempString::const_iterator sFindNextSpace(const CTempString &tempString, CTempString::const_iterator current_it)
static bool s_IsVDBWGSScaffold(string_view accession)
CSeq_id::E_Choice GetNucAccOwner(const CTempString &acc)
static const set< string_view > k_WgsScaffoldPrefix
static const char * GetResidue(TokenStatBlkPtr stoken)
int fta_if_wgs_acc(string_view accession)
static const char * XML_STRAND_array[]
static bool IsWGSAccPrefix(const Parser &parseInfo, const char *acc)
static const char * ParFlat_NA_array[]
const char ** GetAccArray(Parser::ESource source)
static const char * refseq_accpref[]
bool isSupportedAccession(CSeq_id::E_Choice type)
static const char * acc_tsa_allowed[]
int CheckTPG(const string &str)
static void sSetLocusLineOffsets(const CTempString &locusLine, LocusCont &offsets)
void ResetParserStruct(ParserPtr pp)
Int2 CheckNA(const char *str)
bool FindNextEntryBuf(bool end_of_file, FileBuf &fbuf, FinfoBlk &finfo, const CTempString &keyword)
static CTempString::const_iterator sFindNextNonSpace(const CTempString &tempString, CTempString::const_iterator current_it)
bool sNotAllDigits(const char *first, const char *last)
static const char * sprot_accpref[]
CSeq_id::E_Choice GetProtAccOwner(const CTempString &acc)
static const char * XML_TPG_array[]
int CheckSTRAND(const string &str)
bool XMLIndex(ParserPtr pp)
static const char * ParFlat_RESIDUE_STR[]
static const char * ParFlat_DIV_array[]
static Int2 FileGetsBuf(char *res, Int4 size, FileBuf &fbuf)
IndexblkPtr InitialEntry(ParserPtr pp, FinfoBlk &finfo)
bool SprotIndex(ParserPtr pp, void(*fun)(IndexblkPtr entry, char *offset, Int4 len))
static const char * lanl_accpref[]
void DelNonDigitTail(string &str)
static const char * month_name[]
bool GenBankIndex(ParserPtr pp)
static const char * ddbj_tpa_accpref[]
bool SkipTitleBuf(FileBuf &fbuf, FinfoBlk &finfo, const CTempString &keyword)
Int4 IsNewAccessFormat(const Char *acnum)
bool EmblIndex(ParserPtr pp, void(*fun)(IndexblkPtr entry, char *offset, Int4 len))
static const map< Parser::ESource, string > sourceNames
static bool CheckLocusSP(const char *locus)
Int2 XMLCheckTPG(string_view str)
Int2 CheckDIV(const char *str)
bool FlatFileIndex(ParserPtr pp, void(*fun)(IndexblkPtr entry, char *offset, Int4 len))
bool GetAccession(const Parser *pp, string_view str, IndexblkPtr entry, unsigned skip)
static const char * ValidMolTypes[]
void CloseFiles(ParserPtr pp)
static void IsTLSAccPrefix(const Parser &parseInfo, const char *acc, IndexblkPtr ibp)
bool IsSPROTAccession(const char *acc)
static bool isSpace(char c)
static void IsTSAAccPrefix(const Parser &parseInfo, const char *acc, IndexblkPtr ibp)
Int2 XMLCheckSTRAND(string_view str)
static bool fta_check_embl_moltype(char *str)
void DelNoneDigitTail(char *str)
bool XReadFileBuf(FileBuf &fbuf, FinfoBlk &finfo)
bool CkLocusLinePos(char *offset, Parser::ESource source, LocusContPtr lcp, bool is_mga)
static bool XReadFile(FILE *fp, FinfoBlk &finfo)
static bool IsValidAccessPrefix(const char *acc, const char **accpref)
void MsgSkipTitleFail(const char *flatfile, FinfoBlk &finfo)
static bool IsTPAAccPrefix(const Parser &parseInfo, const char *acc)
static bool CheckLocus(const char *locus, Parser::ESource source)
static const char * embl_accpref[]
static const char * ncbi_accpref[]
static const char * ncbi_wgs_accpref[]
static bool CkDateFormat(const char *date)
static bool IsPatentedAccPrefix(const Parser &parseInfo, const char *acc)
static bool fta_if_master_wgs_accession(const char *acnum, Int4 accformat)
Int2 CheckNADDBJ(const char *str)
static const char * ParFlat_AA_array_DDBJ[]
#define ERR_FORMAT_BadlyFormattedIDLine
#define ERR_ACCESSION_WGSProjectAccIsPri
#define ERR_ACCESSION_NoAccessNum
#define ERR_FORMAT_LocusLinePosition
#define ERR_LOCUS_BadLocusName
#define ERR_ACCESSION_BadAccessNum
#define ERR_FORMAT_IllegalCAGEMoltype
#define ERR_LOCUS_NoLocusName
#define ERR_FORMAT_InvalidIDlineMolType
const struct ncbi::grid::netcache::search::fields::SIZE size
const CharType(& source)[N]
static PCRE2_SIZE * offsets
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
CRef< objects::CDate_std > date
vector< IndexblkPtr > entrylist
Int2 StringMatchIcase(const Char **array, string_view text)
Int2 MatchArraySubString(const Char **array, string_view text)
unique_ptr< TokenStatBlk > TokenString(const char *str, Char delimiter)
CRef< CDate_std > get_full_date(const char *s, bool is_ref, Parser::ESource source)
bool ParseAccessionRange(list< string > &tokens, unsigned skip)
Int2 fta_StringMatch(const Char **array, string_view text)