49 #define THIS_FILE "xm_index.cpp"
51 #define XML_FAKE_ACC_TAG "AC "
174 for (p =
buf, q =
buf; *p !=
'\0';) {
199 if (! entry || ! xip || xip->
start == 0 || xip->
end == 0 ||
215 for (; xip; xip = xip->
next)
237 for (; tlen > 0 &&
str[
i] >=
'0' &&
str[
i] <=
'9';
i--)
243 if (len2 > len1 &&
str[
i] ==
'0') {
246 for (tlen = len2 - len1; tlen > 0 &&
str[
i] ==
'0';
i--)
255 for (q = &
str[
i + 1], p = q; *p ==
'0';)
259 if (atoi(p) != (
int)
i) {
275 const char* segtotal;
277 if (! entry || ! ibp || ! ibp->
xip)
280 for (xip = ibp->
xip; xip; xip = xip->
next)
292 if (stoken->
num > 2) {
313 return (
config.ffbuf.start !=
nullptr);
319 if (*
config.ffbuf.current ==
'\0') {
322 return *(
config.ffbuf.current++);
360 for (count = 0, line = 1;;) {
373 for (
i = 1;
i < 50;
i++) {
382 if (ch ==
'<' || ch ==
'>')
396 ibp->
offset = count - start_len;
420 p = s + ((s[1] ==
'/') ? 2 : 1);
421 for (xkbp =
xmkwl; xkbp->
str; xkbp++)
426 if (! ibp->
xip || xip->
tag != xkbp->
tag) {
455 if (xip->
start != 0) {
467 for (tibnp = ibnp,
i = 0; tibnp;
i++, tibnp = ibnp) {
486 for (p = line; *p !=
'\0' && *p !=
' ' && *p !=
'\t';)
499 for (p = q + 1; *p >=
'0' && *p <=
'9';)
529 if (! ibp || ! ibp->
xip || ! entry)
536 ibp->
acnum[0] =
'\0';
537 for (xip = ibp->
xip; xip; xip = xip->
next) {
545 size_t imax = xip->
end - xip->
start;
546 if (imax > (
int)
sizeof(ibp->
locusname) - 1)
556 size_t imax = xip->
end - xip->
start;
557 if (imax > (
int)
sizeof(ibp->
acnum) - 1)
558 imax =
sizeof(ibp->
acnum) - 1;
560 ibp->
acnum[imax] =
'\0';
567 if (ibp->
acnum[0] ==
'\0')
573 for (xip = ibp->
xip; xip; xip = xip->
next) {
589 for (xip = ibp->
xip; xip; xip = xip->
next) {
630 for (; xkbp->
str; xkbp++)
643 for (txip = xip; txip; txip = txip->
next) {
644 if (txip->
start == 0) {
648 if (txip->
end == 0) {
666 if (txip->start == 0) {
670 if (txip->end == 0) {
691 if (! entry || ! xip)
702 c = entry + xip->
start;
703 for (count = xip->
start + 1;;) {
707 if (*c ==
'\0' || count > xip->
end)
715 for (s[0] =
'<',
i = 1;
i < 50;
i++) {
718 if (*c ==
'\0' || count > xip->
end)
723 if (*c ==
'<' || *c ==
'>')
726 if (*c ==
'\0' || count > xip->
end)
731 p = s + ((s[1] ==
'/') ? 2 : 1);
738 }
else if ((s[1] !=
'/' && txipsub->
start != 0) ||
739 (s[1] ==
'/' && txipsub->
end != 0)) {
741 txipsub = txipsub->
next;
744 txipsub->
end = count -
i;
747 txipsub->
start = count;
771 for (xip = ibp->
xip; xip; xip = xip->
next)
791 for (xipsec = xip->
subtags; xipsec; xipsec = xipsec->
next)
797 for (xipsec = xip->
subtags; xipsec; xipsec = xipsec->
next) {
819 if (! entry || ! ibp || ! ibp->
xip)
822 for (xip = ibp->
xip; xip; xip = xip->
next)
837 for (xipkwd = xip->
subtags; xipkwd; xipkwd = xipkwd->
next)
842 for (xipkwd = xip->
subtags; xipkwd; xipkwd = xipkwd->
next) {
869 bool got_locus =
false;
870 bool got_length =
false;
871 bool got_moltype =
false;
872 bool got_division =
false;
873 bool got_update_date =
false;
874 bool got_definition =
false;
875 bool got_accession =
false;
876 bool got_version =
false;
877 bool got_source =
false;
878 bool got_organism =
false;
879 bool got_reference =
false;
880 bool got_primary =
false;
881 bool got_features =
false;
886 for (xip = ibp->
xip; xip; xip = xip->
next) {
896 got_update_date =
true;
898 got_definition =
true;
900 got_accession =
true;
908 got_reference =
true;
921 if (got_length ==
false)
923 if (got_moltype ==
false)
925 if (got_division ==
false)
929 if (got_definition ==
false)
931 if (got_accession ==
false) {
935 if (got_version ==
false) {
942 if (got_source ==
false)
944 if (got_organism ==
false)
954 if (got_features ==
false)
980 if (! ibp || ibp->
len == 0)
987 for (p = entry,
i = 0;
i < ibp->
len;
i++) {
994 if (c > 126 || (c < 32 && c != 10)) {
1001 if (
i != ibp->
len) {
1023 if (! entry || ! xip)
1029 c = entry + xip->
start;
1030 for (count = xip->
start + 1;;) {
1034 if (*c ==
'\0' || count > xip->
end)
1042 for (s[0] =
'<',
i = 1;
i < 50;
i++) {
1045 if (*c ==
'\0' || count > xip->
end)
1050 if (*c ==
'<' || *c ==
'>')
1053 if (*c ==
'\0' || count > xip->
end)
1058 p = s + ((s[1] ==
'/') ? 2 : 1);
1059 for (txkbp = xkbp; txkbp->
str; txkbp++)
1064 if (! xipsub || xipsub->
tag != txkbp->
tag) {
1070 xipsub = xipsub->
next;
1072 xipsub->
tag = txkbp->
tag;
1075 xipsub->
end = count -
i;
1078 xipsub->
start = count;
1084 if (xipsub->
end != 0) {
1086 xipsub = xipsub->
next;
1087 xipsub->
tag = txkbp->
tag;
1090 xipsub->
end = count -
i;
1093 if (xipsub->
start != 0) {
1095 xipsub = xipsub->
next;
1096 xipsub->
tag = txkbp->
tag;
1099 xipsub->
start = count;
1113 bool got_key =
false;
1114 bool got_location =
false;
1117 for (; xip; xip = xip->
next) {
1121 got_location =
true;
1129 if (! got_location) {
1139 bool got_from =
false;
1140 bool got_to =
false;
1141 bool got_point =
false;
1142 bool got_accession =
false;
1145 for (; xip; xip = xip->
next) {
1153 got_accession =
true;
1156 if (! got_accession) {
1162 if (got_from || got_to) {
1166 }
else if (got_from ==
false || got_to ==
false) {
1167 ErrPostEx(
SEV_ERROR,
ERR_FORMAT_XMLInvalidINSDInterval,
"%s must contain either both of %s and %s, or %s.",
XMLStringByTag(
xmsubkwl,
INSDINTERVAL),
XMLStringByTag(
xmintkwl,
INSDINTERVAL_FROM),
XMLStringByTag(
xmintkwl,
INSDINTERVAL_TO),
XMLStringByTag(
xmintkwl,
INSDINTERVAL_POINT));
1177 for (; xip; xip = xip->
next) {
1196 if (! xip || ! entry)
1199 for (; xip; xip = xip->
next) {
1213 for (xipfeat = xip->
subtags; xipfeat; xipfeat = xipfeat->
next) {
1217 for (txip = xipfeat->
subtags; txip; txip = txip->
next) {
1223 for (; xipsub; xipsub = xipsub->
next)
1232 for (; xipsub; xipsub = xipsub->
next)
1252 bool got_reference =
false;
1253 bool got_journal =
false;
1256 for (; xip; xip = xip->
next) {
1258 got_reference =
true;
1263 if (! got_reference) {
1268 if (! got_journal) {
1278 if (! reftag || *reftag ==
'\0')
1281 const string str =
"1.." + to_string(bases);
1297 for (p = reftag; *p !=
'\0' && *p !=
'(';)
1302 const string str =
"(bases 1 to " + to_string(bases) +
")";
1303 const string str1 =
"(bases 1 to " + to_string(bases) +
";";
1315 bool got_dbname =
false;
1316 bool got_id =
false;
1319 for (; xip; xip = xip->
next) {
1347 if (! xip || ! entry)
1350 for (; xip; xip = xip->
next) {
1363 for (xipref = xip->
subtags; xipref; xipref = xipref->
next) {
1368 reftagref =
nullptr;
1369 reftagpos =
nullptr;
1370 for (txip = xipref->
subtags; txip; txip = txip->
next) {
1392 for (; xipsub; xipsub = xipsub->
next)
1432 if (ibp->
len == 0) {
1495 if (! entry || ! xip)
1503 for (dbp =
nullptr, txip = xip->
subtags; txip; txip = txip->
next) {
1528 if (! entry || ! xip)
1531 for (; xip; xip = xip->
next)
1537 for (xipkwd = xip->
subtags; xipkwd; xipkwd = xipkwd->
next) {
1542 keywords.push_back(p);
1556 if (! entry || ! xip)
1559 while (xip && xip->
tag !=
tag)
1565 for (
i = 0, txip = xip->
subtags; txip; txip = txip->
next)
1573 if (
buf[0] !=
'\0') {
1577 for (
i = txip->
start, p = entry + txip->
start; i < txip->end;
i++)
void XMLIndexFree(XmlIndexPtr xip)
#define ERR_FORMAT_LineTypeOrder
#define ERR_FORMAT_MissingEnd
#define INSDSEQ_ACCESSION_VERSION
#define INSDSEQ_OTHER_SEQIDS
#define INSDREFERENCE_REMARK
#define INSDSEQ_PRIMARY_ACCESSION
#define INSDSEQ_UPDATE_RELEASE
#define INSDINTERVAL_POINT
#define INSDSEQ_DEFINITION
#define INSDINTERVAL_FROM
#define INSDQUALIFIER_NAME
#define INSDSEQ_CREATE_RELEASE
#define INSDSEQ_STRANDEDNESS
#define INSDREFERENCE_PUBMED
#define INSDSEQ_FEATURE_TABLE
#define INSDFEATURE_LOCATION
#define INSDREFERENCE_AUTHORS
#define INSDREFERENCE_XREF
#define INSDQUALIFIER_VALUE
#define INSDSEQ_REFERENCES
#define INSDSEQ_SECONDARY_ACCESSIONS
#define INSDREFERENCE_POSITION
#define INSDSEQ_SOURCE_DB
#define INSDREFERENCE_TITLE
#define INSDSECONDARY_ACCN
#define INSDFEATURE_QUALS
#define INSDINTERVAL_ACCESSION
#define INSDSEQ_DATABASE_REFERENCE
#define INSDSEQ_ENTRY_VERSION
#define INSDSEQ_CREATE_DATE
#define INSDFEATURE_INTERVALS
#define INSDREFERENCE_JOURNAL
#define INSDSEQ_UPDATE_DATE
#define INSDREFERENCE_MEDLINE
#define INSDREFERENCE_CONSORTIUM
#define INSDREFERENCE_REFERENCE
std::list< std::string > TKeywordList
bool StringEquN(const char *s1, const char *s2, size_t n)
bool StringEqu(const char *s1, const char *s2)
void StringCpy(char *d, const char *s)
void StringNCpy(char *d, const char *s, size_t n)
size_t StringLen(const char *s)
char * StringRChr(char *s, const char c)
void FtaInstallPrefix(int prefix, const char *name, const char *location)
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint16_t Uint2
2-byte (16-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
CRef< CDate_std > GetUpdateDate(const char *ptr, Parser::ESource source)
#define ERR_FORMAT_XMLMissingStartTag
#define ERR_VERSION_NonDigitVerNum
#define ERR_FORMAT_XMLInvalidINSDInterval
#define ERR_VERSION_MissingVerNum
#define ERR_ACCESSION_NoAccessNum
#define ERR_SEGMENT_BadLocusName
#define ERR_FORMAT_ContigInSegset
#define ERR_FORMAT_XMLFormatError
#define ERR_FORMAT_UnexpectedEnd
#define ERR_INPUT_CannotReadEntry
#define ERR_SEGMENT_IncompSeg
#define ERR_FORMAT_XMLMissingEndTag
#define ERR_VERSION_BadVersionLine
#define ERR_VERSION_InvalidVersion
#define ERR_FORMAT_MissingField
#define ERR_FORMAT_NonAsciiChar
#define ERR_ENTRY_InvalidLineType
#define ERR_VERSION_AccessionsDontMatch
const CharType(& source)[N]
const CConstRef< CSeq_id > GetAccession(const CSeq_id_Handle &id_handle)
static const char * str(char *buf, int n)
CRef< objects::CDate_std > date
vector< IndexblkPtr > entrylist
void check_est_sts_gss_tpa_kwds(ValNodePtr kwds, size_t len, IndexblkPtr entry, bool tpa_check, bool &specialist_db, bool &inferential, bool &experimental, bool &assembly)
TokenStatBlkPtr TokenString(char *str, Char delimiter)
void FreeTokenstatblk(TokenStatBlkPtr tsbp)
ValNodePtr ConstructValNode(CSeq_id::E_Choice choice, const char *data)
static bool XMLKeywordsCheck(const char *entry, IndexblkPtr ibp, Parser::ESource source)
static bool XMLTagCheck(XmlIndexPtr xip, XmlKwordBlkPtr xkbp)
static bool XMLIndexReferences(const char *entry, XmlIndexPtr xip, size_t bases)
static void XMLRestoreSpecialCharacters(char *buf)
DataBlkPtr XMLBuildRefDataBlk(char *entry, const XmlIndex *xip, int type)
char * XMLFindTagValue(const char *entry, const XmlIndex *xip, Int4 tag)
static Int2 XMLGetRefTypePos(char *reftag, size_t bases)
char * XMLGetTagValue(const char *entry, const XmlIndex *xip)
static bool XMLCheckRequiredFeatTags(XmlIndexPtr xip)
static bool XMLCheckRequiredRefTags(XmlIndexPtr xip)
static bool XMLCheckRequiredIntTags(XmlIndexPtr xip)
void XMLGetKeywords(const char *entry, const XmlIndex *xip, TKeywordList &keywords)
static void XMLGetSegment(const char *entry, IndexblkPtr ibp)
static const char * XMLStringByTag(XmlKwordBlkPtr xkbp, Int4 tag)
bool XMLIndex(ParserPtr pp)
static void XMLParseVersion(IndexblkPtr ibp, char *line)
static XmlIndexPtr XMLIndexSameSubTags(const char *entry, XmlIndexPtr xip, Int4 tag)
static int s_GetCharAndAdvance(Parser &config)
static Int2 XMLGetRefType(char *reftag, size_t bases)
static bool XMLCheckRequiredQualTags(XmlIndexPtr xip)
static bool XMLCheckRequiredTags(ParserPtr pp, IndexblkPtr ibp)
static bool XMLAccessionsCheck(ParserPtr pp, IndexblkPtr ibp, const char *entry)
static void XMLPerformIndex(ParserPtr pp)
char * XMLLoadEntry(ParserPtr pp, bool err)
static bool XMLSameTagsCheck(XmlIndexPtr xip, const char *name)
static XmlIndexPtr XMLIndexNew(void)
static bool XMLDelSegnum(IndexblkPtr ibp, const char *segnum, size_t len2)
static bool XMLIndexSubTags(const char *entry, XmlIndexPtr xip, XmlKwordBlkPtr xkbp)
static bool XMLCheckRequiredXrefTags(XmlIndexPtr xip)
static bool XMLIndexFeatures(const char *entry, XmlIndexPtr xip)
void s_SetPointer(Parser &config, size_t offset)
static void XMLInitialEntry(IndexblkPtr ibp, const char *entry, bool accver, Parser::ESource source)
char * XMLConcatSubTags(const char *entry, const XmlIndex *xip, Int4 tag, Char sep)
static bool s_HasInput(const Parser &config)
static bool XMLErrField(Int4 tag)