CPP_DOC/doxyhtml/indx__blk_8cpp_source.html

 /* $Id: indx_blk.cpp 103112 2024-09-10 13:39:01Z stakhovv $

  * ===========================================================================

  *

  *                            PUBLIC DOMAIN NOTICE

  *               National Center for Biotechnology Information

  *

  *  This software/database is a "United States Government Work" under the

  *  terms of the United States Copyright Act.  It was written as part of

  *  the author's official duties as a United States Government employee and

  *  thus cannot be copyrighted.  This software/database is freely available

  *  to the public for use. The National Library of Medicine and the U.S.

  *  Government have not placed any restriction on its use or reproduction.

  *

  *  Although all reasonable efforts have been taken to ensure the accuracy

  *  and reliability of the software and data, the NLM and the U.S.

  *  Government do not and cannot warrant the performance or results that

  *  may be obtained by using this software or data. The NLM and the U.S.

  *  Government disclaim all warranties, express or implied, including

  *  warranties of performance, merchantability or fitness for any particular

  *  purpose.

  *

  *  Please cite the author in any work or product based on this material.

  *

  * ===========================================================================

  *

  * File Name: indx_blk.cpp

  *

  * Author: Karl Sirotkin, Hsiu-Chuan Chen

  *

  * File Description:

  *      Common for all format functions.

  *

  */


 #include <ncbi_pch.hpp>


 #include "ftacpp.hpp"


 #include "index.h"

 #include <objtools/flatfile/flatfile_parse_info.hpp>


 #include "ftaerr.hpp"

 #include "indx_blk.h"

 #include "indx_def.h"

 #include "utilfun.h"

 #include <map>


 #ifdef THIS_FILE

 #  undef THIS_FILE

 #endif

 #define THIS_FILE "indx_blk.cpp"


 BEGIN_NCBI_SCOPE

 USING_SCOPE(objects);


 // clang-format off

 static const char* XML_STRAND_array[] = {

     "   ", "single", "double", "mixed", nullptr

 };


 static const char* XML_TPG_array[] = {

     "   ", "Linear", "Circular", "Tandem", nullptr

 };


 static const char* ParFlat_NA_array_DDBJ[] = {

     "cDNA", nullptr

 };


 static const char* ParFlat_AA_array_DDBJ[] = {

     "PRT", nullptr

 };


 static const char* ParFlat_NA_array[] = {

     "    ", "NA", "DNA", "genomic DNA", "other DNA", "unassigned DNA", "RNA",

     "mRNA", "rRNA", "tRNA", "uRNA", "scRNA", "snRNA", "snoRNA", "pre-RNA",

     "pre-mRNA", "genomic RNA", "other RNA", "unassigned RNA", "cRNA",

     "viral cRNA", nullptr

 };


 static const char* ParFlat_DIV_array[] = {

     "   ", "PRI", "ROD", "MAM", "VRT", "INV", "PLN", "BCT", "RNA",

     "VRL", "PHG", "SYN", "UNA", "EST", "PAT", "STS", "ORG", "GSS",

     "HUM", "HTG", "CON", "HTC", "ENV", "TSA", nullptr

 };


 static const char* embl_accpref[] = {

     "AJ", "AL", "AM", "AN", "AX", "BN", "BX", "CQ", "CR", "CS", "CT", "CU",

     "FB", "FM", "FN", "FO", "FP", "FQ", "FR", "GM", "GN", "HA", "HB", "HC",

     "HD", "HE", "HF", "HG", "HH", "HI", "JA", "JB", "JC", "JD", "JE", "LK",

     "LL", "LM", "LN", "LO", "LP", "LQ", "LR", "LS", "LT", "MP", "MQ", "MR",

     "MS", "OA", "OB", "OC", "OD", "OE", "OU", "OV", "OW", "OX", "OY", "OZ",

     nullptr

 };


 static const char* lanl_accpref[] = {

     "AD", nullptr

 };


 static const char* sprot_accpref[] = {

     "DD", nullptr

 };


 static const char* ddbj_accpref[] = {

     "AB", "AG", "AK", "AP", "AT", "AU", "AV", "BA", "BB", "BD", "BJ", "BP",

     "BR", "BS", "BW", "BY", "CI", "CJ", "DA", "DB", "DC", "DD", "DE", "DF",

     "DG", "DH", "DI", "DJ", "DK", "DL", "DM", "FS", "FT", "FU", "FV", "FW",

     "FX", "FY", "FZ", "GA", "GB", "HT", "HU", "HV", "HW", "HX", "HY", "HZ",

     "LA", "LB", "LC", "LD", "LE", "LF", "LG", "LH", "LI", "LJ", "LU", "LV",

     "LX", "LY", "LZ", "MA", "MB", "MC", "MD", "ME", "OF", "OG", "OH", "OI",

     "OJ", "PA", "PE", "PF", "PG", "PH", "PI", "PJ", "PK", nullptr

 };


 static const char* ncbi_accpref[] = {

     "AA", "AC", "AD", "AE", "AF", "AH", "AI", "AQ", "AR", "AS", "AW", "AY",

     "AZ", "BC", "BE", "BF", "BG", "BH", "BI", "BK", "BL", "BM", "BQ", "BT",

     "BU", "BV", "BZ", "CA", "CB", "CC", "CD", "CE", "CF", "CG", "CH", "CK",

     "CL", "CM", "CN", "CO", "CP", "CV", "CW", "CX", "CY", "CZ", "DN", "DP",

     "DQ", "DR", "DS", "DT", "DU", "DV", "DW", "DX", "DY", "DZ", "EA", "EB",

     "EC", "ED", "EE", "EF", "EG", "EH", "EI", "EJ", "EK", "EL", "EM", "EN",

     "EP", "EQ", "ER", "ES", "ET", "EU", "EV", "EW", "EX", "EY", "EZ", "FA",

     "FC", "FD", "FE", "FF", "FG", "FH", "FI", "FJ", "FK", "FL", "GC", "GD",

     "GE", "GF", "GG", "GH", "GJ", "GK", "GL", "GO", "GP", "GQ", "GR", "GS",

     "GT", "GU", "GV", "GW", "GX", "GY", "GZ", "HJ", "HK", "HL", "HM", "HN",

     "HO", "HP", "HQ", "HR", "HS", "JF", "JG", "JH", "JI", "JJ", "JK", "JL",

     "JM", "JN", "JO", "JP", "JQ", "JR", "JS", "JT", "JU", "JV", "JW", "JX",

     "JY", "JZ", "KA", "KB", "KC", "KD", "KE", "KF", "KG", "KH", "KI", "KJ",

     "KK", "KL", "KM", "KN", "KO", "KP", "KQ", "KR", "KS", "KT", "KU", "KV",

     "KX", "KY", "KZ", "MF", "MG", "MH", "MI", "MJ", "MK", "ML", "MM", "MN",

     "MO", "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "OK", "OL", "OM", "ON",

     "OO", "OP", "OQ", "OR", "OS", "OT", "PP", "PQ", "PR", "PS", "PT", "PU",

     nullptr

 };


 static const char* refseq_accpref[] = {

     "NC_", "NG_", "NM_", "NP_", "NR_", "NT_", "NW_", "XM_", "XP_", "XR_",

     "NZ_", nullptr

 };


 /*

 static const char* refseq_prot_accpref[] = {

     "AP_", "NP_", "WP_", "XP_", "YP_", "ZP_", nullptr

 };

 */


 static const char* acc_tsa_allowed[] = {

     "AF", "AY", "DQ", "EF", "EU", "FJ", "GQ", "HQ", "JF", "JN", "JQ", "JX",

     "KC", "KF", "KJ", "KM", "KP", "KR", "KT", "KU", "KX", "KY", "MF", "MG",

     "MH", "MK", "MN", "MT", nullptr

 };


 static const char* ncbi_tpa_accpref[] = {

     "BK", "BL", "GJ", "GK", nullptr

 };


 static const char* ddbj_tpa_accpref[] = {

     "BR", "HT", "HU", nullptr

 };


 static const char* ncbi_wgs_accpref[] = {

     "GJ", "GK", nullptr

 };


 static const char* ddbj_wgs_accpref[] = {

     "HT", "HU", nullptr

 };


 static const set<string_view> k_WgsScaffoldPrefix = {

     "CH", "CT", "CU", "DF", "DG", "DS",

     "EM", "EN", "EP", "EQ", "FA", "FM",

     "GG", "GJ", "GK", "GL", "HT", "HU",

     "JH", "KB", "KD", "KE", "KI", "KK",

     "KL", "KN", "KQ", "KV", "KZ", "LD",

     "ML", "MU", "PS"

 };


 static const map<Parser::ESource, string> sourceNames = {

     { Parser::ESource::unknown, "unknown" },

     { Parser::ESource::EMBL, "EMBL" },

     { Parser::ESource::GenBank, "GENBANK" },

     { Parser::ESource::SPROT, "Swiss-Prot" },

     { Parser::ESource::NCBI, "NCBI" },

     { Parser::ESource::LANL, "GSDB" },

     { Parser::ESource::Flybase, "FlyBase" },

     { Parser::ESource::Refseq, "RefSeq" }

 };


 static const char* month_name[] = {

     "Ill", "JAN", "FEB", "MAR", "APR", "MAY", "JUN", "JUL", "AUG", "SEP", "OCT", "NOV", "DEC", nullptr

 };


 static const char* ParFlat_RESIDUE_STR[] = {

     "bp", "bp.", "bp,", "AA", "AA.", "AA,", nullptr

 };


 static const char* ValidMolTypes[] = {

     "genomic DNA",

     "genomic RNA",

     "mRNA",

     "tRNA",

     "rRNA",

     "snoRNA",

     "snRNA",

     "scRNA",

     "pre-RNA",

     "pre-mRNA",

     "other RNA",

     "other DNA",

     "transcribed RNA",

     "unassigned RNA",

     "unassigned DNA",

     "viral cRNA",

     nullptr

 };

 // clang-format on


 // functions below are implemented in different source files

 bool EmblIndex(ParserPtr pp, void (*fun)(IndexblkPtr entry, char* offset, Int4 len));

 bool GenBankIndex(ParserPtr pp);

 bool SprotIndex(ParserPtr pp, void (*fun)(IndexblkPtr entry, char* offset, Int4 len));

 bool XMLIndex(ParserPtr pp);


 /**********************************************************

  *

  *   static char* GetResidue(stoken):

  *

  *      Return a string pointer in the "stoken" which its

  *   next token string match any one string in the

  *   ParFlat_RESIDUE_STR but ignore case for all alphabetic

  *   characters; return NULL if not found.

  *

  *                                              3-25-93

  *

  **********************************************************/

 static const char* GetResidue(TokenStatBlkPtr stoken)

 {

     const char** b;

     Int2         i;


     auto ptr  = stoken->list.begin();

     auto sptr = next(ptr);

     for (i = 1; i < stoken->num; i++, ptr = sptr, sptr = next(ptr)) {

         for (b = ParFlat_RESIDUE_STR; *b; b++)

             if (NStr::CompareNocase(*b, *sptr) == 0)

                 return ptr->c_str();

     }


     return nullptr;

 }


 /**********************************************************

  *

  *   bool XReadFile(fp, finfo):

  *

  *      Record position and line # of the file, loop stop

  *   when got a none blank line.

  *      Return TRUE if END_OF_FILE.

  *

  *                                              2-26-93

  *

  **********************************************************/

 static bool XReadFile(FILE* fp, FinfoBlk& finfo)

 {

     bool end_of_file = false;


     StringCpy(finfo.str, "\n");

     while (! end_of_file && StringEquN(finfo.str, "\n", 1)) {

         finfo.pos = (size_t)ftell(fp);

         if (! fgets(finfo.str, sizeof(finfo.str) - 1, fp))

             end_of_file = true;

         else

             ++finfo.line;

     }


     auto n = strlen(finfo.str);

     while (n) {

         n--;

         if (finfo.str[n] != '\n' && finfo.str[n] != '\r') {

             break;

         }

         finfo.str[n] = 0;

     }


     return (end_of_file);

 }


 /**********************************************************/

 static Int2 FileGetsBuf(char* res, Int4 size, FileBuf& fbuf)

 {

     const char* p = nullptr;

     char*       q;

     Int4        l;

     Int4        i;


     if (fbuf.current == nullptr || *fbuf.current == '\0')

         return (0);


     l = size - 1;

     for (p = fbuf.current, q = res, i = 0; i < l; i++, p++) {

         *q++ = *p;

         if (*p == '\n' || *p == '\r') {

             p++;

             break;

         }

     }


     *q           = '\0';

     fbuf.current = p;

     return (1);

 }


 /**********************************************************/

 bool XReadFileBuf(FileBuf& fbuf, FinfoBlk& finfo)

 {

     bool end_of_file = false;


     StringCpy(finfo.str, "\n");

     while (! end_of_file && StringEquN(finfo.str, "\n", 1)) {

         finfo.pos = fbuf.get_offs();

         if (FileGetsBuf(finfo.str, sizeof(finfo.str) - 1, fbuf) == 0)

             end_of_file = true;

         else

             ++finfo.line;

     }


     return (end_of_file);

 }


 /**********************************************************

  *

  *   bool SkipTitle(fp, finfo, str, len):

  *

  *      Return TRUE if file contains no entry in which no

  *   match in keyword "str".

  *      Skip any title declaration lines.

  *

  *                                              3-5-93

  *

  **********************************************************/

 NCBI_UNUSED

 bool SkipTitle(FILE* fp, FinfoBlk& finfo, const char* str, size_t len)

 {

     bool end_of_file = XReadFile(fp, finfo);

     while (! end_of_file && ! StringEquN(finfo.str, str, len))

         end_of_file = XReadFile(fp, finfo);


     return (end_of_file);

 }


 NCBI_UNUSED

 bool SkipTitle(FILE* fp, FinfoBlk& finfo, const CTempString& keyword)

 {

     return SkipTitle(fp, finfo, keyword.data(), keyword.size());

 }


 //  ----------------------------------------------------------------------------

 bool SkipTitleBuf(FileBuf& fbuf, FinfoBlk& finfo, const CTempString& keyword)

 //  ----------------------------------------------------------------------------

 {

     const char* p           = keyword.data();

     size_t      len         = keyword.size();

     bool        end_of_file = XReadFileBuf(fbuf, finfo);

     while (! end_of_file && ! StringEquN(finfo.str, p, len))

         end_of_file = XReadFileBuf(fbuf, finfo);


     return end_of_file;

 }


 /**********************************************************

  *

  *   static bool CheckLocus(locus):

  *

  *      Locus name only allow A-Z, 0-9, characters,

  *   reject if not.

  *

  **********************************************************/

 static bool CheckLocus(const char* locus, Parser::ESource source)

 {

     const char* p = locus;

     if (StringEquN(locus, "SEG_", 4) &&

         (source == Parser::ESource::NCBI || source == Parser::ESource::DDBJ))

         p += 4;

     for (; *p != '\0'; p++) {

         if ((*p >= '0' && *p <= '9') || (*p >= 'A' && *p <= 'Z') ||

             (*p == '.' && source == Parser::ESource::Flybase))

             continue;

         if (((*p >= 'a' && *p <= 'z') || *p == '_' || *p == '-' || *p == '(' ||

              *p == ')' || *p == '/') &&

             source == Parser::ESource::Refseq)

             continue;


         ErrPostEx(SEV_ERROR, ERR_LOCUS_BadLocusName, "Bad locusname, <%s> for this entry", locus);

         break;

     }


     return (*p != '\0');

 }


 /**********************************************************

  *

  *   static bool CheckLocusSP(locus):

  *

  *      Locus name consists of up tp 10 uppercase

  *   alphanumeric characters.

  *      Rule: X_Y format (SWISS-PROT), reject if not

  *      - X is a mnemonic code, up to 4 alphanumeric

  *        characters to represent the protein name.

  *      - Y is a mnemonic species identification code of

  *        at most 5 alphanumeric characters to representing

  *        the biological source of the protein.

  *      Checking the defined species identification code

  *   has not been implemented.

  *

  *      Example:  RL1_ECOLI   FER_HALHA

  *

  **********************************************************/

 static bool CheckLocusSP(const char* locus)

 {

     const char* p;


     bool underscore = false;

     Int2 x;

     Int2 y;


     for (p = locus, x = y = 0; *p != '\0'; p++) {

         if ((*p >= '0' && *p <= '9') || (*p >= 'A' && *p <= 'Z')) {

             if (! underscore)

                 x++;

             else

                 y++;

         } else if (*p == '_')

             underscore = true;

         else

             break;

     }


     if (*p != '\0' || x == 0 || y == 0) {

         ErrPostEx(SEV_ERROR, ERR_LOCUS_BadLocusName, "Bad locusname, <%s> for this entry", locus);

         return true;

     }


     return false;

 }


 /**********************************************************

  *

  *   static bool CkDateFormat(date):

  *

  *      Return FALSE if date != dd-mmm-yyyy format.

  *

  **********************************************************/

 static bool CkDateFormat(const char* date)

 {

     if (date[2] == '-' && date[6] == '-' &&

         isdigit(date[0]) != 0 && isdigit(date[1]) != 0 &&

         isdigit(date[7]) != 0 && isdigit(date[8]) != 0 &&

         isdigit(date[9]) != 0 && isdigit(date[10]) != 0 &&

         MatchArraySubString(month_name, date) >= 0)

         return true;


     return false;

 }


 /**********************************************************/

 int CheckSTRAND(const string& str)

 {

     static const vector<string> strandSpecs = {

         "   ", "ss-", "ds-", "ms-"

     };

     static const auto strandSpecCount = strandSpecs.size();


     string compare(str);

     NStr::ToLower(compare);

     for (int i = 0; i < strandSpecCount; ++i) {

         if (NStr::StartsWith(compare, strandSpecs[i])) {

             return i;

         }

     }

     return -1;

 }


 /**********************************************************/

 Int2 XMLCheckSTRAND(string_view str)

 {

     return (StringMatchIcase(XML_STRAND_array, str));

 }


 /**********************************************************/

 Int2 XMLCheckTPG(string_view str)

 {

     Int2 i;


     i = StringMatchIcase(XML_TPG_array, str);

     if (i == 0)

         i = 1;

     return (i);

 }


 /**********************************************************/

 int CheckTPG(const string& str)

 {

     static const vector<string> topologies = {

         "         ", "linear   ", "circular ", "tandem   "

     };

     static const auto topologyCount = topologies.size();


     string compare(str);

     NStr::ToLower(compare);

     for (int i = 0; i < topologyCount; ++i) {

         if (NStr::StartsWith(compare, topologies[i])) {

             return i;

         }

     }

     return -1;

 }


 /**********************************************************/

 Int2 CheckNADDBJ(const char* str)

 {

     return (fta_StringMatch(ParFlat_NA_array_DDBJ, str));

 }


 /**********************************************************/

 Int2 CheckNA(const char* str)

 {

     return (fta_StringMatch(ParFlat_NA_array, str));

 }


 /**********************************************************/

 Int2 CheckDIV(const char* str)

 {

     return (fta_StringMatch(ParFlat_DIV_array, str));

 }


 /**********************************************************/

 bool CkLocusLinePos(char* offset, Parser::ESource source, LocusContPtr lcp, bool is_mga)

 {

     Char  date[12];

     bool  ret = true;

     char* p;

     Int4  i;


     p = StringChr(offset, '\n');

     if (p)

         *p = '\0';


     if (is_mga == false && ! StringEquN(offset + lcp->bp, "bp", 2) &&

         ! StringEquN(offset + lcp->bp, "rc", 2) &&

         ! StringEquN(offset + lcp->bp, "aa", 2)) {

         i = lcp->bp + 1;

         ErrPostEx(SEV_WARNING, ERR_FORMAT_LocusLinePosition, "bp/rc string unrecognized in column %d-%d: %s", i, i + 1, offset + lcp->bp);

         ret = false;

     }

     if (CheckSTRAND(offset + lcp->strand) == -1) {

         i = lcp->strand + 1;

         ErrPostEx(SEV_WARNING, ERR_FORMAT_LocusLinePosition, "Strand unrecognized in column %d-%d : %s", i, i + 2, offset + lcp->strand);

     }


     p = offset + lcp->molecule;

     if (is_mga) {

         if (! StringEquNI(p, "mRNA", 4) && ! StringEquN(p, "RNA", 3)) {

             ErrPostEx(SEV_REJECT, ERR_FORMAT_IllegalCAGEMoltype, "Illegal molecule type provided in CAGE record in LOCUS line: \"%s\". Must be \"mRNA\"or \"RNA\". Entry dropped.", p);

             ret = false;

         }

     } else if (StringMatchIcase(ParFlat_NA_array, p) < 0) {

         if (StringMatchIcase(ParFlat_AA_array_DDBJ, p) < 0) {

             i = lcp->molecule + 1;

             if (source != Parser::ESource::DDBJ ||

                 StringMatchIcase(ParFlat_NA_array_DDBJ, p) < 0) {

                 ErrPostEx(SEV_WARNING, ERR_FORMAT_LocusLinePosition, "Molecule unrecognized in column %d-%d: %s", i, i + 5, p);

                 ret = false;

             }

         }

     }


     if (CheckTPG(offset + lcp->topology) == -1) {

         i = lcp->topology + 1;

         ErrPostEx(SEV_WARNING, ERR_FORMAT_LocusLinePosition, "Topology unrecognized in column %d-%d: %s", i, i + 7, offset + lcp->topology);

         ret = false;

     }

     if (CheckDIV(offset + lcp->div) == -1) {

         i = lcp->div + 1;

         ErrPostEx(SEV_WARNING, ERR_FORMAT_LocusLinePosition, "Division code unrecognized in column %d-%d: %s", i, i + 2, offset + lcp->div);

         ret = (source == Parser::ESource::LANL);

     }

     MemCpy(date, offset + lcp->date, 11);

     date[11] = '\0';

     if (StringEquN(date, "NODATE", 6)) {

         ErrPostStr(SEV_WARNING, ERR_FORMAT_LocusLinePosition, "NODATE in LOCUS line will be replaced by current system date");

     } else if (! CkDateFormat(date)) {

         i = lcp->date + 1;

         ErrPostEx(SEV_WARNING, ERR_FORMAT_LocusLinePosition, "Date should be in column %d-%d, and format dd-mmm-yyyy: %s", i, i + 10, date);

         ret = false;

     }


     if (p)

         *p = '\n';

     return (ret);

 }


 /**********************************************************

  *

  *   CRef<CDate_std> GetUpdateDate(ptr, source):

  *

  *      Return NULL if ptr does not have dd-mmm-yyyy format

  *   or "NODATE"; otherwise, return Date-std pointer.

  *

  **********************************************************/

 CRef<CDate_std> GetUpdateDate(const char* ptr, Parser::ESource source)

 {

     Char date[12];


     if (StringEquN(ptr, "NODATE", 6))

         return CRef<CDate_std>(new CDate_std(CTime(CTime::eCurrent)));


     if (ptr[11] != '\0' && ptr[11] != '\n' && ptr[11] != ' ' &&

         (source != Parser::ESource::SPROT || ptr[11] != ','))

         return CRef<CDate_std>();


     MemCpy(date, ptr, 11);

     date[11] = '\0';


     if (! CkDateFormat(date))

         return CRef<CDate_std>();


     return get_full_date(ptr, false, source);

 }


 /**********************************************************/

 static bool fta_check_embl_moltype(char* str)

 {

     const char** b;

     char*        p;

     char*        q;


     p = StringChr(str, ';');

     p = StringChr(p + 1, ';');

     p = StringChr(p + 1, ';');


     for (p++; *p == ' ';)

         p++;


     q  = StringChr(p, ';');

     *q = '\0';


     for (b = ValidMolTypes; *b; b++)

         if (StringEqu(p, *b))

             break;


     if (*b) {

         *q = ';';

         return true;

     }


     ErrPostEx(SEV_REJECT, ERR_FORMAT_InvalidIDlineMolType, "Invalid moltype value \"%s\" provided in ID line of EMBL record.", p);

     *q = ';';

     return false;

 }


 /*********************************************************

 Indexblk constructor

 **********************************************************/

 Indexblk::Indexblk()

 {

     acnum[0]      = 0;

     locusname[0]  = 0;

     division[0]   = 0;

     blocusname[0] = 0;

     wgssec[0]     = 0;

 }


 static bool isSpace(char c)

 {

     return isspace(c);

 }


 static CTempString::const_iterator

 sFindNextSpace(const CTempString&          tempString,

                CTempString::const_iterator current_it)

 {

     return find_if(current_it, tempString.end(), isSpace);

 }


 static CTempString::const_iterator

 sFindNextNonSpace(const CTempString&          tempString,

                   CTempString::const_iterator current_it)

 {

     return find_if_not(current_it, tempString.end(), isSpace);

 }


 static void sSetLocusLineOffsets(const CTempString& locusLine, LocusCont& offsets)

 {

     offsets.bases    = -1;

     offsets.bp       = -1;

     offsets.strand   = -1;

     offsets.molecule = -1;

     offsets.topology = -1;

     offsets.div      = -1;

     offsets.date     = -1;


     if (locusLine.substr(0, 5) != "LOCUS") {

         // throw an exception - invalid locus line

     }


     auto it = sFindNextNonSpace(locusLine, locusLine.begin() + 5);

     if (it == locusLine.end()) {

         // throw an exception - no locus name

     }


     it = sFindNextSpace(locusLine, it);

     if (it == locusLine.end()) {

         return;

     }


     // find the number of bases

     it = sFindNextNonSpace(locusLine, it);

     if (it == locusLine.end()) {

         return;

     }

     auto space_it = sFindNextSpace(locusLine, it);

     if (NStr::StringToNonNegativeInt(locusLine.substr(it - begin(locusLine), space_it - it)) == -1) {

         return;

     }


     offsets.bases = Int4(it - begin(locusLine));

     it            = sFindNextNonSpace(locusLine, space_it);

     offsets.bp    = Int4(it - begin(locusLine));


     it = sFindNextSpace(locusLine, it);

     it = sFindNextNonSpace(locusLine, it);


     // the next one might be a strand

     // or might be a molecule

     space_it       = sFindNextSpace(locusLine, it);

     offsets.strand = -1;

     if ((space_it - it) == 3) {

         auto currentSubstr = locusLine.substr(it - begin(locusLine), 3);

         if (currentSubstr == "ss-" ||

             currentSubstr == "ds-" ||

             currentSubstr == "ms-") {

             offsets.strand = Int4(it - begin(locusLine));

             it             = sFindNextNonSpace(locusLine, space_it);

         }

         offsets.molecule = Int4(it - begin(locusLine));

     } else {

         offsets.molecule = Int4(it - begin(locusLine));

     }


     // topology

     it = sFindNextSpace(locusLine, it);

     it = sFindNextNonSpace(locusLine, it);

     if (it != locusLine.end()) {

         offsets.topology = Int4(it - begin(locusLine));

     }


     // find division

     it = sFindNextSpace(locusLine, it);

     it = sFindNextNonSpace(locusLine, it);

     if (it != locusLine.end()) {

         offsets.div = Int4(it - begin(locusLine));

     }


     // find date - date is optional

     it = sFindNextSpace(locusLine, it);

     it = sFindNextNonSpace(locusLine, it);

     if (it != locusLine.end()) {

         offsets.date = Int4(it - begin(locusLine));

     }

 }


 /**********************************************************

  *

  *   IndexblkPtr InitialEntry(pp, finfo):

  *

  *      Assign the entry's value to offset, locusname,

  *   bases, linenum, drop blocusname.

  *      Swiss-prot locusname checking is different from

  *   others.

  *      Check LOCUS line column position, genbank format.

  *

  **********************************************************/

 IndexblkPtr InitialEntry(ParserPtr pp, FinfoBlk& finfo)

 {

     Int2        i;

     Int2        j;

     const char* bases;

     IndexblkPtr entry;

     char*       p;


     entry = new Indexblk;


     entry->offset  = finfo.pos;

     entry->linenum = finfo.line;

     entry->ppp     = pp;

     entry->is_tsa  = false;

     entry->is_tls  = false;

     entry->is_pat  = false;


     auto stoken = TokenString(finfo.str, ' ');


     bool badlocus = false;

     if (stoken->num > 2) {

         p = finfo.str;

         if (pp->mode == Parser::EMode::Relaxed) {

             sSetLocusLineOffsets(p, entry->lc);

         } else {

             if (StringLen(p) > 78 && p[28] == ' ' && p[63] == ' ' && p[67] == ' ') {

                 entry->lc.bases    = ParFlat_COL_BASES_NEW;

                 entry->lc.bp       = ParFlat_COL_BP_NEW;

                 entry->lc.strand   = ParFlat_COL_STRAND_NEW;

                 entry->lc.molecule = ParFlat_COL_MOLECULE_NEW;

                 entry->lc.topology = ParFlat_COL_TOPOLOGY_NEW;

                 entry->lc.div      = ParFlat_COL_DIV_NEW;

                 entry->lc.date     = ParFlat_COL_DATE_NEW;

             } else {

                 entry->lc.bases    = ParFlat_COL_BASES;

                 entry->lc.bp       = ParFlat_COL_BP;

                 entry->lc.strand   = ParFlat_COL_STRAND;

                 entry->lc.molecule = ParFlat_COL_MOLECULE;

                 entry->lc.topology = ParFlat_COL_TOPOLOGY;

                 entry->lc.div      = ParFlat_COL_DIV;

                 entry->lc.date     = ParFlat_COL_DATE;

             }

         }


         auto ptr = stoken->list.begin();

         ++ptr;

         if (pp->format == Parser::EFormat::EMBL &&

             next(ptr) != stoken->list.end() && *next(ptr) == "SV"s) {

             for (i = 0, p = finfo.str; *p != '\0'; p++)

                 if (*p == ';' && p[1] == ' ')

                     i++;


             entry->embl_new_ID = true;

             if (! ptr->empty() && ptr->back() == ';')

                 ptr->pop_back();


             FtaInstallPrefix(PREFIX_LOCUS, ptr->c_str());

             FtaInstallPrefix(PREFIX_ACCESSION, ptr->c_str());


             if (i != 6 || (stoken->num != 10 && stoken->num != 11)) {

                 ErrPostStr(SEV_REJECT, ERR_FORMAT_BadlyFormattedIDLine, "The number of fields in this EMBL record's new ID line does not fit requirements.");

                 badlocus = true;

             } else if (fta_check_embl_moltype(finfo.str) == false)

                 badlocus = true;

         }


         StringCpy(entry->locusname, ptr->c_str());

         StringCpy(entry->blocusname, entry->locusname);


         if (entry->embl_new_ID == false) {

             FtaInstallPrefix(PREFIX_LOCUS, entry->locusname);

             FtaInstallPrefix(PREFIX_ACCESSION, entry->locusname);

         }


         if (pp->mode != Parser::EMode::Relaxed && ! badlocus) {

             if (pp->format == Parser::EFormat::SPROT) {

                 auto it = next(ptr);

                 if (it == stoken->list.end() || it->empty() ||

                     (! StringEquNI(it->c_str(), "preliminary", 11) &&

                      ! StringEquNI(it->c_str(), "unreviewed", 10)))

                     badlocus = CheckLocusSP(entry->locusname);

                 else

                     badlocus = false;

             } else

                 badlocus = CheckLocus(entry->locusname, pp->source);

         }

     } else if (pp->mode != Parser::EMode::Relaxed) {

         badlocus = true;

         ErrPostStr(SEV_ERROR, ERR_LOCUS_NoLocusName, "No locus name for this entry");

     }


     if (badlocus) {

         p = StringChr(finfo.str, '\n');

         if (p)

             *p = '\0';

         ErrPostEx(SEV_ERROR, ERR_ENTRY_Skipped, "Entry skipped. LOCUS line = \"%s\".", finfo.str);

         if (p)

             *p = '\n';

         delete entry;

         return nullptr;

     }


     bases = GetResidue(stoken.get());

     if (bases)

         entry->bases = (size_t)atoi(bases);


     if (pp->format == Parser::EFormat::GenBank &&

         entry->lc.date > -1) {

         /* last token in the LOCUS line is date of the update's data

          */

         auto it = stoken->list.begin();

         for (i = 1; i < stoken->num; ++i)

             ++it;

         entry->date = GetUpdateDate(it->c_str(), pp->source);

     }


     if (pp->source == Parser::ESource::DDBJ || pp->source == Parser::ESource::EMBL) {

         j = stoken->num - ((pp->format == Parser::EFormat::GenBank) ? 2 : 3);

         auto it = stoken->list.begin();

         for (i = 1; i < j; ++i)

             ++it;


         if (pp->format == Parser::EFormat::EMBL) {

             if (StringEquNI(it->c_str(), "TSA", 3))

                 entry->is_tsa = true;

             else if (StringEquNI(it->c_str(), "PAT", 3))

                 entry->is_pat = true;

         }


         ++it;


         if (StringEquNI(it->c_str(), "EST", 3))

             entry->EST = true;

         else if (StringEquNI(it->c_str(), "STS", 3))

             entry->STS = true;

         else if (StringEquNI(it->c_str(), "GSS", 3))

             entry->GSS = true;

         else if (StringEquNI(it->c_str(), "HTC", 3))

             entry->HTC = true;

         else if (StringEquNI(it->c_str(), "PAT", 3) &&

                  pp->source == Parser::ESource::EMBL)

             entry->is_pat = true;

     }


     return (entry);

 }


 /**********************************************************

  *

  *   void DelNoneDigitTail(str):

  *

  *      Delete any non digit characters from tail

  *   of string "str".

  *

  *                                              3-25-93

  *

  **********************************************************/

 void DelNoneDigitTail(char* str)

 {

     char* p;


     if (! str || *str == '\0')

         return;


     for (p = str; *str != '\0'; str++)

         if (*str >= '0' && *str <= '9')

             p = str + 1;


     *p = '\0';

 }


 void DelNonDigitTail(string& str)

 {

     if (str.empty()) {

         return;

     }

     auto pos = str.find_last_of("0123456789");

     if (pos != string::npos) {

         str.resize(pos + 1);

     }

 }


 /**********************************************************

  *

  * Here X is an alpha character, N - numeric one.

  * Return values:

  *

  * 1 - XXN        (AB123456)

  * 2 - XX_N       (NZ_123456)

  * 3 - XXXXN      (AAAA01000001)

  * 4 - XX_XXXXN   (NZ_AAAA01000001)

  * 5 - XXXXXN     (AAAAA1234512)

  * 6 - XX_XXN     (NZ_AB123456)

  * 7 - XXXXNNSN   (AAAA01S000001 - scaffolds)

  * 8 - XXXXXXN    (AAAAAA010000001)

  * 9 - XXXXXXNNSN (AAAAAA01S0000001 - scaffolds)

  * 0 - all others

  *

  */


 inline bool sIsUpperAlpha(char c)

 {

     return (c >= 'A' && c <= 'Z');

 }


 Int4 IsNewAccessFormat(const Char* acnum)

 {

     const Char* p = acnum;


     if (! p || *p == '\0')

         return 0;


     if (sIsUpperAlpha(p[0]) && sIsUpperAlpha(p[1])) {

         if (isdigit(p[2]))

             return 1;


         if (p[2] == '_') {

             if (isdigit(p[3])) {

                 return 2;

             }

             if (sIsUpperAlpha(p[3]) && sIsUpperAlpha(p[4])) {

                 if (sIsUpperAlpha(p[5]) && sIsUpperAlpha(p[6]) &&

                     isdigit(p[7]))

                     return 4;

                 if (isdigit(p[5]))

                     return 6;

             }

             return 0;

         }


         if (sIsUpperAlpha(p[2]) && sIsUpperAlpha(p[3])) {

             if (sIsUpperAlpha(p[4]) && sIsUpperAlpha(p[5]) &&

                 isdigit(p[6])) {

                 if (isdigit(p[7]) && p[8] == 'S' &&

                     isdigit(p[9])) {

                     return 9;

                 }

                 return 8;

             }


             if (isdigit(p[4])) {

                 if (isdigit(p[5]) && p[6] == 'S' &&

                     isdigit(p[7])) {

                     return 7;

                 }

                 return 3;

             }


             if (sIsUpperAlpha(p[4]) && isdigit(p[5]))

                 return 5;

         }

     }

     return 0;

 }


 /**********************************************************/

 static bool IsValidAccessPrefix(const char* acc, const char** accpref)

 {

     Int4 i = IsNewAccessFormat(acc);

     if (i == 0 || ! accpref)

         return false;


     if (2 < i && i < 10)

         return true;


     const char** b = accpref;

     for (; *b; b++) {

         if (StringEquN(acc, *b, StringLen(*b)))

             return true;

     }


     return false;

 }


 /**********************************************************/

 static bool fta_if_master_wgs_accession(const char* acnum, Int4 accformat)

 {

     const char* p;


     if (accformat == 3)

         p = acnum + 4;

     else if (accformat == 8)

         p = acnum + 6;

     else if (accformat == 4)

         p = acnum + 7;

     else

         return false;


     if (p[0] >= '0' && p[0] <= '9' && p[1] >= '0' && p[1] <= '9') {

         for (p += 2; *p == '0';)

             p++;

         if (*p == '\0')

             return true;

         return false;

     }

     return false;

 }


 static bool s_IsVDBWGSScaffold(string_view accession)

 {

     // 4+2+S+[6,7,8]

     if (accession.length() < 13 ||

         accession.length() > 15 ||

         accession[6] != 'S') {

         return false;

     }


     // check that the first 4 chars are letters

     if (any_of(begin(accession),

                begin(accession) + 4,

                [](const char c) { return ! isalpha(c); })) {

         return false;

     }


     // check that the next 2 chars are letters

     if (! isdigit(accession[4]) ||

         ! isdigit(accession[5])) {

         return false;

     }


     // The characters after 'S' should all be digits

     // with at least one non-zero digit


     // First check for digits

     if (any_of(begin(accession) + 7,

                end(accession),

                [](const char c) { return ! isdigit(c); })) {

         return false;

     }


     // Now check to see if at least one is not zero

     if (all_of(begin(accession) + 7,

                end(accession),

                [](const char c) { return c == '0'; })) {

         return false;

     }


     return true;

 }


 static int s_RefineWGSType(string_view accession, int initialType)

 {

     if (initialType == -1) {

         return initialType;

     }

     // Identify as TSA or TLS

     if (accession[0] == 'G') /* TSA-WGS */

     {

         switch (initialType) {

         case 0:

             return 4;

         case 1:

             return 5;

         case 3:

             return 6;

         default:

             return initialType;

         }

     }


     if (accession[0] == 'K' || accession[1] == 'T') { // TLS

         switch (initialType) {

         case 0:

             return 10;

         case 1:

             return 11;

         case 3:

             return 12;

         default:

             return initialType;

         }

     }


     if (initialType == 1) { // TSA again

         if (accession[0] == 'I') {

             return 8;

         }

         if (accession[0] == 'H') {

             return 9;

         }

     }


     return initialType;

 }


 /**********************************************************/

 /* Returns:  0 - if WGS project accession;

  *           1 - WGS contig accession;

  *           2 - WGS scaffold accession (2+6);

  *           3 - WGS master accession (XXXX00000000);

  *           4 - TSA-WGS project accession;

  *           5 - TSA-WGS contig accession

  *           6 - TSA-WGS master accession;

  *           7 - VDB WGS scaffold accession (4+2+S+[6,7,8]);

  *           8 - TSA-WGS contig DDBJ accession

  *           9 - TSA-WGS contig EMBL accession

  *          10 - TLS-WGS project accession;

  *          11 - TLS-WGS contig accession

  *          12 - TLS-WGS master accession;

  *          -1 - something else.

  */

 int fta_if_wgs_acc(string_view accession)

 {

     if (accession.empty() || NStr::IsBlank(accession)) {

         return -1;

     }


     auto length = accession.length();


     if (length == 8 &&

         k_WgsScaffoldPrefix.find(accession.substr(0, 2)) != k_WgsScaffoldPrefix.end() &&

         all_of(begin(accession) + 2, end(accession), [](const char c) { return isdigit(c); })) {

         return 2;

     }


     if (length > 12 && length < 16 && accession[6] == 'S') {

         if (s_IsVDBWGSScaffold(accession)) {

             return 7;

         }

         return -1;

     }


     if (accession.substr(0, 3) == "NZ_"sv) {

         accession = accession.substr(3);

     }

     length = accession.length();

     if (length < 12 || length > 17) {

         return -1;

     }


     if (isdigit(accession[4])) {

         if (all_of(begin(accession), begin(accession) + 4, [](const char c) { return isalpha(c); }) &&

             all_of(begin(accession) + 4, end(accession), [](const char c) { return isdigit(c); })) {


             int i = -1;

             if (any_of(begin(accession) + 6, end(accession), [](const char c) { return c != '0'; })) {

                 i = 1; // WGS contig

             } else if (accession[4] == '0' && accession[5] == '0') {

                 i = 3; // WGS master

             } else {

                 i = 0; // WGS project

             }

             return s_RefineWGSType(accession, i);

         }

         return -1;

     }


     // 6 letters + 2 digits

     if (all_of(begin(accession), begin(accession) + 6, [](const char c) { return isalpha(c); }) &&

         all_of(begin(accession) + 6, end(accession), [](const char c) { return isdigit(c); })) {


         if (any_of(begin(accession) + 8, end(accession), [](const char c) { return c != '0'; })) {

             return 1; // WGS contig

         }


         if (accession[6] == '0' && accession[7] == '0') {

             return 3; // WGS master

         }

         return 0; // WGS project

     }


     return -1; // unknown

 }


 /**********************************************************/

 bool IsSPROTAccession(const char* acc)

 {

     const char** b;


     if (! acc || acc[0] == '\0')

         return false;

     size_t len = StringLen(acc);

     if (len != 6 && len != 8 && len != 10)

         return false;

     if (len == 8) {

         for (b = sprot_accpref; *b; b++) {

             if (StringEquN(*b, acc, 2))

                 break;

         }


         return (*b != nullptr);

     }


     if (acc[0] < 'A' || acc[0] > 'Z' || acc[1] < '0' || acc[1] > '9' ||

         ((acc[3] < '0' || acc[3] > '9') && (acc[3] < 'A' || acc[3] > 'Z')) ||

         ((acc[4] < '0' || acc[4] > '9') && (acc[4] < 'A' || acc[4] > 'Z')) ||

         acc[5] < '0' || acc[5] > '9')

         return false;


     if (acc[0] >= 'O' && acc[0] <= 'Q') {

         if ((acc[2] < '0' || acc[2] > '9') && (acc[2] < 'A' || acc[2] > 'Z'))

             return false;

     } else if (acc[2] < 'A' || acc[2] > 'Z')

         return false;


     if (len == 6)

         return true;


     if (acc[0] >= 'O' && acc[0] <= 'Q')

         return false;


     if (acc[6] < 'A' || acc[6] > 'Z' || acc[9] < '0' || acc[9] > '9' ||

         ((acc[7] < 'A' || acc[7] > 'Z') && (acc[7] < '0' || acc[7] > '9')) ||

         ((acc[8] < 'A' || acc[8] > 'Z') && (acc[8] < '0' || acc[8] > '9')))

         return false;


     return true;

 }


 #if 0

 static bool sCheckAccession(const list<string>& tokens,

                             Parser::ESource source,

                             Parser::EMode mode,

                             const char* priacc, int skip)

 {

     bool        badac;

     bool        res = true;

     bool        iswgs;

     Char        acnum[200];

     Int4        accformat;

     Int4        priformat;

     Int4        count;

     size_t        i;


     if (! priacc || mode == Parser::EMode::Relaxed)

         return true;


     auto it = tokens.begin();

     if (skip) {

         advance(it, skip);

     }


     priformat = IsNewAccessFormat(priacc);

     if((priformat == 3 || priformat == 4 || priformat == 8) &&

        fta_if_master_wgs_accession(priacc, priformat) == false)

         iswgs = true;

     else

         iswgs = false;


     count = 0;

     for(; it != tokens.end(); ++it)

     {

         StringCpy(acnum, it->c_str());

         if(acnum[0] == '-' && acnum[1] == '\0')

             continue;


         if(skip == 2 && count == 0)

             accformat = priformat;

         else

             accformat = IsNewAccessFormat(acnum);


         size_t len = StringLen(acnum);

         if(acnum[len-1] == ';')

         {

             len--;

             acnum[len] = '\0';

         }

         badac = false;

         if(accformat == 1)

         {

             if(len != 8 && len != 10)

                 badac = true;

             else

             {

                 for(i = 2; i < 8 && badac == false; i++)

                     if(acnum[i] < '0' || acnum[i] > '9')

                         badac = true;

             }

         }

         else if(accformat == 2)

         {

             if(len != 9 && len != 12)

                 badac = true;

             else

             {

                 for(i = 3; i < len && badac == false; i++)

                     if(acnum[i] < '0' || acnum[i] > '9')

                         badac = true;

             }

         }

         else if(accformat == 3)

         {

             if(len < 12 || len > 14)

                 badac = true;

             else

             {

                 for(i = 4; i < len && badac == false; i++)

                     if(acnum[i] < '0' || acnum[i] > '9')

                         badac = true;

             }

         }

         else if(accformat == 8)

         {

             if(len < 15 || len > 17)

                 badac = true;

             else

             {

                 for(i = 6; i < len && !badac; i++)

                     if(acnum[i] < '0' || acnum[i] > '9')

                         badac = true;

             }

         }

         else if(accformat == 4)

         {

             if(len < 15 || len > 17)

                 badac = true;

             else

             {

                 for(i = 7; i < len && badac == false; i++)

                     if(acnum[i] < '0' || acnum[i] > '9')

                         badac = true;

             }

         }

         else if(accformat == 5)

         {

             if(len != 12)

                 badac = true;

             else

             {

                 for(i = 5; i < len && badac == false; i++)

                     if(acnum[i] < '0' || acnum[i] > '9')

                         badac = true;

             }

         }

         else if(accformat == 6)

         {

             if(len != 11 || acnum[0] != 'N' || acnum[1] != 'Z' ||

                acnum[2] != '_' || acnum[3] < 'A' || acnum[3] > 'Z' ||

                acnum[4] < 'A' || acnum[4] > 'Z')

                 badac = true;

             else

             {

                 for(i = 5; i < len && badac == false; i++)

                     if(acnum[i] < '0' || acnum[i] > '9')

                         badac = true;

             }

         }

         else if(accformat == 7)

         {

             if(len < 13 || len > 15)

                 badac = true;

             else

             {

                 for(i = 7; i < len && badac == false; i++)

                     if(acnum[i] < '0' || acnum[i] > '9')

                         badac = true;

             }

         }

         else if(accformat == 0)

         {

             if(len != 6 && len != 10)

                 badac = true;

             else if(acnum[0] >= 'A' && acnum[0] <= 'Z')

             {

                 if(source == Parser::ESource::SPROT)

                 {

                     if(!IsSPROTAccession(acnum))

                         badac = true;

                 }

                 else if(len == 10)

                 {

                     badac = true;

                 }

                 else

                 {

                     for(i = 1; i < 6 && badac == false; i++)

                         if(acnum[i] < '0' || acnum[i] > '9')

                             badac = true;

                 }

             }

             else

                 badac = true;

         }

         else

             badac = true;


         if(badac)

         {

             ErrPostEx(SEV_ERROR, ERR_ACCESSION_BadAccessNum,

                       "Bad accession #, %s for this entry", acnum);

             res = false;

             count++;

             continue;

         }


         if(skip == 2 && count == 0 && !iswgs &&

            (accformat == 3 || accformat == 4 || accformat == 8))

         {

             ErrPostStr(SEV_REJECT, ERR_ACCESSION_WGSProjectAccIsPri,

                       "This record has a WGS 'project' accession as its primary accession number. WGS project-accessions are only expected to be used as secondary accession numbers.");

             res = false;

         }

         count++;

     }


     return(res);

 }

 #endif


 inline bool sNotAllDigits(const char* first, const char* last)

 {

     return any_of(first, last, [](char c) { return ! isdigit(c); });

 }


 /**********************************************************

  *

  *   static bool CheckAccession(stoken, source, entryacc,

  *                                 skip):

  *

  *      A valid accession number should be an upper case

  *   letter (A-Z) followed by 5 digits, put "reject" message

  *   if not.

  *

  *                                              7-6-93

  *

  **********************************************************/

 static bool CheckAccession(

     TokenStatBlkPtr stoken,

     Parser::ESource source,

     Parser::EMode   mode,

     const char*     priacc,

     unsigned        skip)

 {

     bool badac;

     bool res = true;

     bool iswgs;

     Char acnum[200];

     Int4 accformat;

     Int4 priformat;

     Int4 count;


     if (! priacc || mode == Parser::EMode::Relaxed)

         return true;


     auto tbp = stoken->list.begin();

     if (skip > 0)

         ++tbp;

     priformat = IsNewAccessFormat(priacc);

     if ((priformat == 3 || priformat == 4 || priformat == 8) &&

         fta_if_master_wgs_accession(priacc, priformat) == false)

         iswgs = true;

     else

         iswgs = false;


     count = 0;

     for (; tbp != stoken->list.end(); ++tbp) {

         StringCpy(acnum, tbp->c_str());

         if (acnum[0] == '-' && acnum[1] == '\0')

             continue;


         if (skip == 2 && count == 0)

             accformat = priformat;

         else

             accformat = IsNewAccessFormat(acnum);


         size_t len = StringLen(acnum);

         if (acnum[len - 1] == ';') {

             len--;

             acnum[len] = '\0';

         }

         badac = false;

         if (accformat == 1) {

             badac = (len != 8 && len != 10) || sNotAllDigits(acnum + 2, acnum + 8);

         } else if (accformat == 2) {

             badac = (len != 9 && len != 12) || sNotAllDigits(acnum + 3, acnum + len);

         } else if (accformat == 3) {

             badac = (len < 12 || len > 14) || sNotAllDigits(acnum + 4, acnum + len);

         } else if (accformat == 8) {

             badac = (len < 15 || len > 17) || sNotAllDigits(acnum + 6, acnum + len);

         } else if (accformat == 4) {

             badac = (len < 15 || len > 17) || sNotAllDigits(acnum + 7, acnum + len);

         } else if (accformat == 5) {

             badac = (len != 12) || sNotAllDigits(acnum + 5, acnum + len);

         } else if (accformat == 6) {

             badac = (len != 11 || acnum[0] != 'N' || acnum[1] != 'Z' ||

                      acnum[2] != '_' || acnum[3] < 'A' || acnum[3] > 'Z' ||

                      acnum[4] < 'A' || acnum[4] > 'Z') ||

                     sNotAllDigits(acnum + 5, acnum + len);

         } else if (accformat == 7) {

             badac = (len < 13 || len > 15) || sNotAllDigits(acnum + 7, acnum + len);

         } else if (accformat == 9) {

             badac = (len < 16 || len > 17) || sNotAllDigits(acnum + 9, acnum + len);

         } else if (accformat == 0) {

             if (len != 6 && len != 10)

                 badac = true;

             else if (sIsUpperAlpha(acnum[0])) {

                 if (source == Parser::ESource::SPROT) {

                     if (! IsSPROTAccession(acnum))

                         badac = true;

                 } else {

                     badac = (len == 10) || sNotAllDigits(acnum + 1, acnum + 6);

                 }

             } else

                 badac = true;

         } else

             badac = true;


         if (badac) {

             ErrPostEx(SEV_ERROR, ERR_ACCESSION_BadAccessNum, "Bad accession #, %s for this entry", acnum);

             res = false;

             count++;

             continue;

         }


         if (skip == 2 && count == 0 && ! iswgs &&

             (accformat == 3 || accformat == 4 || accformat == 8)) {

             ErrPostStr(SEV_REJECT, ERR_ACCESSION_WGSProjectAccIsPri, "This record has a WGS 'project' accession as its primary accession number. WGS project-accessions are only expected to be used as secondary accession numbers.");

             res = false;

         }

         count++;

     }


     return (res);

 }


 /**********************************************************/

 static bool IsPatentedAccPrefix(const Parser& parseInfo, const char* acc)

 {

     if (acc[2] == '\0') {

         if ((StringEqu(acc, "AR") || StringEqu(acc, "DZ") ||

              StringEqu(acc, "EA") || StringEqu(acc, "GC") ||

              StringEqu(acc, "GP") || StringEqu(acc, "GV") ||

              StringEqu(acc, "GX") || StringEqu(acc, "GY") ||

              StringEqu(acc, "GZ") || StringEqu(acc, "HJ") ||

              StringEqu(acc, "HK") || StringEqu(acc, "HL") ||

              StringEqu(acc, "KH") || StringEqu(acc, "MI") ||

              StringEqu(acc, "MM") || StringEqu(acc, "MO") ||

              StringEqu(acc, "MV") || StringEqu(acc, "MX") ||

              StringEqu(acc, "MY") || StringEqu(acc, "OO") ||

              StringEqu(acc, "OS") || StringEqu(acc, "OT") ||

              StringEqu(acc, "PR") || StringEqu(acc, "PT") ||

              StringEqu(acc, "PU")) &&

             (parseInfo.all == true || parseInfo.source == Parser::ESource::NCBI))

             return true;

         if ((StringEquN(acc, "AX", 2) || StringEquN(acc, "CQ", 2) ||

              StringEquN(acc, "CS", 2) || StringEquN(acc, "FB", 2) ||

              StringEquN(acc, "HA", 2) || StringEquN(acc, "HB", 2) ||

              StringEquN(acc, "HC", 2) || StringEquN(acc, "HD", 2) ||

              StringEquN(acc, "HH", 2) || StringEquN(acc, "GM", 2) ||

              StringEquN(acc, "GN", 2) || StringEquN(acc, "JA", 2) ||

              StringEquN(acc, "JB", 2) || StringEquN(acc, "JC", 2) ||

              StringEquN(acc, "JD", 2) || StringEquN(acc, "JE", 2) ||

              StringEquN(acc, "HI", 2) || StringEquN(acc, "LP", 2) ||

              StringEquN(acc, "LQ", 2) || StringEquN(acc, "MP", 2) ||

              StringEquN(acc, "MQ", 2) || StringEquN(acc, "MR", 2) ||

              StringEquN(acc, "MS", 2)) &&

             (parseInfo.all == true || parseInfo.source == Parser::ESource::EMBL))

             return true;

         if ((StringEquN(acc, "BD", 2) || StringEquN(acc, "DD", 2) ||

              StringEquN(acc, "DI", 2) || StringEquN(acc, "DJ", 2) ||

              StringEquN(acc, "DL", 2) || StringEquN(acc, "DM", 2) ||

              StringEquN(acc, "FU", 2) || StringEquN(acc, "FV", 2) ||

              StringEquN(acc, "FW", 2) || StringEquN(acc, "FZ", 2) ||

              StringEquN(acc, "GB", 2) || StringEquN(acc, "HV", 2) ||

              StringEquN(acc, "HW", 2) || StringEquN(acc, "HZ", 2) ||

              StringEquN(acc, "LF", 2) || StringEquN(acc, "LG", 2) ||

              StringEquN(acc, "LV", 2) || StringEquN(acc, "LX", 2) ||

              StringEquN(acc, "LY", 2) || StringEquN(acc, "LZ", 2) ||

              StringEquN(acc, "MA", 2) || StringEquN(acc, "MB", 2) ||

              StringEquN(acc, "MC", 2) || StringEquN(acc, "MD", 2) ||

              StringEquN(acc, "ME", 2) || StringEquN(acc, "OF", 2) ||

              StringEquN(acc, "OG", 2) || StringEquN(acc, "OI", 2) ||

              StringEquN(acc, "OJ", 2) || StringEquN(acc, "PA", 2) ||

              StringEquN(acc, "PE", 2) || StringEquN(acc, "PF", 2) ||

              StringEquN(acc, "PG", 2) || StringEquN(acc, "PH", 2) ||

              StringEquN(acc, "PI", 2) || StringEquN(acc, "PJ", 2) ||

              StringEquN(acc, "PK", 2)) &&

             (parseInfo.all == true || parseInfo.source == Parser::ESource::DDBJ))

             return true;


         return false;

     }


     if (acc[1] == '\0' && (*acc == 'I' || *acc == 'A' || *acc == 'E')) {

         if (parseInfo.all == true ||

             (*acc == 'I' && parseInfo.source == Parser::ESource::NCBI) ||

             (*acc == 'A' && parseInfo.source == Parser::ESource::EMBL) ||

             (*acc == 'E' && parseInfo.source == Parser::ESource::DDBJ))

             return true;

     }

     return false;

 }


 /**********************************************************/

 static bool IsTPAAccPrefix(const Parser& parseInfo, const char* acc)

 {

     if (! acc)

         return (false);


     size_t i = StringLen(acc);

     if (i != 2 && i != 4)

         return (false);


     if (i == 4) {

         if (acc[0] == 'D' &&

             (parseInfo.all == true || parseInfo.source == Parser::ESource::NCBI))

             return (true);

         if ((acc[0] == 'E' || acc[0] == 'Y') &&

             (parseInfo.all == true || parseInfo.source == Parser::ESource::DDBJ))

             return (true);

         return (false);

     }


     if (fta_StringMatch(ncbi_tpa_accpref, acc) >= 0 &&

         (parseInfo.all == true || parseInfo.source == Parser::ESource::NCBI))

         return (true);

     if (fta_StringMatch(ddbj_tpa_accpref, acc) >= 0 &&

         (parseInfo.all == true || parseInfo.source == Parser::ESource::DDBJ))

         return (true);

     return (false);

 }


 /**********************************************************/

 static bool IsWGSAccPrefix(const Parser& parseInfo, const char* acc)

 {

     if (! acc || StringLen(acc) != 2)

         return (false);


     if (fta_StringMatch(ncbi_wgs_accpref, acc) >= 0 &&

         (parseInfo.all == true || parseInfo.source == Parser::ESource::NCBI))

         return (true);

     if (fta_StringMatch(ddbj_wgs_accpref, acc) >= 0 &&

         (parseInfo.all == true || parseInfo.source == Parser::ESource::DDBJ))

         return (true);

     return (false);

 }


 /**********************************************************/

 static void IsTSAAccPrefix(const Parser& parseInfo, const char* acc, IndexblkPtr ibp)

 {

     if (! acc || *acc == '\0')

         return;


     if (parseInfo.source == Parser::ESource::EMBL ||

         parseInfo.source == Parser::ESource::DDBJ) {

         ibp->tsa_allowed = true;

         return;

     }


     if (acc[0] == 'U' && acc[1] == '\0' &&

         (parseInfo.all == true || parseInfo.source == Parser::ESource::NCBI)) {

         ibp->tsa_allowed = true;

         return;

     }


     if (StringLen(acc) != 2 && StringLen(acc) != 4)

         return;


     if (parseInfo.all == true || parseInfo.source == Parser::ESource::NCBI) {

         if ((StringLen(acc) == 2 &&

              (StringEqu(acc, "EZ") || StringEqu(acc, "HP") ||

               StringEqu(acc, "JI") || StringEqu(acc, "JL") ||

               StringEqu(acc, "JO") || StringEqu(acc, "JP") ||

               StringEqu(acc, "JR") || StringEqu(acc, "JT") ||

               StringEqu(acc, "JU") || StringEqu(acc, "JV") ||

               StringEqu(acc, "JW") || StringEqu(acc, "KA"))) ||

             fta_if_wgs_acc(ibp->acnum) == 5) {

             ibp->is_tsa      = true;

             ibp->tsa_allowed = true;

         }

         if (fta_StringMatch(acc_tsa_allowed, acc) >= 0)

             ibp->tsa_allowed = true;

     }


     if (parseInfo.all == true || parseInfo.source == Parser::ESource::DDBJ) {

         if (StringEquN(acc, "FX", 2) || StringEquN(acc, "LA", 2) ||

             StringEquN(acc, "LE", 2) || StringEquN(acc, "LH", 2) ||

             StringEquN(acc, "LI", 2) || StringEquN(acc, "LJ", 2) ||

             fta_if_wgs_acc(ibp->acnum) == 8) {

             ibp->is_tsa      = true;

             ibp->tsa_allowed = true;

         }

     }


     if (parseInfo.all == true || parseInfo.source == Parser::ESource::EMBL) {

         if (fta_if_wgs_acc(ibp->acnum) == 9) {

             ibp->is_tsa      = true;

             ibp->tsa_allowed = true;

         }

     }

 }


 /**********************************************************/

 static void IsTLSAccPrefix(const Parser& parseInfo, const char* acc, IndexblkPtr ibp)

 {

     if (! acc || *acc == '\0' || StringLen(acc) != 4)

         return;


     if (parseInfo.all == true || parseInfo.source == Parser::ESource::NCBI ||

         parseInfo.source == Parser::ESource::DDBJ)

         if (fta_if_wgs_acc(ibp->acnum) == 11)

             ibp->is_tls = true;

 }

 /*

 static bool sIsAccPrefixChar(char c)  {

     return (c >= 'A'  && c <= 'Z');

 }

 */

 /**********************************************************

  *

  *   bool GetAccession(pp, str, entry, skip):

  *

  *      Only record the first line of the first accession

  *   number.

  *      PIR format, accession number does not follow

  *   the rule.

  *

  *                                              3-4-93

  *

  **********************************************************/

 /*

 bool GetAccession(const Parser& parseInfo, const CTempString& str, IndexblkPtr entry, unsigned skip)

 {

     string       accession;

     list<string> tokens;

     bool         get = true;


     if ((skip != 2 && parseInfo.source == Parser::ESource::Flybase) ||

         parserInfo.source == Parser::ESource::USPTO)

         return true;


     NStr::Split(str, " ;", tokens, NStr::fSplit_Tokenize);


     if (skip != 2) {

         get = ParseAccessionRange(tokens, skip);

         if (get)

             get = sCheckAccession(tokens, parseInfo.source, parseInfo.mode, entry->acnum, skip);

         if (! get)

             entry->drop = true;


         if (tokens.size() > skip && skip < 2) { // Not sure about the logic

             auto it = tokens.begin();

             if (skip > 0)

                 it = next(it, skip);

             move(it, tokens.end(), entry->secondary_accessions.end());

         }

         return get;

     }


     // skip == 2

     entry->is_tpa = false;

     if (tokens.size() < 2) {

         if (parseInfo.mode != Parser::EMode::Relaxed) {

             ErrPostEx(SEV_ERROR, ERR_ACCESSION_NoAccessNum, "No accession # for this entry, about line %ld", (long int)entry->linenum);

             entry->drop = true;

         }

         return false;

     }


     accession = *next(tokens.begin());

     sDelNonDigitTail(accession);


     StringCpy(entry->acnum, accession.c_str());


     if (parseInfo.format != Parser::EFormat::XML) {

         string temp = accession;

         if (parseInfo.accver && entry->vernum > 0) {

             temp += "." + NStr::NumericToString(entry->vernum);

         }

         if (temp.empty()) {

             if (entry->locusname[0] != '\0') {

                 temp = entry->locusname;

             } else {

                 temp = "???";

             }

         }

         FtaInstallPrefix(PREFIX_ACCESSION, temp.c_str());

     }


     if (parseInfo.source == Parser::ESource::Flybase) {

         return true;

     }


     if (accession.size() < 2) {

         ErrPostEx(SEV_ERROR, ERR_ACCESSION_BadAccessNum, "Wrong accession [%s] for this entry.", accession.c_str());

         entry->drop = true;

         return false;

     }


     if (sIsAccPrefixChar(accession[0]) && sIsAccPrefixChar(accession[1])) {

         if (parseInfo.accpref && ! IsValidAccessPrefix(accession.c_str(), parseInfo.accpref)) {

             get = false;

         }


         if (sIsAccPrefixChar(accession[2]) && sIsAccPrefixChar(accession[3])) {

             if (sIsAccPrefixChar(accession[4])) {

                 accession = accession.substr(0, 5);

             } else {

                 accession = accession.substr(0, 4);

             }

         } else if (accession[2] == '_') {

             accession = accession.substr(0, 3);

         } else {

             accession = accession.substr(0, 2);

         }

     } else {

         if (parseInfo.acprefix && ! StringChr(parseInfo.acprefix, accession[0])) {

             get = false;

         }

         accession = accession.substr(0, 1);

     }


     if (get) {

         if (tokens.size() > 2) {

             get = ParseAccessionRange(tokens, 2);

             if (get) {

                 get = sCheckAccession(tokens, parseInfo.source, parseInfo.mode, entry->acnum, 2);

             }

         }

     } else {

         string sourceName = sourceNames.at(parseInfo.source);

         ErrPostEx(SEV_ERROR, ERR_ACCESSION_BadAccessNum, "Wrong accession # prefix [%s] for this source: %s", accession.c_str(), sourceName.c_str());

     }


     entry->secondary_accessions.clear(); // Is this necessary?

     move(next(tokens.begin(), 2), tokens.end(), entry->secondary_accessions.begin());


     if (! entry->is_pat) {

         entry->is_pat = IsPatentedAccPrefix(parseInfo, accession.c_str());

     }

     entry->is_tpa = IsTPAAccPrefix(parseInfo, accession.c_str());

     entry->is_wgs = IsWGSAccPrefix(parseInfo, accession.c_str());

     IsTSAAccPrefix(parseInfo, accession.c_str(), entry);

     IsTLSAccPrefix(parseInfo, accession.c_str(), entry);


     auto i = IsNewAccessFormat(entry->acnum);

     if (i == 3 || i == 8) {

         entry->is_wgs = true;

         entry->wgs_and_gi |= 02;

     } else if (i == 5) {

         char* p = entry->acnum;

         if (parseInfo.source != Parser::ESource::DDBJ || *p != 'A' || StringLen(p) != 12 ||

             ! StringEqu(p + 5, "0000000")) {

             string sourceName = sourceNames.at(parseInfo.source);

             ErrPostEx(SEV_ERROR, ERR_ACCESSION_BadAccessNum, "Wrong accession \"%s\" for this source: %s", p, sourceName.c_str());

             get = false;

         }

         entry->is_mga = true;

     }


     if (! get)

         entry->drop = true;


     return get;

 }

 */


 bool GetAccession(const Parser* pp, string_view str, IndexblkPtr entry, unsigned skip)

 {

     Char   acc[200];

     bool   get = true;


     if ((skip != 2 && pp->source == Parser::ESource::Flybase) ||

         pp->source == Parser::ESource::USPTO)

         return true;


     string line(str);

     auto stoken = TokenString(line.c_str(), ';');


     if (skip != 2) {

         get = ParseAccessionRange(stoken.get(), skip);

         if (get)

             get = CheckAccession(stoken.get(), pp->source, pp->mode, entry->acnum, skip);

         if (! get)

             entry->drop = true;


         if (skip == 1 && ! stoken->list.empty()) {

             stoken->list.pop_front();

             skip = 0;

         }

         if (skip == 0 && ! stoken->list.empty()) {

             auto tail = entry->secaccs.before_begin();

             for (; next(tail) != entry->secaccs.end();)

                 ++tail;

             entry->secaccs.splice_after(tail, stoken->list);

         }


         return (get);

     }


     entry->is_tpa = false;

     acc[0]        = '\0';

     if (stoken->num < 2) {

         if (pp->mode != Parser::EMode::Relaxed) {

             ErrPostEx(SEV_ERROR, ERR_ACCESSION_NoAccessNum, "No accession # for this entry, about line %ld", (long int)entry->linenum);

             entry->drop = true;

         }

         return false;

     }


     StringCpy(acc, next(stoken->list.begin())->c_str()); /* get first accession */


     if (pp->mode != Parser::EMode::Relaxed) {

         DelNoneDigitTail(acc);

     }


     StringCpy(entry->acnum, acc);


     if (pp->format != Parser::EFormat::XML) {

         string temp = acc;

         if (pp->accver && entry->vernum > 0) {

             temp += '.';

             temp += to_string(entry->vernum);

         }


         if (temp.empty()) {

             if (entry->locusname[0] != '\0')

                 temp = entry->locusname;

             else

                 temp = "???";

         }

         FtaInstallPrefix(PREFIX_ACCESSION, temp.c_str());

     }


     if (pp->source == Parser::ESource::Flybase) {

         return true;

     }


     if ((StringLen(acc) < 2) &&

         pp->mode != Parser::EMode::Relaxed) {

         ErrPostEx(SEV_ERROR, ERR_ACCESSION_BadAccessNum, "Wrong accession [%s] for this entry.", acc);

         entry->drop = true;

         return false;

     }


     if (pp->mode != Parser::EMode::Relaxed) {

         if (acc[0] >= 'A' && acc[0] <= 'Z' && acc[1] >= 'A' && acc[1] <= 'Z') {

             if (pp->accpref && ! IsValidAccessPrefix(acc, pp->accpref))

                 get = false;

             if (acc[2] >= 'A' && acc[2] <= 'Z' && acc[3] >= 'A' && acc[3] <= 'Z') {

                 if (acc[4] >= 'A' && acc[4] <= 'Z') {

                     acc[5] = '\0';

                 } else {

                     acc[4] = '\0';

                 }

             } else if (acc[2] == '_') {

                 acc[3] = '\0';

             } else {

                 acc[2] = '\0';

             }

         } else {

             /* Processing of accession numbers in old format */

             /* check valid prefix accession number */

             if (pp->acprefix && ! StringChr(pp->acprefix, *acc))

                 get = false;

             acc[1] = '\0';

         }

     }


     if (get) {

         if (stoken->num > 2)

             get = ParseAccessionRange(stoken.get(), 2);

         if (get) {

             get = CheckAccession(stoken.get(), pp->source, pp->mode, entry->acnum, 2);

         }

     } else {

         string sourceName = sourceNames.at(pp->source);

         ErrPostEx(SEV_ERROR, ERR_ACCESSION_BadAccessNum, "Wrong accession # prefix [%s] for this source: %s", acc, sourceName.c_str());

     }


     stoken->list.pop_front();

     stoken->list.pop_front();

     entry->secaccs = std::move(stoken->list);

     stoken.reset();


     if (! entry->is_pat)

         entry->is_pat = IsPatentedAccPrefix(*pp, acc);

     entry->is_tpa = IsTPAAccPrefix(*pp, acc);

     entry->is_wgs = IsWGSAccPrefix(*pp, acc);

     IsTSAAccPrefix(*pp, acc, entry);

     IsTLSAccPrefix(*pp, acc, entry);


     auto i = IsNewAccessFormat(entry->acnum);

     if (i == 3 || i == 8) {

         entry->is_wgs = true;

         entry->wgs_and_gi |= 02;

     } else if (i == 5) {

         const char* p = entry->acnum;

         if (pp->source != Parser::ESource::DDBJ || *p != 'A' || StringLen(p) != 12 ||

             ! StringEqu(p + 5, "0000000")) {

             string sourceName = sourceNames.at(pp->source);

             ErrPostEx(SEV_ERROR, ERR_ACCESSION_BadAccessNum, "Wrong accession \"%s\" for this source: %s", p, sourceName.c_str());

             get = false;

         }

         entry->is_mga = true;

     }


     if (! get)

         entry->drop = true;


     return (get);

 }


 /**********************************************************/

 void ResetParserStruct(ParserPtr pp)

 {

     if (! pp)

         return;


     if (! pp->entrylist.empty()) {

         for (auto* ibp : pp->entrylist)

             if (ibp)

                 FreeIndexblk(ibp);


         pp->entrylist.clear();

     }


     pp->indx    = 0;

     pp->curindx = 0;


     if (pp->pbp) {

         if (pp->pbp->ibp)

             delete pp->pbp->ibp;

         delete pp->pbp;

         pp->pbp = nullptr;

     }

 }


 /**********************************************************

  *

  *   void FreeParser(pp):

  *

  *                                              3-5-93

  *

  **********************************************************/

 /*

 void FreeParser(ParserPtr pp)

 {

     if (! pp)

         return;


     ResetParserStruct(pp);


     if (pp->fpo)

         MemFree(pp->fpo);

     delete pp;

 }

 */


 /**********************************************************

  *

  *   void CloseFiles(pp):

  *

  *                                              3-4-93

  *

  **********************************************************/

 void CloseFiles(ParserPtr pp)

 {

     if (pp->qsfd) {

         fclose(pp->qsfd);

         pp->qsfd = nullptr;

     }

 }


 /**********************************************************

  *

  *   void MsgSkipTitleFail(flatfile, finfo):

  *

  *                                              7-2-93

  *

  **********************************************************/

 void MsgSkipTitleFail(const char* flatfile, FinfoBlk& finfo)

 {

     ErrPostEx(SEV_ERROR, ERR_ENTRY_Begin, "No valid beginning of entry found in %s file", flatfile);


     // delete finfo;

 }


 bool FindNextEntryBuf(bool end_of_file, FileBuf& fbuf, FinfoBlk& finfo, const CTempString& keyword)

 {

     const char* p    = keyword.data();

     size_t      len  = keyword.size();

     bool        done = end_of_file;

     while (! done && ! StringEquN(finfo.str, p, len))

         done = XReadFileBuf(fbuf, finfo);


     return (done);

 }


 /**********************************************************

  *

  *   bool FlatFileIndex(pp, (*fun)()):

  *

  *                                              10-6-93

  *

  **********************************************************/

 bool FlatFileIndex(ParserPtr pp, void (*fun)(IndexblkPtr entry, char* offset, Int4 len))

 {

     bool index;


     switch (pp->format) {

     case Parser::EFormat::GenBank:

         index = GenBankIndex(pp);

         break;

     case Parser::EFormat::EMBL:

         index = EmblIndex(pp, fun);

         break;

     case Parser::EFormat::SPROT:

         index = SprotIndex(pp, fun);

         break;

     case Parser::EFormat::XML:

         index = XMLIndex(pp);

         break;

     default:

         index = false;

         fprintf(stderr, "Unknown flatfile format.\n");

         break;

     }

     return (index);

 }


 /**********************************************************/

 const char** GetAccArray(Parser::ESource source)

 {

     if (source == Parser::ESource::EMBL)

         return (embl_accpref);

     if (source == Parser::ESource::SPROT)

         return (sprot_accpref);

     if (source == Parser::ESource::LANL)

         return (lanl_accpref);

     if (source == Parser::ESource::DDBJ)

         return (ddbj_accpref);

     if (source == Parser::ESource::NCBI)

         return (ncbi_accpref);

     if (source == Parser::ESource::Refseq)

         return (refseq_accpref);

     return nullptr;

 }


 bool isSupportedAccession(CSeq_id::E_Choice type)

 {

     switch (type) {

     case CSeq_id::e_Genbank:

     case CSeq_id::e_Ddbj:

     case CSeq_id::e_Embl:

     case CSeq_id::e_Other:

     case CSeq_id::e_Tpg:

     case CSeq_id::e_Tpd:

     case CSeq_id::e_Tpe:

         return true;

     default:

         break;

     }


     return false;

 }


 /**********************************************************/

 CSeq_id::E_Choice GetNucAccOwner(const CTempString& acc)

 {

     auto info = CSeq_id::IdentifyAccession(acc);

     if (CSeq_id::fAcc_prot & info) {

         return CSeq_id::e_not_set;

     }


     if (auto type = CSeq_id::GetAccType(info);

         isSupportedAccession(type)) {

         return type;

     }


     return CSeq_id::e_not_set;

 }


 /**********************************************************/

 CSeq_id::E_Choice GetProtAccOwner(const CTempString& acc)

 {

     auto info = CSeq_id::IdentifyAccession(acc);

     if (CSeq_id::fAcc_prot & info) {

         if (auto type = CSeq_id::GetAccType(info);

             isSupportedAccession(type)) {

             return type;

         }

     }


     return CSeq_id::e_not_set;

 }


 END_NCBI_SCOPE

FreeIndexblk
void FreeIndexblk(IndexblkPtr ibp)
Definition: block.cpp:143

CDate_std
Definition: Date_std.hpp:53

CRef< CDate_std >

CTempString
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65

CTime
CTime –.
Definition: ncbitime.hpp:296

map
Definition: map.hpp:338

set
Definition: set.hpp:45

fp
static const char fp[]
Definition: des.c:87

ERR_ENTRY_Skipped
#define ERR_ENTRY_Skipped
Definition: flat2err.h:80

flatfile_parse_info.hpp

ftacpp.hpp

StringEquNI
bool StringEquNI(const char *s1, const char *s2, size_t n)
Definition: ftacpp.hpp:131

StringEquN
bool StringEquN(const char *s1, const char *s2, size_t n)
Definition: ftacpp.hpp:121

StringEqu
bool StringEqu(const char *s1, const char *s2)
Definition: ftacpp.hpp:111

StringCpy
void StringCpy(char *d, const char *s)
Definition: ftacpp.hpp:89

StringLen
size_t StringLen(const char *s)
Definition: ftacpp.hpp:60

MemCpy
void MemCpy(void *p, const void *q, size_t sz)
Definition: ftacpp.hpp:50

FtaInstallPrefix
void FtaInstallPrefix(int prefix, const char *name, const char *location)
Definition: ftaerr.cpp:321

ftaerr.hpp

PREFIX_LOCUS
#define PREFIX_LOCUS
Definition: ftaerr.hpp:15

PREFIX_ACCESSION
#define PREFIX_ACCESSION
Definition: ftaerr.hpp:14

first
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46

last
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:51

next
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56

type
static int type
Definition: getdata.c:31

str
static const char * str(char *buf, int n)
Definition: stats.c:84

offset
int offset
Definition: replacements.h:160

SEV_WARNING
#define SEV_WARNING
Definition: gicache.c:90

SEV_ERROR
#define SEV_ERROR
Definition: gicache.c:91

SEV_REJECT
#define SEV_REJECT
Definition: gicache.c:92

ErrPostStr
#define ErrPostStr
Definition: ncbierr.hpp:68

StringChr
#define StringChr
Definition: ncbistr.hpp:317

ErrPostEx
#define ErrPostEx(sev, err_code,...)
Definition: ncbierr.hpp:78

CSeq_id::IdentifyAccession
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
Definition: Seq_id.cpp:1634

CSeq_id::GetAccType
static E_Choice GetAccType(EAccessionInfo info)
Definition: Seq_id.hpp:562

CSeq_id::fAcc_prot
@ fAcc_prot
Definition: Seq_id.hpp:252

NCBI_UNUSED
#define NCBI_UNUSED
Definition: ncbiconf_impl.h:289

Int2
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100

Int4
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102

Char
char Char
Alias for char.
Definition: ncbitype.h:93

END_NCBI_SCOPE
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103

BEGIN_NCBI_SCOPE
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100

CTempString::end
const_iterator end() const
Return an iterator to the string's ending position (one past the end of the represented sequence)
Definition: tempstr.hpp:306

NStr::StringToNonNegativeInt
static int StringToNonNegativeInt(const CTempString str, TStringToNumFlags flags=0)
Convert string to non-negative integer value.
Definition: ncbistr.cpp:457

NStr::CompareNocase
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219

NStr::IsBlank
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106

CTempString::const_iterator
const char * const_iterator
Definition: tempstr.hpp:71

CTempString::data
const char * data(void) const
Return a pointer to the array represented.
Definition: tempstr.hpp:313

NStr::StartsWith
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5406

CTempString::substr
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
Definition: tempstr.hpp:776

CTempString::size
size_type size(void) const
Return the length of the represented array.
Definition: tempstr.hpp:327

NStr::ToLower
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405

CTempString::begin
const_iterator begin() const
Return an iterator to the string's starting position.
Definition: tempstr.hpp:299

CTime::eCurrent
@ eCurrent
Use current time. See also CCurrentTime.
Definition: ncbitime.hpp:300

CSeq_id_Base::E_Choice
E_Choice
Choice variants.
Definition: Seq_id_.hpp:93

CSeq_id_Base::e_Embl
@ e_Embl
Definition: Seq_id_.hpp:100

CSeq_id_Base::e_Other
@ e_Other
for historical reasons, 'other' = 'refseq'
Definition: Seq_id_.hpp:104

CSeq_id_Base::e_Tpe
@ e_Tpe
Third Party Annot/Seq EMBL.
Definition: Seq_id_.hpp:111

CSeq_id_Base::e_Tpd
@ e_Tpd
Third Party Annot/Seq DDBJ.
Definition: Seq_id_.hpp:112

CSeq_id_Base::e_Ddbj
@ e_Ddbj
DDBJ.
Definition: Seq_id_.hpp:107

CSeq_id_Base::e_Genbank
@ e_Genbank
Definition: Seq_id_.hpp:99

CSeq_id_Base::e_not_set
@ e_not_set
No variant selected.
Definition: Seq_id_.hpp:94

CSeq_id_Base::e_Tpg
@ e_Tpg
Third Party Annot/Seq Genbank.
Definition: Seq_id_.hpp:110

index.h

ParFlat_COL_DATE
@ ParFlat_COL_DATE
Definition: index.h:49

ParFlat_COL_STRAND_NEW
@ ParFlat_COL_STRAND_NEW
Definition: index.h:54

ParFlat_COL_MOLECULE_NEW
@ ParFlat_COL_MOLECULE_NEW
Definition: index.h:55

ParFlat_COL_BP_NEW
@ ParFlat_COL_BP_NEW
Definition: index.h:53

ParFlat_COL_TOPOLOGY_NEW
@ ParFlat_COL_TOPOLOGY_NEW
Definition: index.h:56

ParFlat_COL_BP
@ ParFlat_COL_BP
Definition: index.h:44

ParFlat_COL_TOPOLOGY
@ ParFlat_COL_TOPOLOGY
Definition: index.h:47

ParFlat_COL_DATE_NEW
@ ParFlat_COL_DATE_NEW
Definition: index.h:58

ParFlat_COL_DIV
@ ParFlat_COL_DIV
Definition: index.h:48

ParFlat_COL_BASES
@ ParFlat_COL_BASES
Definition: index.h:43

ParFlat_COL_DIV_NEW
@ ParFlat_COL_DIV_NEW
Definition: index.h:57

ParFlat_COL_BASES_NEW
@ ParFlat_COL_BASES_NEW
Definition: index.h:52

ParFlat_COL_MOLECULE
@ ParFlat_COL_MOLECULE
Definition: index.h:46

ParFlat_COL_STRAND
@ ParFlat_COL_STRAND
Definition: index.h:45

s_RefineWGSType
static int s_RefineWGSType(string_view accession, int initialType)
Definition: indx_blk.cpp:1129

ParFlat_NA_array_DDBJ
static const char * ParFlat_NA_array_DDBJ[]
Definition: indx_blk.cpp:66

USING_SCOPE
USING_SCOPE(objects)

sIsUpperAlpha
bool sIsUpperAlpha(char c)
Definition: indx_blk.cpp:987

ddbj_accpref
static const char * ddbj_accpref[]
Definition: indx_blk.cpp:104

ddbj_wgs_accpref
static const char * ddbj_wgs_accpref[]
Definition: indx_blk.cpp:164

ncbi_tpa_accpref
static const char * ncbi_tpa_accpref[]
Definition: indx_blk.cpp:152

SkipTitle
NCBI_UNUSED bool SkipTitle(FILE *fp, FinfoBlk &finfo, const char *str, size_t len)
Definition: indx_blk.cpp:341

CheckAccession
static bool CheckAccession(TokenStatBlkPtr stoken, Parser::ESource source, Parser::EMode mode, const char *priacc, unsigned skip)
Definition: indx_blk.cpp:1506

GetUpdateDate
CRef< CDate_std > GetUpdateDate(const char *ptr, Parser::ESource source)
Definition: indx_blk.cpp:610

sFindNextSpace
static CTempString::const_iterator sFindNextSpace(const CTempString &tempString, CTempString::const_iterator current_it)
Definition: indx_blk.cpp:680

s_IsVDBWGSScaffold
static bool s_IsVDBWGSScaffold(string_view accession)
Definition: indx_blk.cpp:1087

GetNucAccOwner
CSeq_id::E_Choice GetNucAccOwner(const CTempString &acc)
Definition: indx_blk.cpp:2244

k_WgsScaffoldPrefix
static const set< string_view > k_WgsScaffoldPrefix
Definition: indx_blk.cpp:168

GetResidue
static const char * GetResidue(TokenStatBlkPtr stoken)
Definition: indx_blk.cpp:235

fta_if_wgs_acc
int fta_if_wgs_acc(string_view accession)
Definition: indx_blk.cpp:1190

XML_STRAND_array
static const char * XML_STRAND_array[]
Definition: indx_blk.cpp:58

IsWGSAccPrefix
static bool IsWGSAccPrefix(const Parser &parseInfo, const char *acc)
Definition: indx_blk.cpp:1703

ParFlat_NA_array
static const char * ParFlat_NA_array[]
Definition: indx_blk.cpp:74

GetAccArray
const char ** GetAccArray(Parser::ESource source)
Definition: indx_blk.cpp:2207

refseq_accpref
static const char * refseq_accpref[]
Definition: indx_blk.cpp:135

isSupportedAccession
bool isSupportedAccession(CSeq_id::E_Choice type)
Definition: indx_blk.cpp:2224

acc_tsa_allowed
static const char * acc_tsa_allowed[]
Definition: indx_blk.cpp:146

CheckTPG
int CheckTPG(const string &str)
Definition: indx_blk.cpp:501

sSetLocusLineOffsets
static void sSetLocusLineOffsets(const CTempString &locusLine, LocusCont &offsets)
Definition: indx_blk.cpp:695

ResetParserStruct
void ResetParserStruct(ParserPtr pp)
Definition: indx_blk.cpp:2087

CheckNA
Int2 CheckNA(const char *str)
Definition: indx_blk.cpp:525

FindNextEntryBuf
bool FindNextEntryBuf(bool end_of_file, FileBuf &fbuf, FinfoBlk &finfo, const CTempString &keyword)
Definition: indx_blk.cpp:2162

sFindNextNonSpace
static CTempString::const_iterator sFindNextNonSpace(const CTempString &tempString, CTempString::const_iterator current_it)
Definition: indx_blk.cpp:688

sNotAllDigits
bool sNotAllDigits(const char *first, const char *last)
Definition: indx_blk.cpp:1489

sprot_accpref
static const char * sprot_accpref[]
Definition: indx_blk.cpp:100

GetProtAccOwner
CSeq_id::E_Choice GetProtAccOwner(const CTempString &acc)
Definition: indx_blk.cpp:2261

XML_TPG_array
static const char * XML_TPG_array[]
Definition: indx_blk.cpp:62

CheckSTRAND
int CheckSTRAND(const string &str)
Definition: indx_blk.cpp:466

XMLIndex
bool XMLIndex(ParserPtr pp)
Definition: xm_index.cpp:1401

ParFlat_RESIDUE_STR
static const char * ParFlat_RESIDUE_STR[]
Definition: indx_blk.cpp:192

ParFlat_DIV_array
static const char * ParFlat_DIV_array[]
Definition: indx_blk.cpp:81

FileGetsBuf
static Int2 FileGetsBuf(char *res, Int4 size, FileBuf &fbuf)
Definition: indx_blk.cpp:288

InitialEntry
IndexblkPtr InitialEntry(ParserPtr pp, FinfoBlk &finfo)
Definition: indx_blk.cpp:787

SprotIndex
bool SprotIndex(ParserPtr pp, void(*fun)(IndexblkPtr entry, char *offset, Int4 len))
Definition: sp_index.cpp:109

lanl_accpref
static const char * lanl_accpref[]
Definition: indx_blk.cpp:96

DelNonDigitTail
void DelNonDigitTail(string &str)
Definition: indx_blk.cpp:958

month_name
static const char * month_name[]
Definition: indx_blk.cpp:188

GenBankIndex
bool GenBankIndex(ParserPtr pp)
Definition: gb_index.cpp:337

ddbj_tpa_accpref
static const char * ddbj_tpa_accpref[]
Definition: indx_blk.cpp:156

SkipTitleBuf
bool SkipTitleBuf(FileBuf &fbuf, FinfoBlk &finfo, const CTempString &keyword)
Definition: indx_blk.cpp:357

IsNewAccessFormat
Int4 IsNewAccessFormat(const Char *acnum)
Definition: indx_blk.cpp:992

EmblIndex
bool EmblIndex(ParserPtr pp, void(*fun)(IndexblkPtr entry, char *offset, Int4 len))
Definition: em_index.cpp:192

sourceNames
static const map< Parser::ESource, string > sourceNames
Definition: indx_blk.cpp:177

CheckLocusSP
static bool CheckLocusSP(const char *locus)
Definition: indx_blk.cpp:418

XMLCheckTPG
Int2 XMLCheckTPG(string_view str)
Definition: indx_blk.cpp:490

CheckDIV
Int2 CheckDIV(const char *str)
Definition: indx_blk.cpp:531

FlatFileIndex
bool FlatFileIndex(ParserPtr pp, void(*fun)(IndexblkPtr entry, char *offset, Int4 len))
Definition: indx_blk.cpp:2181

GetAccession
bool GetAccession(const Parser *pp, string_view str, IndexblkPtr entry, unsigned skip)
Definition: indx_blk.cpp:1940

ValidMolTypes
static const char * ValidMolTypes[]
Definition: indx_blk.cpp:196

CloseFiles
void CloseFiles(ParserPtr pp)
Definition: indx_blk.cpp:2139

IsTLSAccPrefix
static void IsTLSAccPrefix(const Parser &parseInfo, const char *acc, IndexblkPtr ibp)
Definition: indx_blk.cpp:1773

IsSPROTAccession
bool IsSPROTAccession(const char *acc)
Definition: indx_blk.cpp:1254

isSpace
static bool isSpace(char c)
Definition: indx_blk.cpp:674

IsTSAAccPrefix
static void IsTSAAccPrefix(const Parser &parseInfo, const char *acc, IndexblkPtr ibp)
Definition: indx_blk.cpp:1718

XMLCheckSTRAND
Int2 XMLCheckSTRAND(string_view str)
Definition: indx_blk.cpp:484

fta_check_embl_moltype
static bool fta_check_embl_moltype(char *str)
Definition: indx_blk.cpp:632

DelNoneDigitTail
void DelNoneDigitTail(char *str)
Definition: indx_blk.cpp:944

XReadFileBuf
bool XReadFileBuf(FileBuf &fbuf, FinfoBlk &finfo)
Definition: indx_blk.cpp:313

CkLocusLinePos
bool CkLocusLinePos(char *offset, Parser::ESource source, LocusContPtr lcp, bool is_mga)
Definition: indx_blk.cpp:537

XReadFile
static bool XReadFile(FILE *fp, FinfoBlk &finfo)
Definition: indx_blk.cpp:262

IsValidAccessPrefix
static bool IsValidAccessPrefix(const char *acc, const char **accpref)
Definition: indx_blk.cpp:1044

MsgSkipTitleFail
void MsgSkipTitleFail(const char *flatfile, FinfoBlk &finfo)
Definition: indx_blk.cpp:2154

IsTPAAccPrefix
static bool IsTPAAccPrefix(const Parser &parseInfo, const char *acc)
Definition: indx_blk.cpp:1674

CheckLocus
static bool CheckLocus(const char *locus, Parser::ESource source)
Definition: indx_blk.cpp:378

embl_accpref
static const char * embl_accpref[]
Definition: indx_blk.cpp:87

ncbi_accpref
static const char * ncbi_accpref[]
Definition: indx_blk.cpp:114

ncbi_wgs_accpref
static const char * ncbi_wgs_accpref[]
Definition: indx_blk.cpp:160

CkDateFormat
static bool CkDateFormat(const char *date)
Definition: indx_blk.cpp:453

IsPatentedAccPrefix
static bool IsPatentedAccPrefix(const Parser &parseInfo, const char *acc)
Definition: indx_blk.cpp:1606

fta_if_master_wgs_accession
static bool fta_if_master_wgs_accession(const char *acnum, Int4 accformat)
Definition: indx_blk.cpp:1063

CheckNADDBJ
Int2 CheckNADDBJ(const char *str)
Definition: indx_blk.cpp:519

ParFlat_AA_array_DDBJ
static const char * ParFlat_AA_array_DDBJ[]
Definition: indx_blk.cpp:70

indx_blk.h

indx_def.h

ERR_FORMAT_BadlyFormattedIDLine
#define ERR_FORMAT_BadlyFormattedIDLine
Definition: indx_err.h:58

ERR_ENTRY_Begin
#define ERR_ENTRY_Begin
Definition: indx_err.h:63

ERR_ACCESSION_WGSProjectAccIsPri
#define ERR_ACCESSION_WGSProjectAccIsPri
Definition: indx_err.h:69

ERR_ACCESSION_NoAccessNum
#define ERR_ACCESSION_NoAccessNum
Definition: indx_err.h:68

ERR_FORMAT_LocusLinePosition
#define ERR_FORMAT_LocusLinePosition
Definition: indx_err.h:43

ERR_LOCUS_BadLocusName
#define ERR_LOCUS_BadLocusName
Definition: indx_err.h:74

ERR_ACCESSION_BadAccessNum
#define ERR_ACCESSION_BadAccessNum
Definition: indx_err.h:67

ERR_FORMAT_IllegalCAGEMoltype
#define ERR_FORMAT_IllegalCAGEMoltype
Definition: indx_err.h:57

ERR_LOCUS_NoLocusName
#define ERR_LOCUS_NoLocusName
Definition: indx_err.h:75

ERR_FORMAT_InvalidIDlineMolType
#define ERR_FORMAT_InvalidIDlineMolType
Definition: indx_err.h:59

b
b
Definition: lex.newick.cpp:1285

i
int i
Definition: lex.newick.cpp:1456

n
yy_size_t n
Definition: lex.newick.cpp:1455

len
int len
Definition: lex.newick.cpp:1450

info
static MDB_envinfo info
Definition: mdb_load.c:37

lmdb::mode
mdb_mode_t mode
Definition: lmdb++.h:38

ncbi::grid::netcache::search::fields::size
const struct ncbi::grid::netcache::search::fields::SIZE size

objects
Definition: wiggle_export_job.hpp:44

rapidjson::source
const CharType(& source)[N]
Definition: pointer.h:1149

ncbi_pch.hpp

isalpha
int isalpha(Uchar c)
Definition: ncbictype.hpp:61

isspace
int isspace(Uchar c)
Definition: ncbictype.hpp:69

isdigit
int isdigit(Uchar c)
Definition: ncbictype.hpp:64

offsets
static PCRE2_SIZE * offsets
Definition: pcre2grep.c:266

count
#define count
Definition: pcre2posix_test.c:54

l
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Definition: sljitNativeS390X.c:601

FileBuf
Definition: flatfile_parse_info.hpp:52

FileBuf::current
const char * current
Definition: flatfile_parse_info.hpp:54

FileBuf::get_offs
size_t get_offs() const
Definition: flatfile_parse_info.hpp:62

FinfoBlk
Definition: indx_blk.h:41

FinfoBlk::str
Char str[256]
Definition: indx_blk.h:42

FinfoBlk::pos
size_t pos
Definition: indx_blk.h:44

FinfoBlk::line
Int4 line
Definition: indx_blk.h:43

Indexblk
Definition: ftablock.h:165

Indexblk::acnum
Char acnum[200]
Definition: ftablock.h:166

Indexblk::division
Char division[4]
Definition: ftablock.h:171

Indexblk::ppp
Parser * ppp
Definition: ftablock.h:250

Indexblk::is_mga
bool is_mga
Definition: ftablock.h:199

Indexblk::secaccs
TokenBlkList secaccs
Definition: ftablock.h:216

Indexblk::tsa_allowed
bool tsa_allowed
Definition: ftablock.h:211

Indexblk::wgs_and_gi
Int4 wgs_and_gi
Definition: ftablock.h:231

Indexblk::is_tls
bool is_tls
Definition: ftablock.h:208

Indexblk::blocusname
Char blocusname[200]
Definition: ftablock.h:178

Indexblk::date
CRef< objects::CDate_std > date
Definition: ftablock.h:187

Indexblk::vernum
Int2 vernum
Definition: ftablock.h:167

Indexblk::is_tpa
bool is_tpa
Definition: ftablock.h:206

Indexblk::embl_new_ID
bool embl_new_ID
Definition: ftablock.h:218

Indexblk::is_wgs
bool is_wgs
Definition: ftablock.h:205

Indexblk::STS
bool STS
Definition: ftablock.h:193

Indexblk::is_pat
bool is_pat
Definition: ftablock.h:202

Indexblk::HTC
bool HTC
Definition: ftablock.h:195

Indexblk::drop
bool drop
Definition: ftablock.h:182

Indexblk::bases
size_t bases
Definition: ftablock.h:172

Indexblk::is_tsa
bool is_tsa
Definition: ftablock.h:207

Indexblk::EST
bool EST
Definition: ftablock.h:192

Indexblk::linenum
size_t linenum
Definition: ftablock.h:180

Indexblk::wgssec
string wgssec
Definition: ftablock.h:236

Indexblk::offset
size_t offset
Definition: ftablock.h:168

Indexblk::locusname
Char locusname[200]
Definition: ftablock.h:170

Indexblk::Indexblk
Indexblk()
Definition: indx_blk.cpp:665

Indexblk::lc
LocusCont lc
Definition: ftablock.h:212

Indexblk::GSS
bool GSS
Definition: ftablock.h:194

LocusCont
Definition: ftablock.h:105

LocusCont::bases
Int4 bases
Definition: ftablock.h:106

LocusCont::molecule
Int4 molecule
Definition: ftablock.h:109

LocusCont::strand
Int4 strand
Definition: ftablock.h:108

LocusCont::topology
Int4 topology
Definition: ftablock.h:110

LocusCont::date
Int4 date
Definition: ftablock.h:112

LocusCont::bp
Int4 bp
Definition: ftablock.h:107

LocusCont::div
Int4 div
Definition: ftablock.h:111

Parser
Definition: flatfile_parse_info.hpp:66

Parser::EFormat::SPROT
@ SPROT

Parser::EFormat::XML
@ XML

Parser::EFormat::EMBL
@ EMBL

Parser::EFormat::GenBank
@ GenBank

Parser::EMode
EMode
Definition: flatfile_parse_info.hpp:73

Parser::EMode::Relaxed
@ Relaxed

Parser::acprefix
const char * acprefix
Definition: flatfile_parse_info.hpp:135

Parser::entrylist
vector< IndexblkPtr > entrylist
Definition: flatfile_parse_info.hpp:107

Parser::ESource
ESource
Definition: flatfile_parse_info.hpp:80

Parser::ESource::SPROT
@ SPROT

Parser::ESource::Flybase
@ Flybase

Parser::ESource::DDBJ
@ DDBJ

Parser::ESource::EMBL
@ EMBL

Parser::ESource::LANL
@ LANL

Parser::ESource::unknown
@ unknown

Parser::ESource::Refseq
@ Refseq

Parser::ESource::USPTO
@ USPTO

Parser::ESource::NCBI
@ NCBI

Parser::ESource::GenBank
@ GenBank

Parser::accpref
const char ** accpref
Definition: flatfile_parse_info.hpp:180

Parser::qsfd
FILE * qsfd
Definition: flatfile_parse_info.hpp:215

Parser::source
ESource source
Definition: flatfile_parse_info.hpp:129

Parser::all
bool all
Definition: flatfile_parse_info.hpp:130

Parser::curindx
Int4 curindx
Definition: flatfile_parse_info.hpp:108

Parser::indx
Int4 indx
Definition: flatfile_parse_info.hpp:105

Parser::pbp
ProtBlkPtr pbp
Definition: flatfile_parse_info.hpp:146

Parser::accver
bool accver
Definition: flatfile_parse_info.hpp:183

Parser::mode
EMode mode
Definition: flatfile_parse_info.hpp:189

Parser::format
EFormat format
Definition: flatfile_parse_info.hpp:128

ProtBlk::ibp
InfoBioseq * ibp
Definition: ftablock.h:99

TokenStatBlk
Definition: ftablock.h:136

TokenStatBlk::list
TokenBlkList list
Definition: ftablock.h:137

TokenStatBlk::num
Int2 num
Definition: ftablock.h:138

type
Definition: type.c:6

done
done
Definition: token1.c:1

StringMatchIcase
Int2 StringMatchIcase(const Char **array, string_view text)
Definition: utilfun.cpp:507

MatchArraySubString
Int2 MatchArraySubString(const Char **array, string_view text)
Definition: utilfun.cpp:578

TokenString
unique_ptr< TokenStatBlk > TokenString(const char *str, Char delimiter)
Definition: utilfun.cpp:445

get_full_date
CRef< CDate_std > get_full_date(const char *s, bool is_ref, Parser::ESource source)
Definition: utilfun.cpp:827

ParseAccessionRange
bool ParseAccessionRange(list< string > &tokens, unsigned skip)
Definition: utilfun.cpp:265

fta_StringMatch
Int2 fta_StringMatch(const Char **array, string_view text)
Definition: utilfun.cpp:486

utilfun.h