85 #define THIS_FILE "sp_ascii.cpp"
92 "ALTERNATIVE PRODUCTS:",
93 "BIOPHYSICOCHEMICAL PROPERTIES:",
95 "CATALYTIC ACTIVITY:",
99 "DEVELOPMENTAL STAGE:",
101 "DISRUPTION PHENOTYPE:",
103 "ENZYME REGULATION:",
107 "MASS SPECTROMETRY:",
116 "SUBCELLULAR LOCATION:",
118 "TISSUE SPECIFICITY:",
170 {
"MOD_RES",
ParFlatSPSites, 10,
"5-glutamyl glycerylphosphorylethanolamine" },
227 {
"MOD_RES",
ParFlatSPSites, 13,
"Glutamate methyl ester (Gln)" },
228 {
"MOD_RES",
ParFlatSPSites, 13,
"Glutamate methyl ester (Glu)" },
246 {
"MOD_RES",
ParFlatSPSites, 13,
"N6,N6,N6-trimethyl-5-hydroxylysine" },
251 {
"MOD_RES",
ParFlatSPSites, 13,
"N6-poly(methylaminopropyl)lysine" },
273 {
"MOD_RES",
ParFlatSPSites, 14,
"(3R,4R)-3,4-dihydroxyproline" },
274 {
"MOD_RES",
ParFlatSPSites, 14,
"(3R,4R)-4,5-dihydroxyisoleucine" },
275 {
"MOD_RES",
ParFlatSPSites, 14,
"(3R,4S)-3,4-dihydroxyproline" },
279 {
"MOD_RES",
ParFlatSPSites, 14,
"(3S,4R)-3,4-dihydroxyisoleucine" },
283 {
"MOD_RES",
ParFlatSPSites, 14,
"3',4'-dihydroxyphenylalanine" },
296 {
"MOD_RES",
ParFlatSPSites, 14,
"5-hydroxy-3-methylproline (Ile)" },
304 {
"MOD_RES",
ParFlatSPSites, 14,
"N6-(3,6-diaminohexanoyl)-5-hydroxylysine" },
311 {
"MOD_RES",
ParFlatSPSites, 17,
"Pyrrolidone carboxylic acid (Glu)" },
359 {
"MOTIF",
ParFlatSPRegions, -1,
"Short sequence motif of biological interest" },
367 {
nullptr, 0, 0,
nullptr }
372 #define ParFlatSPSitesModB 9
373 #define ParFlatSPSitesModE 174
375 #define COPYRIGHT "This Swiss-Prot entry is copyright."
376 #define COPYRIGHT1 "Copyrighted by the UniProt Consortium,"
378 #define SPDE_RECNAME 000001
379 #define SPDE_ALTNAME 000002
380 #define SPDE_SUBNAME 000004
381 #define SPDE_FLAGS 000010
382 #define SPDE_INCLUDES 000020
383 #define SPDE_CONTAINS 000040
384 #define SPDE_FULL 000100
385 #define SPDE_SHORT 000200
386 #define SPDE_EC 000400
387 #define SPDE_ALLERGEN 001000
388 #define SPDE_BIOTECH 002000
389 #define SPDE_CD_ANTIGEN 004000
390 #define SPDE_INN 010000
471 "STRAIN",
"SUBSTRAIN",
"TYPE",
"SUBTYPE",
"VAR.",
"SEROTYPE",
472 "SEROGROUP",
"SEROVAR",
"CULTIVAR",
"PATHOVAR",
"CHEMOVAR",
"BIOVAR",
473 "BIOTYPE",
"GROUP",
"SUBGROUP",
"ISOLATE",
"ACRONYM",
"DOSAGE",
474 "NAT_HOST",
"SUBSP.",
nullptr
478 "2DBASE-ECOLI",
"AARHUS/GHENT-2DPAGE",
"AGD",
479 "ANU-2DPAGE",
"BURULIST",
"CARBBANK",
480 "CMR",
"CORNEA-2DPAGE",
"DICTYDB",
481 "DOMO",
"ECO2DBASE",
"GCRDB",
482 "GENEVESTIGATOR",
"GENEW",
"GENOMEREVIEWS",
483 "GERMONLINE",
"HIV",
"HSC-2DPAGE",
484 "HSSP",
"IPI",
"LINKHUB",
485 "LISTILIST",
"MAIZE-2DPAGE",
"MENDEL",
486 "MGD",
"MYPULIST",
"NMPDR",
487 "PATHWAY_INTERACTION_DB",
"PHCI-2DPAGE",
"PHOSSITE",
488 "PPTASEDB",
"PROTCLUSTDB",
"PHOTOLIST",
489 "PMMA-2DPAGE",
"RAT-HEART-2DPAGE",
"RZPD-PROTEXP",
490 "SAGALIST",
"SIENA-2DPAGE",
"STYGENE",
491 "SUBTILIST",
"TIGR",
"TRANSFAC",
492 "WORMPEP",
"YEPD",
"YPD",
497 "ALLERGOME",
"ARACHNOSERVER",
"ARAPORT",
498 "ARRAYEXPRESS",
"BEEBASE",
"BGD",
499 "BGEE",
"BINDINGDB",
"BIOCYC",
500 "BIOGRID",
"BIOMUTA",
"BRENDA",
501 "CAZY",
"CCDS",
"CDD",
502 "CGD",
"CHEMBL",
"CHITARS",
503 "CLEANEX",
"COLLECTF",
"COMPLUYEAST-2DPAGE",
504 "CONOSERVER",
"CTD",
"CYGD",
505 "DBSNP",
"DEPOD",
"DICTYBASE",
506 "DIP",
"DISGENET",
"DISPROT",
507 "DMDM",
"DNASU",
"DOSAC-COBS-2DPAGE",
508 "DRUGBANK",
"ECHOBASE",
"ECOGENE",
509 "EGGNOG",
"EMBL",
"ENSEMBL",
510 "ENSEMBLBACTERIA",
"ENSEMBLFUNGI",
"ENSEMBLMETAZOA",
511 "ENSEMBLPLANTS",
"ENSEMBLPROTISTS",
"EPD",
512 "ESTHER",
"EUHCVDB",
"EUPATHDB",
513 "EUROPEPMC",
"EVOLUTIONARYTRACE",
"EXPRESSIONATLAS",
514 "FLYBASE",
"GENE3D",
"GENECARDS",
515 "GENEDB",
"GENEDB_SPOMBE",
"GENEFARM",
516 "GENEID",
"GENEREVIEWS",
"GENETREE",
517 "GENEVISIBLE",
"GENEWIKI",
"GENOLIST",
518 "GENOMERNAI",
"GK",
"GLYCOSUITEDB",
519 "GRAINGENES",
"GO",
"GRAMENE",
520 "GUIDETOPHARMACOLOGY",
"H-INVDB",
"HAMAP",
521 "HGNC",
"HOGENOM",
"HOVERGEN",
522 "HPA",
"IMGT/GENE-DB",
"IMGT/HLA",
523 "IMGT/LIGM",
"IMGT_GENE-DB",
"INPARANOID",
524 "INTACT",
"INTERPRO",
"IPD-KIR",
525 "IPTMNET",
"KEGG",
"KO",
526 "LEGIOLIST",
"LEPROMA",
"MAIZEDB",
527 "MAIZEGDB",
"MALACARDS",
"MAXQB",
528 "MEROPS",
"MGI",
"MIM",
529 "MINT",
"MIRBASE",
"MOONPROT",
530 "MYCOCLAP",
"NEXTBIO",
"NEXTPROT",
531 "OGP",
"OMA",
"OPENTARGETS",
532 "ORPHANET",
"ORTHODB",
"PANTHER",
533 "PATRIC",
"PAXDB",
"PDB",
534 "PDBSUM",
"PEPTIDEATLAS",
"PEROXIBASE",
535 "PFAM",
"PHARMGKB",
"PHOSPHOSITE",
536 "PHOSPHOSITEPLUS",
"PHYLOMEDB",
"PIR",
537 "PIRSF",
"PMAP-CUTDB",
"POMBASE",
538 "PR",
"PR2",
"PRIDE",
539 "PRINTS",
"PRO",
"PRODOM",
540 "PROMEX",
"PROSITE",
"PROTEINMODELPORTAL",
541 "PROTEOMES",
"PSEUDOCAP",
"REACTOME",
542 "REBASE",
"REFSEQ",
"REPRODUCTION-2DPAGE",
543 "RGD",
"RZPD",
"SABIO-RK",
544 "SFLD",
"SGD",
"SIGNALINK",
545 "SIGNALLINK",
"SIGNOR",
"SMART",
546 "SMR",
"STRING",
"SUPFAM",
547 "SWISS-2DPAGE",
"SWISSLIPIDS",
"SWISSPALM",
548 "TAIR",
"TCDB",
"TIGRFAMS",
549 "TOPDOWNPROTEOMICS",
"TREEFAM",
"TUBERCULIST",
550 "UCD-2DPAGE",
"UCSC",
"UNICARBKB",
551 "UNIGENE",
"UNILIB",
"UNIPATHWAY",
552 "UNITE",
"VBASE2",
"VECTORBASE",
553 "VEGA-TR",
"VEGA-GN",
"VGNC",
554 "WBPARASITE",
"WORLD-2DPAGE",
"WORMBASE",
555 "XENBASE",
"ZFIN",
nullptr
559 "CHLOROPLAST",
"CYANELLE",
"MITOCHONDRION",
"PLASMID",
"NUCLEOMORPH",
560 "HYDROGENOSOME",
"APICOPLAST",
"CHROMATOPHORE",
561 "ORGANELLAR CHROMATOPHORE",
nullptr
565 "Evidence at protein level",
566 "Evidence at transcript level",
567 "Inferred from homology",
588 if (delim && *delim !=
'\0' && ! dest.empty())
608 tag->SetTag().SetStr(
str);
659 pdb_seq_id->SetChain(chain);
678 if (! mol || ! chain)
682 for (bad =
false, got =
false, q = chain; *q !=
'\0'; q = p) {
683 while (*q ==
' ' || *q ==
',')
685 for (p = q; *p !=
'\0' && *p !=
' ' && *p !=
',';)
695 for (
r = q; *
r !=
'\0';
r++) {
698 if (
r[1] !=
'/' &&
r[1] !=
'\0') {
699 while (*
r !=
'/' && *
r !=
'\0')
737 pdb_seq_id->SetRel(*date);
750 if (choice < 1 || choice > 4)
757 }
else if (choice == 4) {
769 for (gmod = -1; dbp; dbp = dbp->
mpNext)
772 for (; subdbp; subdbp = subdbp->
mpNext)
776 for (p += 8; *p ==
' ';)
797 if (gmod == 7 || gmod == 8)
817 while (*eptr ==
' ' && eptr >
offset)
821 pIndex->
bases = atoi(eptr + 1);
822 while (*eptr ==
' ' && eptr >
offset)
854 for (; dbp; dbp = dbp->
mpNext) {
921 for (p = ptr + shift; *p ==
' ';)
924 if (*p == symb || *p ==
'\0') {
929 while (*p ==
'.' || *p ==
'-' || *p ==
'n' ||
isdigit(*p) != 0)
932 while (*p ==
' ' || *p ==
')')
943 if (ptr[8] ==
'\0') {
952 for (q = ptr + 8;;) {
955 if (! q || (p && q > p))
961 else if (q[9] ==
'\0')
963 else if (q[9] ==
's' || q[9] ==
'S') {
966 else if (q[10] ==
'\0')
979 if (ptr[8] ==
'\0') {
1008 for (p = ptr + 8; *p !=
'\0' && *p !=
')';)
1010 while (*p ==
' ' || *p ==
')')
1020 while (p > ptr && *(p - 1) ==
' ')
1039 for (
size_t i = 0;
i < dbp->
len;
i++)
1044 for (q = dbp->
mOffset; *q !=
'\0';) {
1049 for (q += 5; *q !=
'\n' && *q !=
'\0'; q++)
1057 while (*p ==
'.' || *p ==
' ' || *p ==
'\t') {
1077 if (! line || line[0] ==
'\0')
1079 for (p = line; *p ==
' ' || *p ==
'\t' || *p ==
'.' || *p ==
',';)
1095 for (
r = p - 1; *
r ==
' ' || *
r ==
'\t';
r--) {
1106 for (p++; *p ==
' ' || *p ==
'\t';)
1109 for (
i = 1; *p !=
'\0'; p++) {
1125 for (
r = p - 1; *
r ==
' ' || *
r ==
'\t';
r--) {
1156 size_t len = taxname.size();
1160 const Char* p = taxname.c_str() +
len - 3;
1161 if ((p[0] ==
' ' || p[0] ==
'\t') && (p[1] ==
's' || p[1] ==
'S') &&
1162 (p[2] ==
'p' || p[2] ==
'P') && p[3] ==
'\0') {
1189 if (sosp->
name && sosp->
name[0] !=
'\0')
1192 for (synsp = sosp->
syn; synsp; synsp = synsp->
next) {
1194 if (! p || *p ==
'\0')
1201 i = (*q ==
'C' || *q ==
'c') ? 5 : 7;
1206 if ((q == p || q[0] ==
' ' || q[0] ==
'\t') &&
1207 (q[
i] ==
' ' || q[
i] ==
'\t' || q[
i] ==
'\0')) {
1220 if ((
StringEquNI(p,
"PV.", 3) && (p[3] ==
' ' || p[3] ==
'\t' || p[3] ==
'\0')) ||
1233 for (q = p; *p !=
'\0' && *p !=
' ' && *p !=
'\t';)
1236 org_ref->
SetSyn().push_back(q);
1241 for (q = p + 1; *q ==
' ' || *q ==
'\t';)
1248 org_ref->
SetSyn().push_back(q);
1268 if (*p ==
' ' && (p[
i] ==
' ' || p[
i] ==
'\t' || p[
i] ==
'\0')) {
1283 if (! taxname.empty())
1308 for (ssp = sosp->
syn; ssp; ssp = tssp) {
1328 for (; dbp; dbp = dbp->
mpNext)
1351 if (!
StringEquNI(line,
"\nOH NCBI_TaxID=", 17)) {
1371 for (p += 17, q = p; *q ==
' ';)
1375 if ((!
r ||
r > p) && p) {
1378 for (p--; *p ==
';' || *p ==
' ';)
1381 for (
r = q; *
r >=
'0' && *
r <=
'9';)
1389 for (p++; *p ==
' ' || *p ==
';';)
1396 while ((*
r ==
' ' || *
r ==
'.' || *
r ==
'\0') &&
r > p)
1398 if (*
r !=
'\0' && *
r !=
'.' && *
r !=
' ')
1442 for (; subdbp; subdbp = subdbp->
mpNext) {
1464 for (q = p; *q ==
' ';)
1470 for (p = line + 16; *p ==
' ';)
1476 for (q = p; *q >=
'0' && *q <=
'9';)
1478 if (*q ==
' ' || *q ==
'\0')
1480 if (taxid <=
ZERO_TAX_ID || (*q !=
' ' && *q !=
'\0')) {
1508 for (dbp = entry; dbp; dbp = dbp->
mpNext) {
1521 if (line_OS && line_OS[0] !=
'\0') {
1523 if (sosp && sosp->
name && sosp->
name[0] !=
'\0') {
1531 if (org_ref.
NotEmpty() && line_OC && line_OC[0] !=
'\0') {
1545 char* eptr =
nullptr;
1551 for (; dbp; dbp = dbp->
mpNext) {
1556 for (; subdbp; subdbp = subdbp->
mpNext) {
1576 for (ptr =
str; *ptr !=
'\n' && *ptr !=
' ';)
1580 plasms.push_back(
string(
str, ptr));
1600 if (! p || *p ==
'\0')
1604 if (*p ==
'\0' || *p ==
'\n')
1606 if ((*p ==
';' || *p ==
'.') && (p[1] ==
' ' || p[1] ==
'\n'))
1610 if (*p ==
'\0' || *p ==
'\n')
1617 while (*p ==
' ' || *p ==
';' || *p ==
'.')
1630 char* end =
nullptr;
1634 if (!
str || *
str ==
'\0')
1637 if (
str[0] ==
'-') {
1642 lID = strtoll(
str + 1, &end, 10);
1643 if ((lID == 0 &&
str + 1 == end) || (lID == LLONG_MAX && errno == ERANGE)) {
1653 }
else if (*
str ==
'E' || *
str ==
'D') {
1656 tag->SetTag().SetStr(
str);
1677 if (
str[0] ==
'-' &&
str[1] ==
'\0')
1681 for (vnp = *
head; vnp; vnp = vnp->
next)
1689 for (vnp = *
head; vnp; vnp = vnp->
next) {
1711 for (CSP_block::TSeqref::iterator cur_ref = refs.begin(); cur_ref != refs.end(); ++cur_ref) {
1712 if ((*cur_ref)->Which() !=
CSeq_id::e_Pdb || (*cur_ref)->GetPdb().IsSetRel())
1718 CSP_block::TSeqref::iterator next_ref = cur_ref;
1720 for (++next_ref; next_ref != refs.end();) {
1722 (*next_ref)->GetPdb().IsSetRel())
1725 const CPDB_seq_id& next_id = (*next_ref)->GetPdb();
1733 if (! got && cur_id.
GetChain() == 32) {
1747 next_ref = refs.erase(next_ref);
1766 if (! embl_acc_list || ! embl_acc_list->
next->
next)
1769 for (vnp = embl_acc_list; vnp; vnp = vnp->
next->
next) {
1773 for (p = q + 1; *p >=
'0' && *p <=
'9';)
1885 bool check_embl_prot;
1908 embl_vnp = embl_acc_list;
1909 check_embl_prot =
false;
1925 if (! token1 || ! token2 || ! token3 ||
1967 }
else if (
AddToList(&acc_list, token2)) {
1969 p ? (
Int2) atoi(p + 1) : 0));
1977 if (token3[0] >=
'A' && token3[0] <=
'Z' &&
1978 token3[1] >=
'A' && token3[1] <=
'Z') {
1984 for (q = p + 1; *q >=
'0' && *q <=
'9';)
1986 if (q == p + 1 || *q !=
'\0')
1998 embl_vnp = embl_vnp->
next;
2000 embl_vnp = embl_vnp->
next;
2004 check_embl_prot =
true;
2025 if (
AddToList(&ens_tran_list, token2)) {
2031 if (!
AddToList(&ens_prot_list, token3)) {
2039 if (token4 &&
AddToList(&ens_gene_list, token4)) {
2046 if (token2[0] >=
'A' && token2[0] <=
'Z' &&
2047 token2[1] >=
'A' && token2[1] <=
'Z') {
2053 for (q = p + 1; *q >=
'0' && *q <=
'9';)
2055 if (q == p + 1 || *q !=
'\0')
2077 token1 =
"Reactome";
2087 if (
tag.NotEmpty()) {
2090 for (
const auto& cur_tag : spb.
SetDbref()) {
2091 if (
tag->Match(*cur_tag)) {
2102 if (embl_acc_list->
next) {
2103 if (check_embl_prot)
2107 delete embl_acc_list;
2121 if (pdbold && pdbnew) {
2126 if (pdbnew && spb.
SetSeqref().size() > 1)
2170 for (q =
offset, tvnp = vnp;;) {
2186 vnp->
next =
nullptr;
2195 for (tvnp = vnp; tvnp; tvnp = tvnp->
next) {
2210 for (p += 16; *p ==
' ';)
2212 for (q = p; *p >=
'0' && *p <=
'9';)
2214 if (*p ==
'.' && p[1] ==
'\0') {
2225 for (tvnp = vnp; tvnp; tvnp = tvnp->
next) {
2247 }
else if (
first > 1) {
2250 }
else if (second == 0) {
2253 }
else if (second > 1) {
2256 }
else if (third == 0) {
2259 }
else if (third > 1) {
2262 }
else if (std_crdate.
Empty()) {
2265 }
else if (std_sequpd.
Empty()) {
2268 }
else if (std_annotupd.
Empty()) {
2271 }
else if (ver_num && *ver_num < 1) {
2277 crdate.
SetStd(*std_crdate);
2278 sequpd.SetStd(*std_sequpd);
2279 annotupd.
SetStd(*std_annotupd);
2309 if (reviewed ||
StringEquNI(bptr,
"standard", 8)) {
2311 }
else if (
StringEquNI(bptr,
"preliminary", 11) ||
2325 if (spb->SetExtra_acc().empty())
2326 spb->ResetExtra_acc();
2332 i =
GetSPDate(pp, entry, spb->SetCreated(), spb->SetSequpd(), spb->SetAnnotupd(), &ver_num);
2334 i =
GetSPDate(pp, entry, spb->SetCreated(), spb->SetSequpd(), spb->SetAnnotupd(),
nullptr);
2337 if (spb->SetPlasnm().empty())
2346 for (
auto& cur_id : bioseq.
SetId()) {
2347 if (! cur_id->IsSwissprot())
2355 id.SetRelease(
"reviewed");
2357 id.SetRelease(
"reviewed");
2365 bioseq.
SetDescr().Set().push_back(descr);
2384 for (p = line; *p ==
' ';)
2393 while (*p !=
'\0') {
2399 if (p > line && *(p - 1) !=
'-')
2401 for (++p; *p ==
' ';)
2404 for (p += 3; *p ==
' ';)
2411 for (--q; q > com && *q ==
' ';)
2424 descrs.push_back(descr);
2458 for (count = 0, p =
offset;;) {
2462 for (q = p; q >
offset && *q !=
'\n';)
2490 if (count == 0 && cla != 2)
2503 for (p =
tmp; p > bptr && *p !=
'\n';)
2524 for (
const auto& cur_ref : spb.
GetSeqref()) {
2525 if (! cur_ref->IsPir())
2529 text_id->Assign(cur_ref->GetPir());
2532 rep_id->
SetPir(*text_id);
2534 rep_ids.push_back(rep_id);
2537 if (rep_ids.empty())
2552 if (! orpname && ! ohname)
2554 if (! orpname || ! ohname)
2557 for (p = orpname, q = ohname; *p !=
'\0' && *q !=
'\0'; p++, q++) {
2559 if (chp >=
'a' && chp <=
'z')
2562 if (chq >=
'a' && chq <=
'z')
2575 if (*q ==
'(' || *q ==
'\0')
2586 bool fragment =
false;
2603 descr.
Set().push_back(desc_new);
2616 for (
const string& cur_acc : spb->GetExtra_acc()) {
2621 text_id->SetAccession(cur_acc);
2625 rep_ids.push_back(rep_id);
2628 if (! rep_ids.empty()) {
2634 if (spb->CanGetCreated()) {
2638 descr.
Set().push_back(create_date_descr);
2641 bool has_update_date = spb->CanGetAnnotupd() || spb->CanGetSequpd();
2644 if (has_update_date) {
2645 if (spb->CanGetAnnotupd() && spb->CanGetSequpd()) {
2647 }
else if (spb->CanGetAnnotupd())
2648 upd_date.
Assign(spb->GetAnnotupd());
2650 upd_date.
Assign(spb->GetSequpd());
2655 descr.
Set().push_back(upd_date_descr);
2658 if (spb->CanGetCreated() && has_update_date &&
2660 string upd_date_str, create_date_str;
2662 upd_date.
GetDate(&upd_date_str);
2663 spb->GetCreated().GetDate(&create_date_str);
2682 if (org_ref.
Empty())
2689 bio_src->
SetOrg(*org_ref);
2695 if (bio_src.
Empty()) {
2701 bio_src->
SetOrg(*org_ref);
2709 if (bio_src.
Empty())
2716 for (tvhp = vhp; tvhp; tvhp = vhp) {
2731 if (org_ref_cur.
Empty()) {
2737 vector<Char> org_taxname;
2739 const string& cur_taxname = org_ref_cur->
GetTaxname();
2740 org_taxname.assign(cur_taxname.begin(), cur_taxname.end());
2743 org_taxname.push_back(0);
2748 "OH-line HostName \"%s\" does not match NCBI organism name \"%s\" obtained by lookup of NCBI TaxID \"%d\".",
2761 descr.
Set().push_back(bio_src_desc);
2773 descr.
Set().push_back(mol_info_descr);
2778 for (; dbp; dbp = dbp->
mpNext) {
2785 pub_desc_descr->
SetPub(*pub_desc);
2787 descr.
Set().push_back(pub_desc_descr);
2833 for (; spfip; spfip =
next) {
2842 if (! fip1 && ! fip2)
2845 if (! fip1 || ! fip2 ||
2846 fip1->
key != fip2->
key ||
2848 fip1->
to != fip2->
to ||
2862 if (! spfip || ! spfip->
next)
2865 for (; spfip && spfip->
next; spfip = spfip->
next) {
2867 for (fip = spfip->
next; fip; fip = fipnext) {
2868 fipnext = fip->
next;
2893 if (! p || p == temp ||
2894 (*(p - 1) !=
' ' && *(p - 1) !=
'\n') || (p[2] !=
' ' && p[2] !=
'\n')) {
2900 for (p--; p > temp && (*p ==
' ' || *p ==
'\n');)
2902 if (*p < 'A' || *p >
'Z') {
2909 while (p > temp && (*p ==
'\n' || (*p >=
'A' && *p <=
'Z')))
2916 while (*p >=
'A' && *p <=
'Z' && p < end)
2920 for (q = p; *p ==
'\n'; p++)
2925 while (*p ==
' ' || *p ==
'\n')
2927 for (p += 2; *p ==
' ' || *p ==
'\n';)
2930 if (*p < 'A' || *p >
'Z') {
2936 for (q = p; *q ==
'\n' || (*q >=
'A' && *q <=
'Z');)
2938 if (q > p && *(q - 1) ==
'\n') {
2939 for (q--; *q ==
'\n' && q > p;)
2947 while (*p >=
'A' && *p <=
'Z' && p < end)
2951 for (q = p; *p ==
'\n'; p++)
2956 for (p = temp; *p !=
'\0'; p++)
2979 const char* defdelim;
3010 while (bptr < eptr && (endline =
SrchTheChar(bptr, eptr,
'\n'))) {
3013 for (p = bptr,
i = 0; *p !=
' ' && *p !=
'\n' &&
i < 8;
i++)
3015 temp->
key.assign(bptr, p);
3018 if (temp->
key ==
"VAR_SEQ")
3023 for (bptr += 8; *bptr ==
' ' && bptr <= endline;)
3028 if (((*bptr >=
'a' && *bptr <=
'z') || (*bptr >=
'A' && *bptr <=
'Z')) &&
3030 for (bptr += 7; *bptr >=
'0' && *bptr <=
'9' && bptr <= endline;)
3032 for (; *bptr ==
':' && bptr <= endline;)
3036 for (ptr1 = bptr; *ptr1 ==
'?' || *ptr1 ==
'>' || *ptr1 ==
'<' ||
3037 (*ptr1 >=
'0' && *ptr1 <=
'9');)
3040 if (bptr < ptr1 && ptr1 <= endline) {
3041 temp->
from.assign(bptr, ptr1);
3048 if (! p || (q && q < p))
3060 temp->
from.assign(
"-1");
3061 fromstart =
nullptr;
3067 for (; (*bptr ==
' ' || *bptr ==
'.') && bptr <= endline; bptr++)
3070 for (ptr1 = bptr; *ptr1 ==
'?' || *ptr1 ==
'>' || *ptr1 ==
'<' ||
3071 (*ptr1 >=
'0' && *ptr1 <=
'9');)
3074 p = (
char*)temp->
from.c_str();
3075 if (*p ==
'<' || *p ==
'>')
3078 for (q = ptr1; *q ==
' ';)
3081 if (bptr < ptr1 && ptr1 <= endline) {
3082 if (*q !=
'\n' && new_format && (*p ==
'?' || atoi(p) != -1))
3084 temp->
to.assign(bptr, ptr1);
3085 }
else if (fromstart) {
3086 if (*q !=
'\n' && (*p ==
'?' || atoi(p) != -1))
3088 temp->
to.assign(fromstart, fromend);
3090 if (*q !=
'\n' && (*p ==
'?' || atoi(p) != -1))
3092 temp->
to.assign(
"-1");
3095 q = (
char*)temp->
to.c_str();
3096 if (*q ==
'<' || *q ==
'>')
3098 if (extra_text || (*p !=
'?' && *q !=
'?' && (atoi(p) > atoi(q)))) {
3102 if (! p || (q && q < p))
3111 temp->
from.assign(
"-1");
3114 for (bptr = ptr1; *bptr ==
' ' && bptr <= endline;)
3120 if (*--
str ==
'-' &&
str > bptr)
3123 if (bptr <= endline)
3124 temp->
descrip.assign(bptr, endline);
3126 for (bptr = endline; *bptr ==
' ' || *bptr ==
'\n';)
3131 while (bptr < eptr && (*bptr ==
' '))
3133 while (*bptr ==
' ')
3139 }
else if (
StringEquN(bptr,
"/evidence=\"", 11)) {
3153 for (p = bptr + 1; (*p >=
'a' && *p <=
'z') || (*p >=
'A' && *p <=
'Z') || (*p >=
'0' && *p <=
'9') || *p ==
'_';)
3155 if (*p ==
'=' && p[1] ==
'\"') {
3167 if (p >= bptr && *p ==
'\"')
3174 if (p && p - 1 >= bptr && *(p - 1) ==
'.')
3179 if (p && p - 1 >= bptr && *(p - 1) ==
'.')
3191 if (*--
str ==
'-' &&
str > bptr)
3194 for (bptr = endline; *bptr ==
' ' || *bptr ==
'\n';)
3206 if (*defdelim ==
'\n')
3209 p = (
char*)temp->
from.c_str();
3210 if (*p ==
'<' || *p ==
'>')
3212 if (*p !=
'?' && atoi(p) < 0) {
3217 q = (
char*)temp->
to.c_str();
3218 if (*q ==
'<' || *q ==
'>')
3220 if ((*p !=
'?' && atoi(p) > (
Int4)seqlen) || (*q !=
'?' && atoi(q) > (
Int4)seqlen)) {
3230 current->
next = temp;
3260 bool fuzzfrom =
false;
3261 bool fuzzto =
false;
3262 bool nofrom =
false;
3264 bool pntfuzz =
false;
3268 if (! spfip || spfip->
from.empty() || spfip->
to.empty())
3275 ptr = spfip->
from.c_str();
3279 while (*ptr !=
'\0' &&
isdigit(*ptr) == 0)
3281 from = (
Int4)atoi(ptr);
3286 from = (
Int4)atoi(ptr);
3288 if ((initmet ==
false && from != 0) ||
3289 (initmet && signal && from == 1))
3292 ptr = spfip->
to.c_str();
3295 while (*ptr !=
'\0' &&
isdigit(*ptr) == 0)
3297 to = (
Int4)atoi(ptr);
3302 to = (
Int4)atoi(ptr);
3304 if (initmet ==
false && to != 0)
3327 }
else if (from != to && ! pntfuzz) {
3351 }
else if (fuzzfrom) {
3438 if ((pos != 0 && retstr[pos - 1] !=
' ' && retstr[pos - 1] !=
'.') ||
3439 (retstr[pos +
len] !=
'\0' && retstr[pos +
len] !=
' ' &&
3440 retstr[pos +
len] !=
'.' && retstr[pos +
len] !=
';'))
3484 fbp->
key = (
char*)
"VAR_SEQ";
3489 fbp->
key = (
char*)
"SE_CYS";
3492 fbp->
key = (
char*)
"MOD_RES";
3509 feat->
SetData().SetImp().SetDescr(
"uncertain amino acids");
3521 for (p = loc; *p; p++)
3539 if (! descrip.empty())
3550 if (! descrip.empty())
3575 for (temp = spfip; temp; temp = temp->
next) {
3580 temp->
key =
"VAR_SEQ";
3585 temp->
key =
"SE_CYS";
3588 temp->
key =
"MOD_RES";
3622 feat->
SetData().SetImp().SetDescr(
"uncertain amino acids");
3650 feats.push_back(feat);
3668 for (p =
str; *p ==
' ' || *p ==
'\t';)
3670 for (q = p; *q !=
'\0';)
3673 for (q--; (*q ==
' ' || *q ==
'\t') && q > p;)
3675 if (q == p && (*q ==
' ' || *q ==
'\t'))
3677 for (pp = p; *pp ==
'(';)
3679 for (qq = q; *qq ==
')' && qq >= pp;)
3681 for (count = 0, left = 0, right = 0,
r = pp;
r <= qq;
r++) {
3684 else if (*
r ==
')') {
3686 count = left - right;
3690 for (; count < 0 && pp > p; pp--)
3692 for (count = 0,
r = qq;
r >= pp;
r--) {
3699 for (; count < 0 && qq < q; qq++)
3722 for (p = gname; *p !=
'\0'; p++)
3723 if (! (
isalnum(*p) || *p ==
'_' || *p ==
'-' || *p ==
'.' ||
3724 *p ==
'\'' || *p ==
'`' || *p ==
'/' || *p ==
'(' || *p ==
')'))
3747 for (p =
str; *p !=
'\0';) {
3750 for (q = p; *p !=
'\0' && *p !=
' ';)
3762 gene.
SetSyn().push_back(gname);
3813 for (ptr =
str; *ptr !=
'\0'; ptr++)
3823 feats.push_back(feat);
3848 for (p =
str; p && *p !=
'\0'; p = q) {
3849 while (*p ==
' ' || *p ==
',')
3861 gene.
SetSyn().push_back(p);
3871 (! name && ! syns && ! ltags && ! orfs))
3891 feats.push_back(feat);
3908 if (! pp || pp->
entrylist.empty() || ! bptr)
3921 for (p =
str; p && *p !=
'\0'; p = q) {
3922 while (*p ==
' ' || *p ==
';')
3924 for (
r = p;;
r = q + 1) {
3926 if (! q || q[1] ==
' ' || q[1] ==
'\n' || q[1] ==
'\0')
3949 }
else if (
StringEquNI(p,
"OrderedLocusNames=", 18)) {
3972 if (! name && ! syns && ! ltags && ! orfs)
3975 if (! name && syns) {
3994 if (! name && ! syns && ! ltags && ! orfs)
4083 for (count = 0, q =
buf;; q = p) {
4095 if (*p ==
'.' || *p ==
'\0') {
4100 while (*p >=
'0' && *p <=
'9')
4102 if (*q ==
'n' && (*p ==
'.' || *p ==
'\0')) {
4115 if (count != 4 || *p !=
'\0') {
4127 for (; sfp; sfp = sfp->
next) {
4150 for (rcount = 0, scount = 0, tsfp = sfp; tsfp; tsfp = tsfp->
next) {
4157 for (fcount = 0, tsfp = sfp; tsfp; tsfp = tsfp->
next) {
4160 for (tsfp = tsfp->
next; tsfp; tsfp = tsfp->
next) {
4174 }
else if (rcount == 0 && ! is_trembl) {
4179 if (scount > 0 && ! is_trembl) {
4184 if (fcount == 0 && rcount > 0) {
4211 for (
const auto&
id : ids) {
4212 if (! id->IsSwissprot())
4215 if (id->GetSwissprot().IsSetRelease() &&
4222 sfp->
next =
nullptr;
4224 for (tsfp = sfp, p =
str, count = 0; *p !=
'\0';) {
4227 for (q = p; *p !=
'\0' && *p !=
' ';)
4239 if (tsfp->
tag != 0) {
4240 if (q == tsfp->
start)
4243 for (
r = q - 1; *
r ==
' ' || *
r ==
';';)
4256 for (
r = q + cilp->
len; *
r ==
' ';)
4259 tsfp->
next =
nullptr;
4267 for (tsfp = sfp->
next; tsfp; tsfp = tsfp->
next)
4270 for (tsfp = sfp->
next; tsfp; tsfp = tsfp->
next)
4274 for (tsfp = sfp->
next; tsfp; tsfp = tsfp->
next)
4277 for (tsfp = sfp->
next; tsfp; tsfp = tsfp->
next)
4281 for (tsfp = sfp->
next; tsfp; tsfp = tsfp->
next)
4284 for (tsfp = sfp->
next; tsfp; tsfp = tsfp->
next)
4308 for (q =
buf + 2; *q ==
' ';)
4312 for (p++; *p ==
' ';)
4327 qual->
SetQual(
"UniProtKB_evidence");
4329 feat.
SetQual().push_back(qual);
4388 while (s >=
str && (*s ==
'.' || *s ==
';' || *s ==
','))
4416 str1.assign(
str, ptr);
4422 for (bptr = ptr; *ptr !=
'\0' && *ptr !=
' ' && *ptr != symb;)
4425 string ecnum(bptr, ptr);
4428 if (! ecnum.empty())
4429 prot.SetEc().push_back(ecnum);
4435 while (*ptr !=
'\0' && (*ptr ==
' ' || *ptr == symb))
4450 str1.assign(
str, ptr);
4458 if (!
prot.IsSetName())
4459 prot.SetName().push_back(
str);
4476 feats.push_back(feat);
4514 for (; spfip; spfip = spfip->
next) {
4515 if (spfip->
key !=
"NON_CONS")
4520 p = spfip->
from.c_str();
4521 if (*p ==
'<' || *p ==
'>' || *p ==
'?')
4524 spslp->
len = atoi(p);
4528 p = spfip->
from.c_str();
4529 if (*p ==
'<' || *p ==
'>' || *p ==
'?')
4531 curspslp->
len = atoi(p) - curspslp->
from;
4535 p = spfip->
from.c_str();
4536 if (*p ==
'<' || *p ==
'>' || *p ==
'?')
4538 spslp->
from = atoi(p);
4539 curspslp->
next = spslp;
4543 for (
auto& descr : bioseq.
SetDescr().Set()) {
4544 if (! descr->IsMolinfo())
4576 for (count = 0; spfip; spfip = spfip->
next) {
4577 if (spfip->
key !=
"INIT_MET")
4584 p = spfip->
from.c_str();
4585 if (*p ==
'<' || *p ==
'>' || *p ==
'?')
4588 p = spfip->
to.c_str();
4589 if (*p ==
'<' || *p ==
'>' || *p ==
'?')
4593 if ((from != 0 || to != 0) && (from != 1 || to != 1))
4606 if (! temp->
descrip.empty()) {
4621 sequence.insert(sequence.begin(),
'M');
4622 bioseq.
SetInst().SetLength(
static_cast<TSeqPos>(sequence.size()));
4623 }
else if (sequence.empty() || sequence[0] !=
'M')
4652 for (
auto& descr : bioseq.
SetDescr().Set()) {
4653 if (! descr->IsMolinfo())
4656 mol_info = &(descr->SetMolinfo());
4661 for (temp = spfip; temp; temp = temp->
next) {
4662 if (temp->
key ==
"NON_CONS") {
4667 if (temp->
key !=
"NON_TER")
4703 for (string::const_iterator
value = sequence.begin();
value != sequence.end(); ++
value) {
4704 if (*
value !=
'X') {
4728 for (; spslp; spslp = spslp->
next) {
4730 if (! deltas.
Set().empty()) {
4731 delta->SetLiteral().SetLength(0);
4732 delta->SetLiteral().SetFuzz().SetLim();
4738 delta->SetLiteral().SetLength(spslp->
len);
4741 string data_str = bioseq_data.substr(spslp->
from, spslp->
len);
4743 delta->SetLiteral().SetSeq_data().SetIupacaa().Set(data_str);
4747 if (deltas.
Set().size() > 1) {
4749 bioseq.
SetInst().ResetSeq_data();
4751 bioseq.
SetInst().SetExt().Reset();
4793 if (! feats.empty()) {
4795 annot->
SetData().SetFtable().swap(feats);
4796 bioseq.
SetAnnot().push_back(annot);
4799 for (; spslp; spslp =
next) {
4818 eptr = ptr + entry->
len;
4867 for (total = 0,
i = 0, imax = pp->
indx;
i < imax;
i++) {
4884 pp->
entries.push_back(cur_entry);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
char * tata_save(char *str)
void err_install(const Indexblk *ibp, bool accver)
void BuildSubBlock(DataBlkPtr dbp, Int2 subtype, const char *subkw)
void StripSerialNumbers(TEntryList &seq_entries)
unique_ptr< unsigned char[]> GetProteinConv(void)
void GetSequenceOfKeywords(const DataBlk &entry, int type, int col_data, TKeywordList &keywords)
char * GetEmblBlock(DataBlkPtr *chain, char *ptr, short *retkw, Parser::EFormat format, char *eptr)
CRef< CSeq_id > MakeAccSeqId(const char *acc, Uint1 seqtype, bool accver, Int2 vernum)
bool GetSeqData(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq, Int4 nodetype, unsigned char *seqconv, Uint1 seq_data_type)
void GetLenSubNode(DataBlkPtr dbp)
CRef< CSeq_id > MakeLocusSeqId(const char *locus, CSeq_id::E_Choice seqtype)
void GetExtraAccession(IndexblkPtr ibp, bool allow_uwsec, Parser::ESource source, TAccessionList &accessions)
void ShrinkSpaces(char *line)
CRef< CBioseq > CreateEntryBioseq(ParserPtr pp)
void fta_sort_biosource(objects::CBioSource &bio)
TSeqPos GetLength(void) const
void GetDate(string *label, bool year_only=false) const
Append a standardized string representation of the date to the label.
@ eCompare_after
*this comes second.
@Gb_qual.hpp User-defined methods of the data storage class.
@OrgMod.hpp User-defined methods of the data storage class.
@Seq_descr.hpp User-defined methods of the data storage class.
namespace ncbi::objects::
static const char location[]
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
DataBlkPtr LoadEntry(ParserPtr pp, size_t offset, size_t len)
#define ERR_GENENAME_IllegalGeneName
#define ERR_SOURCE_HostNameVsTaxIDMissMatch
#define ERR_FEATURE_PartialNoNonTerNonCons
#define ERR_FORMAT_UnexpectedData
#define ERR_FEATURE_ExpectEmptyComment
#define ERR_FORMAT_NoProteinNameCategory
#define ERR_FORMAT_MultipleRecName
#define ERR_SOURCE_InvalidNcbiTaxID
#define ERR_SOURCE_UnknownOHType
#define ERR_GENENAME_DELineGeneName
#define ERR_SOURCE_NcbiTaxIDLookupFailure
#define ERR_FORMAT_InvalidPDBCrossRef
#define ERR_FORMAT_ECNumberNotPresent
#define ERR_FORMAT_MixedPDBXrefs
#define ERR_ENTRY_Skipped
#define ERR_FEATURE_UnEqualEndPoint
#define ERR_SOURCE_OrgNameVsTaxIDMissMatch
#define ERR_FORMAT_MissingCopyright
#define ERR_SOURCE_MissingPlasmidName
#define ERR_FEATURE_Invalid_INIT_MET
#define ERR_FEATURE_InvalidQualifier
#define ERR_FEATURE_BadLocation
#define ERR_REFERENCE_IllegalDate
#define ERR_FORMAT_MissingFullRecName
#define ERR_FORMAT_SwissProtHasSubName
#define ERR_FEATURE_UnknownFeatKey
#define ERR_SOURCE_UnknownOXType
#define ERR_DRXREF_UnknownDBname
#define ERR_SOURCE_NoNcbiTaxIDLookup
#define ERR_FEATURE_ObsoleteFeature
#define ERR_FEATURE_Dropped
#define ERR_ENTRY_ParsingComplete
#define ERR_FEATURE_MissingInitMet
#define ERR_SOURCE_IncorrectOHLine
#define ERR_FORMAT_MissingGeneName
#define ERR_LOCATION_FailedCheck
#define ERR_FORMAT_InvalidECNumber
#define ERR_QUALIFIER_InvalidEvidence
#define ERR_DATE_IllegalDate
#define ERR_FORMAT_UnknownGeneField
#define ERR_FEATURE_NotSeqEndPoint
#define ERR_FEATURE_NoFragment
#define ERR_SPROT_DRLineCrossDBProtein
#define ERR_DATACLASS_UnKnownClass
#define ERR_FORMAT_ExcessGeneFields
#define ERR_FORMAT_MissingRecName
#define ERR_FEATURE_DuplicateRemoved
list< CRef< objects::CSeq_entry > > TEntryList
std::list< CRef< objects::CSeq_id > > TSeqIdList
char * StringSave(const char *s)
bool StringEquNI(const char *s1, const char *s2, size_t n)
bool StringEquN(const char *s1, const char *s2, size_t n)
bool StringEqu(const char *s1, const char *s2)
void StringCpy(char *d, const char *s)
void StringNCpy(char *d, const char *s, size_t n)
size_t StringLen(const char *s)
void StringCat(char *d, const char *s)
char * StringRChr(char *s, const char c)
void FtaDeletePrefix(int prefix)
void FtaInstallPrefix(int prefix, const char *name, const char *location)
void fta_find_pub_explore(ParserPtr pp, TEntryList &seq_entries)
CRef< COrg_ref > fta_fix_orgref_byid(ParserPtr pp, TTaxId taxid, bool *drop, bool isoh)
void fta_fix_orgref(ParserPtr pp, COrg_ref &org_ref, bool *drop, char *organelle)
#define GI_FROM(T, value)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define TAX_ID_TO(T, tax_id)
SStrictId_Tax::TId TTaxId
Taxon id type.
#define TAX_ID_FROM(T, value)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
const TPrim & Get(void) const
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
static string Sanitize(CTempString str, TSS_Flags flags=fSS_print)
Sanitize a string, allowing only specified classes of characters.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
@ eTrunc_End
Truncate trailing spaces only.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
const TOrg & GetOrg(void) const
Get the Org member data.
void SetGenome(TGenome value)
Assign a value to Genome data member.
void SetOrg(TOrg &value)
Assign a value to Org data member.
EGenome
biological context