96 #define THIS_FILE "loadfeat.cpp"
101 #define Seq_descr_GIBB_mol_unknown CMolInfo::eBiomol_unknown
102 #define Seq_descr_GIBB_mol_genomic CMolInfo::eBiomol_genomic
103 #define Seq_descr_GIBB_mol_preRNA CMolInfo::eBiomol_pre_RNA
104 #define Seq_descr_GIBB_mol_mRNA CMolInfo::eBiomol_mRNA
105 #define Seq_descr_GIBB_mol_rRNA CMolInfo::eBiomol_rRNA
106 #define Seq_descr_GIBB_mol_tRNA CMolInfo::eBiomol_tRNA
107 #define Seq_descr_GIBB_mol_uRNA CMolInfo::eBiomol_snRNA
108 #define Seq_descr_GIBB_mol_snRNA CMolInfo::eBiomol_snRNA
109 #define Seq_descr_GIBB_mol_scRNA CMolInfo::eBiomol_scRNA
110 #define Seq_descr_GIBB_mol_other_genetic CMolInfo::eBiomol_other_genetic
111 #define Seq_descr_GIBB_mol_cRNA CMolInfo::eBiomol_cRNA
112 #define Seq_descr_GIBB_mol_snoRNA CMolInfo::eBiomol_snoRNA
113 #define Seq_descr_GIBB_mol_trRNA CMolInfo::eBiomol_transcribed_RNA
114 #define Seq_descr_GIBB_mol_other CMolInfo::eBiomol_other
129 {
"asparagine",
'N' },
130 {
"aspartic acid",
'D' },
131 {
"aspartate",
'D' },
133 {
"glutamine",
'Q' },
134 {
"glutamic acid",
'E' },
135 {
"glutamate",
'E' },
137 {
"histidine",
'H' },
138 {
"isoleucine",
'I' },
141 {
"methionine",
'M' },
142 {
"phenylalanine",
'F' },
144 {
"selenocysteine",
'U' },
146 {
"threonine",
'T' },
147 {
"tryptophan",
'W' },
161 {
"Ala",
'A', 0, { 52, 53, 54, 55, -1, -1, -1, -1 } },
162 {
"Arg",
'R', 2, { 28, 29, 30, 31, -1, -1, -1, -1 } },
163 {
"Arg",
'R', 5, { 28, 29, 30, 31, -1, -1, -1, -1 } },
164 {
"Arg",
'R', 9, { 28, 29, 30, 31, -1, -1, -1, -1 } },
165 {
"Arg",
'R', 13, { 28, 29, 30, 31, -1, -1, -1, -1 } },
166 {
"Arg",
'R', 14, { 28, 29, 30, 31, -1, -1, -1, -1 } },
167 {
"Arg",
'R', 0, { 28, 29, 30, 31, 46, 47, -1, -1 } },
168 {
"Asn",
'N', 9, { 40, 41, 42, -1, -1, -1, -1, -1 } },
169 {
"Asn",
'N', 14, { 40, 41, 42, -1, -1, -1, -1, -1 } },
170 {
"Asn",
'N', 0, { 40, 41, -1, -1, -1, -1, -1, -1 } },
171 {
"Asp",
'D', 0, { 56, 57, -1, -1, -1, -1, -1, -1 } },
172 {
"Asx",
'B', 9, { 40, 41, 42, 56, 57, -1, -1, -1 } },
173 {
"Asx",
'B', 14, { 40, 41, 42, 56, 57, -1, -1, -1 } },
174 {
"Asx",
'B', 0, { 40, 41, 56, 57, -1, -1, -1, -1 } },
175 {
"Cys",
'C', 10, { 12, 13, 14, -1, -1, -1, -1, -1 } },
176 {
"Cys",
'C', 0, { 12, 13, -1, -1, -1, -1, -1, -1 } },
177 {
"Gln",
'Q', 6, { 10, 11, 26, 27, -1, -1, -1, -1 } },
178 {
"Gln",
'Q', 15, { 11, 26, 27, -1, -1, -1, -1, -1 } },
179 {
"Gln",
'Q', 0, { 26, 27, -1, -1, -1, -1, -1, -1 } },
180 {
"Glu",
'E', 0, { 58, 59, -1, -1, -1, -1, -1, -1 } },
181 {
"Glx",
'Z', 6, { 10, 11, 26, 27, 58, 59, -1, -1 } },
182 {
"Glx",
'Z', 0, { 11, 26, 27, 58, 59, -1, -1, -1 } },
183 {
"Glx",
'Z', 0, { 26, 27, 58, 59, -1, -1, -1, -1 } },
184 {
"Gly",
'G', 13, { 46, 47, 60, 61, 62, 63, -1, -1 } },
185 {
"Gly",
'G', 0, { 60, 61, 62, 63, -1, -1, -1, -1 } },
186 {
"His",
'H', 0, { 24, 25, -1, -1, -1, -1, -1, -1 } },
187 {
"Ile",
'I', 2, { 32, 33, -1, -1, -1, -1, -1, -1 } },
188 {
"Ile",
'I', 3, { 32, 33, -1, -1, -1, -1, -1, -1 } },
189 {
"Ile",
'I', 5, { 32, 33, -1, -1, -1, -1, -1, -1 } },
190 {
"Ile",
'I', 13, { 32, 33, -1, -1, -1, -1, -1, -1 } },
191 {
"Ile",
'I', 0, { 32, 33, 34, -1, -1, -1, -1, -1 } },
192 {
"Leu",
'L', 3, { 2, 3, -1, -1, -1, -1, -1, -1 } },
193 {
"Leu",
'L', 12, { 2, 3, 16, 17, 18, -1, -1, -1 } },
194 {
"Leu",
'L', 0, { 2, 3, 16, 17, 18, 19, -1, -1 } },
195 {
"Lys",
'K', 9, { 43, -1, -1, -1, -1, -1, -1, -1 } },
196 {
"Lys",
'K', 14, { 43, -1, -1, -1, -1, -1, -1, -1 } },
197 {
"Lys",
'K', 0, { 42, 43, -1, -1, -1, -1, -1, -1 } },
198 {
"Met",
'M', 2, { 34, 35, -1, -1, -1, -1, -1, -1 } },
199 {
"Met",
'M', 3, { 34, 35, -1, -1, -1, -1, -1, -1 } },
200 {
"Met",
'M', 5, { 34, 35, -1, -1, -1, -1, -1, -1 } },
201 {
"Met",
'M', 13, { 34, 35, -1, -1, -1, -1, -1, -1 } },
202 {
"Met",
'M', 0, { 35, -1, -1, -1, -1, -1, -1, -1 } },
203 {
"fMet",
'M', 2, { 34, 35, -1, -1, -1, -1, -1, -1 } },
204 {
"fMet",
'M', 3, { 34, 35, -1, -1, -1, -1, -1, -1 } },
205 {
"fMet",
'M', 5, { 34, 35, -1, -1, -1, -1, -1, -1 } },
206 {
"fMet",
'M', 13, { 34, 35, -1, -1, -1, -1, -1, -1 } },
207 {
"fMet",
'M', 0, { 35, -1, -1, -1, -1, -1, -1, -1 } },
208 {
"Phe",
'F', 0, { 0, 1, -1, -1, -1, -1, -1, -1 } },
209 {
"Pro",
'P', 0, { 20, 21, 22, 23, -1, -1, -1, -1 } },
210 {
"Sec",
'U', 0, { -1, -1, -1, -1, -1, -1, -1, -1 } },
211 {
"Ser",
'S', 5, { 4, 5, 6, 7, 44, 45, 46, 47 } },
212 {
"Ser",
'S', 9, { 4, 5, 6, 7, 44, 45, 46, 47 } },
213 {
"Ser",
'S', 12, { 4, 5, 6, 7, 19, 44, 45, -1 } },
214 {
"Ser",
'S', 14, { 4, 5, 6, 7, 44, 45, 46, 47 } },
215 {
"Ser",
'S', 0, { 4, 5, 6, 7, 44, 45, -1, -1 } },
216 {
"Thr",
'T', 3, { 16, 17, 18, 19, 36, 37, 38, 39 } },
217 {
"Thr",
'T', 0, { 36, 37, 38, 39, -1, -1, -1, -1 } },
218 {
"Trp",
'W', 1, { 15, -1, -1, -1, -1, -1, -1, -1 } },
219 {
"Trp",
'W', 6, { 15, -1, -1, -1, -1, -1, -1, -1 } },
220 {
"Trp",
'W', 10, { 15, -1, -1, -1, -1, -1, -1, -1 } },
221 {
"Trp",
'W', 11, { 15, -1, -1, -1, -1, -1, -1, -1 } },
222 {
"Trp",
'W', 12, { 15, -1, -1, -1, -1, -1, -1, -1 } },
223 {
"Trp",
'W', 15, { 15, -1, -1, -1, -1, -1, -1, -1 } },
224 {
"Trp",
'W', 0, { 14, 15, -1, -1, -1, -1, -1, -1 } },
225 {
"Tyr",
'Y', 14, { 8, 9, 10, -1, -1, -1, -1, -1 } },
226 {
"Tyr",
'Y', 0, { 8, 9, -1, -1, -1, -1, -1, -1 } },
227 {
"Val",
'V', 0, { 48, 49, 50, 51, -1, -1, -1, -1 } },
228 {
"TERM",
'*', 1, { 10, 11, 14, -1, -1, -1, -1, -1 } },
229 {
"TERM",
'*', 2, { 10, 11, 46, 47, -1, -1, -1, -1 } },
230 {
"TERM",
'*', 6, { 14, -1, -1, -1, -1, -1, -1, -1 } },
231 {
"TERM",
'*', 11, { 10, 11, 14, -1, -1, -1, -1, -1 } },
232 {
"TERM",
'*', 12, { 10, 11, 14, -1, -1, -1, -1, -1 } },
233 {
"TERM",
'*', 14, { 11, -1, -1, -1, -1, -1, -1, -1 } },
234 {
"TERM",
'*', 15, { 10, 14, -1, -1, -1, -1, -1, -1 } },
235 {
"TERM",
'*', 0, { 10, 11, -1, -1, -1, -1, -1, -1 } },
236 {
"OTHER",
'X', 0, { -1, -1, -1, -1, -1, -1, -1, -1 } },
237 {
nullptr,
'\0', 0, { -1, -1, -1, -1, -1, -1, -1, -1 } }
256 "expressed sequence tag",
257 "partial cDNA sequence",
258 "transcribed sequence fragment",
260 "putatively transcribed partial sequence",
387 "UNIPROT/SWISS-PROT",
389 "UNIPROTKB/SWISS-PROT",
430 "artificial_location",
435 "environmental_sample",
442 "mobile_element_type",
448 "ribosomal_slippage",
469 "autocatalytically_spliced_intron",
470 "hammerhead_ribozyme",
511 "DNase_I_hypersensitive_site",
513 "enhancer_blocking_element",
515 "imprinting_control_region",
517 "locus_control_region",
518 "matrix_attachment_region",
522 "polyA_signal_sequence",
524 "recoding_stimulatory_region",
525 "replication_regulatory_region",
526 "ribosome_binding_site",
531 "transcriptional_cis_regulatory_region",
587 for (; dbp; dbp = dbpnext) {
610 for (p =
value; *p !=
'\0'; p++)
636 #ifdef BIOSEQ_FIND_METHOD
638 bsp = BioseqFind(sip);
640 return (bsp->length);
649 if (
id.IsGenbank() ||
id.IsEmbl() ||
id.IsDdbj() ||
id.IsTpg() ||
650 id.IsTpe() ||
id.IsTpd())
651 text_id =
id.GetTextseq_Id();
656 for (use_indx = 0; use_indx < pp->
indx; use_indx++) {
658 vernum = pp->
entrylist[use_indx]->vernum;
659 if (text_id_acc == acnum &&
660 (pp->
accver ==
false || vernum == text_id_ver))
664 if (use_indx >= pp->
indx) {
669 if (
len !=
static_cast<size_t>(-1))
676 if (pp->
accver ==
false || text_id_ver < 0) {
686 if (*pp->
buf ==
'\0')
737 line = line.substr(4);
739 size_t colon = line.find(
':');
740 if (colon == string::npos) {
745 string tail = line.substr(colon + 1);
746 line = line.substr(0, colon);
761 buf =
"UniProt/Swiss-Prot";
763 buf =
"UniProt/TrEMBL";
770 string buf(
"UniProtKB");
771 buf += line.substr(7);
776 const Char* strid =
nullptr;
779 const Char* p = tail.c_str();
781 for (strid = p; *p >=
'0' && *p <=
'9';)
783 if (*p ==
'\0' && *strid !=
'0') {
790 for (strid = p; *p >=
'0' && *p <=
'9';)
809 for (; *
r >=
'0' && *
r <=
'9';)
815 if (*
r !=
'\0' || q != p)
822 if (*p !=
'e' && *p !=
'g' && *p !=
'd') {
827 const Char* q = p + 1;
832 for (
r = q; *
r >=
'0' && *
r <=
'9';)
834 if (*q ==
'\0' || *
r !=
'\0') {
850 tag->SetTag().SetStr(strid);
852 tag->SetTag().SetId(intid);
883 for (CSeq_feat::TQual::iterator qual = feat.
SetQual().begin(); qual != feat.
SetQual().end();) {
884 if (! (*qual)->IsSetQual() || (*qual)->GetQual() !=
"db_xref") {
895 db_refs.push_back(dbtag);
900 qual = feat.
SetQual().erase(qual);
939 if (cur_loc->
IsInt())
971 for (ptr =
str; *ptr !=
' ' && *ptr !=
'\0';)
980 while (*eptr ==
' ' || *eptr ==
')')
1056 const CSeq_id* cur_id =
nullptr;
1058 switch (cur_loc->
Which()) {
1062 cur_id = cur_loc->
GetId();
1068 cur_id = cur_loc->
GetId();
1074 cur_id = cur_loc->
GetId();
1082 cur_id = cur_loc->
GetId();
1087 cur_id = cur_loc->
GetId();
1097 if (! accession || ! cur_id)
1114 else if (strand != cur_loc->
GetStrand())
1128 if (!
str || *
str ==
'\0')
1167 for (; dbp; dbp = dbp->
mpNext) {
1172 if (pubdesc.
Empty())
1176 feat->
SetData().SetPub(*pubdesc);
1184 for (p = q; *p !=
'\0' && *p !=
'(';)
1201 for (p = dbp->
mOffset + col_data; *p !=
'\0' && *p !=
'(';)
1206 for (; subdbp; subdbp = subdbp->
mpNext) {
1250 feats.push_back(feat);
1256 feats.push_back(feat);
1298 if (pubdesc.
Empty() || ! pubdesc->IsSetPub())
1305 imp_feat.
SetKey(
"Site-ref");
1306 imp_feat.
SetLoc(
"sites");
1315 feat->
SetCit().SetPub().push_back(pub);
1317 if (pubdesc->IsSetComment())
1324 feats.push_back(feat);
1339 return "unknown location";
1362 for (q = p + 4; *q ==
' ';)
1365 for (pars = 0, p = q; *p !=
'\0'; p++) {
1366 if (*p ==
',' && pars == 0)
1370 else if (*p ==
')') {
1400 ErrPostEx(
SEV_WARNING,
ERR_FEATURE_FourBaseAntiCodon,
"tRNA feature at \"%s\" has anticodon with location spanning four bases: \"%s\". Cannot generate corresponding codon value from the DNA sequence.", loc.empty() ?
"unknown" : loc.c_str(), loc_str);
1410 if (xrange != anticodon_range) {
1444 len = comment.size();
1446 if (
len > 15 &&
len < 20) {
1447 if (
StringEquNI(comment.c_str() +
len - 15,
"S ribosomal RNA", 15)) {
1451 }
else if (
len > 6 &&
len < 20) {
1459 if (qval_str.empty())
1463 for (p = qval; p; p += 13) {
1470 for (p = qval; p; p = qval +
len) {
1478 len = p - qval + 13;
1488 s.append(
" ribosomal RNA");
1495 for (p = qval, q = p; q; q = p + 13) {
1508 if (p && p > qval && p[15] ==
'\0') {
1510 if (*p >=
'0' && *p <=
'9')
1518 if (p == qval || (p[9] !=
' ' && p[9] !=
'\0')) {
1526 len = p - qval + 14;
1557 rna_ref.
SetExt().SetName(qval);
1567 if (acp->
intaa == ch)
1570 return (acp->
intaa);
1581 for (tap =
taa; tap->
name; tap++)
1591 return (acp->
intaa);
1627 if (product.length() < 7)
1630 bool digits =
false;
1632 for (p = prod; *p !=
'\0'; p++) {
1633 if (*p >=
'a' && *p <=
'z')
1635 else if ((*p < 'A' || *p >
'Z') && *p !=
'(' && *p !=
')') {
1636 if (*p >=
'0' && *p <=
'9')
1654 for (p = end; *p !=
'\0'; p++)
1655 if (*p ==
'(' || *p ==
')')
1659 if (start == prod && *end ==
'\0') {
1670 for (p = end; *p ==
' ' || *p ==
')' || *p ==
'(';)
1677 while (*p >=
'A' && *p <=
'Z')
1684 while (*p ==
' ' || *p ==
')' || *p ==
'(')
1686 for (q = p; *p >=
'A' && *p <=
'Z';)
1692 while (*p ==
' ' || *p ==
')' || *p ==
'(')
1694 for (q = p; *p >=
'A' && *p <=
'Z';)
1704 while (*p ==
' ' || *p ==
'(' || *p ==
')')
1710 for (p = start - 1; *p ==
' ' || *p ==
')' || *p ==
'('; p--)
1714 if (p > prod && p[1] ==
')') {
1715 for (p--; *p !=
'('; p--)
1719 for (p--; *p ==
' ' || *p ==
'(' || *p ==
'('; p--)
1725 for (q = p++; *q >=
'A' && *q <=
'Z'; q--)
1728 if (*q < 'A' || *q >
'Z')
1757 if (!
first && ! second && ! third && ! fourth &&
remove && ! digits)
1768 comment +=
"; fMet";
1784 if (comment.empty())
1788 for (p = comm; *p !=
'\0'; p++) {
1789 if (*p >=
'a' && *p <=
'z')
1791 else if (*p < 'A' || *p >
'Z')
1796 if (
StringEquN(comm,
"CODON RECOGNIZED ", 17)) {
1808 if (
StringEquN(comm,
"PUTATIVE ", 9) && comm[10] ==
' ' &&
1809 comm[14] ==
' ' &&
StringEquN(&comm[15],
"TRNA", 4)) {
1817 for (q = comm, p = q; p;) {
1845 optional<string> qval;
1866 feat.
SetData().SetRna(*rna_ref);
1880 rna_gen->SetClass(*p);
1886 rna_qual->
SetQual(
"tag_peptide");
1890 rna_quals->
Set().push_back(rna_qual);
1893 rna_gen->SetQuals(*rna_quals);
1902 if (p && ! p->empty()) {
1919 const Char* c_q =
nullptr;
1920 for (;; c_p += 5, c_q = c_p) {
1926 const Char* c_r =
nullptr;
1927 for (c_p = feat.
GetComment().c_str();; c_p += 4, c_r = c_p) {
1934 c_p = (c_q > c_r) ? c_q : c_r;
1941 while (*c_p ==
' ' || *c_p ==
'\t' || *c_p ==
',' || *c_p ==
';')
1952 if (qval->length() > 511) {
1958 if (rna_gen.
Empty())
1961 rna_gen->SetProduct(*qval);
1963 rna_ref->
SetExt().SetName(*qval);
1973 rna_ref->
SetExt().SetGen(*rna_gen);
1994 trnaa->SetAnticodon(*anticodon);
1995 rna_ref->
SetExt().SetTRNA(*trnaa);
2003 if (! qval2.empty()) {
2026 if (trnaa.
Empty()) {
2027 if (trnap.
Empty()) {
2029 rna_ref->
SetExt().SetTRNA(*trnac);
2035 rna_ref->
SetExt().SetTRNA(*trnap);
2039 rna_ref->
SetExt().SetTRNA(*trnac);
2043 trnap->SetCodon().assign(trnac->GetCodon().begin(), trnac->GetCodon().end());
2062 trnac->SetAnticodon(trnaa->SetAnticodon());
2063 trnaa->ResetAnticodon();
2066 trnac->SetCodon().assign(trnaa->GetCodon().begin(), trnaa->GetCodon().end());
2069 rna_ref->
SetExt().SetTRNA(*trnac);
2106 feat.
SetData().SetImp(*imp_feat);
2113 for (COrg_ref::TDb::iterator db = bio.
SetOrg().SetDb().begin(); db != bio.
SetOrg().SetDb().end(); ++db) {
2114 if (! (*db)->CanGetDb())
2117 COrg_ref::TDb::iterator tdb = db;
2118 for (++tdb; tdb != bio.
SetOrg().SetDb().end(); ++tdb) {
2119 if (! (*tdb)->IsSetDb())
2122 if ((*db)->GetDb() < (*tdb)->GetDb())
2125 if ((*db)->GetDb() == (*tdb)->GetDb()) {
2147 for (COrgName::TMod::iterator
mod = rmod.begin();
mod != rmod.end(); ++
mod) {
2148 COrgName::TMod::iterator tmod =
mod;
2149 for (++tmod; tmod != rmod.end(); ++tmod) {
2150 if ((*mod)->GetSubtype() < (*tmod)->GetSubtype())
2153 if ((*mod)->GetSubtype() == (*tmod)->GetSubtype() &&
2154 (*mod)->GetSubname() <= (*tmod)->GetSubname())
2167 for (CBioSource::TSubtype::iterator sub = rsub.begin(); sub != rsub.end(); ++sub) {
2168 CBioSource::TSubtype::iterator tsub = sub;
2169 for (++tsub; tsub != rsub.end(); ++tsub) {
2170 if ((*sub)->GetSubtype() < (*tsub)->GetSubtype())
2173 if ((*sub)->GetSubtype() == (*tsub)->GetSubtype() &&
2174 (*sub)->GetName() <= (*tsub)->GetName())
2186 bool has_comma =
val.find(
',') != string::npos;
2189 std::replace(
val.begin(),
val.end(),
',',
';');
2200 if (! fbp || fbp->
quals.empty())
2203 TQualVector::iterator
first = fbp->
quals.end();
2204 size_t len = 0, count = 0;
2206 for (TQualVector::iterator qual = fbp->
quals.begin(); qual != fbp->
quals.end();) {
2207 if ((*qual)->GetQual() !=
"rpt_unit") {
2214 if ((*qual)->GetVal().empty()) {
2215 qual = fbp->
quals.erase(qual);
2220 len += (*qual)->GetVal().size();
2240 const string&
val = (*first)->GetVal();
2241 if (*
val.begin() ==
'(' && *
val.rbegin() ==
')') {
2248 p.reserve(
len + count + 1);
2250 p.append((*first)->GetVal());
2252 for (TQualVector::iterator qual =
first; qual != fbp->
quals.end();) {
2253 if ((*qual)->GetQual() !=
"rpt_unit") {
2259 p.append((*qual)->GetVal());
2260 qual = fbp->
quals.erase(qual);
2263 (*first)->SetVal(p);
2277 if (! fbp || fbp->
quals.empty())
2287 for (TQualVector::iterator qual = fbp->
quals.begin(); qual != fbp->
quals.end();) {
2288 const string& qual_str = (*qual)->IsSetQual() ? (*qual)->GetQual() :
"";
2289 const string& val_str = (*qual)->IsSetVal() ? (*qual)->GetVal() :
"";
2290 if (qual_str ==
"experiment") {
2291 if (val_str ==
"experimental evidence, no additional details recorded") {
2293 qual = fbp->
quals.erase(qual);
2301 if (qual_str ==
"inference") {
2302 if (val_str ==
"non-experimental evidence, no additional details recorded") {
2304 qual = fbp->
quals.erase(qual);
2312 if (qual_str !=
"evidence") {
2332 qual = fbp->
quals.erase(qual);
2335 if (evi_exp + evi_not > 0 && exp_good + exp_bad + inf_good + inf_bad > 0) {
2347 if (evi_exp + exp_good > 0 && evi_not + inf_good > 0) {
2359 if ((exp_good > 0 && exp_bad > 0) || (inf_good > 0 && inf_bad > 0)) {
2365 ErrPostEx(
SEV_REJECT,
ERR_QUALIFIER_Conflict,
"The special \"no additional details recorded\" value for /experiment or /inference exists in conjunction with other /experiment or /inference qualifiers on the \"%s\" feature at \"%s\". This is currently unsupported.", fbp->
key ? fbp->
key :
"Unknown", fbp->
location ? fbp->
location :
"unknown location");
2371 if (exp_good + evi_exp > 0)
2373 else if (inf_good + evi_not > 0)
2396 char* loc =
nullptr;
2398 bool locmap =
false;
2421 if (pp->
debug ==
false) {
2429 if (! fbp->
quals.empty()) {
2437 if (! fbp->
quals.empty()) {
2442 if (! fbp->
quals.empty())
2445 if (! fbp->
quals.empty())
2448 if (! fbp->
quals.empty()) {
2458 exc_text +=
", trans-splicing";
2473 if (! fbp->
quals.empty()) {
2476 if (! comment->empty()) {
2487 for (
const auto& cur : fbp->
quals) {
2488 const string& qual_str = cur->GetQual();
2489 if (qual_str ==
"pseudogene")
2493 if (qual_str ==
"translation" && (! cur->IsSetVal() || cur->GetVal().empty()))
2496 if (! qual_str.empty())
2497 feat->
SetQual().push_back(cur);
2519 for (TQualVector::iterator q = fbp->
quals.begin(); q != fbp->
quals.end(); ++q) {
2520 if ((*q)->GetQual() ==
"gene" ||
2521 (! qamode && (*q)->GetQual() ==
"product"))
2524 TQualVector::iterator tq = q;
2525 for (++tq; tq != fbp->
quals.end(); ++tq) {
2526 const string& q_qual = (*q)->GetQual();
2527 const string& tq_qual = (*tq)->GetQual();
2529 if (! tq_qual.empty()) {
2530 if (q_qual ==
"gene")
2539 const string q_val = (*q)->GetVal();
2540 const string tq_val = (*tq)->GetVal();
2545 if (! tq_val.empty()) {
2546 if (q_val[0] >=
'0' && q_val[0] <=
'9' &&
2547 tq_val[0] >=
'0' && tq_val[0] <=
'9') {
2548 if (atoi(q_val.c_str()) <= atoi(tq_val.c_str()))
2550 }
else if (q_val <= tq_val)
2566 for (
const auto& gbqp1 : qual1) {
2568 for (
const auto& gbqp2 : qual2) {
2569 const Char* qual_a = gbqp1->IsSetQual() ? gbqp1->GetQual().c_str() :
nullptr;
2570 const Char* qual_b = gbqp2->IsSetQual() ? gbqp2->GetQual().c_str() :
nullptr;
2572 const Char* val_a = gbqp1->IsSetVal() ? gbqp1->GetVal().c_str() :
nullptr;
2573 const Char* val_b = gbqp2->IsSetVal() ? gbqp2->GetVal().c_str() :
nullptr;
2593 if (! fbp1 && ! fbp2)
2595 if (! fbp1 || ! fbp2 ||
2614 if (!
val || *
val ==
'\0')
2617 for (p =
val; *p >=
'0' && *p <=
'9';)
2620 if (p ==
val || p[0] !=
'.' || p[1] !=
'.')
2624 for (p += 2, q = p; *q >=
'0' && *q <=
'9';)
2626 if (q == p || *q !=
'\0')
2630 if (i1 == 0 || i1 > i2 || i2 > (
Int4)length)
2640 if (! fbp || fbp->
quals.empty())
2643 for (TQualVector::iterator cur = fbp->
quals.begin(); cur != fbp->
quals.end();) {
2644 if (! (*cur)->IsSetQual() || ! (*cur)->IsSetVal()) {
2649 const string& qual_str = (*cur)->GetQual();
2650 const string& val_str = (*cur)->GetVal();
2666 cur = fbp->
quals.erase(cur);
2680 if (! dbp || ! dbp->
mpNext)
2683 for (; dbp; dbp = dbp->
mpNext) {
2689 for (tdbp = dbp->
mpNext; tdbp; tdbp = tdbpnext) {
2692 tdbpprev->
mpNext = tdbpnext;
2716 tdbpprev->
mpNext = tdbpnext;
2743 for (; dbp; dbp = dbp->
mpNext) {
2769 isLocusTag(
"locus_tag");
2771 for (; dbp; dbp = dbp->
mpNext) {
2775 size_t olt = std::count_if(fbp->
quals.begin(), fbp->
quals.end(), isOldLocusTag);
2776 size_t lt = std::count_if(fbp->
quals.begin(), fbp->
quals.end(), isLocusTag);
2785 for (
const auto& gbqp1 : fbp->
quals) {
2786 if (! gbqp1->IsSetQual() || ! gbqp1->IsSetVal() || ! isLocusTag(gbqp1))
2789 const string& gbqp1_val = gbqp1->GetVal();
2790 if (gbqp1_val.empty())
2793 for (
const auto& gbqp2 : fbp->
quals) {
2794 if (! gbqp2->IsSetQual() || ! gbqp2->IsSetVal())
2797 const string& gbqp2_val = gbqp2->GetVal();
2811 for (TQualVector::const_iterator gbqp1 = fbp->
quals.begin(); gbqp1 != fbp->
quals.end(); ++gbqp1) {
2812 const string& gbqp1_val = (*gbqp1)->GetVal();
2813 if (isOldLocusTag(*gbqp1) || gbqp1_val.empty())
2816 TQualVector::const_iterator gbqp2 = gbqp1;
2817 for (++gbqp2; gbqp2 != fbp->
quals.end(); ++gbqp2) {
2818 const string& gbqp2_val = (*gbqp2)->GetVal();
2819 if (isOldLocusTag(*gbqp2) || gbqp2_val.empty())
2828 if (gbqp2 != fbp->
quals.end())
2838 bool got_pseudogene;
2841 for (; dbp; dbp = dbp->
mpNext) {
2847 got_pseudogene =
false;
2849 for (TQualVector::iterator cur = fbp->
quals.begin(); cur != fbp->
quals.end();) {
2850 const string& qual_str = (*cur)->GetQual();
2851 const string& val_str = (*cur)->IsSetVal() ? (*cur)->GetVal() :
"";
2853 if (qual_str !=
"pseudogene") {
2854 if (! got_pseudo && qual_str ==
"pseudo")
2860 if (got_pseudogene) {
2863 cur = fbp->
quals.erase(cur);
2867 got_pseudogene =
true;
2869 if (val_str.empty()) {
2872 cur = fbp->
quals.erase(cur);
2883 cur = fbp->
quals.erase(cur);
2886 if (! got_pseudogene || ! got_pseudo)
2902 for (; dbp; dbp = dbp->
mpNext) {
2910 for (TQualVector::iterator cur = fbp->
quals.begin(); cur != fbp->
quals.end();) {
2911 const string& qual_str = (*cur)->GetQual();
2913 const string& val_str = (*cur)->IsSetVal() ? (*cur)->SetVal() :
dummy;
2915 if (qual_str ==
"compare") {
2917 if (! val_str.empty()) {
2918 const char* q =
StringChr(val_str.c_str(),
'.');
2919 if (q && q[1] !=
'\0') {
2921 for (p = q + 1; *p >=
'0' && *p <=
'9';)
2933 cur = fbp->
quals.erase(cur);
2937 }
else if (qual_str ==
"citation")
2943 if (com_count > 0 || cit_count > 0 ||
2972 for (; dbp; dbp = dbp->
mpNext) {
2977 for (p =
location, q = p; *p !=
'\0'; p++)
2978 if (*p !=
' ' && *p !=
'\t' && *p !=
'\n')
2987 for (p =
location + 1; *p !=
'\0'; p++) {
2990 for (
r =
nullptr, q = p - 1;; q--) {
2992 if (*q !=
'_' && (*q < '0' || *q >
'9') &&
2993 (*q < 'a' || *q >
'z') && (*q < 'A' || *q >
'Z'))
3005 if (*q !=
'_' && (*q < '0' || *q >
'9') &&
3006 (*q < 'a' || *q >
'z') && (*q < 'A' || *q >
'Z')) {
3015 (q[1] ==
'z' || q[1] ==
'Z') && ibp->
is_tpa ==
false)
3050 using FTAOperonList = list<FTAOperon*>;
3051 FTAOperonList operonList;
3052 FTAOperonList residentList;
3053 bool success =
true;
3055 if (feats.empty()) {
3059 for (
const auto& pFeat : feats) {
3060 if (! pFeat->GetData().IsImp())
3063 const auto& featLocation = pFeat->GetLocation();
3064 const CImp_feat& featImp = pFeat->GetData().GetImp();
3068 for (
const auto& pQual : pFeat->GetQual()) {
3069 const auto& qual = *pQual;
3070 if (! qual.IsSetQual() || qual.GetQual() !=
"operon" ||
3071 ! qual.IsSetVal() || qual.GetVal().empty()) {
3081 operonList.push_back(pLatest);
3083 residentList.push_back(pLatest);
3086 for (
const auto& operon : operonList) {
3087 if (pLatest == operon) {
3090 if (pLatest->
mOperon != operon->mOperon) {
3098 if (opQualCount > 1) {
3103 if (opQualCount == 0 && featImp.
IsSetKey() && featImp.
GetKey() ==
"operon") {
3109 for (
const auto& resident : residentList) {
3110 bool matched =
false;
3111 for (
const auto& operon : operonList) {
3112 if (resident->mOperon != operon->mOperon) {
3119 ErrPostEx(
SEV_REJECT,
ERR_FEATURE_OperonLocationMisMatch,
"Feature \"%s\" at \"%s\" with /operon qualifier \"%s\" does not fall within the span of the operon feature at \"%s\".", resident->mFeatname.c_str(), resident->LocationStr().c_str(), resident->mOperon.c_str(), operon->LocationStr().c_str());
3124 ErrPostEx(
SEV_REJECT,
ERR_FEATURE_InvalidOperonQual,
"/operon qualifier \"%s\" on feature \"%s\" at \"%s\" has a value that does not match any of the /operon qualifiers on operon features.", resident->mOperon.c_str(), resident->mFeatname.c_str(), resident->LocationStr().c_str());
3128 for (
auto& resident : residentList) {
3131 for (
auto& operon : operonList) {
3142 if (! fbp || fbp->
quals.empty())
3145 for (TQualVector::iterator cur = fbp->
quals.begin(); cur != fbp->
quals.end(); ++cur) {
3146 const char* cur_qual = (*cur)->IsSetQual() ? (*cur)->GetQual().c_str() :
nullptr;
3147 const char* cur_val = (*cur)->IsSetVal() ? (*cur)->GetVal().c_str() :
nullptr;
3149 TQualVector::iterator
next = cur;
3151 const char* next_qual = (*next)->IsSetQual() ? (*next)->GetQual().c_str() :
nullptr;
3152 const char* next_val = (*next)->IsSetVal() ? (*next)->GetVal().c_str() :
nullptr;
3185 list<string> linkage_evidence_names;
3190 const char* gap_type;
3193 Int4 estimated_length;
3216 if (is_htg >= 0 && is_htg <= 2)
3220 else if (
key ==
"HTGS_PHASE0")
3222 else if (
key ==
"HTGS_PHASE1")
3224 else if (
key ==
"HTGS_PHASE2")
3226 else if (
key ==
"HTGS_PHASE3")
3232 finished_gap =
false;
3233 for (ibp->
gaps =
nullptr; dbp; dbp = dbp->
mpNext) {
3239 linkage_evidence_names.clear();
3240 asn_linkage_evidence.clear();
3246 if (! fbp || ! fbp->
key)
3249 prev_gap = curr_gap;
3252 prev_gap = curr_gap;
3260 linkage_evidence_names.clear();
3262 asn_linkage_evidence.clear();
3263 estimated_length = -1;
3265 for (
const auto& cur : fbp->
quals) {
3266 if (! cur->IsSetQual() || ! cur->IsSetVal())
3269 const string& cur_qual = cur->GetQual();
3270 const string& cur_val = cur->GetVal();
3272 if (cur_qual.empty() || cur_val.empty())
3275 if (cur_qual ==
"estimated_length") {
3276 if (cur_val ==
"unknown")
3277 estimated_length = -100;
3279 const char* cp = cur_val.c_str();
3280 for (; *cp >=
'0' && *cp <=
'9';)
3283 estimated_length = atoi(cur_val.c_str());
3285 }
else if (cur_qual ==
"gap_type")
3286 gap_type = cur_val.c_str();
3287 else if (cur_qual ==
"linkage_evidence") {
3288 linkage_evidence_names.push_back(cur_val);
3296 for (q = p; *p >=
'0' && *p <=
'9';)
3301 }
else if (*p ==
'.') {
3305 if (*fbp->
location ==
'<' && from != 1)
3307 else if (*p ==
'.') {
3310 for (q = p; *p >=
'0' && *p <=
'9';)
3314 if (*(q - 1) ==
'>' && to != (
int)ibp->
bases)
3320 if (from == 0 || to == 0 || from > to) {
3331 if (gap_type && is_htg > -1 &&
3332 !
StringEqu(gap_type,
"within scaffold") &&
3333 !
StringEqu(gap_type,
"repeat within scaffold"))
3334 ErrPostEx(
SEV_ERROR,
ERR_QUALIFIER_UnexpectedGapTypeForHTG,
"assembly_gap has /gap_type of \"%s\", but clone-based HTG records are only expected to have \"within scaffold\" or \"repeat within scaffold\" gaps. assembly_gap feature located at \"%d..%d\".", gap_type, from, to);
3336 if (is_htg == 0 || is_htg == 1) {
3337 for (
const string& evidence : linkage_evidence_names) {
3339 ErrPostEx(
SEV_ERROR,
ERR_QUALIFIER_LinkageShouldBeUnspecified,
"assembly gap has /linkage_evidence of \"%s\", but unoriented and unordered Phase0/Phase1 HTG records are expected to have \"unspecified\" evidence. assembly_gap feature located at \"%d..%d\".", evidence.c_str(), from, to);
3342 }
else if (is_htg == 2 || is_htg == 3) {
3343 for (
const string& evidence : linkage_evidence_names) {
3351 if (is_htg == 3 && ! finished_gap) {
3353 finished_gap =
true;
3370 asn_gap_type = snp->
num;
3372 if (linkage_evidence_names.empty() &&
3373 (
StringEqu(gap_type,
"within scaffold") ||
3374 StringEqu(gap_type,
"repeat within scaffold"))) {
3379 if (! linkage_evidence_names.empty()) {
3381 !
StringEqu(gap_type,
"within scaffold") &&
3382 !
StringEqu(gap_type,
"repeat within scaffold")) {
3385 "The /linkage_evidence qualifier is not legal for the assembly_gap feature at \"%d..%d\" with /gap_type \"%s\".",
3393 for (
const string& evidence : linkage_evidence_names) {
3395 if (evidence == snp->
str)
3400 "assembly_gap feature at \"%d..%d\" has an invalid linkage evidence : \"%s\".",
3409 new_evidence->SetType(snp->
num);
3410 asn_linkage_evidence.push_back(new_evidence);
3415 if (prev_gap + curr_gap == 3) {
3424 if (estimated_length == -1)
3428 }
else if (estimated_length == 0) {
3435 }
else if (estimated_length == -100) {
3436 if (is_htg >= 0 && to - from != 99) {
3439 }
else if (estimated_length != to - from + 1) {
3450 for (gfp = ibp->
gaps; gfp; gfp = gfp->
next) {
3451 if ((gfp->
from >= from && gfp->
from <= to) ||
3452 (gfp->
to >= from && gfp->
to <= to) ||
3453 (gfp->
from <= from && gfp->
to >= to)) {
3456 }
else if (to + 1 == gfp->
from || from - 1 == gfp->
to) {
3464 ErrPostEx(sev,
ERR_FEATURE_ContiguousGaps,
"Gap features at \"%d..%d\" and \"%d..%d\" are contiguous, and should probably be represented by a single gap that spans both.", from, to, gfp->
from, gfp->
to);
3480 if (! asn_linkage_evidence.empty()) {
3482 asn_linkage_evidence.clear();
3484 gfp->
next =
nullptr;
3502 for (tgfp = ibp->
gaps; tgfp; tgfp = tgfp->
next) {
3511 linkage_evidence_names.clear();
3512 asn_linkage_evidence.clear();
3521 ibp->
gaps =
nullptr;
3530 if (! entry || ! xip)
3533 for (; xip; xip = xip->
next) {
3538 for (xipqual = xip->
subtags; xipqual; xipqual = xipqual->
next) {
3550 quals.push_back(qual);
3563 if (! entry || ! xip)
3566 for (; xip; xip = xip->
next)
3579 for (xipfeat = xip->
subtags; xipfeat; xipfeat = xipfeat->
next) {
3620 for (
auto& cur : fbp->
quals) {
3621 if (! cur->IsSetQual() || ! cur->IsSetVal())
3624 const string& cur_qual = cur->GetQual();
3625 const string& cur_val = cur->GetVal();
3627 if (cur_qual !=
"note" || cur_val.empty())
3631 vector<Char>
buf(cur_val.size() + 1);
3633 const char* cp = cur_val.c_str();
3634 for (q = &
buf[0]; *cp !=
'\0'; ++cp) {
3636 if (*cp ==
';' && (cp[1] ==
' ' || cp[1] ==
';')) {
3637 for (++cp; *cp ==
' ' || *cp ==
';';)
3646 cur->SetVal(&
buf[0]);
3648 size += cur->GetVal().size();
3649 for (cp = cur->GetVal().c_str(); *cp !=
'\0'; ++cp)
3660 for (TQualVector::iterator cur = fbp->
quals.begin(); cur != fbp->
quals.end();) {
3661 if (! (*cur)->IsSetQual() || ! (*cur)->IsSetVal()) {
3666 const string& cur_qual = (*cur)->GetQual();
3667 const string& cur_val = (*cur)->GetVal();
3669 if (cur_qual !=
"note") {
3674 if (! cur_val.empty()) {
3682 for (
const char* cq = cur_val.c_str(); *cq !=
'\0'; *p++ = *cq++)
3687 cur = fbp->
quals.erase(cur);
3696 fbp->
quals.push_back(qual_new);
3723 if (!
str || *
str ==
'\0')
3726 for (p =
str; *p !=
'\0'; p++)
3727 if (*p >=
'A' && *p <=
'Z')
3735 char* p = &val_buf[0];
3738 for (; *p !=
'\0'; p++)
3739 if (*p ==
',' && p[1] !=
' ' && p[1] !=
'\0')
3744 vector<char>
buf(
i + val_buf.size());
3747 for (p = &val_buf[0]; *p !=
'\0'; p++) {
3749 if (*p ==
',' && p[1] !=
' ' && p[1] !=
'\0')
3760 vector<string>& lines)
3784 string bstr(bptr, eptr);
3787 vector<string> qualLines;
3790 string qualKey, qualVal;
3791 string featKey(fbp->
key);
3792 string featLocation(fbp->
location);
3794 while (! qualParser.
Done()) {
3801 fbp->
quals.push_back(pQual);
3813 if (!
str || *
str ==
'\0')
3826 }
else if (p && p[1] ==
'\0') {
3875 loc =
"1.." + to_string(ibp->
bases);
3876 for (num = 0; dbp; dbp = dbp->
mpNext, num++) {
3883 eptr = bptr + dbp->
len;
3885 for (p = bptr; *p !=
'\n';)
3894 for (ptr1 = bptr; *ptr1 ==
' ';)
3897 for (ptr2 = ptr1; *ptr2 !=
' ' && *ptr2 !=
'\n';)
3906 for (ptr1 = ptr2; *ptr1 ==
' ';)
3908 if (*ptr1 ==
'\n') {
3909 if (ibp->
is_mga ==
false) {
3923 for (ptr2 = ptr1; *ptr2 !=
'/' && ptr2 < eptr;)
3928 for (p = fbp->
location, q = p; *p !=
'\0'; p++)
3929 if (*p !=
' ' && *p !=
'\n')
3960 for (
const auto& cur : fbp->
quals) {
3961 const string& cur_qual = cur->GetQual();
3962 if (cur_qual ==
"gap_type" ||
3963 cur_qual ==
"assembly_evidence") {
3971 for (
const auto& cur : fbp->
quals) {
3972 const string& cur_qual = cur->GetQual();
3973 if (cur_qual ==
"submitter_seqid") {
4001 const char*
str = str1.c_str();
4021 for (
auto& cur : fbp->
quals) {
4022 if (! cur->IsSetQual() || ! cur->IsSetVal())
4025 const string& qual_str = cur->GetQual();
4026 const string& val_str = cur->GetVal();
4028 vector<Char> val_buf(val_str.begin(), val_str.end());
4029 val_buf.push_back(0);
4033 if (*p ==
'\0' && qual_str !=
"replace") {
4037 if (qual_str ==
"replace")
4042 if (qual_str ==
"satellite")
4057 if (! fbp || fbp->
quals.empty())
4060 for (TQualVector::iterator cur = fbp->
quals.begin(); cur != fbp->
quals.end();) {
4061 const string& qual_str = (*cur)->GetQual();
4063 if ((*cur)->IsSetVal()) {
4064 const string& val_str = (*cur)->GetVal();
4065 vector<Char> val_buf(val_str.begin(), val_str.end());
4066 val_buf.push_back(0);
4068 if (qual_str ==
"translation") {
4070 }
else if (qual_str ==
"rpt_unit") {
4072 }
else if (qual_str ==
"cons_splice") {
4074 }
else if (qual_str ==
"note") {
4075 for (p = &val_buf[0];;) {
4083 if (val_buf.size() > 30) {
4094 for (p = &val_buf[0]; *p ==
'\"' || *p ==
' ' || *p ==
'\t';)
4098 if (qual_str ==
"replace") {
4103 (*cur)->SetVal(&val_buf[0]);
4111 if (! (*cur)->IsSetVal()) {
4112 if (qual_str ==
"old_locus_tag")
4117 cur = fbp->
quals.erase(cur);
4120 }
else if ((*cur)->IsSetVal()) {
4126 if ((*cur)->IsSetVal() && qual_str ==
"note") {
4127 string val = (*cur)->GetVal();
4128 std::replace(
val.begin(),
val.end(),
'\"',
'\'');
4129 (*cur)->SetVal(
val);
4146 for (num = 0; dbp; dbp = dbp->
mpNext, num++) {
4153 if (fbp->
key[0] ==
'-' && fbp->
key[1] ==
'\0') {
4182 if (keyindx < 0 && ! deb) {
4191 if (! fbp->
quals.empty()) {
4201 }
else if (fbp->
spindex < 0) {
4216 const char*
str = str1.c_str();
4236 for (
auto& cur : fbp->
quals) {
4237 if (! cur->IsSetQual() || ! cur->IsSetVal())
4240 const string& qual_str = cur->GetQual();
4241 const string& val_str = cur->GetVal();
4243 vector<Char> val_buf(val_str.begin(), val_str.end());
4244 val_buf.push_back(0);
4248 if (*p ==
'\0' && qual_str !=
"replace") {
4252 if (qual_str ==
"replace")
4268 for (
const auto& qual : feat.
GetQual()) {
4269 if (! qual->IsSetQual() || qual->GetQual().empty() ||
4270 qual->GetQual() !=
"ncRNA_class")
4275 if (! qual->IsSetVal() || qual->GetVal().empty()) {
4300 ErrPostEx(
SEV_REJECT,
ERR_FEATURE_ncRNA_class,
"Feature \"ncRNA\" at location \"%s\" %s /ncRNA_class qualifier.", loc.empty() ?
"unknown" : loc.c_str(), (count == 0) ?
"lacks the mandatory" :
"has more than one");
4308 for (
auto qual = feat.
SetQual().begin(); qual != feat.
SetQual().end(); ++qual) {
4309 if (! (*qual)->IsSetQual() || (*qual)->GetQual() !=
"artificial_location")
4312 if ((*qual)->IsSetVal()) {
4313 const Char* p_val = (*qual)->GetVal().c_str();
4314 for (; *p_val ==
'\"';)
4318 (*qual)->ResetVal();
4321 string val = (*qual)->IsSetVal() ? (*qual)->GetVal() :
"";
4323 if (
val ==
"heterogenous population sequenced" ||
4324 val ==
"low-quality sequence region") {
4331 except_text +=
", ";
4352 for (
const auto& qual : feat.
GetQual()) {
4353 if (qual->IsSetQual() && qual->GetQual() ==
"mobile_element_type" &&
4354 qual->IsSetVal() && ! qual->GetVal().empty()) {
4355 const Char* p_val = qual->GetVal().c_str();
4356 for (; *p_val ==
'\"';)
4359 if (*p_val !=
'\0') {
4396 if (! fbp1->
key && fbp2->
key)
4398 if (fbp1->
key && ! fbp2->
key)
4400 if (fbp1->
key && fbp2->
key) {
4418 return fbp1->
num < fbp2->
num;
4428 vector<DataBlk*> temp;
4429 temp.reserve(total);
4431 temp.push_back(tdbp);
4436 for (
size_t i = 0;
i < total - 1; tdbp = tdbp->
mpNext,
i++)
4439 temp[total - 1]->
mpNext =
nullptr;
4446 if (! fbp || ! fbp->
key || ! rclass)
4454 qual->
SetQual(
"regulatory_class");
4456 fbp->
quals.push_back(qual);
4470 for (; dbp; dbp = dbp->
mpNext) {
4472 if (! fbp || ! fbp->
key)
4501 other_class =
false;
4504 for (
const auto& cur : fbp->
quals) {
4505 if (! cur->IsSetQual() || ! cur->IsSetVal())
4508 const string& qual_str = cur->GetQual();
4510 if (qual_str !=
"regulatory_class") {
4511 if (qual_str ==
"note")
4517 if (! cur->IsSetVal() || cur->GetVal().empty()) {
4520 p = (
char*)
"(empty)";
4535 const string& val_str = cur->GetVal();
4542 if (val_str ==
"other")
4549 p = (
char*)
"(empty)";
4566 p = (
char*)
"(empty)";
4578 }
else if (count > 1) {
4581 p = (
char*)
"(empty)";
4595 if (other_class && ! got_note) {
4598 p = (
char*)
"(empty)";
4616 const string& submitter_seqid,
4621 if (seqtype == 0 || seqtype == 1 || seqtype == 7)
4623 else if (seqtype == 4 || seqtype == 5 || seqtype == 8 || seqtype == 9)
4631 tag.SetTag().SetStr(submitter_seqid);
4633 bioseq.
SetId().push_back(gen_id);
4653 if (seqtype == 0 || seqtype == 3 || seqtype == 4 || seqtype == 6 ||
4654 seqtype == 10 || seqtype == 12) {
4660 if (seqtype == 1 || seqtype == 5 || seqtype == 7 || seqtype == 8 ||
4661 seqtype == 9 || seqtype == 11) {
4673 if (tbp->
str[0] ==
'-')
4687 if (seqtype == 0 || seqtype == 1 || seqtype == 4 || seqtype == 5 ||
4688 seqtype == 7 || seqtype == 8 || seqtype == 9 || seqtype == 10 ||
4707 CDelta_ext::Tdata::iterator
delta;
4712 if (! (*delta)->IsLoc())
4715 const CSeq_loc& locs = (*delta)->GetLoc();
4750 if (seqtype == 0 || seqtype == 1 || seqtype == 4 || seqtype == 5 ||
4751 seqtype == 7 || seqtype == 8 || seqtype == 9 || seqtype == 10 ||
4823 ids.push_back(seq_id);
4846 for (dbp = dab; dbp; dbp = dbp->
mpNext) {
4863 for (
i = 0; tdbp;
i++, tdbp = tdbp->
mpNext)
4887 if (seq_feats.empty()) {
4889 for (; dab; dab = dabnext) {
4903 for (CSeq_descr::Tdata::iterator descr = descr_list.begin(); descr != descr_list.end();) {
4904 if (! (*descr)->IsSource()) {
4909 descr = descr_list.erase(descr);
4913 descr_src->
SetSource(seq_feats.front()->SetData().SetBiosrc());
4915 descr_list.push_back(descr_src);
4916 seq_feats.pop_front();
4920 for (; dab; dab = dabnext) {
4929 if (dbp->
mDrop ==
true)
4967 if (imp_feat.
GetKey() ==
"intron" ||
4968 imp_feat.
GetKey() ==
"exon") {
4984 seq_feats.push_back(feat);
4994 "Mixed strands in SeqLoc of /trans_splicing feature: %s",
5000 seq_feats.push_back(feat);
5016 for (
auto& feat : seq_feats) {
5017 if (! feat->GetData().IsImp())
5020 const CImp_feat& imp_feat = feat->GetData().GetImp();
5040 SeqFeatPub(pp, entry, seq_feats, ids, col_data, ibp);
5041 if (seq_feats.empty() && ibp->
drop) {
5048 ImpFeatPub(pp, entry, seq_feats, *seq_id, col_data, ibp);
5051 if (seq_feats.empty())
5055 annot->
SetData().SetFtable().swap(seq_feats);
5057 bioseq.
SetAnnot().push_back(annot);
5067 if (! p || (tRNA && tRNA < p))
5069 if (! p || (rRNA && rRNA < p))
5071 if (! p || (snRNA && snRNA < p))
5073 if (! p || (scRNA && scRNA < p))
5075 if (! p || (uRNA && uRNA < p))
5077 if (! p || (snoRNA && snoRNA < p))
5086 if (p == snRNA || p == uRNA)
5109 char* mRNA =
nullptr;
5110 char* tRNA =
nullptr;
5111 char* rRNA =
nullptr;
5112 char* snRNA =
nullptr;
5113 char* scRNA =
nullptr;
5114 char* uRNA =
nullptr;
5115 char* snoRNA =
nullptr;
5152 while (*
r !=
';' && *
r !=
'\n' && *
r !=
'\0')
5155 while (*
r !=
';' && *
r !=
' ' && *
r !=
'\t' && *
r !=
'\n' &&
5158 if (
r - molstr > 10)
5169 if (ibp->
moltype ==
"genomic DNA") {
5179 }
else if (ibp->
moltype ==
"genomic RNA") {
5188 }
else if (ibp->
moltype ==
"mRNA") {
5197 }
else if (ibp->
moltype ==
"tRNA") {
5206 }
else if (ibp->
moltype ==
"rRNA") {
5215 }
else if (ibp->
moltype ==
"snoRNA") {
5224 }
else if (ibp->
moltype ==
"snRNA") {
5233 }
else if (ibp->
moltype ==
"scRNA") {
5242 }
else if (ibp->
moltype ==
"pre-RNA") {
5251 }
else if (ibp->
moltype ==
"pre-mRNA") {
5260 }
else if (ibp->
moltype ==
"other RNA") {
5272 }
else if (ibp->
moltype ==
"other DNA") {
5284 }
else if (ibp->
moltype ==
"unassigned RNA") {
5296 }
else if (ibp->
moltype ==
"unassigned DNA") {
5308 }
else if (ibp->
moltype ==
"viral cRNA") {
5319 }
else if (ibp->
moltype ==
"transcribed RNA") {
5355 ibp->
moltype !=
"genomic DNA")
5414 if (genomic < 0 || genomic > 20) {
5424 while (*q !=
';' && *q !=
'\n' && *q !=
'\0')
5427 while (*q !=
';' && *q !=
' ' && *q !=
'\t' && *q !=
'\n' &&
5430 if (q - molstr > 10)
5454 else if (genomic > 1 && genomic < 6)
5463 else if (genomic == 2)
5465 else if (genomic == 3)
5467 else if (genomic == 4)
5473 }
else if (genomic == 5)
5479 }
else if (genomic == 7)
5481 else if (genomic == 8)
5483 else if (genomic == 9)
5485 else if (genomic == 10 || genomic == 12)
5487 else if (genomic == 11)
5489 else if (genomic == 13)
5491 else if (genomic == 14)
5493 else if (genomic == 15)
5495 else if (genomic == 16)
5497 else if (genomic == 17)
5503 }
else if (genomic == 18)
5509 }
else if (genomic == 19 || genomic == 20)
5516 const Char* div =
nullptr;
5566 for (p = tRNA + 4; *p ==
' ' || *p ==
'\t';)
5594 for (
i = 0; dbp &&
i < 2; dbp = dbp->
mpNext) {
5612 ErrPostEx(
SEV_ERROR,
ERR_SOURCE_UnclassifiedViralRna,
"Cannot determine viral molecule type (genomic vs a specific type of RNA) based on definition line, CDS content, or taxonomic lineage. So this sequence has been classified as genomic by default (perhaps in error).");
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool fta_strings_same(const char *s1, const char *s2)
CRef< CSeq_loc > fta_get_seqloc_int_whole(CSeq_id &seq_id, size_t len)
bool fta_number_is_huge(const Char *s)
Int4 fta_fix_seq_loc_id(TSeqLocList &locs, ParserPtr pp, char *location, const char *name, bool iscon)
CRef< CPatent_seq_id > MakeUsptoPatSeqId(const char *acc)
void GetSequenceOfKeywords(const DataBlk &entry, int type, int col_data, TKeywordList &keywords)
CRef< CSeq_id > MakeAccSeqId(const char *acc, Uint1 seqtype, bool accver, Int2 vernum)
void ShrinkSpaces(char *line)
void GapFeatsFree(GapFeatsPtr gfp)
size_t CheckOutsideEntry(ParserPtr pp, const char *acc, Int2 vernum)
@Gb_qual.hpp User-defined methods of the data storage class.
@Imp_feat.hpp User-defined methods of the data storage class.
list< CRef< CLinkage_evidence > > TLinkage_evidence
virtual bool GetNextQualifier(string &qualKey, string &qualVal)
@RNA_ref.hpp User-defined methods of the data storage class.
EQualifier
List of available qualifiers for feature keys.
const TQualifiers & GetMandatoryQualifiers(void) const
Get the list of all mandatory qualifiers for the feature.
@ eSubtype_bad
These no longer need to match the FEATDEF values in the C toolkit's objfdef.h.
static EQualifier GetQualifierType(CTempString qual)
convert qual string to enumerated value
static CTempString GetQualifierAsString(EQualifier qual)
Convert a qualifier from an enumerated value to a string representation or empty if not found.
static ESubtype SubtypeNameToValue(CTempString sName)
Turn a string into its ESubtype which is NOT necessarily related to the identifier of the enum.
namespace ncbi::objects::
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
const CSeq_loc * GetFirstLoc(ENullSegType null_seg=eNullSegAllow) const
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
bool operator()(const CRef< CGb_qual > &qual)
PredIsGivenQual(const string &qual)
constexpr const_iterator begin() const
#define ParFlat_COL_DATA_EMBL
#define ERR_FEATURE_RedundantOldLocusTag
#define ERR_FEATURE_BadAnticodonLoc
#define ERR_QUALIFIER_InvalidLinkageEvidence
#define ERR_QUALIFIER_MissingGapType
#define ERR_FEATURE_MissingOperonQual
#define ERR_LOCUS_NonViralRNAMoltype
#define ERR_FEATURE_MoreThanOneCAGEFeat
#define ERR_FEATURE_MatchingOldNewLocusTag
#define ERR_FEATURE_OldLocusTagWithoutNew
#define ERR_LOCATION_RefersToExternalRecord
#define ERR_FEATURE_LocationParsing
#define ERR_QUALIFIER_InvalidArtificialLoc
#define ERR_FEATURE_GapSizeEstLengthMissMatch
#define ERR_QUALIFIER_InvalidPseudoGeneValue
#define ERR_QUALIFIER_MultRptUnitComma
#define ERR_FEATURE_RequiredQualifierMissing
#define ERR_QUALIFIER_IllegalCompareQualifier
#define ERR_QUALIFIER_MissingRegulatoryClass
#define ERR_FEATURE_MultipleOperonQuals
#define ERR_QUALIFIER_DbxrefUnknownDBName
#define ERR_SOURCE_SubmitterSeqidIgnored
#define ERR_QUALIFIER_ObsoleteRptUnit
#define ERR_SOURCE_InvalidMolType
#define ERR_QUALIFIER_LinkageShouldNotBeUnspecified
#define ERR_FEATURE_OverlappingGaps
#define ERR_QUALIFIER_MultiplePseudoGeneQuals
#define ERR_QUALIFIER_MultipleRegulatoryClass
#define ERR_FEATURE_UnknownGapNot100
#define ERR_QUALIFIER_LinkageShouldBeUnspecified
#define ERR_FEATURE_ContiguousGaps
#define ERR_FEATURE_EmptyOldLocusTag
#define ERR_QUALIFIER_InvalidRegulatoryClass
#define ERR_QUALIFIER_InvalidGapTypeForLinkageEvidence
#define ERR_FEATURE_InvalidGapLocation
#define ERR_REFERENCE_UnparsableLocation
#define ERR_FEATURE_AssemblyGapAndLegacyGap
#define ERR_FEATURE_InvalidAnticodonPos
#define ERR_QUALIFIER_ShouldNotHaveValue
#define ERR_QUALIFIER_Conflict
#define ERR_QUALIFIER_InvalidRptUnitRange
#define ERR_FEATURE_InvalidSatelliteType
#define ERR_QUALIFIER_InvalidGapType
#define ERR_QUALIFIER_NoNoteForOtherRegulatory
#define ERR_SOURCE_GenomicViralRnaAssumed
#define ERR_FEATURE_InvalidQualifier
#define ERR_LOCATION_AccessionNotTLS
#define ERR_FEATURE_FourBaseAntiCodon
#define ERR_SOURCE_MolTypeSeqTypeConflict
#define ERR_FEATURE_OperonLocationMisMatch
#define ERR_FEATURE_MultipleLocusTags
#define ERR_SOURCE_LineageImpliesGenomicViralRna
#define ERR_LOCATION_NCBIRefersToExternalRecord
#define ERR_LOCATION_TransSpliceMixedStrand
#define ERR_FEATURE_UnknownFeatKey
#define ERR_QUALIFIER_DuplicateRemoved
#define ERR_FEATURE_ncRNA_class
#define ERR_LOCATION_AccessionNotTPA
#define ERR_FEATURE_IllegalEstimatedLength
#define ERR_QUALIFIER_DbxrefShouldBeNumeric
#define ERR_FEATURE_InvalidOperonQual
#define ERR_FEATURE_FinishedHTGHasAssemblyGap
#define ERR_FEATURE_ObsoleteFeature
#define ERR_FEATURE_OperonQualsNotUnique
#define ERR_FEATURE_Dropped
#define ERR_DEFINITION_DifferingRnaTokens
#define ERR_FORMAT_InvalidMolType
#define ERR_QUALIFIER_MissingLinkageEvidence
#define ERR_QUALIFIER_DbxrefIncorrect
#define ERR_SOURCE_SubmitterSeqidNotAllowed
#define ERR_QUALIFIER_OldPseudoWithPseudoGene
#define ERR_FEATURE_StrangeAntiCodonSize
#define ERR_QUALIFIER_EmbeddedQual
#define ERR_FEATURE_InvalidAssemblyGapLocation
#define ERR_LOCATION_AccessionNotTSA
#define ERR_LOCATION_FailedCheck
#define ERR_FEATURE_NoSatelliteClassOrIdentifier
#define ERR_QUALIFIER_InvalidEvidence
#define ERR_SOURCE_UnclassifiedViralRna
#define ERR_QUALIFIER_UnexpectedGapTypeForHTG
#define ERR_SOURCE_SubmitterSeqidDropped
#define ERR_SOURCE_MolTypesDisagree
#define ERR_QUALIFIER_DbxrefWrongType
#define ERR_FEATURE_FeatureKeyReplaced
#define ERR_QUALIFIER_EmptyQual
#define ERR_FEATURE_ObsoleteDbXref
#define ERR_FEATURE_DuplicateRemoved
#define ERR_LOCATION_MixedStrand
void ParseSourceFeat(ParserPtr pp, DataBlkPtr dbp, TSeqIdList &seqids, Int2 type, CBioseq &bioseq, TSeqFeatList &seq_feats)
DataBlkPtr XMLBuildRefDataBlk(char *entry, const XmlIndex *xip, int type)
char * XMLFindTagValue(const char *entry, const XmlIndex *xip, Int4 tag)
#define INSDQUALIFIER_NAME
#define INSDSEQ_FEATURE_TABLE
char * XMLGetTagValue(const char *entry, const XmlIndex *xip)
#define INSDFEATURE_LOCATION
void XMLGetKeywords(const char *entry, const XmlIndex *xip, TKeywordList &keywords)
#define INSDQUALIFIER_VALUE
#define INSDREFERENCE_POSITION
#define INSDFEATURE_QUALS
#define INSDREFERENCE_REFERENCE
std::list< CRef< objects::CSeq_id > > TSeqIdList
std::list< CRef< objects::CSeq_feat > > TSeqFeatList
int StringCmp(const char *s1, const char *s2)
bool StringEquNI(const char *s1, const char *s2, size_t n)
bool StringEquN(const char *s1, const char *s2, size_t n)
bool StringEqu(const char *s1, const char *s2)
void StringCpy(char *d, const char *s)
size_t StringLen(const char *s)
void StringCat(char *d, const char *s)
char * StringNew(size_t sz)
void FtaDeletePrefix(int prefix)
void Nlm_ErrSetContext(const char *module, const char *fname, int line)
void Nlm_ErrPostEx(ErrSev sev, int lev1, int lev2, const char *fmt,...)
void FtaInstallPrefix(int prefix, const char *name, const char *location)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static void DLIST_NAME() remove(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static const char * str(char *buf, int n)
static const char location[]
unsigned int TSeqPos
Type for sequence locations and lengths.
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
E_SIC Compare(const CSeq_id &sid2) const
Compare() - more general.
@ e_YES
SeqIds compared, but are different.
ENa_strand GetStrand(void) const
Get the location's strand.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
TRange GetTotalRange(void) const
CConstRef< CSeq_loc > GetRangeAsSeq_loc(void) const
Get seq-loc for the current iterator position.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
const CSeq_id & GetSeq_id(void) const
Get seq_id of the current location.
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eSame
CSeq_locs contain each other.
@ eContained
First CSeq_loc contained by second.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
bool CanGetInst(void) const
TSeqPos GetBioseqLength(void) const
bool CanGetInst_Length(void) const
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
TThisType IntersectionWith(const TThisType &r) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
bool CanGetOrg(void) const
Check if it is safe to call GetOrg method.
list< CRef< CSubSource > > TSubtype
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
const TOrg & GetOrg(void) const
Get the Org member data.
void SetOrg(TOrg &value)
Assign a value to Org data member.
TSubtype & SetSubtype(void)
Assign a value to Subtype data member.
bool IsStr(void) const
Check if variant Str is selected.
const TStr & GetStr(void) const
Get the variant data.
TId GetId(void) const
Get the variant data.
const TLineage & GetLineage(void) const
Get the Lineage member data.
const TDiv & GetDiv(void) const
Get the Div member data.
TMgcode GetMgcode(void) const
Get the Mgcode member data.
TGcode GetGcode(void) const
Get the Gcode member data.
bool IsSetLineage(void) const
lineage with semicolon separators Check if a value has been assigned to Lineage data member.
bool IsSetMgcode(void) const
mitochondrial genetic code Check if a value has been assigned to Mgcode data member.
const TDb & GetDb(void) const
Get the Db member data.
bool IsSetDiv(void) const
GenBank division code Check if a value has been assigned to Div data member.
bool IsSetMod(void) const
Check if a value has been assigned to Mod data member.
list< CRef< COrgMod > > TMod
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
bool IsSetGcode(void) const
genetic code (see CdRegion) Check if a value has been assigned to Gcode data member.
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
TEquiv & SetEquiv(void)
Select the variant.
void SetQual(const TQual &value)
Assign a value to Qual data member.
void SetVal(const TVal &value)
Assign a value to Val data member.
TType GetType(void) const
Get the Type member data.
const TAa & GetAa(void) const
Get the Aa member data.
const TCodon & GetCodon(void) const
Get the Codon member data.
bool IsSetAa(void) const
Check if a value has been assigned to Aa data member.
bool IsTRNA(void) const
Check if variant TRNA is selected.
bool IsSetAnticodon(void) const
location of anticodon Check if a value has been assigned to Anticodon data member.
bool IsNcbieaa(void) const
Check if variant Ncbieaa is selected.
void SetExt(TExt &value)
Assign a value to Ext data member.
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
TNcbieaa GetNcbieaa(void) const
Get the variant data.
bool IsSetCodon(void) const
codon(s) as in Genetic-code Check if a value has been assigned to Codon data member.
Tdata & Set(void)
Assign a value to data member.
void SetType(TType value)
Assign a value to Type data member.
void ResetExt(void)
Reset Ext data member.
const TExt & GetExt(void) const
Get the Ext member data.
const TTRNA & GetTRNA(void) const
Get the variant data.
@ eType_snoRNA
will become ncRNA, with RNA-gen.class = snoRNA
@ eType_ncRNA
non-coding RNA; subsumes snRNA, scRNA, snoRNA
const TVal & GetVal(void) const
Get the Val member data.
void SetQual(const TQual &value)
Assign a value to Qual data member.
const TKey & GetKey(void) const
Get the Key member data.
bool IsSetComment(void) const
Check if a value has been assigned to Comment data member.
vector< CRef< CDbtag > > TDbxref
TDbxref & SetDbxref(void)
Assign a value to Dbxref data member.
void SetLoc(const TLoc &value)
Assign a value to Loc data member.
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
void SetLocation(TLocation &value)
Assign a value to Location data member.
bool IsImp(void) const
Check if variant Imp is selected.
void SetComment(const TComment &value)
Assign a value to Comment data member.
void SetCit(TCit &value)
Assign a value to Cit data member.
void SetPartial(TPartial value)
Assign a value to Partial data member.
const TQual & GetQual(void) const
Get the Qual member data.
bool IsSetPartial(void) const
incomplete in some way? Check if a value has been assigned to Partial data member.
bool IsSetKey(void) const
Check if a value has been assigned to Key data member.
const TLocation & GetLocation(void) const
Get the Location member data.
void SetExcept(TExcept value)
Assign a value to Except data member.
const TData & GetData(void) const
Get the Data member data.
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
void SetData(TData &value)
Assign a value to Data data member.
void ResetComment(void)
Reset Comment data member.
void SetExp_ev(TExp_ev value)
Assign a value to Exp_ev data member.
bool IsSetQual(void) const
Check if a value has been assigned to Qual data member.
const TComment & GetComment(void) const
Get the Comment member data.
void SetVal(const TVal &value)
Assign a value to Val data member.
void SetPseudo(TPseudo value)
Assign a value to Pseudo data member.
TPartial GetPartial(void) const
Get the Partial member data.
void SetExcept_text(const TExcept_text &value)
Assign a value to Except_text data member.
void ResetLocation(void)
Reset Location data member.
const TQual & GetQual(void) const
Get the Qual member data.
void ResetDbxref(void)
Reset Dbxref data member.
TQual & SetQual(void)
Assign a value to Qual data member.
bool IsSetVal(void) const
Check if a value has been assigned to Val data member.
void ResetQual(void)
Reset Qual data member.
const TImp & GetImp(void) const
Get the variant data.
void SetKey(const TKey &value)
Assign a value to Key data member.
@ eExp_ev_experimental
any reasonable experimental check
@ eExp_ev_not_experimental
similarity, pattern, etc
bool IsGenbank(void) const
Check if variant Genbank is selected.
TGeneral & SetGeneral(void)
Select the variant.
TPatent & SetPatent(void)
Select the variant.
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
bool IsMix(void) const
Check if variant Mix is selected.
bool IsTpg(void) const
Check if variant Tpg is selected.
bool IsEmpty(void) const
Check if variant Empty is selected.
const TPnt & GetPnt(void) const
Get the variant data.
bool IsTpd(void) const
Check if variant Tpd is selected.
TPoint GetPoint(void) const
Get the Point member data.
bool IsOther(void) const
Check if variant Other is selected.
TFrom GetFrom(void) const
Get the From member data.
bool IsEquiv(void) const
Check if variant Equiv is selected.
bool IsPrf(void) const
Check if variant Prf is selected.
bool CanGetA(void) const
Check if it is safe to call GetA method.
bool IsEmbl(void) const
Check if variant Embl is selected.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSwissprot(void) const
Check if variant Swissprot is selected.
const Tdata & Get(void) const
Get the member data.
const TPacked_pnt & GetPacked_pnt(void) const
Get the variant data.
TVersion GetVersion(void) const
Get the Version member data.
bool CanGetAccession(void) const
Check if it is safe to call GetAccession method.
const TPoints & GetPoints(void) const
Get the Points member data.
bool IsGpipe(void) const
Check if variant Gpipe is selected.
TTo GetTo(void) const
Get the To member data.
bool IsInt(void) const
Check if variant Int is selected.
const TInt & GetInt(void) const
Get the variant data.
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
bool IsTpe(void) const
Check if variant Tpe is selected.
const TMix & GetMix(void) const
Get the variant data.
bool IsPir(void) const
Check if variant Pir is selected.
const TAccession & GetAccession(void) const
Get the Accession member data.
const TBond & GetBond(void) const
Get the variant data.
bool IsDdbj(void) const
Check if variant Ddbj is selected.
@ e_Tpe
Third Party Annot/Seq EMBL.
@ e_Tpd
Third Party Annot/Seq DDBJ.
@ e_not_set
No variant selected.
@ e_Tpg
Third Party Annot/Seq Genbank.
@ e_Empty
to NULL one Seq-id in a collection
void SetData(TData &value)
Assign a value to Data data member.
list< CRef< CSeqdesc > > Tdata
TId & SetId(void)
Assign a value to Id data member.
const TInst & GetInst(void) const
Get the Inst member data.
const TSource & GetSource(void) const
Get the variant data.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
const TId & GetId(void) const
Get the Id member data.
bool IsSetExt(void) const
extensions for special types Check if a value has been assigned to Ext data member.
bool IsDelta(void) const
Check if variant Delta is selected.
void SetInst(TInst &value)
Assign a value to Inst data member.
TSource & SetSource(void)
Select the variant.
const TExt & GetExt(void) const
Get the Ext member data.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
const TDelta & GetDelta(void) const
Get the variant data.
list< CRef< CDelta_seq > > Tdata
@ eTech_htgs_2
ordered High Throughput sequence contig
@ eTech_htc
high throughput cDNA
@ eTech_targeted
targeted locus sets/studies
@ eTech_sts
Sequence Tagged Site.
@ eTech_htgs_3
finished High Throughput sequence
@ eTech_htgs_1
unordered High Throughput sequence contig
@ eTech_tsa
transcriptome shotgun assembly
@ eTech_wgs
whole genome shotgun sequencing
@ eTech_survey
one-pass genomic sequence
@ eTech_htgs_0
single genomic reads for coordination
@ eTech_fli_cdna
full length insert cDNA
@ eTech_est
Expressed Sequence Tag.
@ eMol_na
just a nucleic acid
@ eType_proximity_ligation
char * dbname(DBPROCESS *dbproc)
Get name of current database.
CSeq_id::E_Choice GetNucAccOwner(const CTempString &acc)
Int2 CheckNA(const char *str)
int fta_if_wgs_acc(const CTempString &accession)
Int2 CheckNADDBJ(const char *str)
static void fta_parse_rrna_feat(CSeq_feat &feat, CRNA_ref &rna_ref)
static DataBlkPtr fta_sort_features(DataBlkPtr dbp, bool order)
#define Seq_descr_GIBB_mol_tRNA
static void fta_convert_to_lower_case(char *str)
static void fta_check_replace_regulatory(DataBlkPtr dbp, bool *drop)
static bool fta_qual_a_in_b(const TQualVector &qual1, const TQualVector &qual2)
static const char * DbxrefTagStr[]
static bool fta_check_ncrna(const CSeq_feat &feat)
void GetFlatBiomol(CMolInfo::TBiomol &biomol, CMolInfo::TTech tech, char *molstr, ParserPtr pp, const DataBlk &entry, const COrg_ref *org_ref)
static const char * EmptyQuals[]
static void fta_check_pseudogene_qual(DataBlkPtr dbp)
static bool fta_feats_same(const FeatBlk *fbp1, const FeatBlk *fbp2)
static bool fta_check_evidence(CSeq_feat &feat, FeatBlkPtr fbp)
static void fta_convert_to_regulatory(FeatBlkPtr fbp, const char *rclass)
void fta_sort_biosource(CBioSource &bio)
const char * ParFlat_ESTmod[]
static void fta_remove_dup_feats(DataBlkPtr dbp)
#define Seq_descr_GIBB_mol_rRNA
#define Seq_descr_GIBB_mol_trRNA
static bool fta_check_mobile_element(const CSeq_feat &feat)
static void CollectGapFeats(const DataBlk &entry, DataBlkPtr dbp, ParserPtr pp, Int2 type)
const AaCodons aacodons[]
static void ParseQualifiers(FeatBlkPtr fbp, const char *bptr, const char *eptr, Parser::EFormat format)
static CRef< CTrna_ext > fta_get_trna_from_product(CSeq_feat &feat, const string &product, unsigned char *remove)
string location_to_string_or_unknown(const CSeq_loc &loc)
static bool PackSeqPntCheckCpp(const CSeq_loc &loc)
#define Seq_descr_GIBB_mol_other_genetic
StrNum LinkageEvidenceValues[]
static const char * EMBLDbxrefTagStr[]
static void fta_sort_quals(FeatBlkPtr fbp, bool qamode)
void xSplitLines(const string &str, vector< string > &lines)
static bool fta_check_rpt_unit_span(const char *val, size_t length)
static void XMLGetQuals(char *entry, XmlIndexPtr xip, TQualVector &quals)
static FeatBlkPtr MergeNoteQual(FeatBlkPtr fbp)
static char * CheckLocStr(const Char *str)
static void GetRnaRef(CSeq_feat &feat, CBioseq &bioseq, Parser::ESource source, bool accver)
static void SeqFeatPub(ParserPtr pp, const DataBlk &entry, TSeqFeatList &feats, TSeqIdList &seqids, Int4 col_data, IndexblkPtr ibp)
static void fta_check_satellite(char *str, bool *drop)
static bool SortFeaturesByOrder(const DataBlkPtr &sp1, const DataBlkPtr &sp2)
static Uint1 fta_get_aa_from_string(char *str)
#define Seq_descr_GIBB_mol_snRNA
static void fta_process_con_slice(vector< char > &val_buf)
#define Seq_descr_GIBB_mol_other
static void fta_get_gcode_from_biosource(const CBioSource &bio_src, IndexblkPtr ibp)
#define Seq_descr_GIBB_mol_genomic
static void FilterDb_xref(CSeq_feat &feat, Parser::ESource source)
int ParseFeatureBlock(IndexblkPtr ibp, bool deb, DataBlkPtr dbp, Parser::ESource source, Parser::EFormat format)
static void fta_check_compare_qual(DataBlkPtr dbp, bool is_tpa)
static void fta_parse_rpt_units(FeatBlkPtr fbp)
static DataBlkPtr XMLLoadFeatBlk(char *entry, XmlIndexPtr xip)
Int2 SpFeatKeyNameValid(const Char *keystr)
static int get_aa_from_trna(const CTrna_ext &trna)
static bool SeqIntCheckCpp(const CSeq_loc &loc)
static const char * DbxrefObsolete[]
const char * ncRNA_class_values[]
static bool SortFeaturesByLoc(const DataBlkPtr &sp1, const DataBlkPtr &sp2)
static Int4 flat2asn_range_func(void *pp_ptr, const CSeq_id &id)
static void fta_check_artificial_location(CSeq_feat &feat, char *key)
static bool fta_perform_operon_checks(TSeqFeatList &feats, IndexblkPtr ibp)
static bool SeqPntCheckCpp(const CSeq_loc &loc)
CRef< CSeq_feat > SpProcFeatBlk(ParserPtr pp, FeatBlkPtr fbp, TSeqIdList &seqids)
static void fta_create_wgs_seqid(CBioseq &bioseq, IndexblkPtr ibp, Parser::ESource source)
static void ConvertQualifierValue(CRef< CGb_qual > &qual)
static void fta_check_old_locus_tags(DataBlkPtr dbp, bool *drop)
static CRef< CSeq_feat > ProcFeatBlk(ParserPtr pp, FeatBlkPtr fbp, TSeqIdList &seqids)
void LoadFeat(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq)
const char * RegulatoryClassValues[]
static bool CheckLegalQual(const Char *val, Char ch, string *qual)
static void ImpFeatPub(ParserPtr pp, const DataBlk &entry, TSeqFeatList &feats, CSeq_id &seq_id, Int4 col_data, IndexblkPtr ibp)
#define Seq_descr_GIBB_mol_unknown
static CRef< CDbtag > DbxrefQualToDbtag(const CGb_qual &qual, Parser::ESource source)
#define Seq_descr_GIBB_mol_snoRNA
static const char * DbxrefTagAny[]
static bool CheckForeignLoc(const CSeq_loc &loc, const CSeq_id &sid)
static int get_first_codon_from_trna(const CTrna_ext &trna)
#define Seq_descr_GIBB_mol_scRNA
static void XMLCheckQualifiers(FeatBlkPtr fbp)
static void fta_fake_gbparse_err_handler(const Char *, const Char *)
static void GetImpFeat(CSeq_feat &feat, FeatBlkPtr fbp, bool locmap)
const char * PseudoGeneValues[]
static void fta_create_wgs_dbtag(CBioseq &bioseq, const string &submitter_seqid, char *prefix, Int4 seqtype)
static void FreeFeatBlk(DataBlkPtr dbp, Parser::EFormat format)
static void fta_check_non_tpa_tsa_tls_locations(DataBlkPtr dbp, IndexblkPtr ibp)
#define Seq_descr_GIBB_mol_preRNA
static void DelCharBtwData(char *value)
static CRef< CTrna_ext > fta_get_trna_from_comment(const string &comment, unsigned char *remove)
static const char * trna_tags[]
static int XMLParseFeatureBlock(bool deb, DataBlkPtr dbp, Parser::ESource source)
bool GetSeqLocation(CSeq_feat &feat, char *location, TSeqIdList &ids, bool *hard_err, ParserPtr pp, const char *name)
const char * SatelliteValues[]
static Uint1 FTASeqLocCheck(const CSeq_loc &locs, char *accession)
static void fta_remove_dup_quals(FeatBlkPtr fbp)
static CRef< CSeq_loc > GetTrnaAnticodon(const CSeq_feat &feat, char *qval, const TSeqIdList &seqids, bool accver)
static const char * DbxrefTagInt[]
static CMolInfo::EBiomol GetBiomolFromToks(char *mRNA, char *tRNA, char *rRNA, char *snRNA, char *scRNA, char *uRNA, char *snoRNA)
const char * TransSplicingFeats[]
static void fta_strip_aa(char *str)
static const char * ParFlat_RNA_array[]
#define Seq_descr_GIBB_mol_cRNA
static void fta_check_multiple_locus_tag(DataBlkPtr dbp, bool *drop)
static Uint1 fta_get_aa_from_symbol(Char ch)
static void fta_check_rpt_unit_range(FeatBlkPtr fbp, size_t length)
#define Seq_descr_GIBB_mol_mRNA
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::SIZE size
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
const CharType(& source)[N]
std::list< SeqLoc > TSeqLocList
Int4 delta(size_t dimension_, const Int4 *score_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static const char * prefix[]
CRef< CPubdesc > DescrRefs(ParserPtr pp, DataBlkPtr dbp, Int4 col_data)
CRef< objects::CSeq_entry > seq_entry
string LocationStr() const
objects::CLinkage_evidence::TLinkage_evidence asn_linkage_evidence
objects::CSeq_gap::TType asn_gap_type
vector< IndexblkPtr > entrylist
bool DeleteQual(TQualVector &qlist, const Char *qual)
Uint1 GetQualValueAa(const char *qval, bool checkseq)
string location_to_string(const CSeq_loc &loc)
string CpTheQualValue(const TQualVector &qlist, const Char *qual)
bool SeqLocHaveFuzz(const CSeq_loc &loc)
optional< string > GetTheQualValue(TQualVector &qlist, const Char *qual)
Int2 MatchArrayIString(const Char **array, const Char *text)
char * xSrchNodeType(const DataBlk &entry, Int4 type, size_t *len)
void fta_StringCpy(char *dst, const char *src)
DataBlkPtr TrackNodeType(const DataBlk &entry, Int2 type)
Int2 MatchArrayString(const char **array, const char *text)
Char * StringIStr(const Char *where, const Char *what)
int XGBFeatKeyQualValid(CSeqFeatData::ESubtype subtype, TQualVector &quals, bool error_msgs, bool perform_corrections)
std::vector< CRef< objects::CGb_qual > > TQualVector
#define GB_FEAT_ERR_REPAIRABLE
CRef< CSeq_loc > xgbparseint_ver(const char *raw_intervals, bool &keep_rawPt, int &numErrors, const TSeqIdList &seq_ids, bool accver)
void xinstall_gbparse_error_handler(X_gbparse_errfunc new_func)
void xinstall_gbparse_range_func(void *data, X_gbparse_rangefunc new_func)