66 #define THIS_FILE "genref.cpp"
275 vector<CConstRef<CSeq_loc>> to_remove;
282 }
else if (cur_loc->
IsInt()) {
285 }
else if (cur_loc->
IsPnt()) {
288 }
else if (cur_loc->
IsBond()) {
297 to_remove.push_back(cur_loc);
300 for (vector<
CConstRef<CSeq_loc>>::const_iterator it = to_remove.begin(); it != to_remove.end(); ++it)
304 if (bond->IsSetB() && !
fta_seqid_same(bond->GetB().GetId(), acnum,
id))
316 if (acnum && *acnum !=
'\0' && *acnum !=
' ')
327 TSynSet::const_iterator it1 = syn1.begin(),
330 for (; it1 != syn1.end() && it2 != syn2.end(); ++it1, ++it2) {
336 if (it1 == syn1.end() && it2 == syn2.end())
338 if (it1 == syn1.end())
340 if (it2 == syn2.end())
355 syn1.insert(grp1.
GetSyn().begin(), grp1.
GetSyn().end());
357 syn2.insert(grp2.
GetSyn().begin(), grp2.
GetSyn().end());
389 if (! glp1 && ! glp2)
396 if (glp1->
locus.empty() && glp2->
locus.empty()) {
402 if (glp1->
locus.empty())
404 if (glp2->
locus.empty())
446 status = slip1->
to - slip2->
to;
504 for (glp = gnp->
glp; glp; glp = glp->
next)
507 vector<GeneListPtr> temp(total);
509 for (index = 0, glp = gnp->
glp; glp; glp = glp->
next)
514 gnp->
glp = glp = temp[0];
515 for (index = 0; index < total - 1; glp = glp->
next, index++)
516 glp->
next = temp[index + 1];
518 glp = temp[total - 1];
530 for (; mlp; mlp =
next) {
541 for (; glp; glp = glpnext) {
578 for (; clp; clp = clpnext) {
590 for (; gelop; gelop = gelopnext) {
591 gelopnext = gelop->
next;
623 if (cur_loc->
IsInt()) {
625 id = &interval.
GetId();
628 to = interval.
GetTo();
637 }
else if (cur_loc->
IsPnt()) {
668 slibp->
ids.push_back(sid);
672 if (slibp->
from > from) {
676 if (slibp->
to < to) {
698 for (glp = gnp->
glp; glp; glp = glp->
next) {
717 return glp !=
nullptr;
728 for (; clp; clp = clp->
next)
732 return clp !=
nullptr;
741 if (! glp->
locus.empty()) {
747 if (! maploc.empty()) {
751 if (! glp->
syn.empty()) {
752 gene_ref.
SetSyn().assign(glp->
syn.begin(), glp->
syn.end());
770 feat->
SetQual().push_back(qual);
778 for (TWormbaseSet::const_iterator it = glp->
wormbase.begin(); it != glp->
wormbase.end(); ++it) {
784 tag->SetDb(
"WormBase");
785 tag->SetTag().SetStr(*it);
791 if (! glp->
olt.empty()) {
792 if (glp->
olt.size() > 1)
795 for (TLocusTagSet::const_iterator it = glp->
olt.begin(); it != glp->
olt.end(); ++it) {
800 qual->
SetQual(
"old_locus_tag");
803 feat->
SetQual().push_back(qual);
808 feats.push_back(feat);
828 if (! pId1 && ! pId2) {
832 if (! pId1 || ! pId2) {
850 if (!
first && ! second)
856 for (; mlp; mlp = mlp->
next) {
860 if (
first && second) {
861 for (mlp = second; mlp; mlp = mlp->
next) {
863 for (res = tres->
next; res; res = res->
next) {
878 for (res = mlp->
next; res; res = res->
next) {
897 for (got = 1; got == 1;) {
899 for (tres = res; tres; tres = tres->
next) {
902 for (mlp = tres->
next; mlp; mlp = mlp->
next) {
910 if (tres->
noleft ==
false)
919 (tres->
min <= mlp->
max + 1 && tres->
max + 1 >= mlp->
min)) {
920 if (tres->
min == mlp->
min) {
921 if (tres->
noleft ==
false)
923 }
else if (tres->
min > mlp->
min) {
928 if (tres->
max == mlp->
max) {
931 }
else if (tres->
max < mlp->
max) {
941 for (mlp =
nullptr, tres = res; tres; tres =
next) {
951 tres->
next =
nullptr;
960 if (!
first && ! second)
989 toglp->
olt.insert(fromglp->
olt.begin(), fromglp->
olt.end());
990 fromglp->
olt.clear();
1044 for (glp = gnp->
glp; glp && glp->
segnum == 1; glp = glp->
next) {
1045 if (glp->
loc || ! glp->
mlp)
1049 for (tglp = gnp->
glp; tglp; tglp = tglp->
next) {
1050 if (tglp->
loc || ! glp->
mlp ||
1068 for (tglp = gnp->
glp; tglp; tglp = tglp->
next) {
1069 if (tglp->
loc || tglp->
segnum - segnum != 1 ||
1082 tglp->
mlp =
nullptr;
1097 for (tglp = gnp->
glp; tglp; tglp =
next) {
1107 tglp->
next =
nullptr;
1118 if (! gelop || ! c || ! mlp)
1121 min = (mlp->
min > from) ? from : mlp->
min;
1124 for (; gelop; gelop = gelop->
next) {
1136 auto it = gelop->
ammp.begin();
1137 for (; it != gelop->
ammp.end(); ++it) {
1141 ammp.pId->GetLabel(&label1, &ver1);
1145 if (max < ammp.min || min > ammp.max || ver1 != ver2)
1147 if (label1 == label2)
1150 if (it != gelop->
ammp.end()) {
1155 return gelop !=
nullptr;
1163 if (! mlp || ! mlp->
next)
1168 for (; tmlp->
next; tmlp = tmlp->
next)
1172 for (; tmlp->
next; tmlp = tmlp->
next)
1180 for (tmlp = mlp; tmlp; tmlp = tmlp->
next) {
1226 if (c->
leave == 1) {
1240 if (cur_loc->
IsInt()) {
1256 }
else if (cur_loc->
IsPnt()) {
1276 if (! pId || from < 0 || to < 0) {
1293 for (tmlp = mlp;; tmlp = tmlp->
next) {
1296 if (tempcirc ==
false && ((tmlp->
min <= to && tmlp->
max >= from) ||
1298 if (tmlp->
min > from) {
1302 if (tmlp->
max < to) {
1341 if (mlp->
noleft || noleft) {
1345 if (mlp->
noright || noright) {
1371 if (mlp->
min == mlp->
max)
1381 for (; mlp; mlp = mlp->
next) {
1384 if (mlp->
min == mlp->
max) {
1404 for (mlp = second; mlp; mlp = mlp->
next) {
1407 for (tmlp = second; tmlp < mlp; tmlp = tmlp->
next)
1414 for (mlp =
first; mlp; mlp = mlp->
next) {
1417 for (tmlp =
first; tmlp < mlp; tmlp = tmlp->
next)
1438 for (
bool got =
true; got ==
true;) {
1440 for (mlp = c->
mlp; mlp; mlp = mlp->
next) {
1443 for (tmlp = mlp->
next; tmlp; tmlp = tmlp->
next) {
1448 if ((tmlp->
min >= mlp->
min && tmlp->
min <= mlp->
max) ||
1451 if (mlp->
min == tmlp->
min) {
1454 }
else if (mlp->
min > tmlp->
min) {
1458 if (mlp->
max == tmlp->
max) {
1461 }
else if (mlp->
max < tmlp->
max) {
1479 }
else if (mlp->
numint == 0) {
1503 for (mlpprev =
nullptr, mlp = c->
mlp; mlp; mlp = mlpnext) {
1504 mlpnext = mlp->
next;
1513 mlpprev->
next = mlpnext;
1514 mlp->
next =
nullptr;
1519 for (mlp = c->
mlp; mlp; mlpprev = mlp, mlp = mlp->
next)
1523 if (mlp && mlp != c->
mlp) {
1524 mlpprev->
next =
nullptr;
1525 for (tmlp = mlp; tmlp->
next;)
1548 for (mlp = c->
mlp; mlp; mlp = mlp->
next) {
1549 for (tmlp = mlp->
next; tmlp; tmlp = tmlp->
next) {
1554 if (tmlp->
min < mlp->
min)
1556 if (tmlp->
min == mlp->
min) {
1558 if (tmlp->
max < mlp->
max)
1560 if (tmlp->
max == mlp->
max) {
1568 if (tmlp->
min > mlp->
min)
1570 if (tmlp->
min == mlp->
min) {
1572 if (tmlp->
max > mlp->
max)
1574 if (tmlp->
max == mlp->
max) {
1615 for (cn = c->
next; cn; cn = cn->
next) {
1626 bool circular =
false;
1627 for (j = 1, c = gnp->
glp; c; c = c->
next, j++) {
1637 for (c = gnp->
glp; c->
next;) {
1663 for (cp = gnp->
glp; cp; cp = cp->
next) {
1673 join = (cp !=
nullptr);
1704 for (c = gnp->
glp; c; c = c->
next) {
1713 for (c = gnp->
glp; c; c = c->
next)
1717 for (c = gnp->
glp; c; c = c->
next) {
1722 for (cn = gnp->
glp; cn; cn = cn->
next) {
1750 for (cp =
nullptr, c = gnp->
glp; c; c = cn) {
1763 for (c = gnp->
glp; c; c = cn) {
1771 for (cn = c; cn; cn = cn->
next) {
1777 if (maploc.empty()) {
1783 for (cn = c; cn; cn = cn->
next) {
1798 const CTextseq_id* pTextId = new_id->GetTextseq_Id();
1800 const_cast<CTextseq_id*
>(pTextId)->ResetVersion();
1830 }
else if (data.
IsImp()) {
1850 for (
const auto& qual : quals) {
1851 if (! qual->IsSetQual() || ! qual->IsSetVal() ||
1852 qual->GetQual() !=
"gene" ||
1856 syns.insert(qual->GetVal());
1859 for (
const auto& qual : quals) {
1860 if (! qual->IsSetQual() || ! qual->IsSetVal() ||
1861 qual->GetQual() !=
"gene_synonym" ||
1865 syns.insert(qual->GetVal());
1878 if (
key ==
"CDS" ||
key ==
"rRNA" ||
1879 key ==
"tRNA" ||
key ==
"mRNA")
1935 p =
"precursor_RNA";
1986 if (loc_str.empty())
1989 if (loc_str.size() > 55) {
1990 loc_str = loc_str.substr(0, 50);
2003 list<AccMinMax> ammps;
2013 if (cur_loc->
IsInt()) {
2020 to = interval.
GetTo();
2025 else if (gelop->
strand != strand)
2027 }
else if (cur_loc->
IsPnt()) {
2039 else if (gelop->
strand != strand)
2052 bool found_id =
false;
2053 auto it = ammps.begin();
2054 while (! found_id && it != ammps.end()) {
2057 if (from < ammp.min) {
2060 if (to > ammp.max) {
2073 ammps.push_back(ammp);
2090 gelop->
gene.clear();
2093 gelop->
locus = locus_tag;
2095 gelop->
locus.clear();
2124 for (glp = gelop; glp; glp = glp->
next)
2127 vector<GeneLocsPtr> temp(total);
2129 for (index = 0, glp = gelop; glp; glp = glp->
next)
2130 temp[index++] = glp;
2134 gelop = glp = temp[0];
2135 for (index = 0; index < total - 1; glp = glp->
next, index++)
2136 glp->
next = temp[index + 1];
2138 glp = temp[total - 1];
2139 glp->
next =
nullptr;
2151 for (CSeq_feat::TDbxref::iterator dbxref = feat.
SetDbxref().begin(); dbxref != feat.
SetDbxref().end(); ++dbxref) {
2152 if (! (*dbxref)->IsSetTag() || ! (*dbxref)->IsSetDb() ||
2153 (*dbxref)->GetDb() !=
"WormBase" ||
2154 !
StringEquN((*dbxref)->GetTag().GetStr().c_str(),
"WBGene", 6)) {
2155 dbxrefs.push_back(*dbxref);
2159 glp->
wormbase.insert((*dbxref)->GetTag().GetStr());
2162 if (dbxrefs.empty())
2175 for (TQualVector::iterator qual = feat.
SetQual().begin(); qual != feat.
SetQual().end(); ++qual) {
2176 if (! (*qual)->IsSetQual() || ! (*qual)->IsSetVal() ||
2177 (*qual)->GetQual() !=
"old_locus_tag") {
2178 quals.push_back(*qual);
2182 glp->
olt.insert((*qual)->GetVal());
2206 for (
auto& feat : feats) {
2215 const CSeq_loc* cur_loc = feat->IsSetLocation() ? &feat->GetLocation() :
nullptr;
2216 if (gene.empty() && ! locus_tag) {
2224 if (! gene.empty()) {
2225 newglp->
locus = gene;
2246 if (! newglp->
slibp) {
2259 if (feat->IsSetQual()) {
2275 newglp->
todel =
false;
2279 newglp->
pseudo = feat->IsSetPseudo() ? feat->GetPseudo() :
false;
2281 newglp->
allpseudo = feat->IsSetPseudo() ? feat->GetPseudo() :
false;
2305 for (
const auto& feat : feats) {
2309 const CSeq_loc* cur_loc = feat->IsSetLocation() ? &feat->GetLocation() :
nullptr;
2318 newclp->
to = slip->
to;
2338 const CSeq_id* first_id =
nullptr;
2339 if (! bioseq.
GetId().empty())
2340 first_id = *bioseq.
GetId().begin();
2355 for (
auto& annot : bioseq.
SetAnnot()) {
2356 if (! annot->IsFtable())
2369 if (gene_node->
glp && gene_node->
flag ==
false) {
2372 gene_node->
bioseq = &bioseq;
2373 gene_node->
flag =
true;
2393 for (tglp = glp->
next; tglp; tglp = tglp->
next) {
2420 if (! glp || ! glp->
next)
2424 for (ret =
true; glp; glp = glpstop->
next) {
2430 for (tglp = glp->
next; tglp; tglp = tglp->
next) {
2437 for (tglp = glpstop->
next; tglp; tglp = tglp->
next) {
2444 if ((same_gn ==
false && same_lt ==
false) || (same_gn && same_lt) ||
2448 for (glp = glpstart;; glp = glp->
next) {
2449 ErrPostEx(
SEV_REJECT,
ERR_FEATURE_InconsistentLocusTagAndGene,
"Inconsistent pairs /gene+/locus_tag are encountered: \"%s\"+\"%s\" : %s feature at %s : \"%s\"+\"%s\" : %s feature at %s. Entry dropped.", (glp->
locus.empty()) ?
"(NULL)" : glp->
locus.c_str(), (glp->
locus_tag.empty()) ?
"(NULL)" : glp->
locus_tag.c_str(), glp->
fname.c_str(), glp->
location.c_str(), (tglp->
locus.empty()) ?
"(NULL)" : tglp->
locus.c_str(), (tglp->
locus_tag.empty()) ?
"(NULL)" : tglp->
locus_tag.c_str(), tglp->
fname.c_str(), tglp->
location.c_str());
2456 if (! glpstart->
locus.empty() && ! glpstart->
locus_tag.empty() &&
2458 for (glp = glpstart;; glp = glp->
next) {
2476 if (! gnp || ! gnp->
glp)
2479 for (glp = gnp->
glp; glp; glp = glp->
next) {
2481 (glp->
fname !=
"misc_feature"))
2484 for (tglp = gnp->
glp; tglp; tglp = tglp->
next) {
2485 if (tglp->
fname.empty() ||
2486 (tglp->
fname ==
"misc_feature")) {
2489 if (tglp->
locus.empty() || tglp->
locus[0] ==
'\0' ||
2506 if (! gnp || ! gnp->
glp)
2509 for (glp = gnp->
glp; glp; glp = glp->
next) {
2510 if (glp->
todel || ! glp->
syn.empty() || (glp->
fname !=
"misc_feature"))
2514 for (tglp = gnp->
glp; tglp; tglp = tglp->
next) {
2515 if (tglp->
todel || (tglp->
fname ==
"misc_feature"))
2520 if (tglp->
syn.empty()) {
2531 if (glp->
todel && got)
2535 for (glpprev =
nullptr, glp = gnp->
glp; glp; glp = glpnext) {
2536 glpnext = glp->
next;
2546 glpprev->
next = glpnext;
2548 glp->
next =
nullptr;
2558 for (TSeqFeatList::const_iterator feat = feats.begin(); feat != feats.end(); ++feat) {
2559 const CGene_ref& gene_ref1 = (*feat)->GetData().GetGene();
2563 TSeqFeatList::const_iterator feat_next = feat,
2565 for (++feat_next; feat_next != feats.end(); ++feat_next, ++feat_cur) {
2566 const CGene_ref& gene_ref2 = (*feat_next)->GetData().GetGene();
2579 string loc1_str, loc2_str;
2584 if (diff_lt ==
false) {
2587 "Multiple instances of the \"%s\" gene encountered: \"%s\"+\"%s\" : gene feature at \"%s\" : \"%s\"+\"%s\" : gene feature at \"%s\". Entry dropped.",
2599 "Multiple instances of the \"%s\" gene encountered: \"%s\"+\"%s\" : gene feature at \"%s\" : \"%s\"+\"%s\" : gene feature at \"%s\".",
2643 for (
auto& entry : seq_entries) {
2663 for (glp = gnp->
glp; glp; glp = glp->
next) {
2701 if (! gnp->
feats.empty()) {
2709 for (
auto& cur_annot : *annots) {
2710 if (! cur_annot->IsFtable())
2713 size_t advance = cur_annot->GetData().GetFtable().size();
2714 cur_annot->SetData().SetFtable().splice(cur_annot->SetData().SetFtable().end(), gnp->
feats);
2716 gene_refs.
first = cur_annot->SetData().SetFtable().begin();
2717 std::advance(gene_refs.
first, advance);
2718 gene_refs.
last = cur_annot->SetData().SetFtable().end();
2719 gene_refs.
valid =
true;
2723 if (annots->empty()) {
2733 gene_refs.
first = annot->
SetData().SetFtable().begin();
2734 gene_refs.
last = annot->
SetData().SetFtable().end();
2735 gene_refs.
valid =
true;
2756 if (strand1 != strand2) {
2760 const auto& intv1 = loc1.
GetInt();
2761 const auto& intv2 = loc2.
GetInt();
2762 return (intv1.GetFrom() >= intv2.GetFrom() && intv1.GetTo() <= intv2.GetTo());
2794 TSeqLocInfoList::const_iterator cur_loc = llocs.begin();
2797 bool stopped =
false;
2798 if (gene_refs.
valid) {
2799 for (
auto cur_feat = gene_refs.
first; cur_feat != gene_refs.
last; ++cur_feat) {
2806 if (gene_ref.
Empty()) {
2807 gene_ref.
Reset(&(*cur_feat)->GetData().GetGene());
2823 xref->SetData().SetGene(gerep);
2830 for (CBioseq::TAnnot::iterator annot = annots.begin(); annot != annots.end();) {
2831 if (! (*annot)->IsSetData() || ! (*annot)->GetData().IsFtable()) {
2837 for (TSeqFeatList::iterator feat = feat_table.begin(); feat != feat_table.end();) {
2838 if ((*feat)->IsSetData() && (*feat)->GetData().IsImp()) {
2839 const CImp_feat& imp = (*feat)->GetData().GetImp();
2840 if (imp.
GetKey() ==
"gene") {
2841 feat = feat_table.erase(feat);
2846 char* gene = (*feat)->IsSetQual() ?
GetTheQualValue((*feat)->SetQual(),
"gene") :
nullptr;
2847 char* locus_tag = (*feat)->IsSetQual() ?
GetTheQualValue((*feat)->SetQual(),
"locus_tag") :
nullptr;
2848 if (! gene && ! locus_tag) {
2862 gene_ref->
SetSyn().assign(syns.begin(), syns.end());
2866 (*feat)->SetXref().push_back(xref);
2870 DeleteQual((*feat)->SetQual(),
"gene_synonym");
2872 if ((*feat)->GetQual().empty())
2873 (*feat)->ResetQual();
2881 if (feat_table.empty())
2882 annot = annots.erase(annot);
2901 if (gene_refs.
valid) {
2902 for (TSeqFeatList::iterator feat = gene_refs.
first; feat != gene_refs.
last; ++feat) {
2904 info.strand = (*feat)->GetLocation().IsSetStrand() ? (*feat)->GetLocation().GetStrand() :
eNa_strand_unknown;
2910 llocs.push_back(
info);
2914 for (
auto& entry : seq_entries) {
2916 if (bio_set->IsSetAnnot())
2917 FixAnnot(bio_set->SetAnnot(), acnum, gene_refs, llocs);
2921 if (bioseq->IsSetAnnot())
2922 FixAnnot(bioseq->SetAnnot(), acnum, gene_refs, llocs);
2930 for (
const auto& annot : bioseq.
GetAnnot()) {
2931 if (! annot->IsFtable())
2934 for (
const auto& feat : annot->GetData().GetFtable()) {
2935 for (
const auto& qual : feat->GetQual()) {
2936 if (! qual->IsSetQual() || qual->GetQual() !=
"gene" ||
2937 ! qual->IsSetVal() || qual->GetVal().empty())
2940 genes.insert(qual->GetVal());
2952 for (
auto& annot : bioseq.
SetAnnot()) {
2953 if (! annot->IsFtable())
2956 for (
auto& feat : annot->SetData().SetFtable()) {
2958 if (! feat->IsSetQual())
2961 for (CSeq_feat::TQual::iterator qual = feat->SetQual().begin(); qual != feat->SetQual().end(); ++qual) {
2962 if (! (*qual)->IsSetQual() || (*qual)->GetQual() !=
"label" ||
2963 ! (*qual)->IsSetVal() || (*qual)->GetVal().empty())
2966 const string& cur_val = (*qual)->GetVal();
2967 std::set<string>::const_iterator ci = genes.lower_bound(cur_val);
2968 if (*ci == cur_val) {
2971 new_qual->
SetVal(cur_val);
2973 feat->SetQual().insert(qual, new_qual);
2984 std::set<string> genes;
2985 for (
const auto& entry : seq_entries) {
2991 if (! genes.empty()) {
2992 for (
auto& entry : seq_entries) {
3003 gene_refs.
valid =
false;
3006 if (gene_refs.
valid) {
3007 for (TSeqFeatList::iterator feat = gene_refs.
first; feat != gene_refs.
last; ++feat) {
3008 if ((*feat)->IsSetLocation()) {
3012 (*feat)->SetPartial(
true);
3014 if (! pp->
genenull || ! (*feat)->GetLocation().IsMix())
3017 CSeq_loc_mix& mix_loc = (*feat)->SetLocation().SetMix();
3022 CSeq_loc_mix::Tdata::iterator it_loc = mix_loc.
Set().begin();
3024 for (; it_loc != mix_loc.
Set().end(); ++it_loc) {
3025 it_loc = mix_loc.
Set().insert(it_loc, null_loc);
3036 if (! seq_entries.empty())
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool fta_strings_same(const char *s1, const char *s2)
bool IsSegBioseq(const CSeq_id &id)
TSeqPos GetLength(void) const
@Gb_qual.hpp User-defined methods of the data storage class.
@Imp_feat.hpp User-defined methods of the data storage class.
@RNA_ref.hpp User-defined methods of the data storage class.
namespace ncbi::objects::
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
void AddSeqLoc(const CSeq_loc &other)
Template class for iteration on objects of class C.
static const char location[]
static char * join(int argc, char *argv[], const char sep[])
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
#define ERR_FEATURE_InconsistentPseudogene
#define ERR_FEATURE_InconsistentLocusTagAndGene
#define ERR_FEATURE_MultipleOldLocusTags
#define ERR_FEATURE_MultipleWBGeneXrefs
#define ERR_GENEREF_BothStrands
#define ERR_FEATURE_MultipleGenesDifferentLocusTags
#define ERR_GENEREF_NoUniqMaploc
list< CRef< objects::CSeq_entry > > TEntryList
std::list< CRef< objects::CSeq_id > > TSeqIdList
std::list< CRef< objects::CSeq_feat > > TSeqFeatList
bool StringEquN(const char *s1, const char *s2, size_t n)
bool StringEqu(const char *s1, const char *s2)
const char * leave_imp_feat[]
static GeneLocsPtr fta_sort_feat_list(GeneLocsPtr gelop)
static bool WeDontNeedToJoinThis(const CSeqFeatData &data)
static void GetGeneSyns(const TQualVector &quals, const char *name, TSynSet &syns)
static void fta_add_olt(GeneListPtr fromglp, GeneListPtr toglp)
static void ScannGeneName(GeneNodePtr gnp, Int4 seqlen)
static Int4 fta_cmp_gene_syns(const TSynSet &syn1, const TSynSet &syn2)
static bool fta_rnas_cds_feat(const CSeq_feat &feat)
const char * feat_no_gene[]
static CRef< CSeq_loc > MakeCLoc(MixLocPtr mlp, bool noleft, bool noright)
void DealWithGenes(TEntryList &seq_entries, ParserPtr pp)
bool GenelocContained(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
static void MessWithSegGenes(GeneNodePtr gnp)
static void GeneLocsFree(GeneLocsPtr gelop)
static void SrchGene(CSeq_annot::C_Data::TFtable &feats, GeneNodePtr gnp, Int4 length, const CSeq_id &id)
static MixLocPtr MixLocCopy(MixLocPtr mlp)
static void fta_collect_genes(const CBioseq &bioseq, std::set< string > &genes)
static void CheckGene(TEntryList &seq_entries, ParserPtr pp, GeneRefFeats &gene_refs)
static void GeneCheckForStrands(const GeneListPtr _glp)
static void GeneListFree(GeneListPtr glp)
static void GetLocationStr(const CSeq_loc &loc, string &str)
static bool GetFeatNameAndLoc(GeneListPtr glp, const CSeq_feat &feat, GeneNodePtr gnp)
static void fta_append_feat_list(GeneNodePtr gnp, const CSeq_loc *location, const char *gene, const char *locus_tag)
static Int4 fta_cmp_locusyn(GeneListPtr glp1, GeneListPtr glp2)
static bool CompareGeneListName(const GeneListPtr &sp1, const GeneListPtr &sp2)
static void fta_make_seq_int(MixLocPtr mlp, bool noleft, bool noright, CSeq_interval &interval)
static CdssListPtr SrchCdss(CSeq_annot::C_Data::TFtable &feats, CdssListPtr clp, Int4 segnum, const CSeq_id &id)
static bool fta_check_feat_overlap(GeneLocsPtr gelop, GeneListPtr c, MixLocPtr mlp, Int4 from, Int4 to)
static void SortMixLoc(GeneListPtr c)
static void fta_fix_labels(CBioseq &bioseq, const std::set< string > &genes)
static void fta_add_wormbase(GeneListPtr fromglp, GeneListPtr toglp)
static bool DoWeHaveCdssInBetween(GeneListPtr c, Int4 to, CdssListPtr clp)
static void RemoveUnneededMiscFeats(GeneNodePtr gnp)
std::set< string > TWormbaseSet
static void fta_collect_wormbases(GeneListPtr glp, CSeq_feat &feat)
static void FixMixLoc(GeneListPtr c, GeneLocsPtr gelop)
static GeneNodePtr sort_gnp_list(GeneNodePtr gnp)
static bool CompareGeneLocsMinMax(const GeneLocsPtr &sp1, const GeneLocsPtr &sp2)
static void fta_seqloc_del_far(CSeq_loc &locs, const Char *acnum, const CSeq_id *id)
static void CircularSeqLocFormat(GeneListPtr c)
static Int4 fta_cmp_gene_refs(const CGene_ref &grp1, const CGene_ref &grp2)
static SeqlocInfoblkPtr GetLowHighFromSeqLoc(const CSeq_loc *origslp, Int4 length, const CSeq_id &orig_id)
static bool fta_seqid_same(const CSeq_id &sid, const Char *acnum, const CSeq_id *id)
std::set< string > TSynSet
static bool LocusTagCheck(GeneListPtr glp, bool &resort)
static void GeneQuals(TEntryList &seq_entries, const char *acnum, GeneRefFeats &gene_refs)
static void fta_check_pseudogene(GeneListPtr tglp, GeneListPtr glp)
static void fta_make_seq_pnt(MixLocPtr mlp, bool noleft, bool noright, CSeq_point &point)
static void MixLocFree(MixLocPtr mlp)
static bool ConfirmCircular(MixLocPtr mlp)
static bool s_IdsMatch(const CRef< CSeq_id > &pId1, const CRef< CSeq_id > &pId2)
static MixLocPtr EasySeqLocMerge(MixLocPtr first, MixLocPtr second, bool join)
static Int2 GetMergeOrder(MixLocPtr first, MixLocPtr second)
static CRef< CSeq_loc > fta_seqloc_local(const CSeq_loc &orig, const Char *acnum)
static void fta_collect_olts(GeneListPtr glp, CSeq_feat &feat)
static bool GeneLocusCheck(const TSeqFeatList &feats, bool diff_lt)
std::set< string > TLocusTagSet
static void CdssListFree(CdssListPtr clp)
static void FixAnnot(CBioseq::TAnnot &annots, const char *acnum, GeneRefFeats &gene_refs, TSeqLocInfoList &llocs)
static MixLocPtr CircularSeqLocCollect(MixLocPtr first, MixLocPtr second)
static CRef< CSeq_id > CpSeqIdAcOnly(const CSeq_id &id, bool accver)
static void FindGene(CBioseq &bioseq, GeneNodePtr gene_node)
static list< AccMinMax > fta_get_acc_minmax_strand(const CSeq_loc *location, GeneLocsPtr gelop)
static void AddGeneFeat(GeneListPtr glp, const string &maploc, TSeqFeatList &feats)
static bool DoWeHaveGeneInBetween(GeneListPtr c, SeqlocInfoblkPtr second, GeneNodePtr gnp)
static bool IfCDSGeneFeat(const CSeq_feat &feat, Uint1 choice, const char *key)
static void MiscFeatsWithoutGene(GeneNodePtr gnp)
static CRef< CSeqFeatXref > GetXrpForOverlap(const char *acnum, GeneRefFeats &gene_refs, const TSeqLocInfoList &llocs, const CSeq_feat &feat, CGene_ref &gerep)
list< SeqLocInfo > TSeqLocInfoList
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
E_SIC Compare(const CSeq_id &sid2) const
Compare() - more general.
@ e_NO
different SeqId types-can't compare
@ e_YES
SeqIds compared, but are different.
ENa_strand GetStrand(void) const
Get the location's strand.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
const_iterator end(void) const
const_iterator begin(void) const
CRef< CSeq_loc > Subtract(const CSeq_loc &other, TOpFlags flags, ISynonymMapper *syn_mapper, ILengthGetter *len_getter) const
Subtract seq-loc from this, merge/sort resulting ranges depending on flags.
bool IsSetStrand(EIsSetStrand flag=eIsSetStrand_Any) const
Check if strand is set for any/all part(s) of the seq-loc depending on the flag.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
void GetLabel(string *label) const
Appends a label suitable for display (e.g., error messages) label must point to an existing string ob...
void SetNull(void)
Override all setters to incorporate cache invalidation.
@ eEmpty_Allow
ignore empty locations
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
int SeqLocPartialCheck(const CSeq_loc &loc, CScope *scope)
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eContains
First CSeq_loc contains second.
@ eSame
CSeq_locs contain each other.
@ eContained
First CSeq_loc contained by second.
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty – not pointing to any object which means having a null value.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool NotEmpty(void) const THROWS_NONE
Check if CConstRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
TSyn & SetSyn(void)
Assign a value to Syn data member.
bool IsSetSyn(void) const
synonyms for locus Check if a value has been assigned to Syn data member.
const TSyn & GetSyn(void) const
Get the Syn member data.
bool IsSetLocus_tag(void) const
systematic gene name (e.g., MI0001, ORF0069) Check if a value has been assigned to Locus_tag data mem...
bool IsSetLocus(void) const
Official gene symbol Check if a value has been assigned to Locus data member.
void SetLocus(const TLocus &value)
Assign a value to Locus data member.
void SetLocus_tag(const TLocus_tag &value)
Assign a value to Locus_tag data member.
void SetMaploc(const TMaploc &value)
Assign a value to Maploc data member.
const TLocus_tag & GetLocus_tag(void) const
Get the Locus_tag member data.
const TLocus & GetLocus(void) const
Get the Locus member data.
bool IsLim(void) const
Check if variant Lim is selected.
TLim GetLim(void) const
Get the variant data.
TType GetType(void) const
Get the Type member data.
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
bool IsSetType(void) const
Check if a value has been assigned to Type data member.
const TName & GetName(void) const
Get the variant data.
const TExt & GetExt(void) const
Get the Ext member data.
bool IsName(void) const
Check if variant Name is selected.
void SetQual(const TQual &value)
Assign a value to Qual data member.
const TKey & GetKey(void) const
Get the Key member data.
vector< CRef< CDbtag > > TDbxref
TDbxref & SetDbxref(void)
Assign a value to Dbxref data member.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
E_Choice Which(void) const
Which variant is currently selected.
void SetLocation(TLocation &value)
Assign a value to Location data member.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
bool IsImp(void) const
Check if variant Imp is selected.
bool IsSetKey(void) const
Check if a value has been assigned to Key data member.
const TLocation & GetLocation(void) const
Get the Location member data.
bool IsGene(void) const
Check if variant Gene is selected.
const TData & GetData(void) const
Get the Data member data.
void SetData(TData &value)
Assign a value to Data data member.
void SetVal(const TVal &value)
Assign a value to Val data member.
bool IsBiosrc(void) const
Check if variant Biosrc is selected.
void SetPseudo(TPseudo value)
Assign a value to Pseudo data member.
const TRna & GetRna(void) const
Get the variant data.
void ResetDbxref(void)
Reset Dbxref data member.
bool IsSetDbxref(void) const
support for xref to other databases Check if a value has been assigned to Dbxref data member.
TQual & SetQual(void)
Assign a value to Qual data member.
bool IsRna(void) const
Check if variant Rna is selected.
void ResetQual(void)
Reset Qual data member.
const TImp & GetImp(void) const
Get the variant data.
bool IsSetLocation(void) const
feature made from Check if a value has been assigned to Location data member.
void SetTo(TTo value)
Assign a value to To data member.
const TFuzz_from & GetFuzz_from(void) const
Get the Fuzz_from member data.
void SetPoint(TPoint value)
Assign a value to Point data member.
void SetId(TId &value)
Assign a value to Id data member.
bool IsSetId(void) const
WARNING: this used to be optional Check if a value has been assigned to Id data member.
bool IsMix(void) const
Check if variant Mix is selected.
ENa_strand
strand of nucleic acid
const TId & GetId(void) const
Get the Id member data.
bool IsSetPoint(void) const
Check if a value has been assigned to Point data member.
bool IsPacked_pnt(void) const
Check if variant Packed_pnt is selected.
const TPnt & GetPnt(void) const
Get the variant data.
TPoint GetPoint(void) const
Get the Point member data.
const TFuzz_to & GetFuzz_to(void) const
Get the Fuzz_to member data.
bool IsSetA(void) const
connection to a least one residue Check if a value has been assigned to A data member.
void SetId(TId &value)
Assign a value to Id data member.
void SetStrand(TStrand value)
Assign a value to Strand data member.
TFrom GetFrom(void) const
Get the From member data.
void SetFuzz(TFuzz &value)
Assign a value to Fuzz data member.
bool IsSetFuzz(void) const
Check if a value has been assigned to Fuzz data member.
const TFuzz & GetFuzz(void) const
Get the Fuzz member data.
const TId & GetId(void) const
Get the Id member data.
const TId & GetId(void) const
Get the Id member data.
void SetFrom(TFrom value)
Assign a value to From data member.
TStrand GetStrand(void) const
Get the Strand member data.
bool IsBond(void) const
Check if variant Bond is selected.
const TPacked_pnt & GetPacked_pnt(void) const
Get the variant data.
void SetFuzz_to(TFuzz_to &value)
Assign a value to Fuzz_to data member.
void SetFuzz_from(TFuzz_from &value)
Assign a value to Fuzz_from data member.
const TA & GetA(void) const
Get the A member data.
bool IsSetTo(void) const
Check if a value has been assigned to To data member.
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
Tdata & Set(void)
Assign a value to data member.
bool IsSetFuzz_to(void) const
Check if a value has been assigned to Fuzz_to data member.
bool IsSetStrand(void) const
Check if a value has been assigned to Strand data member.
TStrand GetStrand(void) const
Get the Strand member data.
bool IsSetId(void) const
WARNING: this used to be optional Check if a value has been assigned to Id data member.
TTo GetTo(void) const
Get the To member data.
bool IsWhole(void) const
Check if variant Whole is selected.
bool IsSetFrom(void) const
Check if a value has been assigned to From data member.
bool IsInt(void) const
Check if variant Int is selected.
const TInt & GetInt(void) const
Get the variant data.
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
bool IsSetFuzz_from(void) const
Check if a value has been assigned to Fuzz_from data member.
void SetStrand(TStrand value)
Assign a value to Strand data member.
bool IsPnt(void) const
Check if variant Pnt is selected.
const TBond & GetBond(void) const
Get the variant data.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
@ eClass_parts
parts for 2 or 3
void SetData(TData &value)
Assign a value to Data data member.
const TInst & GetInst(void) const
Get the Inst member data.
TTopology GetTopology(void) const
Get the Topology member data.
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
const TAnnot & GetAnnot(void) const
Get the Annot member data.
const TId & GetId(void) const
Get the Id member data.
list< CRef< CSeq_feat > > TFtable
list< CRef< CSeq_annot > > TAnnot
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
if(yy_accept[yy_current_state])
constexpr auto sort(_Init &&init)
const struct ncbi::grid::netcache::search::fields::KEY key
static const char * str(char *buf, int n)
TSeqFeatList::iterator first
TSeqFeatList::iterator last
vector< IndexblkPtr > entrylist
bool DeleteQual(TQualVector &qlist, const Char *qual)
char * GetTheQualValue(TQualVector &qlist, const Char *qual)
char * CpTheQualValue(const TQualVector &qlist, const Char *qual)
void MakeLocStrCompatible(string &str)
Int2 MatchArrayString(const char **array, const char *text)
std::vector< CRef< objects::CGb_qual > > TQualVector