127 #define CLEANUP_SETUP \
128 auto changes = makeCleanupChange(options); \
129 CNewCleanup_imp clean_i(changes, options); \
130 clean_i.SetScope(*m_Scope);
135 clean_i.BasicCleanupSeqEntry(se);
143 clean_i.BasicCleanupSeqSubmit(ss);
151 clean_i.BasicCleanupSubmitblock(block);
159 clean_i.BasicCleanupBioseqSet(bss);
167 clean_i.BasicCleanupSeqAnnot(sa);
175 clean_i.BasicCleanupSeqFeat(sf);
183 clean_i.BasicCleanupBioSource(src);
241 clean_i.BasicCleanup(desc);
251 for (
auto& it : desc.
Set()) {
252 clean_i.BasicCleanup(*it);
262 clean_i.ExtendedCleanupSeqEntry(se);
271 clean_i.ExtendedCleanupSeqSubmit(ss);
279 clean_i.ExtendedCleanupSeqAnnot(sa);
313 vector<string_view>
result;
323 static constexpr std::array<string_view, CCleanupChangeCore::eNumberofChangeTypes>
sm_ChangeDesc = {
324 "Invalid Change Code",
327 "Clean Double Quotes",
330 "Clean Qualifiers List",
331 "Clean Dbxrefs List",
332 "Clean CitonFeat List",
333 "Clean Keywords List",
334 "Clean Subsource List",
338 "Change Feature Key",
340 "Change Publication",
354 "Change Genetic Code",
360 "Change WholeLocation",
362 "Change MolInfo Descriptor",
368 "Add Protein Feature",
382 "Convert Feature to Descriptor",
384 "Change Feature Location",
388 "Add BioSource OrgMod",
389 "Add BioSource SubSource",
390 "Change BioSource Genome",
391 "Change BioSource Origin",
392 "Change BioSource Other",
394 "Remove Empty Publication",
402 "Change Prot Activities",
404 "Change PCR Primers",
411 "Create Gene Nomenclature",
412 "Clean Seq-feat xref",
413 "Clean User-Object Or -Field",
414 "Letter Case Change",
415 "Change Bioseq-set Class",
416 "Unique Without Sort",
424 "Add NcbiCleanupObject",
426 "Trim Flanking Quotes",
427 "Clean Bioseq Title",
429 "Remove Dup BioSource",
431 "Trim Internal Semicolons",
433 "Convert Unstructured Org-ref Modifier",
435 "Move GO term to GeneOntology object",
470 return "mat_peptide";
476 return "sig_peptide";
479 return "transit_peptide";
503 new_feat->
SetQual().push_back(q);
541 CSeq_feat::TQual::iterator it = feat.
SetQual().begin();
542 while (it != feat.
SetQual().end()) {
543 if ((*it)->IsSetQual() &&
546 feat.
SetData().SetProt().SetName().push_back((*it)->GetVal());
562 const bool feat_by_product =
true;
582 if (cdsScores.empty()) {
587 for (
auto cdsScore : cdsScores) {
590 return cdsScore.second;
595 return cdsScores.front().second;
621 if (parent_bsh.
IsAa()) {
627 bool matched_by_product =
false;
635 matched_by_product =
true;
638 if (!matched_by_product) {
647 bool require_frame =
false;
648 if (!require_frame) {
650 if ((*id_it)->IsEmbl() || (*id_it)->IsDdbj()) {
651 require_frame =
true;
665 new_feat->
Assign(*orig_feat);
667 new_feat->
SetData().SetProt().SetProcessed(processed);
676 new_feat->
SetData().SetProt().SetName().push_back(
"unnamed");
685 if (matched_by_product) {
709 if ((*annot_it)->IsFtable()) {
737 bool any_change =
false;
743 for (
CFeat_CI prot_it(*bi, sel); prot_it; ++prot_it) {
774 if (scores.size() == 1) {
776 }
else if (scores.size() == 0) {
792 if (!
f.IsSetXref()) {
795 bool any_removed =
false;
796 CSeq_feat::TXref::iterator xit =
f.SetXref().begin();
797 while (xit !=
f.SetXref().end()) {
798 if ((*xit)->IsSetData() && (*xit)->GetData().IsGene() &&
800 xit =
f.SetXref().erase(xit);
807 if (
f.IsSetXref() &&
f.GetXref().empty()) {
817 bool any_change =
false;
821 if (
fi->IsSetXref()) {
823 new_feat->
Assign(*(
fi->GetOriginalSeq_feat()));
841 if (!
f.IsSetXref()) {
844 bool any_removed =
false;
845 CSeq_feat::TXref::iterator xit =
f.SetXref().begin();
846 while (xit !=
f.SetXref().end()) {
847 if ((*xit)->IsSetData() && (*xit)->GetData().IsGene() &&
848 !(*xit)->GetData().GetGene().IsSuppressed()) {
849 xit =
f.SetXref().erase(xit);
856 if (
f.IsSetXref() &&
f.GetXref().empty()) {
877 bool has_xref =
false;
880 if ((*xit)->IsSetId() && (*xit)->GetId().IsLocal()) {
881 if ((*xit)->GetId().Equals(src.
GetId())) {
920 if (!
f.IsSetId() || !
f.IsSetXref()) {
925 if ((*xit)->IsSetId() && (*xit)->GetId().IsLocal()) {
928 if (far_feats.size() == 1) {
962 if ( !
f.Equals(*feat_ci->GetSeq_feat()) && feat_ci->GetSeq_feat()->IsSetData() && feat_ci->GetSeq_feat()->GetData().IsGene()
963 && feat_ci->GetSeq_feat()->GetData().GetGene().IsSetLocus())
965 locus2 = feat_ci->GetSeq_feat()->GetData().GetGene().GetLocus();
967 if (!locus1.empty() && !locus2.empty() && locus1 == locus2)
978 if (!
f.IsSetXref()) {
981 bool any_removed =
false;
982 CSeq_feat::TXref::iterator xit =
f.SetXref().begin();
983 while (xit !=
f.SetXref().end()) {
984 if ((*xit)->IsSetData() && (*xit)->GetData().IsGene() &&
985 !(*xit)->GetData().GetGene().IsSuppressed() && !
FindMatchingLocusGene(
f, (*xit)->GetData().GetGene(), bsh)) {
986 xit =
f.SetXref().erase(xit);
993 if (
f.IsSetXref() &&
f.GetXref().empty()) {
1010 if ( !
f.Equals(*feat_ci->GetSeq_feat()) && feat_ci->GetSeq_feat()->IsSetData() && feat_ci->GetSeq_feat()->GetData().IsGene()
1011 && feat_ci->GetSeq_feat()->GetData().GetGene().IsSetLocus_tag())
1013 locus_tag2 = feat_ci->GetSeq_feat()->GetData().GetGene().GetLocus_tag();
1015 if (!locus_tag1.empty() && !locus_tag2.empty() && locus_tag1 == locus_tag2)
1026 if (!
f.IsSetXref()) {
1029 bool any_removed =
false;
1030 CSeq_feat::TXref::iterator xit =
f.SetXref().begin();
1031 while (xit !=
f.SetXref().end()) {
1032 if ((*xit)->IsSetData() && (*xit)->GetData().IsGene() &&
1034 xit =
f.SetXref().erase(xit);
1041 if (
f.IsSetXref() &&
f.GetXref().empty()) {
1058 bool changed =
false;
1060 if (pos < loc_start) {
1062 id->Assign(*(loc.
GetId()));
1067 }
else if (pos > loc_stop) {
1069 id->Assign(*(loc.
GetId()));
1089 if (new_loc->
IsMix()) {
1090 last_interval = new_loc->
SetMix().SetLastLoc();
1094 last_interval = new_loc;
1115 last_interval->
SetInt().SetFrom(new_start);
1116 last_interval->
SetInt().SetTo(new_stop);
1117 last_interval->
SetInt().SetId().Assign(*
id);
1131 if (
f.IsSetData() &&
f.GetData().IsCdregion())
1133 if (
f.GetData().GetCdregion().IsSetCode())
1134 code = &(
f.GetData().GetCdregion().GetCode());
1135 if (
f.GetData().GetCdregion().IsSetFrame())
1136 frame =
f.GetData().GetCdregion().GetFrame();
1146 size_t len = orig_len;
1156 vector_loc->
SetInt().SetId().Assign(*(bsh.
GetId().front().GetSeqId()));
1159 vector_loc->
SetInt().SetFrom(0);
1160 vector_loc->
SetInt().SetTo(stop +
mod - 1);
1163 vector_loc->
SetInt().SetFrom(stop -
mod + 1);
1169 size_t usable_size = seq.
size();
1171 if (limit > 0 && usable_size > limit) {
1172 usable_size = limit;
1186 size_t length = usable_size / 3;
1188 for (
i = 0;
i < length; ++
i) {
1190 for (k = 0; k < 3; ++k, ++start) {
1207 bool changed =
false;
1214 if (frame != new_frame) {
1215 cds.
SetData().SetCdregion().SetFrame(new_frame);
1241 switch( (seq_len % 3) + 1 ) {
1256 if (frame != desired_frame) {
1257 frame = desired_frame;
1304 if (!
f.GetData().IsCdregion()) {
1323 if (check_for_stop) {
1360 prot_ref.
SetName().front() +=
"; ";
1362 prot_ref.
SetName().front() += protein_name;
1364 prot_ref.
SetName().push_back(protein_name);
1371 bool used_qual =
false;
1373 for (
auto it = mrna.
SetQual().begin(); it != mrna.
SetQual().end(); it++) {
1375 (*it)->SetVal(protein_name);
1383 mrna.
SetData().SetRna().SetRnaProductName(protein_name, remainder);
1393 for (
auto it = cds.
GetXref().begin(); it != cds.
GetXref().end(); it++) {
1394 if ((*it)->IsSetData() && (*it)->GetData().IsProt()) {
1400 for (
auto it = cds.
GetQual().begin(); it != cds.
GetQual().end(); it++) {
1415 for (
auto it = feat.
SetXref().begin(); it != feat.
SetXref().end(); it++) {
1416 if ((*it)->IsSetData() && (*it)->GetData().IsProt()) {
1423 for (
auto it = feat.
SetQual().begin(); it != feat.
SetQual().end(); it++) {
1426 (*it)->SetVal((*it)->GetVal() +
"; " + protein_name);
1428 (*it)->SetVal(protein_name);
1462 if ((*it)->IsSetData() && (*it)->GetData().IsProt()) {
1471 xref->SetData().SetProt().SetName().push_back(protein_name);
1472 cds.
SetXref().push_back(xref);
1480 if (
prot.IsSetName() && !
prot.GetName().empty()) {
1481 return prot.GetName().front();
1491 if ((*it)->IsSetData() && (*it)->GetData().IsProt()) {
1497 for (
auto it = cds.
GetQual().begin(); it != cds.
GetQual().end(); it++) {
1498 if ((*it)->IsSetQual() && (*it)->IsSetVal() &&
NStr::EqualNocase((*it)->GetQual(),
"product")) {
1499 return (*it)->GetVal();
1542 bool any_change =
false;
1560 }
catch (
const runtime_error&) {
1583 switch (loc.
Which()) {
1601 bool this_is_last = is_last && (*it == mix.
Set().back());
1602 if ((*it)->IsMix() || (*it)->IsPacked_int()) {
1610 if (!this_is_last &&
1627 bool this_is_last = is_last && (*it == pint.
Set().back());
1648 new_feat->
Assign(*(
f->GetSeq_feat()));
1662 if (!
f.IsSetLocation()) {
1665 bool partial =
false;
1667 while (li && !partial) {
1674 bool changed =
false;
1675 if (
f.IsSetPartial() &&
f.GetPartial()) {
1692 bool changed =
false;
1696 string & ec_num = *ec_num_iter;
1697 size_t tlen = ec_num.length();
1699 if (tlen != ec_num.length()) {
1718 bool changed =
false;
1719 CProt_ref::TEc::iterator ec_num_iter = ec_num_list.begin();
1720 while (ec_num_iter != ec_num_list.end()) {
1721 string & ec_num = *ec_num_iter;
1722 size_t tlen = ec_num.length();
1724 if (tlen != ec_num.length()) {
1729 ec_num_iter = ec_num_list.erase(ec_num_iter);
1742 bool any_change =
false;
1745 if (
f->GetData().GetProt().IsSetEc()) {
1746 bool this_change =
false;
1748 new_feat->
Assign(*(
f->GetSeq_feat()));
1752 new_feat->
SetData().SetProt().ResetEc();
1787 if (
len > longest) {
1795 bool changed =
false;
1856 bool needs_molinfo =
true;
1860 if ((*it)->IsMolinfo()) {
1861 needs_molinfo =
false;
1863 (!(*it)->GetMolinfo().IsSetBiomol() ||
1870 if (needs_molinfo) {
1884 needs_molinfo =
false;
1888 return needs_molinfo;
1900 switch (it->Which()) {
1914 string new_defline = sequence::CDeflineGenerator().GenerateDefline(bsh, sequence::CDeflineGenerator::fIgnoreExisting);
1918 bool modified = title_desc.
Set().
SetTitle() != new_defline;
1929 CBioseq::TDescr::Tdata::iterator it = seq_entry.
SetDescr().Set().begin();
1930 while (it != seq_entry.
SetDescr().Set().end()) {
1932 it = seq_entry.
SetDescr().Set().erase(it);
1939 if (seq_entry.
SetDescr().Set().empty()) {
1940 if (seq_entry.
IsSeq()) {
1943 else if (seq_entry.
IsSet()) {
1959 if (descr.
IsSet()) {
1960 for (
auto pDesc : descr.
Set()) {
1962 pDesc->SetUser().UpdateNcbiCleanup(ncbi_cleanup_version);
1970 auto& user = pCleanupObject->SetUser();
1971 user.UpdateNcbiCleanup(ncbi_cleanup_version);
1972 descr.
Set().push_back(pCleanupObject);
1982 if ((*it)->IsSource() && (*it)->GetSource().IsSetOrg()) {
1983 src_descs.push_back(*it);
2001 bool any_changes =
false;
2003 vector<CRef<COrg_ref> > rq_list;
2004 vector<const CSeqdesc* > src_descs;
2005 vector<CConstRef<CSeq_feat> > src_feats;
2008 vector<const CSeqdesc* >::iterator desc_it = src_descs.begin();
2009 while (desc_it != src_descs.end()) {
2012 org->
Assign((*desc_it)->GetSource().GetOrg());
2013 rq_list.push_back(org);
2024 rq_list.push_back(org);
2031 if (rq_list.size() > 0) {
2035 CTaxon3_reply::TReply::const_iterator reply_it = reply->GetReply().begin();
2038 desc_it = src_descs.begin();
2040 while (reply_it != reply->GetReply().end()
2041 && desc_it != src_descs.end()) {
2042 if ((*reply_it)->IsData() &&
2043 !(*desc_it)->GetSource().GetOrg().Equals((*reply_it)->GetData().GetOrg())) {
2054 vector<CConstRef<CSeq_feat> >::iterator feat_it = src_feats.begin();
2055 while (reply_it != reply->GetReply().end()
2056 && feat_it != src_feats.end()) {
2057 if ((*reply_it)->IsData() &&
2058 !(*feat_it)->GetData().GetBiosrc().GetOrg().Equals((*reply_it)->GetData().GetOrg())) {
2061 new_feat->
Assign(**feat_it);
2062 new_feat->
SetData().SetBiosrc().SetOrg().Assign((*reply_it)->GetData().GetOrg());
2090 if (new_product.
Empty()) {
2097 new_product->
SetDescr().Set().push_back(molinfo);
2102 new_product->
SetId().push_back(prot_id);
2105 prot_entry->
SetSeq(*new_product);
2119 if (
set &&
set->IsSetSeq_set()) {
2121 if (
nuc->IsSetDescr()) {
2123 auto it =
nuc->GetDescr().Get().begin();
2124 while (it !=
nuc->GetDescr().Get().end()) {
2125 if (!(*it)->IsMolinfo() && !(*it)->IsTitle() && !(*it)->IsCreate_date()) {
2129 neh.RemoveSeqdesc(**it);
2130 if (
nuc->IsSetDescr()) {
2131 it =
nuc->GetDescr().Get().begin();
2164 if (!bsrc.IsSetOrg() || !bsrc.IsSetOrgname()) {
2168 if (!orgname.IsSetGcode() && !orgname.IsSetMgcode() && !orgname.IsSetPgcode()) {
2173 bool any_changed =
false;
2177 for (; feat_ci; ++feat_ci) {
2180 int cdregionGenCode = (cds.
IsSetCode() ?
2183 if (cdregionGenCode != bioseqGenCode)
2193 new_cds.
SetCode().SetId(bioseqGenCode);
2195 edit_handle.
Replace(*new_feat);
2208 const string & sTitle,
2209 const string & sOrganism,
2212 OrganellePos =
NPOS;
2216 const string sPattern =
" [" + sOrganism +
"]";
2218 answer = sTitle.length() - sPattern.length();
2225 if (answer < 1 || answer ==
NPOS) {
2231 if (answer !=
NPOS) {
2245 if (possible_organelle_start_pos !=
NPOS &&
2247 OrganellePos = possible_organelle_start_pos;
2259 const string & sTitle,
2264 organelle_pos =
NPOS;
2280 bool is_cross_kingdom =
false;
2292 if (first_kingdom.empty()) {
2295 is_cross_kingdom =
true;
2296 second_kingdom = te.
GetName();
2302 return is_cross_kingdom;
2308 string first_kingdom, second_kingdom;
2314 const string & sTitle,
2319 organelle_pos =
NPOS;
2324 if ((*it)->IsSetSubtype() && (*it)->IsSetSubname() &&
2328 if (suffixPos !=
NPOS) {
2338 if (suffixPos !=
NPOS) {
2347 if (suffixPos !=
NPOS) {
2355 if (sep != string::npos) {
2366 if (taxname.empty())
return;
2375 if (!data.
IsProt())
continue;
2379 if (
str.empty())
continue;
2381 if (
len < 5)
continue;
2382 if (
str[
len - 1] !=
']')
continue;
2384 if (cp ==
NPOS)
continue;
2387 if (
suffix.length() != taxlen + 1)
continue;
2407 if ((*seqid_itr)->IsSwissprot()) {
2414 bool bPartial =
false;
2420 if (!molinfo_desc && (*descr_iter)->
IsMolinfo()) {
2421 molinfo_desc = *descr_iter;
2423 if (!src_desc && (*descr_iter)->
IsSource()) {
2424 src_desc = *descr_iter;
2426 if (molinfo_desc && src_desc) {
2430 if (!molinfo_desc || !src_desc) {
2433 for (; bioseq_set; bioseq_set = bioseq_set->
GetParentSet()) {
2435 if (!molinfo_desc && (*descr_iter)->
IsMolinfo()) {
2436 molinfo_desc = *descr_iter;
2438 if (!src_desc && (*descr_iter)->
IsSource()) {
2439 src_desc = *descr_iter;
2441 if (molinfo_desc && src_desc) {
2445 if (molinfo_desc && src_desc) {
2488 if (org->IsSetTaxname() && !
NStr::IsBlank(org->GetTaxname())) {
2498 if ((*d)->IsTitle()) {
2505 string & sTitle = title_desc->
SetTitle();
2507 const string sOriginalTitle = sTitle;
2511 if (partialPos ==
NPOS) {
2512 partialPos =
NStr::Find(sTitle,
", partial (");
2519 if (suffixPos ==
NPOS) {
2524 sTitle.resize(suffixPos);
2525 if (penult !=
NPOS) {
2526 sTitle.resize(penult);
2532 partialPos != string::npos &&
2533 (partialPos == (sTitle.length() - 9)))
2535 sTitle.resize(partialPos);
2540 if (bPartial && partialPos ==
NPOS) {
2541 sTitle +=
", partial";
2544 sTitle +=
" (" +
string(organelle) +
")";
2546 string first_kingdom, second_kingdom;
2548 sTitle +=
" [" + first_kingdom +
"][" + second_kingdom +
"]";
2551 if (org->IsSetTaxname()) {
2552 sTitle += org->GetTaxname();
2557 if (sTitle != sOriginalTitle) {
2576 if (
prot->GetData().GetProt().IsSetName() &&
2577 !
prot->GetData().GetProt().GetName().empty()) {
2578 label =
prot->GetData().GetProt().GetName().front();
2579 }
else if (
prot->GetData().GetProt().IsSetDesc()) {
2580 label =
prot->GetData().GetProt().GetDesc();
2603 bool any_change =
false;
2607 if ((*xit)->IsSetId() && (*xit)->GetId().IsLocal()) {
2613 if (f_start < gene_start) {
2615 gene_start = f_start;
2618 if (f_stop > gene_stop) {
2634 {
"16S", { 1000,
false } },
2635 {
"18S", { 1000,
false } },
2636 {
"23S", { 2000,
false } },
2637 {
"25S", { 1000,
false } },
2638 {
"26S", { 1000,
false } },
2639 {
"28S", { 3300,
false } },
2640 {
"small", { 1000,
false } },
2641 {
"large", { 1000,
false } },
2642 {
"5.8S", { 130,
true } },
2643 {
"5S", { 90,
true } }
2654 bool is_bad =
false;
2656 const CRNA_ref& rrna =
f.GetData().GetRna();
2658 if (rrna_name.empty()) {
2660 if (
f.IsSetQual()) {
2661 for (
auto qit :
f.GetQual()) {
2664 rrna_name = gbq.
GetVal();
2672 if (pos != string::npos && len < it->second.first && !(it->second.second &&
f.IsSetPartial() &&
f.GetPartial()) ) {
2682 bool any_changes =
false;
2684 int protein_id_counter = 1;
2687 for (
CFeat_CI cds_it(entry, sel); cds_it; ++cds_it) {
2688 bool change_this_cds =
false;
2690 new_cds->
Assign(*(cds_it->GetSeq_feat()));
2709 new_cds->
SetProduct().SetWhole().Assign(*new_id);
2710 change_this_cds =
true;
2713 if (new_cds->
IsSetProduct() && instantiate_missing_proteins) {
2729 new_inst->
SetSeq_data().SetNcbieaa().Set(current);
2737 current_name =
"hypothetical protein";
2738 change_this_cds =
true;
2752 bool change_mrna =
false;
2758 for (
auto it = new_mrna->
GetQual().begin(); it != new_mrna->
GetQual().end(); it++) {
2759 if ((*it)->IsSetQual() && (*it)->IsSetVal() &&
NStr::EqualNocase((*it)->GetQual(),
"product")) {
2760 mrna_name = (*it)->GetVal();
2766 || (!
NStr::Equal(current_name,
"hypothetical protein") &&
2783 if (change_this_cds) {
2798 const CSeq_feat& rna_feat = *(rna_it->GetSeq_feat());
2803 bool change_this_rrna =
false;
2805 new_rrna->
Assign(*(rna_it->GetSeq_feat()));
2811 change_this_rrna =
true;
2815 change_this_rrna =
true;
2820 change_this_rrna =
true;
2824 change_this_rrna =
true;
2828 if (change_this_rrna) {
2837 bool change_this_gene;
2839 new_gene->
Assign(*(gene_it->GetSeq_feat()));
2845 if (change_this_gene) {
2858 if (run_extended_cleanup) {
2860 if (pChanged->ChangeCount()>0) {
2901 if (this_strand == prev_strand) {
2902 if (
abs((
long int)this_start - (
long int)prev_end) < min_len) {
2917 bool any_change =
false;
2935 bool any_changes =
false;
2938 for (
CFeat_CI cds_it(entry, sel); cds_it; ++cds_it) {
2939 bool change_this_cds =
false;
2941 new_cds->
Assign(*(cds_it->GetSeq_feat()));
2947 if (change_this_cds) {
3006 const int unknown_seqdesc =
static_cast<int>(1 + sc_SeqdescOrderMap.size());
3009 if (find_iter == sc_SeqdescOrderMap.end()) {
3010 return unknown_seqdesc;
3013 return find_iter->second;
3021 chs1 = desc1->
Which();
3022 chs2 = desc2->
Which();
3044 if (
edit.IsSetDescr()) {
3056 bool removed =
false;
3060 if ((*d)->IsTitle()) {
3065 last_title.
Reset(d->GetPointer());
3075 bool removed =
false;
3076 if (
set.IsSetDescr()) {
3079 if ((*d)->IsTitle()) {
3081 set.RemoveSeqdesc(*last_title);
3084 last_title.
Reset(d->GetPointer());
3106 string & auth_str = *out_authors;
3113 vector<string> name_list;
3117 if ((*auth_it)->IsSetName()) {
3119 (*auth_it)->GetName().GetLabel(&
label);
3120 name_list.push_back(
label);
3125 back_inserter(name_list));
3128 back_inserter(name_list));
3131 if (name_list.size() == 0) {
3133 }
else if (name_list.size() == 1) {
3134 auth_str = name_list.back();
3140 last_author.swap(name_list.back());
3141 name_list.pop_back();
3145 auth_str += last_author;
3152 string *out_authors_string,
const CPubdesc& pd)
3154 string & authors_string = *out_authors_string;
3155 authors_string.clear();
3158 if ((*pub)->IsSetAuthors()) {
3168 vector<TEntrezId>& pmids, vector<TEntrezId>& muids, vector<int>& serials,
3169 vector<string>& published_labels,
3170 vector<string>& unpublished_labels)
3173 bool is_published =
false;
3174 bool need_label =
false;
3180 if ((*it)->IsPmid()) {
3181 pmids.push_back((*it)->GetPmid());
3182 is_published =
true;
3183 }
else if ((*it)->IsMuid()) {
3184 muids.push_back((*it)->GetMuid());
3185 is_published =
true;
3186 }
else if ((*it)->IsGen()) {
3187 if ((*it)->GetGen().IsSetCit()
3191 if ((*it)->GetGen().IsSetSerial_number()) {
3192 serials.push_back((*it)->GetGen().GetSerial_number());
3193 if ((*it)->GetGen().IsSetCit()
3194 || (*it)->GetGen().IsSetJournal()
3195 || (*it)->GetGen().IsSetDate()) {
3201 }
else if ((*it)->IsArticle() && (*it)->GetArticle().IsSetIds()) {
3202 is_published =
true;
3204 if ((*id)->IsPubmed()) {
3205 pmids.push_back((*id)->GetPubmed());
3206 is_published =
true;
3207 }
else if ((*id)->IsMedline()) {
3208 muids.push_back((*id)->GetMedline());
3226 published_labels.push_back(
label);
3228 unpublished_labels.push_back(
label);
3236 vector<CConstRef<CPub> > pub_list;
3241 vector<TEntrezId> pmids;
3242 vector<TEntrezId> muids;
3243 vector<int> serials;
3244 vector<string> published_labels;
3245 vector<string> unpublished_labels;
3247 if (pmids.size() > 0) {
3250 pub_list.push_back(pub);
3251 }
else if (muids.size() > 0) {
3254 pub_list.push_back(pub);
3255 }
else if (serials.size() > 0) {
3258 pub_list.push_back(pub);
3259 }
else if (published_labels.size() > 0) {
3262 pub_list.push_back(pub);
3263 }
else if (unpublished_labels.size() > 0) {
3266 pub_list.push_back(pub);
3274 vector<TEntrezId> pmids;
3275 vector<TEntrezId> muids;
3276 vector<int> serials;
3277 vector<string> published_labels;
3278 vector<string> unpublished_labels;
3279 GetPubdescLabels(
fi->GetData().GetPub(), pmids, muids, serials, published_labels, unpublished_labels);
3280 if (pmids.size() > 0) {
3283 pub_list.push_back(pub);
3284 }
else if (muids.size() > 0) {
3287 pub_list.push_back(pub);
3288 }
else if (serials.size() > 0) {
3291 pub_list.push_back(pub);
3292 }
else if (published_labels.size() > 0) {
3295 pub_list.push_back(pub);
3296 }
else if (unpublished_labels.size() > 0) {
3299 pub_list.push_back(pub);
3310 bool any_change =
false;
3311 CSeq_descr::Tdata::iterator it1 = descr.
Set().begin();
3312 while (it1 != descr.
Set().end()) {
3313 if ((*it1)->IsPub()) {
3314 CSeq_descr::Tdata::iterator it2 = it1;
3316 while (it2 != descr.
Set().end()) {
3317 if ((*it2)->IsPub() && (*it1)->GetPub().Equals((*it2)->GetPub())) {
3318 it2 = descr.
Set().erase(it2);
3337 if (pd1.
GetPub().
Get().front()->Equals(**it)) {
3359 bool is_embl_or_ddbj =
false;
3361 if ((*id)->IsEmbl() || (*id)->IsDdbj()) {
3362 is_embl_or_ddbj =
true;
3366 return !is_embl_or_ddbj;
3417 bool any_change =
false;
3420 if (p->GetLocation().IsInt() &&
3425 if (p->IsSetComment()) {
3458 bool found_non_minimal =
false;
3460 if ((*it)->IsMuid() || (*it)->IsPmid()) {
3461 if (is_refseq_prot) {
3462 found_non_minimal =
true;
3465 }
else if ((*it)->IsGen()) {
3467 if (
gen.IsSetCit() && !
gen.IsSetJournal() &&
3468 !
gen.IsSetAuthors() && !
gen.IsSetVolume() &&
3469 !
gen.IsSetPages()) {
3472 found_non_minimal =
true;
3475 found_non_minimal =
true;
3480 return !found_non_minimal;
3486 bool found_site_ref =
false;
3488 while (
f && !found_site_ref) {
3490 found_site_ref =
true;
3494 if (!found_site_ref) {
3498 bool any_change =
false;
3500 bool is_refseq_prot =
false;
3503 if ((*id_it)->IsOther()) {
3504 is_refseq_prot =
true;
3515 bool is_site_ref =
IsSiteRef(*(p->GetSeq_feat()));
3518 if ((*c)->IsEquiv()) {
3587 if (chs1 < chs2)
return true;
3588 if (chs1 > chs2)
return false;
3617 const string& n1 = s1.
GetName();
3618 const string& n2 = s2.
GetName();
3639 CBioSource::TSubtype::iterator s = biosrc.
SetSubtype().begin();
3640 CBioSource::TSubtype::iterator s_next = s;
3642 while (s_next != biosrc.
SetSubtype().end()) {
3658 bool any_change =
false;
3699 bool any_change =
false;
3755 bool any_change =
false;
3759 if (!
HasMod(org1, *it)) {
3760 org1.
SetMod().push_back(*it);
3771 org1.
SetDb().push_back(
a);
3779 org1.
SetSyn().push_back(*it);
3794 bool any_change =
false;
3795 CSeq_descr::Tdata::iterator src1 = seq_descr.
Set().begin();
3796 while (src1 != seq_descr.
Set().end()) {
3797 if ((*src1)->IsSource() && (*src1)->GetSource().IsSetOrg() && (*src1)->GetSource().GetOrg().IsSetTaxname()) {
3798 CSeq_descr::Tdata::iterator src2 = src1;
3800 while (src2 != seq_descr.
Set().end()) {
3801 if ((*src2)->IsSource() &&
3808 src2 = seq_descr.
Set().erase(src2);
3823 bool any_change =
false;
3824 vector<CConstRef<CBioSource> > src_list;
3825 CSeq_descr::Tdata::iterator d = descr.
Set().begin();
3826 while (d != descr.
Set().end()) {
3827 if ((*d)->IsSource()) {
3830 if ((*d)->GetSource().Equals(**s)) {
3836 d = descr.
Set().erase(d);
3840 src_list.push_back(src);
3853 if (!
f.IsSetData() || !
f.GetData().IsBiosrc()) {
3857 src->
Assign(
f.GetData().GetBiosrc());
3860 if (
f.IsSetComment()) {
3869 if (
f.IsSetDbxref()) {
3873 src->
SetOrg().SetDb().push_back(
a);
3886 bool any_change =
false;
3888 bool transgenic_or_focus =
false;
3890 while (existing_src && !transgenic_or_focus) {
3893 transgenic_or_focus =
true;
3897 if (transgenic_or_focus) {
3901 if (p->GetLocation().IsInt() &&
3945 size_t num_gene_locus = 0;
3946 size_t num_gene_locus_tag = 0;
3947 size_t num_gene_xref_locus = 0;
3948 size_t num_gene_xref_locus_tag = 0;
3951 if (
fi->GetData().IsGene()) {
3952 if (
fi->GetData().GetGene().IsSetLocus()) {
3955 if (
fi->GetData().GetGene().IsSetLocus_tag()) {
3956 num_gene_locus_tag++;
3958 }
else if (
fi->IsSetXref()) {
3961 if (
g->IsSetLocus()) {
3962 num_gene_xref_locus++;
3964 if (
g->IsSetLocus_tag()) {
3965 num_gene_xref_locus_tag++;
3969 if (num_gene_locus > 0) {
3970 if (num_gene_locus_tag > 0) {
3973 if (num_gene_xref_locus > 0) {
3977 if (num_gene_locus_tag > 0) {
3978 if (num_gene_locus > 0) {
3981 if (num_gene_xref_locus_tag > 0) {
3988 bool any_change =
false;
3989 if (num_gene_locus == 0 && num_gene_locus_tag > 0) {
3990 if (num_gene_xref_locus > 0 && num_gene_xref_locus_tag == 0) {
3993 if (!
fi->GetData().IsGene() &&
fi->GetGeneXref()) {
3994 bool this_change =
false;
3996 new_f->
Assign(*(
fi->GetSeq_feat()));
3998 if ((*it)->IsSetData() && (*it)->GetData().IsGene()
3999 && (*it)->GetData().GetGene().IsSetLocus()) {
4000 (*it)->SetData().SetGene().SetLocus_tag((*it)->GetData().GetGene().GetLocus());
4001 (*it)->SetData().SetGene().ResetLocus();
4013 }
else if (num_gene_locus > 0 && num_gene_locus_tag == 0) {
4014 if (num_gene_xref_locus == 0 && num_gene_xref_locus_tag > 0) {
4017 if (!
fi->GetData().IsGene() &&
fi->GetGeneXref()) {
4018 bool this_change =
false;
4020 new_f->
Assign(*(
fi->GetSeq_feat()));
4022 if ((*it)->IsSetData() && (*it)->GetData().IsGene()
4023 && (*it)->GetData().GetGene().IsSetLocus_tag()) {
4024 (*it)->SetData().SetGene().SetLocus((*it)->GetData().GetGene().GetLocus_tag());
4025 (*it)->SetData().SetGene().ResetLocus_tag();
4045 bool strip_serial =
true;
4048 switch (sid.
Which()) {
4054 const string& acc =
GET_FIELD(tsid, Accession);
4055 if (acc.length() == 6) {
4056 strip_serial =
false;
4063 strip_serial =
false;
4080 strip_serial =
false;
4086 return strip_serial;
4092 bool change_made =
false;
4129 bool change_made =
false;
4142 const static struct {
4145 } transformations[] = {
4155 {
"#916",
"Delta" },
4156 {
"#945",
"alpha" },
4158 {
"#947",
"gamma" },
4159 {
"#952",
"theta" },
4160 {
"#955",
"lambda" },
4168 {
"#8710",
"delta" },
4185 idx <
sizeof(transformations)/
sizeof(transformations[0]);
4189 searcher.
AddWord( transformations[idx].src_word, idx );
4201 result.reserve( str_len );
4207 while( amp !=
NPOS && amp < str_len ) {
4213 if (
str[search_pos] ==
' ') {
4216 for( ; search_pos < str_len ; ++search_pos ) {
4217 const char ch =
str[search_pos];
4221 if( ch ==
'&' &&
state == 0 ) {
4233 const string & result_word = transformations[match_idx].result_word;
4234 copy( result_word.begin(), result_word.end(),
4240 if( search_pos >= str_len ) {
4252 const string & result_word = transformations[match_idx].result_word;
4253 copy( result_word.begin(), result_word.end(),
4259 copy(
str.begin() + amp,
str.begin() + search_pos + 1,
4264 if(
str[search_pos] ==
'&' ) {
4270 if(
NPOS == next_amp ) {
4272 copy(
str.begin() + search_pos + 1,
str.end(),
4278 if( (search_pos + 1) < next_amp ) {
4279 copy(
str.begin() + search_pos + 1,
str.begin() + next_amp,
4295 if (require_inframe) {
4298 switch (is_in_frame) {
4328 new_loc = nuc2prot_mapper->
Map(nuc_loc);
4335 if (!sid || (orig_id && sid->
Equals(*orig_id))) {
4355 if (!new_loc->
IsInt() && !new_loc->
IsPnt()) {
4417 bool changed =
false;
4423 if ((*annot_it)->IsSetData() && (*annot_it)->IsFtable()) {
4425 if ((*feat_it)->IsSetData() && (*feat_it)->GetData().IsCdregion()) {
4440 bool any_change =
false;
4466 if (!feat_loc_seq_id) {
4471 string::size_type
len = 0;
4472 string::size_type loc_pos, end_pos;
4473 char protein_letter =
'X';
4476 if (aa_pos == string::npos) {
4478 if (aa_pos != string::npos) {
4481 if (aa_pos != string::npos) {
4488 if (aa_pos != string::npos) {
4504 [pMessageListener](
string msg, TSubcode subcode) {
4509 if (loc_pos == string::npos) {
4510 if (pMessageListener) {
4511 string msg =
"Unable to identify code-break location in '" +
str +
"'";
4512 postMessage(msg, TSubcode::eParseError);
4522 if (end_pos ==
NPOS) {
4524 if (end_pos ==
NPOS) {
4525 end_pos =
str.length();
4532 if (pos.find_first_of(
",") != string::npos) {
4533 pos =
"join(" + pos +
")";
4539 if (pMessageListener) {
4540 string msg =
"Unable to extract code-break location from '" +
str +
"'";
4541 postMessage(msg, TSubcode::eParseError);
4547 if (pMessageListener) {
4548 string msg =
"code-break location exceeds 3 bases";
4549 postMessage(msg, TSubcode::eBadLocation);
4553 if ((break_loc->
IsInt() || break_loc->
IsPnt()) &&
4555 if (pMessageListener) {
4556 string msg =
"code-break location lies outside of coding region";
4557 postMessage(msg, TSubcode::eBadLocation);
4577 newCodeBreak->
SetLoc(*break_loc);
4580 orig_list.push_back(newCodeBreak);
4593 bool any_removed =
false;
4594 CSeq_feat::TQual::iterator it = feat.
SetQual().begin();
4595 while (it != feat.
SetQual().end()) {
4596 if ((*it)->IsSetQual() &&
4598 (*it)->IsSetVal() &&
4600 it = feat.
SetQual().erase(it);
4606 if (feat.
GetQual().size() == 0) {
4625 if (it == flu_map.
end()) {
4627 new_set->AddBioseq(*bi);
4628 flu_map[
key] = new_set;
4630 it->second->AddBioseq(*bi);
4638 for (
auto& entry : flu_map) {
4639 if (entry.second->OkToMakeSet()) {
4640 entry.second->MakeSet();
4654 for (; annot_ci; ++annot_ci) {
4655 if ((*annot_ci).IsFtable()) {
4670 f->SetData().SetImp().SetKey(
"misc_feature");
4671 f->SetLocation().SetInt().SetFrom(0);
4673 f->SetLocation().SetInt().SetId().Assign(*(bh.
GetSeqId()));
4676 f->SetDbxref().push_back(xref);
4678 suppress->SetData().SetGene();
4679 f->SetXref().push_back(suppress);
4693 COrg_ref::TDb::iterator db = org->
SetDb().begin();
4694 while (db != org->
SetDb().end()) {
4695 if ((*db)->IsSetDb() &&
NStr::Equal((*db)->GetDb(),
"IRD")) {
4697 db = org->
SetDb().erase(db);
4703 if (org->
GetDb().size() == 0) {
4767 default : frame = 0;
break;
4772 if (cb->IsSetLoc()) {
4776 ((
offset - frame) / 3 ) + 1 == pos) {
4789 int start =
static_cast<int>((pos-1)*3);
4801 default : frame = 0;
break;
4804 int frame_shift = (start - frame) % 3;
4805 if (frame_shift < 0) {
4808 if (frame_shift == 1)
4810 else if (frame_shift == 2)
4816 int len = loc_iter.GetRange().GetLength();
4817 if (offset <= start && offset + len > start) {
4819 tmp->SetId().Assign(loc_iter.GetSeq_id());
4822 tmp->SetTo(loc_iter.GetRange().GetTo() - (start -
offset) );
4824 tmp->SetFrom(loc_iter.GetRange().GetFrom() + start -
offset);
4826 if (offset <= start + 2 && offset + len > start + 2) {
4828 tmp->SetFrom(loc_iter.GetRange().GetTo() - (start -
offset + 2) );
4830 tmp->SetTo(loc_iter.GetRange().GetFrom() + start -
offset + 2);
4834 tmp->SetFrom(loc_iter.GetRange().GetFrom());
4836 tmp->SetTo(loc_iter.GetRange().GetTo());
4843 tmp->SetId().Assign(loc_iter.GetSeq_id());
4846 tmp->SetTo(loc_iter.GetRange().GetTo());
4848 tmp->SetFrom(loc_iter.GetRange().GetTo() - (start -
offset + 2) );
4850 tmp->SetFrom(loc_iter.GetRange().GetFrom());
4853 tmp->SetFrom(loc_iter.GetRange().GetFrom());
4855 tmp->SetTo(loc_iter.GetRange().GetFrom() + start -
offset + 2);
4857 tmp->SetTo(loc_iter.GetRange().GetTo());
4894 bool any_change =
false;
4915 bool any_changes =
false;
4917 vector<CRef<COrg_ref> > rq_list;
4918 vector<const CSeqdesc* > src_descs;
4919 vector<CConstRef<CSeq_feat> > src_feats;
4922 vector<const CSeqdesc* >::iterator desc_it = src_descs.begin();
4923 while (desc_it != src_descs.end()) {
4924 if ((*desc_it)->GetSource().IsSetSubtype()) {
4928 && s->IsSetName()) {
4929 bool month_ambiguous =
false;
4932 s->SetName(new_date);
4946 bool local_change =
false;
4947 for (
auto s : new_feat->
SetData().SetBiosrc().SetSubtype()) {
4949 && s->IsSetName()) {
4950 bool month_ambiguous =
false;
4953 s->SetName(new_date);
4954 local_change =
true;
4976 bool removed =
true;
4981 if (
ud->GetUser().IsAutodefOptions()) {
@ eExtreme_Positional
numerical value
@ eExtreme_Biological
5' and 3'
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CRef< objects::CSeq_id > GetNewProtId(objects::CBioseq_Handle bsh, int &offset, string &id_label, bool general_only)
bool IsGeneralIdProtPresent(objects::CSeq_entry_Handle tse)
@Auth_list.hpp User-defined methods of the data storage class.
CSeqdesc & Set(bool skip_lookup=false)
static CRef< CUser_object > CreateIDOptions(CSeq_entry_Handle seh)
static bool RegenerateSequenceDefLines(CSeq_entry_Handle se)
static string GetOrganelleByGenome(unsigned int genome)
int GetGenCode(int def=1) const
bool HasSubtype(CSubSource::TSubtype subtype) const
CBioseq_set_EditHandle –.
CConstRef< CBioseq_set > GetParentSet(void) const
CConstRef< CBioseq_set > GetParentSet(void) const
vector< string > GetAllDescriptions() const
vector< EChanges > GetAllChanges() const
vector< string_view > GetDescriptions() const
static string_view GetDescription(EChanges e)
static bool RescueSiteRefPubs(CSeq_entry_Handle seh)
Rescue pubs from Site-ref features.
static bool ConvertDeltaSeqToRaw(CSeq_entry_Handle seh, CSeq_inst::EMol filter=CSeq_inst::eMol_not_set)
static bool RenormalizeNucProtSets(CSeq_entry_Handle seh)
Convert nuc-prot sets with just one sequence to just the sequence can't be done during the explore ph...
static bool ShouldStripPubSerial(const CBioseq &bs)
static bool RemoveOrphanLocus_tagGeneXrefs(CSeq_feat &f, CBioseq_Handle bsh)
Removes orphaned locus_tag Gene-xrefs.
static bool FixGeneXrefSkew(CSeq_entry_Handle seh)
Examine all genes and gene xrefs in the Seq-entry.
static void MoveOneFeatToPubdesc(CSeq_feat_Handle feat, CRef< CSeqdesc > d, CBioseq_Handle b, bool remove_feat=true)
static bool AddGenBankWrapper(CSeq_entry_Handle seh)
Add GenBank Wrapper Set.
static bool ConvertPubFeatsToPubDescs(CSeq_entry_Handle seh)
Convert full-length publication features to publication descriptors.
static void SetProteinName(CProt_ref &prot, const string &protein_name, bool append)
static void s_SetProductOnFeat(CSeq_feat &feat, const string &protein_name, bool append)
static bool AddPartialToProteinTitle(CBioseq &bioseq)
Adjusts protein title to reflect partialness.
static bool RemovePseudoProduct(CSeq_feat &cds, CScope &scope)
Removes protein product from pseudo coding region.
static bool FixECNumbers(CSeq_entry_Handle entry)
Fix EC numbers.
static bool AddMissingMolInfo(CBioseq &seq, bool is_product)
Adds missing MolInfo descriptor to sequence.
static void SetMrnaName(CSeq_feat &mrna, const string &protein_name)
static CRef< CSeq_entry > AddProtein(const CSeq_feat &cds, CScope &scope)
static bool OkToPromoteNpPub(const CPubdesc &pd)
Some pubs should not be promoted to nuc-prot set from sequence.
static void GetPubdescLabels(const CPubdesc &pd, vector< TEntrezId > &pmids, vector< TEntrezId > &muids, vector< int > &serials, vector< string > &published_labels, vector< string > &unpublished_labels)
For Publication Citations Get labels for a pubdesc.
static bool DecodeXMLMarkChanged(std::string &str)
decodes various tags, including carriage-return-line-feed constructs
static bool SetFeaturePartial(CSeq_feat &f)
Set feature partial based on feature location.
static bool AddProteinTitle(CBioseq_Handle bsh)
Creates missing protein title descriptor.
static size_t MakeSmallGenomeSet(CSeq_entry_Handle entry)
static bool ExtendToStopIfShortAndNotPartial(CSeq_feat &f, CBioseq_Handle bsh, bool check_for_stop=true)
Extends a coding region up to 50 nt.
static bool IsGeneXrefUnnecessary(const CSeq_feat &sf, CScope &scope, const CGene_ref &gene_xref)
Calculates whether a Gene-xref is unnecessary (because it refers to the same gene as would be calcula...
static bool RemoveNcbiCleanupObject(CSeq_entry &seq_entry)
Removes NcbiCleanup User Objects in the Seq-entry.
static bool ClearInternalPartials(CSeq_loc &loc, bool is_first=true, bool is_last=true)
Clear internal partials.
static bool RepackageProteins(CSeq_entry_Handle seh)
Find proteins that are not packaged in the same nuc-prot set as the coding region for which they are ...
CCleanup(CScope *scope=nullptr, EScopeOptions scope_handling=eScope_Copy)
static bool ParseCodeBreaks(CSeq_feat &feat, CScope &scope)
Parses all valid transl_except Gb-quals into code-breaks for cdregion, then removes the transl_except...
static bool SetMolinfoTech(CBioseq_Handle seq, CMolInfo::ETech tech)
Sets MolInfo::tech for a sequence.
static bool AddLowQualityException(CSeq_entry_Handle entry)
For table2asn -c s Adds an exception of "low-quality sequence region" to coding regions and mRNAs tha...
static bool RemoveUnseenTitles(CSeq_entry_EditHandle::TSeq seq)
Remove all titles in Seqdescr except the last, because it is the only one that would be displayed in ...
static bool RemoveDupBioSource(CSeq_descr &descr)
Remove duplicate biosource descriptors.
static bool ExtendStopPosition(CSeq_feat &f, const CSeq_feat *cdregion, size_t extension=0)
TChanges ExtendedCleanup(CSeq_entry &se, Uint4 options=0)
Cleanup a Seq-entry.
static bool s_IsProductOnFeat(const CSeq_feat &cds)
static bool SetGenePartialByLongestContainedFeature(CSeq_feat &gene, CScope &scope)
Set partialness of gene to match longest feature contained in gene.
static CConstRef< CCode_break > GetCodeBreakForLocation(size_t pos, const CSeq_feat &cds)
utility function for finding the code break for a given amino acid position pos is the position of th...
TChanges BasicCleanup(CSeq_entry &se, Uint4 options=0)
static bool SetCDSPartialsByFrameAndTranslation(CSeq_feat &cds, CScope &scope)
1.
static bool RemoveBadECNumbers(CProt_ref::TEc &ec_num_list)
Delete EC numbers.
void SetScope(CScope *scope)
static bool RepairXrefs(const CSeq_feat &f, const CTSE_Handle &tse)
Repairs non-reciprocal xref pairs for specified feature if xrefs between subtypes are permitted and f...
static bool ExtendToStopCodon(CSeq_feat &f, CBioseq_Handle bsh, size_t limit)
Extends a feature up to limit nt to a stop codon, or to the end of the sequence if limit == 0 (partia...
static CRef< CSeq_loc > GetProteinLocationFromNucleotideLocation(const CSeq_loc &nuc_loc, CScope &scope)
static bool ParseCodeBreak(const CSeq_feat &feat, CCdregion &cds, const CTempString &str, CScope &scope, IObjtoolsListener *pMessageListener=nullptr)
Parse string into code break and add to coding region.
static void SetCodeBreakLocation(CCode_break &cb, size_t pos, const CSeq_feat &cds)
utility function for setting code break location given offset pos is the position of the amino acid w...
static bool x_AddLowQualityException(CSeq_feat &feat)
static const string & GetProteinName(const CProt_ref &prot)
static CRef< CBioSource > BioSrcFromFeat(const CSeq_feat &f)
Get BioSource from feature to use for source descriptor.
static bool MergeDupBioSources(CSeq_descr &descr)
static bool SetFrameFromLoc(CCdregion &cdregion, const CSeq_loc &loc, CScope &scope)
Chooses best frame based on location 1.
static bool SetMolinfoBiomol(CBioseq_Handle seq, CMolInfo::EBiomol biomol)
Sets MolInfo::biomol for a sequence.
static bool MoveProteinSpecificFeats(CSeq_entry_Handle seh)
Moves protein-specific features from nucleotide sequences in the Seq-entry to the appropriate protein...
static bool TaxonomyLookup(CSeq_entry_Handle seh)
Looks up Org-refs in the Seq-entry.
static bool PubAlreadyInSet(const CPubdesc &pd, const CSeq_descr &descr)
static bool NormalizeDescriptorOrder(CSeq_descr &descr)
Normalize Descriptor Order on a specific Seq-entry.
static bool ConvertSrcFeatsToSrcDescs(CSeq_entry_Handle seh)
Convert full-length source features to source descriptors.
static bool SetBestFrame(CSeq_feat &cds, CScope &scope)
Translates coding region and selects best frame (without stops, or longest)
static bool x_MergeDupOrgNames(COrgName &on1, const COrgName &add)
static bool FindMatchingLocusGene(CSeq_feat &f, const CGene_ref &gene_xref, CBioseq_Handle bsh)
Detects gene features with matching locus.
static bool MoveFeatToProtein(CSeq_feat_Handle fh)
Moves one feature from nucleotide bioseq to the appropriate protein sequence.
static bool RemoveOrphanLocusGeneXrefs(CSeq_feat &f, CBioseq_Handle bsh)
Removes orphaned locus Gene-xrefs.
static void AddNcbiCleanupObject(int ncbi_cleanup_version, CSeq_descr &descr)
Adds NcbiCleanup User Object to Seq-descr.
static bool AreBioSourcesMergeable(const CBioSource &src1, const CBioSource &src2)
static bool ExpandGeneToIncludeChildren(CSeq_feat &gene, CTSE_Handle &tse)
Expands gene to include features it cross-references.
static bool WGSCleanup(CSeq_entry_Handle entry, bool instantiate_missing_proteins=true, Uint4 options=0, bool run_extended_cleanup=true)
Performs WGS specific cleanup.
static vector< CConstRef< CPub > > GetCitationList(CBioseq_Handle bsh)
Get list of pubs that can be used for citations for Seq-feat on a Bioseq-handle.
static bool LocationMayBeExtendedToMatch(const CSeq_loc &orig, const CSeq_loc &improved)
Checks whether it is possible to extend the original location up to improved one.
static bool UpdateECNumbers(CProt_ref::TEc &ec_num_list)
Update EC numbers.
static bool FixRNAEditingCodingRegion(CSeq_feat &cds)
From GB-7563 An action has been requested that will do the following: 1.
static bool x_HasShortIntron(const CSeq_loc &loc, size_t min_len=11)
static bool SetGeneticCodes(CBioseq_Handle bsh)
Sets genetic codes for coding regions on Bioseq-Handle.
static bool RemoveUnnecessaryGeneXrefs(CSeq_feat &f, CScope &scope)
Removes unnecessary Gene-xrefs.
static bool CleanupCollectionDates(CSeq_entry_Handle seh, bool month_first)