61 #define THIS_FILE "fcleanup.cpp"
75 if (desc->IsSource()) {
83 if (bioseq->IsSetDescr()) {
85 for (TSeqdescList::iterator desc = descrs.begin(); desc != descrs.end(); ++desc) {
86 if ((*desc)->IsSource()) {
89 seq_set.
SetDescr().Set().push_back(*desc);
115 parts = &entry->SetSet();
123 annot.splice(annot.end(), parts->
SetAnnot());
134 for (
const auto& desc : seq_entry.
GetDescr().
Get()) {
135 if (desc->IsMolinfo())
145 parts = &entry->SetSet();
152 for (
const auto& entry : parts->
GetSeq_set()) {
153 if (! entry->IsSeq())
156 const CBioseq& bioseq = entry->GetSeq();
159 if (desc->IsMolinfo() && desc->GetMolinfo().IsSetBiomol()) {
163 else if (biomol != cur_biomol)
172 CBioseq& bioseq = entry->SetSeq();
175 for (TSeqdescList::iterator desc = descrs.begin(); desc != descrs.end(); ++desc) {
176 if ((*desc)->IsMolinfo()) {
177 (*desc)->SetMolinfo().ResetBiomol();
189 seq_entry.
SetDescr().Set().push_back(new_descr);
201 for (TQualVector::iterator qual = feat.
SetQual().begin(); qual != feat.
SetQual().end(); ++qual) {
202 if ((*qual)->IsSetQual() && (*qual)->GetQual() ==
"product" && (*qual)->IsSetVal()) {
204 prot_ref.
SetName().push_back((*qual)->GetVal());
240 size_t sz = ranges.size();
241 for (
size_t i = 1;
i < sz; ++
i) {
242 if (ranges[
i].
first != ranges[
i - 1].second && ranges[
i].
first != ranges[
i - 1].second + 1) {
263 if (
last->IsSetFuzz_from()) {
266 if (
last->IsSetFuzz_to()) {
280 vector<pair<TSeqPos, TSeqPos>> ranges;
281 for (
const auto& cur_loc : locs) {
282 if (cur_loc->IsInt()) {
285 ranges.push_back(make_pair(interval.
GetFrom(), interval.
GetTo()));
287 if (! first_interval) {
288 first_interval = &interval;
290 first_interval = &interval;
293 if (! last_interval) {
294 last_interval = &interval;
296 last_interval = &interval;
298 }
else if (cur_loc->IsPnt()) {
307 if (! first_interval) {
311 sort(ranges.begin(), ranges.end());
314 SetNewInterval(first_interval, last_interval, ranges.front().first, ranges.back().second, loc);
322 vector<pair<TSeqPos, TSeqPos>> ranges;
323 for (
const auto& interval : ints) {
324 ranges.push_back(make_pair(interval->GetFrom(), interval->GetTo()));
327 sort(ranges.begin(), ranges.end());
338 auto& scope = entryHandle.
GetScope();
340 int protein_id_counter = 99;
342 for (
CFeat_CI cds_it(entryHandle, sel); cds_it; ++cds_it) {
343 auto pCds = cds_it->GetSeq_feat();
345 if (! pCds->IsSetProduct()) {
347 pNewCds->Assign(*pCds);
350 edit::GetNewProtId(scope.GetBioseqHandle(pNewCds->GetLocation()), protein_id_counter, idLabel,
false);
351 pNewCds->SetProduct().SetWhole().Assign(*pProteinId);
353 pCdsEditHandle.
Replace(*pNewCds);
355 }
else if (pCds->GetProduct().GetId() &&
356 scope.Exists(*pCds->GetProduct().GetId())) {
361 auto protHandle = scope.GetBioseqHandle(pCds->GetProduct());
367 protName =
"hypothetical protein";
393 for (
auto& entry : seq_entries) {
403 cleanup.ExtendedCleanup(*entry);
415 bool gene_set =
false;
421 if (feat->IsSetData() && feat->GetData().IsGene()) {
430 if (feat->IsSetLocation()) {
432 CSeq_loc& loc = feat->SetLocation();
435 }
else if (loc.
IsMix()) {
437 if (feat->IsSetData() && feat->GetData().IsProt()) {
458 CSeqdesc_CI mol_info(bioseq_h, CSeqdesc::E_Choice::e_Molinfo);
465 const CSeq_id&
id = *ids.front();
@ eExtreme_Biological
5' and 3'
CRef< objects::CSeq_id > GetNewProtId(objects::CBioseq_Handle bsh, int &offset, string &id_label, bool general_only)
static CRef< CSeq_entry > AddProtein(const CSeq_feat &cds, CScope &scope)
static const string & GetProteinName(const CProt_ref &prot)
const CSeq_interval * GetStopInt(ESeqLocExtremes ext) const
const CSeq_interval * GetStartInt(ESeqLocExtremes ext) const
const CSeq_descr & GetDescr(void) const
void SetDescr(CSeq_descr &value)
bool IsSetDescr(void) const
namespace ncbi::objects::
TSeqPos GetStart(ESeqLocExtremes ext) const
TSeqPos GetStop(ESeqLocExtremes ext) const
static bool IsEmptyMolInfo(const CMolInfo &mol_info)
static void LookForProductName(CSeq_feat &feat)
void g_InstantiateMissingProteins(CSeq_entry_Handle entryHandle)
static void MoveSourceDescrToTop(CSeq_entry &entry)
static bool IsConversionPossible(const vector< pair< TSeqPos, TSeqPos >> &ranges)
void FinalCleanup(TEntryList &seq_entries)
static void MoveAnnotToTop(CSeq_entry &seq_entry)
static void MoveBiomolToTop(CSeq_entry &seq_entry)
static void ConvertPackedIntToInterval(CSeq_loc &loc)
static void ConvertMixToInterval(CSeq_loc &loc)
static void SetNewInterval(const CSeq_interval *first, const CSeq_interval *last, const TSeqPos &from, TSeqPos &to, CSeq_loc &loc)
list< CRef< objects::CSeq_entry > > TEntryList
std::list< CRef< objects::CSeqdesc > > TSeqdescList
static void cleanup(void)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static TDSRET convert(TDSSOCKET *tds, TDSICONV *conv, TDS_ICONV_DIRECTION direction, const char *from, size_t from_len, char *dest, size_t *dest_len)
unsigned int TSeqPos
Type for sequence locations and lengths.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
void AddProteinFeature(const CBioseq &seq, const string &protein_name, const CSeq_feat &cds, CScope &scope)
AddProteinFeature A function to create a protein feature with the specified protein name.
bool IsPseudo(const CSeq_feat &feat, CScope &scope)
Determines whether given feature is pseudo, using gene associated with feature if necessary Checks to...
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
CScope & GetScope(void) const
Get scope this handle belongs to.
void Replace(const CSeq_feat &new_feat) const
Replace the feature with new Seq-feat object.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
const TName & GetName(void) const
Get the Name member data.
bool IsSetName(void) const
protein name Check if a value has been assigned to Name data member.
TName & SetName(void)
Assign a value to Name data member.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
bool IsProt(void) const
Check if variant Prot is selected.
void SetLocation(TLocation &value)
Assign a value to Location data member.
const TQual & GetQual(void) const
Get the Qual member data.
const TData & GetData(void) const
Get the Data member data.
void SetData(TData &value)
Assign a value to Data data member.
TQual & SetQual(void)
Assign a value to Qual data member.
void ResetQual(void)
Reset Qual data member.
bool IsSetLocation(void) const
feature made from Check if a value has been assigned to Location data member.
void SetTo(TTo value)
Assign a value to To data member.
bool IsMix(void) const
Check if variant Mix is selected.
list< CRef< CSeq_interval > > Tdata
TPoint GetPoint(void) const
Get the Point member data.
TFrom GetFrom(void) const
Get the From member data.
list< CRef< CSeq_loc > > Tdata
E_Choice Which(void) const
Which variant is currently selected.
void SetFrom(TFrom value)
Assign a value to From data member.
const Tdata & Get(void) const
Get the member data.
void SetFuzz_to(TFuzz_to &value)
Assign a value to Fuzz_to data member.
void SetFuzz_from(TFuzz_from &value)
Assign a value to Fuzz_from data member.
bool IsPacked_int(void) const
Check if variant Packed_int is selected.
TTo GetTo(void) const
Get the To member data.
const TMix & GetMix(void) const
Get the variant data.
const TPacked_int & GetPacked_int(void) const
Get the variant data.
bool IsSetClass(void) const
Check if a value has been assigned to Class data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
TSet & SetSet(void)
Select the variant.
TClass GetClass(void) const
Get the Class member data.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
const TSet & GetSet(void) const
Get the variant data.
bool IsSeq(void) const
Check if variant Seq is selected.
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
void ResetAnnot(void)
Reset Annot data member.
bool IsSetDescr(void) const
Check if a value has been assigned to Descr data member.
bool IsSet(void) const
Check if variant Set is selected.
const TSeq_set & GetSeq_set(void) const
Get the Seq_set member data.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_parts
parts for 2 or 3
@ eClass_segset
segmented sequence + parts
bool IsSetCompleteness(void) const
Check if a value has been assigned to Completeness data member.
bool IsSetTechexp(void) const
explanation if tech not enough
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
const Tdata & Get(void) const
Get the member data.
list< CRef< CSeq_id > > TId
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
TBiomol GetBiomol(void) const
Get the Biomol member data.
void SetBiomol(TBiomol value)
Assign a value to Biomol data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
bool IsSetTech(void) const
Check if a value has been assigned to Tech data member.
list< CRef< CSeq_annot > > TAnnot
const TDescr & GetDescr(void) const
Get the Descr member data.
const TMolinfo & GetMolinfo(void) const
Get the variant data.
TMolinfo & SetMolinfo(void)
Select the variant.
bool IsSetGbmoltype(void) const
identifies particular ncRNA Check if a value has been assigned to Gbmoltype data member.
constexpr auto sort(_Init &&init)
const CharType(& source)[N]