63 CollectGaps(sf.GetLocation(), sf.GetScope());
90 for (; seq_map_ci; ++seq_map_ci)
110 for (
TSeqPos i = map_start;
i <= map_stop; ++
i) {
158 size_t skip_left = 0;
159 TGapIntervalList::iterator it =
m_Gaps.begin();
160 while (it !=
m_Gaps.end()) {
166 if (it->second.first <=
m_Start && it->second.second >=
m_Start) {
172 }
else if (it->second.first <=
m_LeftGaps.front().second + 1 && it->second.second >=
m_LeftGaps.front().second) {
173 m_LeftGaps.front().second = it->second.second;
180 TGapIntervalList::reverse_iterator rit =
m_Gaps.rbegin();
181 size_t skip_right = 0;
182 while (rit !=
m_Gaps.rend()) {
187 if (rit->second.first <=
m_Stop && rit->second.second >=
m_Stop) {
193 }
else if (rit->second.first <=
m_RightGaps.front().first - 1 && rit->second.second >=
m_RightGaps.front().second) {
248 size_t start =
b->first;
249 size_t stop =
b->second;
262 size_t start =
b->first;
263 size_t stop =
b->second;
282 size_t start =
b->first;
283 size_t stop =
b->second;
299 locs.push_back(loc2);
302 if (locs.size() > 0) {
305 locs.push_back(left_loc);
307 reverse(locs.begin(), locs.end());
319 if ((*it)->IsSetQual() && (*it)->IsSetVal() &&
322 (id_label.empty() ||
NStr::Equal((*it)->GetVal(), id_label) ||
NStr::Equal((*it)->GetVal(), id_label +
"_1"))) {
323 if (id_label.empty()) {
324 id_label = (*it)->GetVal();
349 prot_seq->
SetInst().ResetExt();
351 prot_seq->
SetInst().SetSeq_data().SetIupacaa().Set(
prot);
358 if ((*id)->Which() == feat_prod.
Which()) {
359 bool do_replace =
false;
360 if ((*id)->IsGeneral()) {
361 if ((*id)->GetGeneral().IsSetDb()) {
381 if ((*mi)->IsMolinfo()) {
395 if ((*ait)->IsFtable()) {
396 CSeq_annot::TData::TFtable::iterator fit = (*ait)->SetData().SetFtable().begin();
397 while (fit != (*ait)->SetData().SetFtable().end()) {
399 new_prot_loc->
Assign((*fit)->GetLocation());
400 bool complete_cut =
false;
401 bool adjusted =
false;
406 if (!complete_cut && stop < orig_len - 1) {
411 fit = (*ait)->SetData().SetFtable().erase(fit);
413 new_prot_loc->
SetId(feat_prod);
417 (*fit)->SetLocation().Assign(*new_prot_loc);
437 CCdregion::TCode_break::iterator cit = cdr.
SetCode_break().begin();
439 bool do_remove =
false;
440 if ((*cit)->IsSetLoc()) {
443 (*cit)->SetLoc().
Assign(*new_loc);
497 vector<CRef<CSeq_feat> > rval;
499 if (!trim && !
split) {
500 rval.push_back(new_feat);
518 const string cds_gap_comment =
"coding region disrupted by sequencing gap";
520 vector<CRef<CSeq_loc> > locs =
Split(new_feat->
GetLocation(), in_intron, make_partial);
521 if (locs.size() > 0) {
529 comment = comment +
"; " + cds_gap_comment;
534 size_t transcript_id_offset = 0;
535 string transcript_id_label;
536 size_t protein_id_offset = 0;
537 string protein_id_label;
538 int protein_seqid_offset = 0;
539 string protein_seqid_label;
545 split_feat->
Assign(*new_feat);
550 x_AdjustOrigLabel(*split_feat, transcript_id_offset, transcript_id_label,
"orig_transcript_id");
551 x_AdjustOrigLabel(*split_feat, protein_id_offset, protein_id_label,
"orig_protein_id");
569 new_id =
GetNewProtId(bsh, protein_seqid_offset, protein_seqid_label, create_general_only);
571 split_feat->
SetProduct().SetWhole().Assign(*new_id);
573 protein_seqid_offset++;
575 rval.push_back(split_feat);
579 rval.push_back(new_feat);
582 rval.push_back(new_feat);
595 frame_adjust = frame_adjust % 3;
596 if (frame_adjust == 0) {
605 if (frame_adjust == 1) {
613 }
else if (frame_adjust == 2) {
631 if (fgap->HasKnown() || fgap->HasUnknown() || fgap->HasNs()) {
632 gapped_feats.push_back(fgap);
660 if (updates.size() == 0) {
664 for (
size_t i = 1;
i < updates.size();
i++) {
677 feat.
SetId().SetLocal().SetId(next_id);
686 for (
size_t i = 1;
i < updates.size();
i++) {
698 if ((*xit)->IsSetId() && (*xit)->GetId().IsLocal() &&
699 (*xit)->GetId().GetLocal().IsId() &&
700 (*xit)->GetId().GetLocal().GetId() == search) {
731 if (orig_id > 0 && new_id > 0 &&
f.IsSetXref()) {
733 if ((*xit)->IsSetId() && (*xit)->GetId().IsLocal() &&
734 (*xit)->GetId().GetLocal().IsId() &&
735 (*xit)->GetId().GetLocal().GetId() == orig_id) {
736 (*xit)->SetId().SetLocal().SetId(new_id);
745 if (updates1.size() != updates2.size()) {
749 if (updates1.size() < 2) {
752 vector<CRef<CSeq_feat> >::iterator u1 = updates1.begin();
753 vector<CRef<CSeq_feat> >::iterator u2 = updates2.begin();
756 if ((*u1)->IsSetId() && (*u1)->GetId().IsLocal() && (*u1)->GetId().GetLocal().IsId()) {
757 orig_id_1 = (*u1)->GetId().GetLocal().GetId();
760 if ((*u2)->IsSetId() && (*u2)->GetId().IsLocal() && (*u2)->GetId().GetLocal().IsId()) {
761 orig_id_2 = (*u2)->GetId().GetLocal().GetId();
766 while (u1 != updates1.end() && u2 != updates2.end()) {
768 if ((*u1)->IsSetId() && (*u1)->GetId().IsLocal() && (*u1)->GetId().GetLocal().IsId()) {
769 new_id_1 = (*u1)->GetId().GetLocal().GetId();
772 if ((*u2)->IsSetId() && (*u2)->GetId().IsLocal() && (*u2)->GetId().GetLocal().IsId()) {
773 new_id_2 = (*u2)->GetId().GetLocal().GetId();
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
@ eExtreme_Biological
5' and 3'
User-defined methods of the data storage class.
bool AdjustProteinFeaturePartialsToMatchCDS(CSeq_feat &new_prot, const CSeq_feat &cds)
AdjustProteinFeaturePartialsToMatchCDS A function to change an existing MolInfo to match a coding reg...
CRef< objects::CSeq_id > GetNewProtId(objects::CBioseq_Handle bsh, int &offset, string &id_label, bool general_only)
vector< CRef< objects::CSeq_id > > GetNewProtIdFromExistingProt(objects::CBioseq_Handle bsh, int &offset, string &id_label)
CBioseq_set_EditHandle –.
pair< EGapIntervalType, pair< size_t, size_t > > TGapInterval
CSeq_feat_Handle m_Feature
static void x_AdjustFrame(CCdregion &cdregion, TSeqPos frame_adjust)
bool x_UsableInterval(const TGapInterval &interval, bool unknown_length, bool known_length, bool ns)
TIntervalList m_InsideGaps
void CalculateRelevantIntervals(bool unknown_length, bool known_length, bool ns=false)
bool IsRelatedByCrossRef(const CFeatGapInfo &other) const
void x_AdjustCodebreaks(CSeq_feat &feat)
@ eGapIntervalType_unknown
vector< CRef< CSeq_feat > > AdjustForRelevantGapIntervals(bool make_partial, bool trim, bool split, bool in_intron, bool create_general_only=false)
void x_AdjustOrigLabel(CSeq_feat &feat, size_t &id_offset, string &id_label, const string &qual)
void CollectGaps(const CSeq_loc &feat_loc, CScope &scope)
void x_AdjustAnticodons(CSeq_feat &feat)
bool ShouldRemove() const
void Trim(CSeq_loc &loc, bool make_partial, CScope &scope)
TIntervalList m_RightGaps
vector< CRef< CSeq_loc > > TLocList
CSeq_feat_Handle GetFeature() const
static CRef< CBioseq > AdjustProteinSeq(const CBioseq &seq, const CSeq_feat &feat, const CSeq_feat &orig_cds, CScope &scope)
TLocList Split(const CSeq_loc &orig, bool in_intron, bool make_partial)
CSeq_feat_EditHandle –.
namespace ncbi::objects::
Include a standard set of the NCBI C++ Toolkit most basic headers.
bool s_IsRelated(const CSeq_feat &f1, CObject_id::TId search)
TGappedFeatList ListGappedFeatures(CFeat_CI &feat_it, CScope &scope)
void s_ReplaceFeatureIdXref(CSeq_feat &f, CObject_id::TId orig_id, CObject_id::TId new_id)
void s_FixPartial(CSeq_feat &feat)
void ProcessForTrimAndSplitUpdates(CSeq_feat_Handle cds, vector< CRef< CSeq_feat > > updates)
void FixFeatureIdsForUpdatePair(vector< CRef< CSeq_feat > > &updates1, vector< CRef< CSeq_feat > > &updates2)
void FixFeatureIdsForUpdates(CSeq_feat &feat, CObject_id::TId &next_id)
vector< CRef< CFeatGapInfo > > TGappedFeatList
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
static int Score(const CRef< CSeq_id > &id)
Wrappers for use with FindBestChoice from <corelib/ncbiutil.hpp>
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
bool IsSetStrand(EIsSetStrand flag=eIsSetStrand_Any) const
Check if strand is set for any/all part(s) of the seq-loc depending on the flag.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
void ResetStrand(void)
Reset the strand on this location.
void SetPartialStart(bool val, ESeqLocExtremes ext)
set / remove e_Lim fuzz on start or stop (lt/gt - indicating partial interval)
CRef< CSeq_loc > Intersect(const CSeq_loc &other, TOpFlags flags, ISynonymMapper *syn_mapper) const
Find the intersection with the seq-loc, merge/sort resulting ranges depending on flags.
void SetPartialStop(bool val, ESeqLocExtremes ext)
bool IsPartialStop(ESeqLocExtremes ext) const
TSeqPos GetStop(ESeqLocExtremes ext) const
bool AdjustProteinMolInfoToMatchCDS(CMolInfo &molinfo, const CSeq_feat &cds)
AdjustProteinMolInfoToMatchCDS A function to change an existing MolInfo to match a coding region.
TSeqPos LocationOffset(const CSeq_loc &outer, const CSeq_loc &inner, EOffsetType how=eOffset_FromStart, CScope *scope=0)
returns (TSeqPos)-1 if the locations don't overlap
CRef< CSeq_loc > Seq_loc_Merge(const CSeq_loc &loc, CSeq_loc::TOpFlags flags, CScope *scope)
Merge ranges in the seq-loc.
@ eOffset_FromStart
For positive-orientation strands, start = left and end = right; for reverse-orientation strands,...
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
@ eLocationToProduct
Map from the feature's location to product.
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
const CSeq_annot_Handle & GetAnnot(void) const
Get handle to seq-annot for this feature.
CBioseq_set_Handle GetParentBioseq_set(void) const
Return a handle for the parent Bioseq-set, or null handle.
virtual CConstRef< CSeq_feat > GetSeq_feat(void) const
CBioseq_EditHandle AttachBioseq(CBioseq &seq, int index=-1) const
Attach a bioseq.
const CSeqFeatData & GetData(void) const
void Remove(void) const
Remove the feature from Seq-annot.
CSeq_feat_EditHandle AddFeat(const CSeq_feat &new_obj) const
virtual const CSeq_loc & GetProduct(void) const
virtual const CSeq_loc & GetLocation(void) const
void Remove(ERemoveMode mode=eRemoveSeq_entry) const
CSeq_annot_EditHandle GetEditHandle(void) const
Get 'edit' version of handle.
CScope & GetScope(void) const
Get scope this handle belongs to.
CConstRef< CSeq_feat > GetOriginalSeq_feat(void) const
void Replace(const CSeq_feat &new_feat) const
Replace the feature with new Seq-feat object.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
CSeqMap::ESegmentType GetType(void) const
bool IsUnknownLength(void) const
return true if current segment is a gap of unknown length
TSeqPos GetPosition(void) const
return position of current segment in sequence
TSeqPos GetLength(void) const
return length of current segment
static CConstRef< CSeqMap > GetSeqMapForSeq_loc(const CSeq_loc &loc, CScope *scope)
CSeqMap_CI ResolvedRangeIterator(CScope *scope, TSeqPos from, TSeqPos length, ENa_strand strand=eNa_strand_plus, size_t maxResolve=size_t(-1), TFlags flags=fDefaultFlags) const
Iterate segments in the range with specified strand coordinates.
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
bool IsSetDb(void) const
name of database or system Check if a value has been assigned to Db data member.
bool IsId(void) const
Check if variant Id is selected.
const TDb & GetDb(void) const
Get the Db member data.
TId GetId(void) const
Get the variant data.
const TAnticodon & GetAnticodon(void) const
Get the Anticodon member data.
bool IsTRNA(void) const
Check if variant TRNA is selected.
bool IsSetAnticodon(void) const
location of anticodon Check if a value has been assigned to Anticodon data member.
void SetAnticodon(TAnticodon &value)
Assign a value to Anticodon data member.
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
void ResetAnticodon(void)
Reset Anticodon data member.
const TExt & GetExt(void) const
Get the Ext member data.
bool IsSetComment(void) const
Check if a value has been assigned to Comment data member.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
bool IsProt(void) const
Check if variant Prot is selected.
void SetLocation(TLocation &value)
Assign a value to Location data member.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
void SetComment(const TComment &value)
Assign a value to Comment data member.
void ResetCode_break(void)
Reset Code_break data member.
void SetPartial(TPartial value)
Assign a value to Partial data member.
void SetProduct(TProduct &value)
Assign a value to Product data member.
const TId & GetId(void) const
Get the Id member data.
const TLocal & GetLocal(void) const
Get the variant data.
bool IsSetXref(void) const
cite other relevant features Check if a value has been assigned to Xref data member.
const TLocation & GetLocation(void) const
Get the Location member data.
bool IsLocal(void) const
Check if variant Local is selected.
const TData & GetData(void) const
Get the Data member data.
void SetId(TId &value)
Assign a value to Id data member.
void SetData(TData &value)
Assign a value to Data data member.
TCode_break & SetCode_break(void)
Assign a value to Code_break data member.
const TCdregion & GetCdregion(void) const
Get the variant data.
bool IsSetId(void) const
Check if a value has been assigned to Id data member.
const TProduct & GetProduct(void) const
Get the Product member data.
const TComment & GetComment(void) const
Get the Comment member data.
const TXref & GetXref(void) const
Get the Xref member data.
vector< CRef< CSeqFeatXref > > TXref
vector< CRef< CGb_qual > > TQual
const TRna & GetRna(void) const
Get the variant data.
TQual & SetQual(void)
Assign a value to Qual data member.
const TCode_break & GetCode_break(void) const
Get the Code_break member data.
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
bool IsRna(void) const
Check if variant Rna is selected.
void SetFrame(TFrame value)
Assign a value to Frame data member.
bool IsSetCode_break(void) const
individual exceptions Check if a value has been assigned to Code_break data member.
@ eFrame_not_set
not set, code uses one
@ eFrame_three
reading frame
bool IsEmpty(void) const
Check if variant Empty is selected.
const TWhole & GetWhole(void) const
Get the variant data.
E_Choice Which(void) const
Which variant is currently selected.
E_Choice Which(void) const
Which variant is currently selected.
const TGeneral & GetGeneral(void) const
Get the variant data.
bool IsWhole(void) const
Check if variant Whole is selected.
bool IsNull(void) const
Check if variant Null is selected.
@ e_not_set
No variant selected.
list< CRef< CSeqdesc > > Tdata
TId & SetId(void)
Assign a value to Id data member.
const TInst & GetInst(void) const
Get the Inst member data.
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
TLength GetLength(void) const
Get the Length member data.
list< CRef< CSeq_id > > TId
void SetInst(TInst &value)
Assign a value to Inst data member.
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
list< CRef< CSeq_annot > > TAnnot
@ eRepr_raw
continuous sequence
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
@ eSplitLocOption_split_in_intron
@ eSplitLocOption_make_partial
@ eSplitLocOption_split_in_exon
void SplitLocationForGap(CSeq_loc &loc1, CSeq_loc &loc2, size_t start, size_t stop, const CSeq_id *seqid, unsigned int options=eSplitLocOption_make_partial|eSplitLocOption_split_in_exon)
void SeqLocAdjustForTrim(CSeq_loc &loc, TSeqPos from, TSeqPos to, const CSeq_id *seqid, bool &bCompleteCut, TSeqPos &trim5, bool &bAdjusted)
void split(std::vector< std::string > *strVec, const std::string &str_, const std::string &split_)
static Uint4 letter(char c)