58 bool stop_at_stop,
bool cleanup_partials,
bool merge_abutting,
60 : m_Src(src), m_Target(target),
62 m_CdsStopAtStopCodon(stop_at_stop),
63 m_CdsCleanupPartials(cleanup_partials),
64 m_MessageListener(pMessageListener),
66 m_MergeAbutting(merge_abutting),
67 m_ExpandOverGaps(
true),
68 m_synonym_mapper(this)
70 if (align.GetSegs().IsDenseg()) {
71 m_Alignment.
Reset(&align);
72 }
else if (align.GetSegs().IsDisc()) {
73 m_Alignment.Reset(&*align.CreateDensegFromDisc());
74 }
else if (align.GetSegs().IsStd()) {
75 m_Alignment.Reset(&*align.CreateDensegFromStdseg());
76 }
else if (align.GetSegs().IsSpliced()) {
82 if (m_MessageListener) {
83 m_MessageListener->PostMessage(
86 eFeaturePropagationProblem_FeatureLocation));
95 bool stop_at_stop,
bool cleanup_partials,
bool merge_abutting,
bool expand_over_gaps,
97 : m_Src(src), m_Target(target),
99 m_CdsStopAtStopCodon(stop_at_stop),
100 m_CdsCleanupPartials(cleanup_partials),
101 m_MessageListener(pMessageListener),
102 m_MaxFeatId(feat_id),
103 m_MergeAbutting(merge_abutting),
104 m_ExpandOverGaps(expand_over_gaps),
105 m_synonym_mapper(this)
150 CMessage_Basic(
"Unable to propagate location of feature " + loc_label +
" to " + target_label,
186 vector<CRef<CSeq_feat> > rval;
191 rval.push_back(new_feat);
200 vector<CRef<CSeq_feat> > rval;
203 auto old_feat =
fi->GetOriginalSeq_feat();
206 rval.push_back(new_feat);
209 failures.push_back(old_feat);
218 vector<CRef<CSeq_feat>> propagated_feats;
220 for (
auto&& it : orig_feats) {
223 propagated_feats.push_back(new_feat);
225 if (it->IsSetProduct()) {
228 propagated_feats.push_back(prot_feat);
236 for (
auto&& it : propagated_feats) {
237 if (it->IsSetXref()) {
238 auto xref_it = it->SetXref().begin();
239 while (xref_it != it->SetXref().end()) {
240 if ((*xref_it)->IsSetId()) {
241 CFeat_id& feat_id = (*xref_it)->SetId();
249 xref_it = it->SetXref().erase(xref_it);
256 if (it->GetXref().empty()) {
263 return propagated_feats;
281 while ( seg < num_segs )
287 strand = denseg.
GetStrands()[seg * num_rows + row];
292 if (start >= 0 && pos >= start && pos < start +
len)
294 res = total_len + pos - start;
298 if (start >= 0 && start > pos && left)
305 if (start >= 0 && start +
len - 1 < pos && !left)
307 found_len = total_len +
len - 1;
336 while ( seg < num_segs )
342 strand = denseg.
GetStrands()[seg * num_rows + row];
347 if (total_len <= pos && total_len + len > pos)
351 res = start + pos - total_len;
369 while ( seg < num_segs )
375 strand = denseg.
GetStrands()[seg * num_rows + row];
398 strand = denseg.
GetStrands()[seg * num_rows + row];
405 res = start +
len - 1;
440 for (
int seg = 0; seg < num_segs; seg++)
448 ret->
SetId().Assign(
id);
469 bool should_be_null =
false;
470 for (
const auto& it : loc.
GetMix().
Get())
472 if (it->IsNull() != should_be_null)
474 should_be_null = !should_be_null;
476 return should_be_null;
484 for (
const auto& it : loc.
GetMix().
Get())
486 mix->
SetMix().Set().push_back(it);
488 null_loc->CSeq_loc_Base::SetNull();
489 mix->
SetMix().Set().push_back(null_loc);
493 mix->
SetMix().Set().pop_back();
510 if (source_row == -1 || target_row == -1)
515 new_loc->
Assign(sourceLoc);
526 if (start_before != start_after)
528 if (stop_before != stop_after)
534 new_loc->
SetId(targetId);
547 bool sub_partial5(
false), sub_partial3(
false);
551 align_start =
SeqPosToAlignPos(start, source_row,
true, sub_partial5, sub_partial3);
552 align_stop =
SeqPosToAlignPos(stop, source_row,
false, sub_partial5, sub_partial3);
563 if (align_start < 0 || align_stop < 0)
585 new_start =
AlignPosToSeqPos(align_start, target_row,
true, sub_partial5, sub_partial3);
586 new_stop =
AlignPosToSeqPos(align_stop, target_row,
false, sub_partial5, sub_partial3);
598 if (new_start < 0 || new_stop < 0 || new_stop < new_start)
626 loc_it.
SetTo(new_stop);
653 if (partial5 && target)
657 if (partial3 && target)
677 unsigned int curLen = 0;
678 for (
CSeq_loc_CI it(loc); it && curLen < truncLen; ++it) {
679 unsigned int thisLen = it.GetRange().GetLength();
681 if (curLen + thisLen <= truncLen) {
683 pTruncated->
Add(*pThisLoc);
686 pTruncated->
Assign(*pThisLoc);
691 unsigned int missingLen = truncLen - curLen;
693 if (missingLen == 1) {
695 pPartialLoc->
SetPnt().SetPoint(start);
699 pPartialLoc->
SetInt().SetFrom(start - missingLen + 1);
700 pPartialLoc->
SetInt().SetTo(start);
702 pPartialLoc->
SetInt().SetFrom(start);
703 pPartialLoc->
SetInt().SetTo(start + missingLen - 1);
711 pTruncated->
Add(*pPartialLoc);
714 pTruncated->
Assign(*pPartialLoc);
716 curLen += missingLen;
740 size_t nuc_len = orig_len;
749 unsigned int mod = nuc_len % 3;
765 const unsigned int usable_size = seq.
size();
776 unsigned int prot_len = usable_size / 3;
778 for (
unsigned int i = 0;
i < prot_len; ++
i) {
780 for (
size_t k = 0; k < 3; ++k, ++start) {
791 new_loc->
Add(*this_loc);
794 new_loc->
Assign(*this_loc);
803 unsigned int extension = ((
i + 1) * 3) -
mod;
804 last_interval->
SetInt().SetId().Assign(*(this_loc->
GetId()));
807 last_interval->
SetInt().SetFrom(this_start - extension);
808 last_interval->
SetInt().SetTo(this_stop);
810 last_interval->
SetInt().SetFrom(this_start);
811 last_interval->
SetInt().SetTo(this_stop + extension);
815 new_loc->
Add(*last_interval);
818 new_loc->
Assign(*last_interval);
831 bool ambiguous =
false;
851 if ((*it)->IsSetLoc()) {
852 const CSeq_loc& codebreak = (*it)->GetLoc();
855 (*it)->SetLoc(*new_codebreak);
859 (*it)->GetLoc().GetLabel(&loc_label);
863 CMessage_Basic(
"Unable to propagate location of translation exception " + loc_label +
" to " + target_label,
907 if (!targetIsPartialStart && !origIsPartialStart) {
915 if (!targetIsPartialStop) {
947 while (it && *it !=
'*') {
952 unsigned int newSize = 3*(it.
GetPos()+1);
986 feat.
SetData().SetRna().SetExt().SetTRNA().SetAnticodon(*new_anticodon);
994 CMessage_Basic(
"Unable to propagate location of anticodon " + loc_label +
" to " + target_label,
998 feat.
SetData().SetRna().SetExt().SetTRNA().ResetAnticodon();
1018 prot_feat->
Assign(orig_prot);
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool IsReverse(ENa_strand s)
@ eExtreme_Positional
numerical value
@ eExtreme_Biological
5' and 3'
User-defined methods of the data storage class.
User-defined methods of the data storage class.
const CSeq_id & GetSeq_id(TDim row) const
CMessageListener_Basic * m_MessageListener
vector< CRef< CSeq_feat > > PropagateFeatureList(const vector< CConstRef< CSeq_feat >> &orig_feats)
Propagates a feature list from the source sequence The propagated protein feature is stored right aft...
TSignedSeqPos SeqPosToAlignPos(TSignedSeqPos pos, CDense_seg::TDim row, bool left, bool &partial5, bool &partial3)
void x_CdsCleanupPartials(CSeq_feat &cds, bool origIsPartialStart)
CRef< CSeq_loc > x_MapLocation(const CSeq_loc &sourceLoc, const CSeq_id &targetId)
map< CObject_id::TId, CObject_id::TId > m_FeatIdMap
CConstRef< CSeq_align > m_Alignment
CRef< CSeq_loc > CreateRowSeq_loc(const CSeq_align &align, CDense_seg::TDim row)
CRef< CSeq_loc > MakeOrdered(const CSeq_loc &loc)
CRef< CSeq_loc > x_TruncateToStopCodon(const CSeq_loc &loc, unsigned int truncLen)
CFeaturePropagator(CBioseq_Handle src, CBioseq_Handle target, const CSeq_align &align, bool stop_at_stop=true, bool cleanup_partials=true, bool merge_abutting=true, CMessageListener_Basic *pMessageListener=nullptr, CObject_id::TId *feat_id=nullptr)
void x_CdsStopAtStopCodon(CSeq_feat &cds)
bool IsOrdered(const CSeq_loc &loc)
vector< CRef< CSeq_feat > > PropagateAllReportFailures(vector< CConstRef< CSeq_feat > > &)
@ eFeaturePropagationProblem_CodeBreakLocation
@ eFeaturePropagationProblem_AnticodonLocation
@ eFeaturePropagationProblem_FeatureLocation
void x_PropagatetRNA(CSeq_feat &feat, const CSeq_id &targetId)
void x_CdsMapCodeBreaks(CSeq_feat &feat, const CSeq_id &targetId)
void x_PropagateCds(CSeq_feat &feat, const CSeq_id &targetId, bool origIsPartialStart)
CObject_id::TId * m_MaxFeatId
CSynonymMapper m_synonym_mapper
vector< CRef< CSeq_feat > > PropagateAll()
CRef< CSeq_feat > Propagate(const objects::CSeq_feat &orig_feat)
CRef< CSeq_feat > ConstructProteinFeatureForPropagatedCodingRegion(const CSeq_feat &orig_cds, const CSeq_feat &new_cds)
bool m_CdsStopAtStopCodon
TSignedSeqPos AlignPosToSeqPos(TSignedSeqPos pos, CDense_seg::TDim row, bool left, bool &partial5, bool &partial3)
CRef< CSeq_loc > x_ExtendToStopCodon(CSeq_feat &feat)
CDense_seg::TDim FindRow(const CSeq_align &align, CBioseq_Handle bsh)
bool m_CdsCleanupPartials
static const CTrans_table & GetTransTable(int id)
Default implementation of IMessageListener: collects all messages posted.
Default IMessage implementation: text and severity only.
ESubtype GetSubtype(void) const
CRef< CSeq_align > CreateDensegFromDisc(SSeqIdChooser *SeqIdChooser=0) const
CRef< CSeq_align > CreateDensegFromStdseg(SSeqIdChooser *SeqIdChooser=0) const
---------------------------------------------------------------------------- PRE : the Seq-align has ...
namespace ncbi::objects::
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
CRef< CSeq_align > AsDiscSeg() const
Convert this alignment to a discontinuous segment.
char GetCodonResidue(int state) const
static int NextCodonState(int state, unsigned char ch)
const_iterator end() const
const_iterator find(const key_type &key) const
static const char location[]
Include a standard set of the NCBI C++ Toolkit most basic headers.
static void DLIST_NAME() remove(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
unsigned int TSeqPos
Type for sequence locations and lengths.
int TSignedSeqPos
Type for signed sequence position.
@ eDiag_Error
Error message.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
virtual EPostResult PostMessage(const IMessage &message)
Post new message to the listener.
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
CConstRef< CSeq_id > GetSeqId(void) const
CRef< CSeq_loc > MakeSeq_loc(EMakeType make_type=eMake_CompactType) const
return constructed CSeq_loc with all changes
void SetPacked_int(TPacked_int &v)
void SetFrom(TSeqPos from)
Set the range from position.
void SetTo(TSeqPos to)
Set the range to position.
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
void Delete(void)
Delete current element, and make iterator to point to the next element.
ENa_strand GetStrand(void) const
Get the location's strand.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
size_t GetPos(void) const
Get iterator's position.
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
CConstRef< CSeq_loc > GetRangeAsSeq_loc(void) const
Get seq-loc for the current iterator position.
CRef< CSeq_loc > Merge(TOpFlags flags, ISynonymMapper *syn_mapper) const
All functions create and return a new seq-loc object.
void Add(const CSeq_loc &other)
Simple adding of seq-locs.
bool IsSetStrand(EIsSetStrand flag=eIsSetStrand_Any) const
Check if strand is set for any/all part(s) of the seq-loc depending on the flag.
bool IsEmpty(void) const
True if the current location is empty.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
size_t GetSize(void) const
Get number of ranges.
TRange GetRange(void) const
Get the range.
void SetPartialStart(bool val, ESeqLocExtremes ext)
set / remove e_Lim fuzz on start or stop (lt/gt - indicating partial interval)
CRef< CSeq_loc > Intersect(const CSeq_loc &other, TOpFlags flags, ISynonymMapper *syn_mapper) const
Find the intersection with the seq-loc, merge/sort resulting ranges depending on flags.
ENa_strand GetStrand(void) const
void GetLabel(string *label) const
Appends a label suitable for display (e.g., error messages) label must point to an existing string ob...
void SetStrand(ENa_strand strand)
Set the strand for all of the location's ranges.
void SetPartialStop(bool val, ESeqLocExtremes ext)
bool IsPartialStop(ESeqLocExtremes ext) const
TSeqPos GetStop(ESeqLocExtremes ext) const
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
static CRef< CBioseq > TranslateToProtein(const CSeq_feat &cds, CScope &scope)
static CCdregion::EFrame FindBestFrame(const CSeq_feat &cds, CScope &scope)
Find "best" frame for a coding region.
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
@ eGetId_Best
return the "best" gi (uses FindBestScore(), with CSeq_id::CalculateScore() as the score function
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
TInst_Length GetInst_Length(void) const
CConstRef< CSeq_feat > GetOriginalSeq_feat(void) const
CRef< CSeq_loc > GetRangeSeq_loc(TSeqPos start, TSeqPos stop, ENa_strand strand=eNa_strand_unknown) const
Return CSeq_loc referencing the given range and strand on the bioseq If start == 0,...
@ eCoding_Ncbi
Set coding to binary coding (Ncbi4na or Ncbistdaa)
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
TSeqPos GetPos(void) const
void SetCoding(TCoding coding)
const_iterator begin(void) const
void Reset(void)
Reset reference object.
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
bool IsId(void) const
Check if variant Id is selected.
TId & SetId(void)
Select the variant.
TId GetId(void) const
Get the variant data.
const TAnticodon & GetAnticodon(void) const
Get the Anticodon member data.
bool IsTRNA(void) const
Check if variant TRNA is selected.
bool IsSetAnticodon(void) const
location of anticodon Check if a value has been assigned to Anticodon data member.
bool IsSetExt(void) const
generic fields for ncRNA, tmRNA, miscRNA Check if a value has been assigned to Ext data member.
const TExt & GetExt(void) const
Get the Ext member data.
const TTRNA & GetTRNA(void) const
Get the variant data.
const TDenseg & GetDenseg(void) const
Get the variant data.
bool IsSetStrands(void) const
Check if a value has been assigned to Strands data member.
const TStarts & GetStarts(void) const
Get the Starts member data.
const TLens & GetLens(void) const
Get the Lens member data.
const TSpliced & GetSpliced(void) const
Get the variant data.
TDim GetDim(void) const
Get the Dim member data.
bool IsStd(void) const
Check if variant Std is selected.
bool IsDisc(void) const
Check if variant Disc is selected.
bool IsSpliced(void) const
Check if variant Spliced is selected.
TNumseg GetNumseg(void) const
Get the Numseg member data.
const TStrands & GetStrands(void) const
Get the Strands member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
bool IsDenseg(void) const
Check if variant Denseg is selected.
void ResetPartial(void)
Reset Partial data member.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
bool IsSetCode(void) const
genetic code used Check if a value has been assigned to Code data member.
void SetLocation(TLocation &value)
Assign a value to Location data member.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
void ResetCode_break(void)
Reset Code_break data member.
void SetPartial(TPartial value)
Assign a value to Partial data member.
const TId & GetId(void) const
Get the Id member data.
const TLocal & GetLocal(void) const
Get the variant data.
const TLocation & GetLocation(void) const
Get the Location member data.
bool IsLocal(void) const
Check if variant Local is selected.
TLocal & SetLocal(void)
Select the variant.
TFrame GetFrame(void) const
Get the Frame member data.
const TData & GetData(void) const
Get the Data member data.
void SetId(TId &value)
Assign a value to Id data member.
const TCode & GetCode(void) const
Get the Code member data.
void SetData(TData &value)
Assign a value to Data data member.
TCode_break & SetCode_break(void)
Assign a value to Code_break data member.
const TCdregion & GetCdregion(void) const
Get the variant data.
bool IsSetId(void) const
Check if a value has been assigned to Id data member.
const TProduct & GetProduct(void) const
Get the Product member data.
void ResetProduct(void)
Reset Product data member.
const TRna & GetRna(void) const
Get the variant data.
const TCode_break & GetCode_break(void) const
Get the Code_break member data.
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
bool IsSetCode_break(void) const
individual exceptions Check if a value has been assigned to Code_break data member.
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
bool IsSetLocation(void) const
feature made from Check if a value has been assigned to Location data member.
@ eFrame_three
reading frame
bool IsSet(void) const
Check if a value has been assigned to data member.
void SetTo(TTo value)
Assign a value to To data member.
bool IsMix(void) const
Check if variant Mix is selected.
bool IsEmpty(void) const
Check if variant Empty is selected.
ENa_strand
strand of nucleic acid
const Tdata & Get(void) const
Get the member data.
bool IsSetPoints(void) const
Check if a value has been assigned to Points data member.
bool IsPacked_pnt(void) const
Check if variant Packed_pnt is selected.
const TWhole & GetWhole(void) const
Get the variant data.
void SetId(TId &value)
Assign a value to Id data member.
bool IsSet(void) const
Check if a value has been assigned to data member.
void SetFrom(TFrom value)
Assign a value to From data member.
const Tdata & Get(void) const
Get the member data.
const TPacked_pnt & GetPacked_pnt(void) const
Get the variant data.
bool IsPacked_int(void) const
Check if variant Packed_int is selected.
const TPoints & GetPoints(void) const
Get the Points member data.
bool IsNull(void) const
Check if variant Null is selected.
void SetStrand(TStrand value)
Assign a value to Strand data member.
const TMix & GetMix(void) const
Get the variant data.
const TPacked_int & GetPacked_int(void) const
Get the variant data.
@ e_Ncbieaa
extended ASCII 1 letter aa codes
range(_Ty, _Ty) -> range< _Ty >