71 command->AddCommand(*update_cmd);
84 if (*(new_prot_seq.end() - 1) ==
'*')
85 new_prot_seq.erase(new_prot_seq.end() - 1);
110 string new_protein = GetProteinSeq(*new_cds, *
m_Scope);
111 new_inst->
SetSeq_data().SetNcbieaa().Set(new_protein);
117 if (!prot_it->GetData().GetProt().IsSetProcessed()) {
119 new_prot->
Assign(prot_it->GetOriginalFeature());
131 command->AddCommand(*update_cmd);
163 string orig_prot_seq;
186 TLocs::iterator range_it = range_it_prev;
189 bool changed =
false;
203 TSeqPos stop = range_it->GetTo();
205 start = length - range_it_prev->GetTo();
206 stop = length - range_it->GetFrom();
212 TSeqPos intron_start = range_it_prev->GetTo() + 1 - start;
213 TSeqPos intron_stop = range_it->GetFrom() -1 - start;
215 intron_start = length - range_it_prev->GetFrom() + 1 - start;
216 intron_stop = length - range_it->GetTo() -1 -start;
227 TSeqPos exon_length = range_it->GetTo() - range_it->GetFrom() + 1;
243 swap(range_prev0, *range_it_prev);
244 swap(range0, *range_it);
255 TSeqPos prev_exon_length = range_it_prev->GetTo() - range_it_prev->GetFrom() + 1;
267 swap(range_prev0, *range_it_prev);
268 swap(range0, *range_it);
310 cmd->AddCommand(*chgmRNA);
326 for ( ; origloc_ci && newloc_ci; ++origloc_ci, ++newloc_ci) {
353 cmd->AddCommand(*chgexon);
427 bool s_IsAcceptorSpliceSiteOK(
const string& seqdata,
TSeqPos intron_stop)
429 if (seqdata.empty())
return false;
430 return (seqdata.at(intron_stop - 1) ==
'A' && seqdata.at(intron_stop) ==
'G');
433 bool s_IsDonorSpliceSiteOK(
const string& seqdata,
TSeqPos intron_start)
435 if (seqdata.empty())
return false;
436 return (seqdata.at(intron_start) ==
'G' && (seqdata.at(intron_start + 1) ==
'T' || seqdata.at(intron_start + 1) ==
'C'));
442 if (seqdata.empty())
return false;
444 return s_IsAcceptorSpliceSiteOK(seqdata, intron_stop) && s_IsDonorSpliceSiteOK(seqdata, intron_start);
448 catch (
const exception& e) {
467 if (mRNA_start == cds_start && mRNA_stop == cds_stop) {
484 TLocs mrna_orig_ranges;
486 mrna_orig_ranges.push_back(loc_iter.GetRange());
492 mrna_ranges.push_back(*iter);
495 TLocs::iterator range_it = mrna_ranges.begin();
496 if (mRNA_start != range_it->GetFrom())
497 range_it->SetFrom(mRNA_start);
498 range_it = mrna_ranges.end() - 1 ;
499 if (mRNA_stop != range_it->GetTo())
500 range_it->SetTo(mRNA_stop);
506 TLocs::iterator range_it = mrna_ranges.begin();
507 TLocs::iterator orig_rng_it = mrna_orig_ranges.begin();
510 while (loc_iter && range_it != mrna_ranges.end() && orig_rng_it != mrna_orig_ranges.end()) {
512 if (range_it->GetFrom() != orig_rng_it->GetFrom() || range_it->GetTo() != orig_rng_it->GetTo()) {
562 if (range_it->GetFrom() != orig_rng_it->GetFrom() || range_it->GetTo() != orig_rng_it->GetTo()) {
583 new_loc->
SetInt(*interval);
635 string new_prot_seq = GetProteinSeq(*new_cds, *
m_Scope);
651 string new_prot_seq = GetProteinSeq(*new_cds, *
m_Scope);
664 new_cds->
SetData().SetCdregion().SetFrame(fr);
666 string prot_seq = GetProteinSeq(*new_cds, *
m_Scope);
668 if (fr != orig_frame) {
669 cds.
SetData().SetCdregion().SetFrame(fr);
674 prot_seq.erase(prot_seq.begin());
676 if (fr != orig_frame) {
677 cds.
SetData().SetCdregion().SetFrame(fr);
697 const string new_prot_seq = GetProteinSeq(*new_cds, *
m_Scope);
701 string modified_prot = new_prot_seq.substr(0, new_prot_seq.length() - 1);
707 string shorter_orig_prot = orig_prot_seq.substr(0, orig_prot_seq.length() - 1);
720 if ( ! orig_loc.
IsMix())
730 if (subloc->
IsPnt()) {
733 if (range_it->GetFrom() != orig_rng_it->GetFrom() || range_it->GetTo() != orig_rng_it->GetTo()) {
740 new_subloc->
SetInt().SetId(*new_id);
744 new_subloc->
SetPnt().Assign(pnt);
746 locs.push_back(new_subloc);
748 }
else if (subloc->
IsInt()) {
750 new_subloc->
Assign(*subloc);
752 if (range_it->GetFrom() != orig_rng_it->GetFrom()) {
753 new_subloc->
SetInt().SetFrom(range_it->GetFrom());
755 if (range_it->GetTo() != orig_rng_it->GetTo()) {
756 new_subloc->
SetInt().SetTo(range_it->GetTo());
758 locs.push_back(new_subloc);
775 if (range_it->GetFrom() != orig_rng_it->GetFrom()) {
776 interval.
SetFrom(range_it->GetFrom());
778 if (range_it->GetTo() != orig_rng_it->GetTo()) {
779 interval.
SetTo(range_it->GetTo());
786 CSeq_loc::TIntervals::iterator it = locs.begin();
792 if (range_it->GetFrom() != orig_rng_it->GetFrom()) {
793 interval.
SetFrom(range_it->GetFrom());
795 if (range_it->GetTo() != orig_rng_it->GetTo()) {
796 interval.
SetTo(range_it->GetTo());
879 string orig_prot_seq;
881 if (orig_prot_seq.empty()) {
888 bool changed =
false;
889 if (orig_prot_seq.front() !=
'M' &&
891 !edit::CLocationEditPolicy::Is5AtEndOfSeq(cds.
GetLocation(), bsh)) {
896 if (orig_prot_seq.back() !=
'*' &&
898 !edit::CLocationEditPolicy::Is3AtEndOfSeq(cds.
GetLocation(), bsh)) {
915 if (!bsh)
return false;
924 if (seq_length - start == 1 || seq_length - start == 2) {
929 if (start == 1 || start == 2) {
938 if (!bsh)
return false;
947 if (stop == 1 || stop == 2) {
952 if (seq_length - stop == 1 || seq_length - stop == 2) {
972 bool changed =
false;
979 TSeqPos stop = range_it->GetTo();
981 stop = length - range_it->GetFrom();
987 TSeqPos intron_stop = range_it->GetFrom() - 1 - start;
989 intron_stop = length - range_it->GetTo() - 1 - start;
992 if (intron_stop < seqdata.size() &&
993 intron_stop - 1 < seqdata.size() &&
994 s_IsAcceptorSpliceSiteOK(seqdata, intron_stop)) {
1000 while (
offset < 3 && !changed) {
1001 if (intron_stop -
offset < seqdata.size() &&
1002 intron_stop -
offset - 1 < seqdata.size() &&
1003 s_IsAcceptorSpliceSiteOK(seqdata, intron_stop -
offset)) {
1008 swap(range_orig, *range_it);
1016 intron_stop +
offset < seqdata.size() &&
1017 intron_stop +
offset + 1 < seqdata.size() &&
1018 s_IsAcceptorSpliceSiteOK(seqdata, intron_stop +
offset)) {
1023 swap(range_orig, *range_it);
1034 if (!changed && IsCloseTo5EndOfSeq(cds.
GetLocation(), bsh)) {
1036 while (
offset > 0 && !changed) {
1040 swap(range_orig, *range_it);
1056 bool extended =
false;
1097 bool changed =
false;
1102 TSeqPos start = range_it->GetFrom();
1105 start = length - range_it->GetTo();
1111 TSeqPos intron_start = range_it->GetTo() + 1 - start;
1113 intron_start = length - range_it->GetFrom() + 1 - start;
1116 if (intron_start < seqdata.size() &&
1117 intron_start + 1 < seqdata.size() &&
1118 s_IsDonorSpliceSiteOK(seqdata, intron_start)) {
1125 while (
offset < 3 && !changed) {
1126 if (intron_start +
offset < seqdata.size() &&
1127 intron_start +
offset + 1 < seqdata.size() &&
1128 s_IsDonorSpliceSiteOK(seqdata, intron_start +
offset)) {
1133 swap(range_orig, *range_it);
1141 intron_start -
offset < seqdata.size() &&
1142 intron_start -
offset + 1 < seqdata.size() &&
1143 s_IsDonorSpliceSiteOK(seqdata, intron_start -
offset)) {
1148 swap(range_orig, *range_it);
1159 if (!changed && IsCloseTo3EndOfSeq(cds.
GetLocation(), bsh)) {
1161 while (
offset > 0 && !changed) {
1165 swap(range_orig, *range_it);
1180 bool extended =
false;
1220 cmd->AddCommand(*chgmRNA);
1244 if (cds_start != mrna_start) {
1246 loc_it.
SetTo(cds_start);
1254 if (cds_stop != mrna_stop) {
1255 auto num_intervals = loc_it.
GetSize();
1256 loc_it.
SetPos(num_intervals - 1);
1261 loc_it.
SetTo(cds_stop);
@ eExtreme_Positional
numerical value
@ eExtreme_Biological
5' and 3'
TSeqPos x_IntronLength(const TSeqRange &rng_prev, const TSeqRange &rng)
bool x_HasProteinChanged(const objects::CSeq_feat &cds, const string &orig_prot_seq)
static bool s_IsBioseqGood_AdjustEnds(const objects::CBioseq_Handle &bsh)
void x_InitRanges(const objects::CSeq_feat &cds)
bool AdjustCDS(objects::CSeq_feat &cds)
bool x_HasProteinChangedAt3End(objects::CSeq_feat &cds, const string &orig_prot_seq)
bool x_ExtendStartOfExon(TSeqRange &range, TSeqPos offset, const objects::CSeq_loc &loc, objects::CBioseq_Handle bsh)
vector< TSeqRange > TLocs
CRef< objects::CSeq_loc > x_CreateNewLocation(const objects::CSeq_feat &cds)
void x_UpdateMixLocations(const objects::CSeq_loc &orig_loc, objects::CSeq_loc::TLocations &locs)
void x_UpdateInterval(objects::CSeq_interval &interval)
CRef< CCmdComposite > AdjustmRNAandExonEnds(const objects::CSeq_feat &new_cds, const objects::CSeq_feat &orig_cds)
static bool s_IsBioseqGood_Strict(const objects::CBioseq_Handle &bsh)
static bool s_IsBioseqGood_Relaxed(const objects::CBioseq_Handle &bsh)
void x_ShiftExonPairBackward(TSeqRange &range_prev, TSeqRange &range, TSeqPos offset)
bool x_AlsoAdjustmRNA(objects::CSeq_feat &mrna, const objects::CSeq_loc &loc)
bool x_HasProteinChangedAt5End(objects::CSeq_feat &cds, const string &orig_prot_seq)
CRef< CCmdComposite > GetCommandToAdjustCDSEnds(const objects::CSeq_feat &cds)
void x_TrimStopOfExon(TSeqRange &range, TSeqPos offset)
bool x_ExtendStopOfExon(TSeqRange &range, TSeqPos offset, const objects::CSeq_loc &loc, objects::CBioseq_Handle bsh)
objects::ENa_strand m_Strand
bool x_AdjustCDS3End(objects::CSeq_feat &cds, const string &orig_prot_seq)
bool AdjustmRNAToMatchCDS(const objects::CSeq_feat &edit_cds, objects::CSeq_feat &mrna)
bool x_AdjustCDS5End(objects::CSeq_feat &cds, const string &orig_prot_seq)
void x_TrimStartOfExon(TSeqRange &range, TSeqPos offset)
void x_ShiftExonPairForward(TSeqRange &range_prev, TSeqRange &range, TSeqPos offset)
CRef< CCmdComposite > GetCommand(const objects::CSeq_feat &cds)
AdjustForConsensusSpliceSite Adjust internal intervals of a CDS (and its associated mRNA feature) to ...
static bool s_IsAdjustedSpliceSitePairOK(const string &seqdata, TSeqPos intron_start, TSeqPos intron_stop)
bool AdjustmRNAToMatchCDSEnds(const objects::CSeq_feat &edit_cds, objects::CSeq_feat &mrna)
CRef< objects::CScope > m_Scope
void x_UpdateIntervals(objects::CSeq_loc::TIntervals &locs)
CRef< objects::CSeq_loc > x_UpdateLocation(const objects::CSeq_feat &cds)
void x_UpdateExonFeatures(CCmdComposite *cmd, const objects::CSeq_feat &orig_cds, const objects::CSeq_feat &new_cds)
bool AdjustCDSEnds(objects::CSeq_feat &cds)
CRef< CCmdComposite > AdjustmRNAandExonFeatures(const objects::CSeq_feat &new_cds, const objects::CSeq_feat &orig_cds)
const string & GetLineage(void) const
bool IsSetLineage(void) const
namespace ncbi::objects::
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
void AddInterval(const CSeq_id &id, TSeqPos from, TSeqPos to, ENa_strand strand=eNa_strand_unknown)
void AddSeqLoc(const CSeq_loc &other)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
void Error(CExceptionArgs_Base &args)
const string & GetMsg(void) const
Get message string.
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
CRef< CSeq_loc > MakeSeq_loc(EMakeType make_type=eMake_CompactType) const
return constructed CSeq_loc with all changes
void SetPacked_int(TPacked_int &v)
void SetFrom(TSeqPos from)
Set the range from position.
void SetTo(TSeqPos to)
Set the range to position.
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
ENa_strand GetStrand(void) const
Get the location's strand.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
CSeq_loc_mix_Base::Tdata TLocations
TRange GetTotalRange(void) const
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
CConstRef< CSeq_loc > GetRangeAsSeq_loc(void) const
Get seq-loc for the current iterator position.
bool IsSetStrand(void) const
Get strand.
bool HasChanges(void) const
return true of any part was changed since initialization
CPacked_seqint_Base::Tdata TIntervals
void SetPos(size_t pos)
Set iterator's position.
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
size_t GetSize(void) const
Get number of ranges.
TRange GetRange(void) const
Get the range.
void SetPartialStart(bool val, ESeqLocExtremes ext)
set / remove e_Lim fuzz on start or stop (lt/gt - indicating partial interval)
ENa_strand GetStrand(void) const
void SetPartialStop(bool val, ESeqLocExtremes ext)
bool IsPartialStop(ESeqLocExtremes ext) const
TSeqPos GetStop(ESeqLocExtremes ext) const
@ eOrder_Biological
Iterate sub-locations in positional order.
CConstRef< CSeq_feat > GetmRNAforCDS(const CSeq_feat &cds, CScope &scope)
GetmRNAforCDS A function to find a CSeq_feat representing the appropriate mRNA for a given CDS.
static void Translate(const string &seq, string &prot, const CGenetic_code *code, bool include_stop=true, bool remove_trailing_X=false, bool *alt_start=NULL, bool is_5prime_complete=true, bool is_3prime_complete=true)
Translate a string using a specified genetic code.
TSeqPos GetBioseqLength(void) const
bool IsProtein(void) const
TInst_Length GetInst_Length(void) const
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eCoding_Ncbi
Set coding to binary coding (Ncbi4na or Ncbistdaa)
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
void SetCoding(TCoding coding)
TObjectType & GetObject(void)
Get object.
TObjectType & GetObject(void) const
Get object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
TGenome GetGenome(void) const
Get the Genome member data.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
const TOrg & GetOrg(void) const
Get the Org member data.
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
void SetTo(TTo value)
Assign a value to To data member.
void SetLocation(TLocation &value)
Assign a value to Location data member.
bool IsCdregion(void) const
Check if variant Cdregion is selected.
const TLocation & GetLocation(void) const
Get the Location member data.
TFrame GetFrame(void) const
Get the Frame member data.
const TData & GetData(void) const
Get the Data member data.
bool IsSetExcept(void) const
something funny about this? Check if a value has been assigned to Except data member.
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
void SetData(TData &value)
Assign a value to Data data member.
const TCdregion & GetCdregion(void) const
Get the variant data.
const TProduct & GetProduct(void) const
Get the Product member data.
void ResetLocation(void)
Reset Location data member.
TExcept GetExcept(void) const
Get the Except member data.
bool IsSetProduct(void) const
product of process Check if a value has been assigned to Product data member.
bool IsSetFrame(void) const
Check if a value has been assigned to Frame data member.
bool IsSetLocation(void) const
feature made from Check if a value has been assigned to Location data member.
@ eFrame_not_set
not set, code uses one
@ eFrame_three
reading frame
void SetTo(TTo value)
Assign a value to To data member.
bool IsMix(void) const
Check if variant Mix is selected.
ENa_strand
strand of nucleic acid
const TPnt & GetPnt(void) const
Get the variant data.
TPoint GetPoint(void) const
Get the Point member data.
const TId & GetId(void) const
Get the Id member data.
void SetFrom(TFrom value)
Assign a value to From data member.
bool IsPacked_int(void) const
Check if variant Packed_int is selected.
bool IsInt(void) const
Check if variant Int is selected.
bool IsPnt(void) const
Check if variant Pnt is selected.
TRepr GetRepr(void) const
Get the Repr member data.
const TSource & GetSource(void) const
Get the variant data.
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
TMol GetMol(void) const
Get the Mol member data.
TBiomol GetBiomol(void) const
Get the Biomol member data.
void SetLength(TLength value)
Assign a value to Length data member.
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
const TMolinfo & GetMolinfo(void) const
Get the variant data.
@ eRepr_raw
continuous sequence
@ e_Ncbieaa
extended ASCII 1 letter aa codes
@ e_Molinfo
info on the molecule and techniques
@ e_Source
source of materials, includes Org-ref
range(_Ty, _Ty) -> range< _Ty >
static int match(register const pcre_uchar *eptr, register const pcre_uchar *ecode, const pcre_uchar *mstart, int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)