48 using namespace sequence;
64 ESpliceSiteRead good_donor = ReadDonorSpliceSite(strand, stop, vec_donor, seq_len_donor, donor);
65 ESpliceSiteRead good_acceptor = ReadAcceptorSpliceSite(strand, start, vec_acceptor, seq_len_acceptor, acceptor);
66 bool donor_ok = (good_donor == eSpliceSiteRead_OK || good_donor == eSpliceSiteRead_WrongNT);
67 bool acceptor_ok = (good_acceptor == eSpliceSiteRead_OK || good_acceptor == eSpliceSiteRead_WrongNT);
69 if (donor_ok && acceptor_ok) {
79 m_AcceptorProblems.push_back(
TSpliceProblem(good_acceptor, start));
82 m_AcceptorProblems.push_back(
TSpliceProblem(good_acceptor, start));
96 if (stop > 1 && stop <= seq_len) {
99 bad_seq = (vec[stop - 1] > 250 || vec[stop - 2] > 250);
103 return eSpliceSiteRead_Gap;
104 }
else if (bad_seq) {
105 return eSpliceSiteRead_BadSeq;
109 site[0] = vec[stop - 2];
110 site[1] = vec[stop - 1];
112 return eSpliceSiteRead_OutOfRange;
117 if (stop < seq_len - 2) {
120 bad_seq = (vec[stop + 1] > 250 || vec[stop + 2] > 250);
123 return eSpliceSiteRead_Gap;
124 }
else if (bad_seq) {
125 return eSpliceSiteRead_BadSeq;
127 site[0] = vec[stop + 1];
128 site[1] = vec[stop + 2];
130 return eSpliceSiteRead_OutOfRange;
136 return eSpliceSiteRead_OK;
138 return eSpliceSiteRead_WrongNT;
141 return eSpliceSiteRead_OK;
151 return ReadDonorSpliceSite(strand, stop, vec, seq_len,
site);
165 bool bad_seq =
false;
169 if (start < seq_len - 2) {
172 bad_seq = (vec[start + 1] > 250 || vec[start + 2] > 250);
176 return eSpliceSiteRead_Gap;
177 }
else if (bad_seq) {
178 return eSpliceSiteRead_BadSeq;
180 site[0] = vec[start + 1];
181 site[1] = vec[start + 2];
183 return eSpliceSiteRead_OutOfRange;
188 if (start > 1 && start <= seq_len) {
191 bad_seq = (vec[start - 2] > 250 || vec[start - 1] > 250);
195 return eSpliceSiteRead_Gap;
196 }
else if (bad_seq) {
197 return eSpliceSiteRead_BadSeq;
199 site[0] = vec[start - 2];
200 site[1] = vec[start - 1];
202 return eSpliceSiteRead_OutOfRange;
207 return eSpliceSiteRead_OK;
209 return eSpliceSiteRead_WrongNT;
212 return eSpliceSiteRead_BadSeq;
225 return ReadAcceptorSpliceSite(strand, start, vec, seq_len,
site);
231 bool has_errors =
false;
233 for (
auto it = m_DonorProblems.begin(); it != m_DonorProblems.end() && !has_errors; it++) {
234 if (it->first == eSpliceSiteRead_BadSeq || it->first == eSpliceSiteRead_Gap ||
235 it->first == eSpliceSiteRead_WrongNT) {
240 for (
auto it = m_AcceptorProblems.begin(); it != m_AcceptorProblems.end() && !has_errors; it++) {
241 if (it->first == eSpliceSiteRead_BadSeq || it->first == eSpliceSiteRead_Gap ||
242 it->first == eSpliceSiteRead_WrongNT) {
253 m_DonorProblems.clear();
254 m_AcceptorProblems.clear();
255 m_ExceptionUnnecessary =
false;
256 m_ErrorsNotExpected =
true;
258 bool has_errors =
false, ribo_slip =
false;
274 m_ErrorsNotExpected =
false;
284 m_ErrorsNotExpected =
false;
292 for (CSeq_loc_CI
si(loc);
si; ++
si) {
293 if (
si.IsSetStrand()) {
297 strand =
si.GetStrand();
298 }
else if (strand !=
tmp) {
306 if (!check_all && num_parts < 2) {
320 ValidateSpliceExon(feat, loc_handle, strand);
323 ValidateSpliceMrna(feat, loc_handle, strand);
326 ValidateSpliceCdregion(feat, loc_handle, strand);
332 has_errors = SpliceSitesHaveErrors();
334 if (!m_ErrorsNotExpected && !has_errors && !ribo_slip) {
335 m_ExceptionUnnecessary =
true;
345 bool overlap_feat_partial_5 =
false;
346 bool overlap_feat_partial_3 =
false;
347 TSeqPos overlap_feat_start = 0;
350 bool overlap_feat_exists =
false;
358 overlap_feat_exists =
true;
373 overlap_feat_exists =
true;
394 start =
range.GetTo();
395 stop =
range.GetFrom();
397 start =
range.GetFrom();
398 stop =
range.GetTo();
401 if (overlap_feat_exists) {
403 if (stop == overlap_feat_stop) {
404 if (overlap_feat_partial_3) {
415 if (start == overlap_feat_start) {
416 if (overlap_feat_partial_5) {
418 m_AcceptorProblems.push_back(
TSpliceProblem(good_acceptor, start));
422 m_AcceptorProblems.push_back(
TSpliceProblem(good_acceptor, start));
433 m_AcceptorProblems.push_back(
TSpliceProblem(good_acceptor, start));
440 }
catch (
const std::exception& ) {
449 bool ignore_mrna_partial5 =
false;
450 bool ignore_mrna_partial3 =
false;
464 ignore_mrna_partial5 =
true;
468 ignore_mrna_partial3 =
true;
475 CSeq_loc_CI
head(loc);
478 const CSeq_loc& part =
head.GetEmbeddingSeq_loc();
485 start =
range.GetTo();
487 start =
range.GetFrom();
491 m_AcceptorProblems.push_back(
TSpliceProblem(good_acceptor, start));
495 CSeq_loc_CI tail(loc);
501 for(; tail; ++
head, ++tail) {
506 if (bsh_head && bsh_tail) {
512 start = range_tail.
GetTo();
516 stop = range_head.
GetTo();
518 ValidateDonorAcceptorPair(
530 const CSeq_loc& part =
head.GetEmbeddingSeq_loc();
537 stop =
range.GetFrom();
539 stop =
range.GetTo();
556 CSeq_loc_CI
head(loc);
559 const CSeq_loc& part =
head.GetEmbeddingSeq_loc();
567 start =
range.GetTo();
569 start =
range.GetFrom();
572 m_AcceptorProblems.push_back(
TSpliceProblem(good_acceptor, start));
578 CSeq_loc_CI tail(loc);
584 for(; tail; ++
head, ++tail) {
589 if (bsh_head && bsh_tail) {
595 start = range_tail.
GetTo();
599 stop = range_head.
GetTo();
601 ValidateDonorAcceptorPair(
613 const CSeq_loc& part =
head.GetEmbeddingSeq_loc();
621 stop =
range.GetFrom();
623 stop =
range.GetTo();
659 struct tagSpliceSiteInfo
682 static int size =
sizeof(SpliceSiteInfo) /
sizeof(
struct tagSpliceSiteInfo);
684 for (
int i = 0;
i <
size; ++
i) {
685 struct tagSpliceSiteInfo* entry = &SpliceSiteInfo[
i];
686 if (strand == entry->strand && entry->id == signature) {
687 return (entry->check_donor0(donor[0]) && entry->check_donor1(donor[1]) &&
688 entry->check_acceptor0(acceptor[0]) && entry->check_acceptor1(acceptor[1]));
699 struct tagSpliceSiteInfo
720 static int size =
sizeof(SpliceSiteInfo) /
sizeof(
struct tagSpliceSiteInfo);
722 for (
int i = 0;
i <
size; ++
i) {
723 struct tagSpliceSiteInfo* entry = &SpliceSiteInfo[
i];
724 if (strand == entry->strand && entry->id == signature) {
725 return (entry->check_site0(
site[0]) && entry->check_site1(
site[1]));
@ eExtreme_Biological
5' and 3'
ESubtype GetSubtype(void) const
SeqVector related exceptions.
namespace ncbi::objects::
bool SpliceSitesHaveErrors()
void ValidateSpliceCdregion(const CSeq_feat &feat, const CBioseq_Handle &bsh, ENa_strand strand)
void CalculateSpliceProblems(const CSeq_feat &feat, bool check_all, bool pseudo, CBioseq_Handle loc_handle)
void ValidateSpliceExon(const CSeq_feat &feat, const CBioseq_Handle &bsh, ENa_strand strand)
ESpliceSiteRead ReadAcceptorSpliceSite(ENa_strand strand, TSeqPos start, const CSeqVector &vec, TSeqPos seq_len, TSpliceSite &site)
ESpliceSiteRead ReadDonorSpliceSite(ENa_strand strand, TSeqPos stop, const CSeqVector &vec, TSeqPos seq_len, TSpliceSite &site)
void ValidateSpliceMrna(const CSeq_feat &feat, const CBioseq_Handle &bsh, ENa_strand strand)
pair< size_t, TSeqPos > TSpliceProblem
void ValidateDonorAcceptorPair(ENa_strand strand, TSeqPos stop, const CSeqVector &vec_donor, TSeqPos seq_len_donor, TSeqPos start, const CSeqVector &vec_acceptor, TSeqPos seq_len_acceptor)
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
static const char si[8][64]
unsigned int TSeqPos
Type for sequence locations and lengths.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
bool IsPartialStop(ESeqLocExtremes ext) const
TSeqPos GetStop(ESeqLocExtremes ext) const
CMappedFeat GetBestOverlappingFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype need_subtype, sequence::EOverlapType overlap_type, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
@ eOverlap_Contains
2nd contains 1st extremes
@ eOverlap_Contained
2nd contained within 1st extremes
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
TInst_Length GetInst_Length(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
bool IsInGap(TSeqPos pos) const
true if sequence at 0-based position 'pos' has gap Note: this method is not MT-safe,...
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
const TLocation & GetLocation(void) const
Get the Location member data.
const TData & GetData(void) const
Get the Data member data.
bool IsSetExcept(void) const
something funny about this? Check if a value has been assigned to Except data member.
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
ENa_strand
strand of nucleic acid
range(_Ty, _Ty) -> range< _Ty >
const struct ncbi::grid::netcache::search::fields::SIZE size
bool CheckIntronAcceptor(ENa_strand strand, TConstSpliceSite acceptor)
bool CheckIntronSpliceSites(ENa_strand strand, TConstSpliceSite donor, TConstSpliceSite acceptor)
static bool s_EqualsA(Char c)
static bool s_EqualsT(Char c)
static bool s_EqualsG(Char c)
bool CheckAdjacentSpliceSites(const string &signature, ENa_strand strand, TConstSpliceSite donor, TConstSpliceSite acceptor)
static bool s_EqualsC(Char c)
bool CheckIntronDonor(ENa_strand strand, TConstSpliceSite donor)
bool CheckSpliceSite(const string &signature, ENa_strand strand, TConstSpliceSite site)
const string kSpliceSiteGT
const string kSpliceSiteGTAG
Char const (& TConstSpliceSite)[2]
const string kSpliceSiteGC
const string kSpliceSiteAG
const string kSpliceSiteATAC
const string kSpliceSiteGCAG