48 using namespace sequence;
64 ESpliceSiteRead good_donor = ReadDonorSpliceSite(strand, stop, vec_donor, seq_len_donor, donor);
65 ESpliceSiteRead good_acceptor = ReadAcceptorSpliceSite(strand, start, vec_acceptor, seq_len_acceptor, acceptor);
66 bool donor_ok = (good_donor == eSpliceSiteRead_OK || good_donor == eSpliceSiteRead_WrongNT);
67 bool acceptor_ok = (good_acceptor == eSpliceSiteRead_OK || good_acceptor == eSpliceSiteRead_WrongNT);
69 if (donor_ok && acceptor_ok) {
79 m_AcceptorProblems.push_back(
TSpliceProblem(good_acceptor, start));
82 m_AcceptorProblems.push_back(
TSpliceProblem(good_acceptor, start));
96 if (stop > 1 && stop <= seq_len) {
99 bad_seq = (vec[stop - 1] > 250 || vec[stop - 2] > 250);
103 return eSpliceSiteRead_Gap;
104 }
else if (bad_seq) {
105 return eSpliceSiteRead_BadSeq;
109 site[0] = vec[stop - 2];
110 site[1] = vec[stop - 1];
112 return eSpliceSiteRead_OutOfRange;
117 if (stop < seq_len - 2) {
120 bad_seq = (vec[stop + 1] > 250 || vec[stop + 2] > 250);
123 return eSpliceSiteRead_Gap;
124 }
else if (bad_seq) {
125 return eSpliceSiteRead_BadSeq;
127 site[0] = vec[stop + 1];
128 site[1] = vec[stop + 2];
130 return eSpliceSiteRead_OutOfRange;
136 return eSpliceSiteRead_OK;
138 return eSpliceSiteRead_WrongNT;
141 return eSpliceSiteRead_OK;
151 return ReadDonorSpliceSite(strand, stop, vec, seq_len,
site);
166 bool bad_seq =
false;
170 if (start < seq_len - 2) {
173 bad_seq = (vec[start + 1] > 250 || vec[start + 2] > 250);
177 return eSpliceSiteRead_Gap;
178 }
else if (bad_seq) {
179 return eSpliceSiteRead_BadSeq;
181 site[0] = vec[start + 1];
182 site[1] = vec[start + 2];
184 return eSpliceSiteRead_OutOfRange;
189 if (start > 1 && start <= seq_len) {
192 bad_seq = (vec[start - 2] > 250 || vec[start - 1] > 250);
196 return eSpliceSiteRead_Gap;
197 }
else if (bad_seq) {
198 return eSpliceSiteRead_BadSeq;
200 site[0] = vec[start - 2];
201 site[1] = vec[start - 1];
203 return eSpliceSiteRead_OutOfRange;
208 return eSpliceSiteRead_OK;
210 return eSpliceSiteRead_WrongNT;
213 return eSpliceSiteRead_BadSeq;
226 return ReadAcceptorSpliceSite(strand, start, vec, seq_len,
site);
232 bool has_errors =
false;
234 for (
auto it = m_DonorProblems.begin(); it != m_DonorProblems.end() && !has_errors; it++) {
235 if (it->first == eSpliceSiteRead_BadSeq || it->first == eSpliceSiteRead_Gap ||
236 it->first == eSpliceSiteRead_WrongNT) {
241 for (
auto it = m_AcceptorProblems.begin(); it != m_AcceptorProblems.end() && !has_errors; it++) {
242 if (it->first == eSpliceSiteRead_BadSeq || it->first == eSpliceSiteRead_Gap ||
243 it->first == eSpliceSiteRead_WrongNT) {
254 m_DonorProblems.clear();
255 m_AcceptorProblems.clear();
256 m_ExceptionUnnecessary =
false;
257 m_ErrorsNotExpected =
true;
259 bool has_errors =
false, ribo_slip =
false;
275 m_ErrorsNotExpected =
false;
285 m_ErrorsNotExpected =
false;
293 for (CSeq_loc_CI
si(loc);
si; ++
si) {
294 if (
si.IsSetStrand()) {
298 strand =
si.GetStrand();
299 }
else if (strand !=
tmp) {
307 if (!check_all && num_parts < 2) {
321 ValidateSpliceExon(feat, loc_handle, strand);
324 ValidateSpliceMrna(feat, loc_handle, strand);
327 ValidateSpliceCdregion(feat, loc_handle, strand);
333 has_errors = SpliceSitesHaveErrors();
335 if (!m_ErrorsNotExpected && !has_errors && !ribo_slip) {
336 m_ExceptionUnnecessary =
true;
346 bool overlap_feat_partial_5 =
false;
347 bool overlap_feat_partial_3 =
false;
348 TSeqPos overlap_feat_start = 0;
351 bool overlap_feat_exists =
false;
359 overlap_feat_exists =
true;
374 overlap_feat_exists =
true;
395 start =
range.GetTo();
396 stop =
range.GetFrom();
398 start =
range.GetFrom();
399 stop =
range.GetTo();
402 if (overlap_feat_exists) {
404 if (stop == overlap_feat_stop) {
405 if (overlap_feat_partial_3) {
416 if (start == overlap_feat_start) {
417 if (overlap_feat_partial_5) {
419 m_AcceptorProblems.push_back(
TSpliceProblem(good_acceptor, start));
423 m_AcceptorProblems.push_back(
TSpliceProblem(good_acceptor, start));
434 m_AcceptorProblems.push_back(
TSpliceProblem(good_acceptor, start));
441 }
catch (
const std::exception& ) {
450 bool ignore_mrna_partial5 =
false;
451 bool ignore_mrna_partial3 =
false;
465 ignore_mrna_partial5 =
true;
469 ignore_mrna_partial3 =
true;
476 CSeq_loc_CI
head(loc);
479 const CSeq_loc& part =
head.GetEmbeddingSeq_loc();
486 start =
range.GetTo();
488 start =
range.GetFrom();
492 m_AcceptorProblems.push_back(
TSpliceProblem(good_acceptor, start));
496 CSeq_loc_CI tail(loc);
502 for(; tail; ++
head, ++tail) {
507 if (bsh_head && bsh_tail) {
513 start = range_tail.
GetTo();
517 stop = range_head.
GetTo();
519 ValidateDonorAcceptorPair(strand,
530 const CSeq_loc& part =
head.GetEmbeddingSeq_loc();
537 stop =
range.GetFrom();
539 stop =
range.GetTo();
556 CSeq_loc_CI
head(loc);
559 const CSeq_loc& part =
head.GetEmbeddingSeq_loc();
567 start =
range.GetTo();
569 start =
range.GetFrom();
572 m_AcceptorProblems.push_back(
TSpliceProblem(good_acceptor, start));
578 CSeq_loc_CI tail(loc);
584 for(; tail; ++
head, ++tail) {
589 if (bsh_head && bsh_tail) {
596 start = range_tail.
GetTo();
600 stop = range_head.
GetTo();
602 ValidateDonorAcceptorPair(strand,
613 const CSeq_loc& part =
head.GetEmbeddingSeq_loc();
621 stop =
range.GetFrom();
623 stop =
range.GetTo();
660 struct tagSpliceSiteInfo
683 static int size =
sizeof(SpliceSiteInfo) /
sizeof(
struct tagSpliceSiteInfo);
685 for (
int i = 0;
i <
size; ++
i) {
686 struct tagSpliceSiteInfo* entry = &SpliceSiteInfo[
i];
687 if (strand == entry->strand && entry->id == signature) {
688 return (entry->check_donor0(donor[0]) && entry->check_donor1(donor[1]) &&
689 entry->check_acceptor0(acceptor[0]) && entry->check_acceptor1(acceptor[1]));
700 struct tagSpliceSiteInfo
721 static int size =
sizeof(SpliceSiteInfo) /
sizeof(
struct tagSpliceSiteInfo);
723 for (
int i = 0;
i <
size; ++
i) {
724 struct tagSpliceSiteInfo* entry = &SpliceSiteInfo[
i];
725 if (strand == entry->strand && entry->id == signature) {
726 return (entry->check_site0(
site[0]) && entry->check_site1(
site[1]));
@ eExtreme_Biological
5' and 3'
ESubtype GetSubtype(void) const
SeqVector related exceptions.
namespace ncbi::objects::
bool SpliceSitesHaveErrors()
void ValidateSpliceCdregion(const CSeq_feat &feat, const CBioseq_Handle &bsh, ENa_strand strand)
void CalculateSpliceProblems(const CSeq_feat &feat, bool check_all, bool pseudo, CBioseq_Handle loc_handle)
void ValidateSpliceExon(const CSeq_feat &feat, const CBioseq_Handle &bsh, ENa_strand strand)
ESpliceSiteRead ReadAcceptorSpliceSite(ENa_strand strand, TSeqPos start, const CSeqVector &vec, TSeqPos seq_len, TSpliceSite &site)
ESpliceSiteRead ReadDonorSpliceSite(ENa_strand strand, TSeqPos stop, const CSeqVector &vec, TSeqPos seq_len, TSpliceSite &site)
void ValidateSpliceMrna(const CSeq_feat &feat, const CBioseq_Handle &bsh, ENa_strand strand)
pair< size_t, TSeqPos > TSpliceProblem
void ValidateDonorAcceptorPair(ENa_strand strand, TSeqPos stop, const CSeqVector &vec_donor, TSeqPos seq_len_donor, TSeqPos start, const CSeqVector &vec_acceptor, TSeqPos seq_len_acceptor)
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
static const char si[8][64]
unsigned int TSeqPos
Type for sequence locations and lengths.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
bool IsPartialStart(ESeqLocExtremes ext) const
check start or stop of location for e_Lim fuzz
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
bool IsPartialStop(ESeqLocExtremes ext) const
TSeqPos GetStop(ESeqLocExtremes ext) const
CMappedFeat GetBestOverlappingFeat(const CMappedFeat &feat, CSeqFeatData::ESubtype need_subtype, sequence::EOverlapType overlap_type, CFeatTree *feat_tree=0, const SAnnotSelector *base_sel=0)
@ eOverlap_Contains
2nd contains 1st extremes
@ eOverlap_Contained
2nd contained within 1st extremes
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
TInst_Length GetInst_Length(void) const
CScope & GetScope(void) const
Get scope this handle belongs to.
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
bool IsInGap(TSeqPos pos) const
true if sequence at 0-based position 'pos' has gap Note: this method is not MT-safe,...
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
const TLocation & GetLocation(void) const
Get the Location member data.
const TData & GetData(void) const
Get the Data member data.
bool IsSetExcept(void) const
something funny about this? Check if a value has been assigned to Except data member.
const TExcept_text & GetExcept_text(void) const
Get the Except_text member data.
bool IsSetExcept_text(void) const
explain if except=TRUE Check if a value has been assigned to Except_text data member.
ENa_strand
strand of nucleic acid
range(_Ty, _Ty) -> range< _Ty >
const struct ncbi::grid::netcache::search::fields::SIZE size
bool CheckIntronAcceptor(ENa_strand strand, TConstSpliceSite acceptor)
bool CheckIntronSpliceSites(ENa_strand strand, TConstSpliceSite donor, TConstSpliceSite acceptor)
bool CheckAdjacentSpliceSites(const string &signature, ENa_strand strand, TConstSpliceSite donor, TConstSpliceSite acceptor)
bool CheckIntronDonor(ENa_strand strand, TConstSpliceSite donor)
bool CheckSpliceSite(const string &signature, ENa_strand strand, TConstSpliceSite site)
const string kSpliceSiteGT
const string kSpliceSiteGTAG
Char const (& TConstSpliceSite)[2]
const string kSpliceSiteGC
const string kSpliceSiteAG
const string kSpliceSiteATAC
const string kSpliceSiteGCAG