101 odds(
double pos_ = 0.0,
double neg_ = 0.0) :
pos(pos_),
neg(neg_) {}
116 return "(+" + std::to_string(
pos)
117 +
", -" + std::to_string(
neg) +
")";
123 _ASSERT(0 < ratio && ratio < 1);
124 return log( ratio / (1 - ratio) );
129 static double clamp(
double x,
double lo,
double hi)
143 const auto num_gaps = gapped_len - ungapped_len;
148 const auto num_gapopens_between_exons =
151 const auto weighted_gaps =
154 + 4 * num_gapopens_between_exons
155 + 4 * num_frameshifts;
157 const odds gaps_odds(ungapped_len, weighted_gaps);
163 matches_odds += polya_len;
172 splices_odds.pos += 1.1;
175 const bool defect_free = matches_odds.neg == 0
176 && gaps_odds.
neg == 0
177 && splices_odds.neg == 0;
196 const odds matchrun_odds(
197 clamp(longest_matchrun, 1, ungapped_len/4.0),
202 0.5 * matches_odds.logodds()
203 + 0.5 * matchrun_odds.
logodds()
205 + splices_odds.logodds()
206 + exons_odds.logodds()
209 :
clamp(coverage, 0.05, 0.95))
213 cerr <<
"ret:" << ret
214 <<
" ma" << matches_odds.AsString()
215 <<
" mr" << matchrun_odds.
AsString()
217 <<
" s" << splices_odds.AsString()
218 <<
" e" << exons_odds.AsString()
219 <<
" " << (defect_free ?
"perfect" :
"not-perfect")
220 << (
int)
round(100 * ret)
231 const string& acptr)
const
233 const bool have_donor = !(donor ==
"" || donor ==
"NN");
234 const bool have_acptr = !(acptr ==
"" || acptr ==
"NN");
244 (have_donor && have_acptr) ?
245 ( donor ==
"GT" && acptr ==
"AG" ? 3.75
246 : donor ==
"AC" && acptr ==
"AG" ? -4.5
247 : donor ==
"AT" && acptr ==
"AC" ? -6.5
248 : donor ==
"GT" || acptr ==
"AG" ? -7.0
251 : ( !have_donor && !have_acptr ? 0.0
252 : !have_donor ? (acptr ==
"AG" ? 2.0 : -7.0)
253 : !have_acptr ? (donor ==
"GT" ? 2.0 : -7.0)
270 for(
const auto& chunk : exon->GetParts()) {
272 if(chunk->IsMatch()) {
273 curr_len += chunk->GetMatch();
274 }
else if(chunk->IsDiag()) {
275 curr_len += chunk->GetDiag();
277 max_len =
max(max_len, curr_len);
283 return max(curr_len, max_len);
296 return r1.CombinationWith(
r2).GetLength()
303 #pragma push_macro("GET_SCORE")
304 #define GET_SCORE(score_name) \
305 double score_name = 0.0; \
306 bool have_##score_name = aln.GetNamedScore(#score_name, score_name); \
307 (void)score_name; (void)have_##score_name
336 if(!have_num_ident && have_num_mismatch) {
337 have_num_ident =
true;
338 num_ident = ungapped_len - num_mismatch;
341 if(!have_num_positives && have_num_negatives) {
342 have_num_positives =
true;
343 num_positives = ungapped_len - num_negatives;
349 double identity = have_num_ident ? num_ident / ungapped_len
350 : have_num_positives ? num_positives / ungapped_len
353 if(identity < 0 || identity > 1.0) {
355 NCBI_USER_THROW(
"Identity is outside of [0..1] range - problem with alignment scores.");
358 return odds(ungapped_len * identity,
359 ungapped_len * (1 - identity));
370 for(
const auto& chunk : exon->GetParts()) {
371 if(chunk->IsMatch()) {
372 res.
pos += chunk->GetMatch();
373 }
else if(chunk->IsMismatch()) {
374 res.
neg += chunk->GetMismatch();
410 if(have_num_ident && have_num_mismatch) {
411 return odds(num_ident,
418 return odds(num_ident,
419 ungapped_len - num_ident);
422 if(have_num_mismatch) {
423 return odds(ungapped_len - num_mismatch,
429 if(have_pct_identity_ungap) {
430 num_ident = pct_identity_ungap * 0.01 * ungapped_len;
431 return odds(num_ident,
432 ungapped_len - num_ident);
437 if(have_pct_identity_gap) {
439 num_ident = pct_identity_gap * 0.01 * gapped_len;
440 return odds(num_ident,
441 ungapped_len - num_ident);
466 for(
const auto& exon : ss.
GetExons()) {
477 prev->IsSetDonor_after_exon()
478 ?
prev->GetDonor_after_exon().GetBases() :
"",
480 exon->IsSetAcceptor_before_exon()
481 ? exon->GetAcceptor_before_exon().GetBases() :
"");
503 for(
const auto& exon : ss.
GetExons()) {
505 const bool is_terminal = exon == ss.
GetExons().front()
508 const size_t len = exon->GetRowSeq_range(1,
true).GetLength();
511 (exon->IsSetPartial() && exon->GetPartial())
512 || (
len <= (is_terminal ? 15 : 5))
515 res += is_poor ? -1 : 1;
568 if(have_pct_coverage_hiqual) {
569 return pct_coverage_hiqual / 100;
573 if(have_pct_coverage) {
574 return pct_coverage / 100;
585 return ung_aln_len * 1.0 /
len;
595 #pragma pop_macro("GET_SCORE")
603 return get_score(aln);
612 if(sas.
Get().empty()) {
616 using pair_t = pair<int, CRef<CSeq_align> >;
619 for(
const auto aln : sas.
Set()) {
621 _ASSERT( aln->GetSeq_id(0).Equals(
622 sas.
Get().front()->GetSeq_id(0)));
624 v.emplace_back(score_fn(*aln), aln);
629 [](
const pair_t
a,
const pair_t
b) {
630 return b.first < a.first;
633 const auto rank1_count = std::count_if(
635 [&v](pair_t p) { return p.first == v.front().first; });
641 for(
const auto& p : v) {
644 if(rank1_count > 1 && p.first == v.front().first) {
645 aln->SetNamedScore(
"rank", 1);
646 aln->SetNamedScore(
"rank1_index", rank1_index++);
647 aln->SetNamedScore(
"rank1_count", (
int)rank1_count);
649 aln->SetNamedScore(
"rank", rank);
654 aln->SetNamedScore(
"best_placement_score", p.first);
656 sas.
Set().emplace_back(aln);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
#define GET_SCORE(score_name)
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
TSeqPos GetAlignLength(bool include_gaps=true) const
Get the length of this alignment.
TSeqPos GetNumFrameshifts(TDim row=-1) const
Retrieves the number of times a given row shifts frames; i.e.
TSeqPos GetNumGapOpenings(TDim row=-1) const
Retrieves the number of gap openings in a given row in an alignment (ignoring how many gaps are in th...
TSeqRange GetRowSeq_range(CSeq_align::TDim row, bool always_as_nuc) const
Return exon's range within this row.
static void Rank(objects::CSeq_align_set &sas, score_fn_t score_fn=&NBestPlacement::GetScore)
Adds the following scores: `best_placement_score` as computed by score_fn `rank` - all top scoring al...
std::function< int(const objects::CSeq_align &)> score_fn_t
static int GetScore(const objects::CSeq_align &aln)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
#define NCBI_USER_THROW(message)
Throw a quick-and-dirty runtime exception of type 'CException' with the given error message and error...
#define MSerial_AsnText
I/O stream manipulators –.
int64_t Int8
8-byte (64-bit) signed integer
Tdata & Set(void)
Assign a value to data member.
TProduct_length GetProduct_length(void) const
Get the Product_length member data.
bool IsSetPoly_a(void) const
start of poly(A) tail on the transcript For sense transcripts: aligned product positions < poly-a <= ...
TProduct_type GetProduct_type(void) const
Get the Product_type member data.
const TSpliced & GetSpliced(void) const
Get the variant data.
const TExons & GetExons(void) const
Get the Exons member data.
bool IsDisc(void) const
Check if variant Disc is selected.
bool IsSetProduct_length(void) const
length of the product, in bases/residues from this (or from poly-a if present), a 3' unaligned length...
TPoly_a GetPoly_a(void) const
Get the Poly_a member data.
bool IsSpliced(void) const
Check if variant Spliced is selected.
const TDisc & GetDisc(void) const
Get the variant data.
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Magic spell ;-) needed for some weird compilers... very empiric.
static const sljit_gpr r1
static const sljit_gpr r2
#define row(bind, expected)
void operator+=(double x)
odds(double pos_=0.0, double neg_=0.0)
double logodds(int a=2) const
static size_t GetPolyA_length(const CSeq_align &aln)
static bool isProtSS(const CSeq_align &aln)
double GetSpliceConsensusScore(const string &donor, const string &acptr) const
static odds s_GetIdentOdds_protSS(const CSeq_align &aln)
odds GetExonsOdds(const CSeq_align &aln) const
pos + neg = count of exons.
static double clamp(double x, double lo, double hi)
static odds s_GetIdentOdds_nucSS(const CSeq_align &aln)
static odds s_GetIdentOdds_disc(const CSeq_align &aln)
int operator()(const CSeq_align &aln) const
static odds GetIdentOdds(const CSeq_align &aln)
double GetScore(const CSeq_align &aln) const
static double as_logodds(double ratio)
static double GetCoverage(const CSeq_align &aln, size_t ung_aln_len)
odds GetSplicesOdds(const CSeq_align &aln) const
static Int8 GetGapLengthOnRow(const CSpliced_exon &exon1, const CSpliced_exon &exon2, CSeq_align::TDim row)
static size_t GetLongestMatchrunLen(const CSeq_align &aln)
static size_t GetNumGapopensBetweenExons(const CSeq_align &aln)