32 #ifndef COMPARE_FEATS_HPP_
33 #define COMPARE_FEATS_HPP_
59 fCmp_Unknown = 1 << 0,
60 fCmp_Incomplete = 1 << 1,
61 fCmp_NoOverlap = 1 << 2,
62 fCmp_RegionOverlap = 1 << 3,
63 fCmp_Overlap = 1 << 4,
65 fCmp_Superset = 1 << 6,
66 fCmp_intsMissing_internal = 1 << 7,
67 fCmp_intsExtra_internal = 1 << 8,
68 fCmp_intsMissing_3p = 1 << 9,
69 fCmp_intsExtra_3p = 1 << 10,
70 fCmp_intsMissing_5p = 1 << 11,
71 fCmp_intsExtra_5p = 1 << 12,
72 fCmp_3pExtension = 1 << 13,
73 fCmp_3pTruncation = 1 << 14,
74 fCmp_5pExtension = 1 << 15,
75 fCmp_5pTruncation = 1 << 16,
76 fCmp_StrandDifferent = 1 << 17,
77 fCmp_FuzzDifferent = 1 << 18,
90 fCmp_IgnoreStrand = 1 << 0,
99 : m_exon_ordinal1(pos1), m_exon_ordinal2(pos2), m_result(
result), m_position_comparison(pos_comparison) {}
118 m_loc1->Assign(loc1);
120 m_loc2->Assign(loc2);
121 if ( m_flags & fCmp_IgnoreStrand ) {
122 m_loc1->ResetStrand();
123 m_loc2->ResetStrand();
131 this->m_cachedOverlapValues =
false;
139 return GetOverlap(eOverlap_vs_Union);
145 return GetOverlap(eOverlap_vs_Shorter);
151 if(!m_cachedOverlapValues) {
152 x_ComputeOverlapValues();
158 case eOverlap_vs_Union:
159 denom = m_len_seqloc1 + m_len_seqloc2 - m_len_seqloc_overlap;
161 case eOverlap_vs_Shorter:
164 case eOverlap_vs_First:
165 denom = m_len_seqloc1;
167 case eOverlap_vs_Second:
168 denom = m_len_seqloc2;
172 return (denom == 0) ? 0.0 : (
static_cast<double>(m_len_seqloc_overlap) / denom);
177 if(!m_cachedOverlapValues) {
178 x_ComputeOverlapValues();
181 score = m_shared_sites_score;
183 *loc1_intervals = m_loc1_interval_count;
186 *loc2_intervals = m_loc2_interval_count;
192 TCompareLocsFlags GetResult(
string* str_out =
NULL)
const;
217 string GetEvidenceString()
const;
222 return m_IntComparisons;
234 : m_first(0, 0, fCmp_Unknown, 0)
235 , m_last(0, 0, fCmp_Unknown, 0)
236 , m_isReverse(isReverse)
242 return !(m_first.m_exon_ordinal1 == 0 && m_first.m_exon_ordinal2 == 0 && m_last.m_exon_ordinal1 == 0 && m_last.m_exon_ordinal2 == 0);
255 if(
r.m_position_comparison == m_last.m_position_comparison
256 &&
r.m_result == m_last.m_result
257 && ((!m_isReverse &&
r.m_exon_ordinal1 == m_last.m_exon_ordinal1 + 1)
258 || (m_isReverse &&
r.m_exon_ordinal1 == m_last.m_exon_ordinal1 - 1)
259 || (
r.m_exon_ordinal1 == 0 && m_last.m_exon_ordinal1 == 0))
260 && ((!m_isReverse &&
r.m_exon_ordinal2 == m_last.m_exon_ordinal2 + 1)
261 || (m_isReverse &&
r.m_exon_ordinal2 == m_last.m_exon_ordinal2 - 1)
262 || (
r.m_exon_ordinal2 == 0 && m_last.m_exon_ordinal2 == 0)))
286 partially_matched(0),
315 void x_ComputeOverlapValues()
const;
354 ,
double mapped_identity
361 , m_feat1_mapped_loc(&feat1_mapped_loc)
362 , m_feat1_self_loc(&feat1_self_loc)
365 , m_feat2_self_loc(&feat2_self_loc)
369 , m_mapped_identity(mapped_identity)
376 ,
double mapped_identity
380 , m_feat1_mapped_loc(&feat1_mapped_loc)
381 , m_feat1_self_loc(&feat1_self_loc)
384 , m_mapped_identity(mapped_identity)
390 , m_feat2_self_loc(&feat2_self_loc)
393 , m_mapped_identity(mapped_identity)
404 return m_mapped_identity;
411 && m_feat1->CanGetData()
412 && m_feat2->CanGetData()
413 && (m_feat1->GetData().GetSubtype() == m_feat2->GetData().GetSubtype());
419 && m_feat1->CanGetData()
420 && m_feat2->CanGetData()
421 && (m_feat1->GetData().Which() == m_feat2->GetData().Which());
447 string gene_label =
"";
460 bool IsMatch()
const {
return !m_feat1.IsNull() && !m_feat2.IsNull();}
491 eScore_SymmetricPctOverlap
492 , eScore_Feat1PctOverlap
493 , eScore_Feat2PctOverlap
497 fSelectBest = (1<<0),
498 fMergeExons = (1<<1),
499 fDifferentGenesOnly = (1<<2),
500 fCreateSentinelGenes = (1<<3),
501 fSameTypeOnly = (1<<4)
515 ,
EScoreMethod score_method = eScore_SymmetricPctOverlap)
516 : m_loc_q(&query_loc)
520 , m_selector_q(q_sel)
521 , m_selector_t(t_sel)
522 , m_target_id(&target_id)
523 , m_comp_options(options)
524 , m_score_method(score_method)
525 , m_loc_q_ci(*m_scope_q, *m_loc_q, q_sel)
526 , m_already_processed_unmatched_targets(
false)
535 t_whole_loc->
SetInt().SetId(*t_id);
536 t_whole_loc->
SetInt().SetFrom(0);
543 q_whole_loc->
SetInt().SetId(*q_id);
544 q_whole_loc->
SetInt().SetFrom(0);
548 m_self_mapper_q.Reset(
new CSeq_loc_Mapper(*q_whole_loc, *q_whole_loc, m_scope_q));
549 m_self_mapper_t.Reset(
new CSeq_loc_Mapper(*t_whole_loc, *t_whole_loc, m_scope_t));
551 m_seen_targets.clear();
557 m_seen_targets.clear();
567 static int s_GetGeneId(
const CSeq_feat& feat);
573 bool merge_single_range);
CCompareFeats represens a result of comparison of two features.
CConstRef< CSeq_loc > m_feat1_self_loc
CConstRef< CSeq_feat > GetFeatQ() const
CCompareFeats(const CSeq_feat &feat1, const CSeq_loc &feat1_mapped_loc, double mapped_identity, const CSeq_loc &feat1_self_loc, CScope *scope1)
No matching feat2.
CConstRef< CSeq_loc > GetSelfLocT() const
bool IsSameSubtype() const
CCompareFeats(const CSeq_feat &feat1, const CSeq_loc &feat1_mapped_loc, double mapped_identity, const CSeq_loc &feat1_self_loc, CScope *scope1, const CSeq_feat &feat2, const CSeq_loc &feat2_self_loc, CScope *scope2)
CConstRef< CSeq_feat > m_feat1
CConstRef< CSeq_loc > GetMappedLocQ() const
void SetIrrelevance(int val)
CConstRef< CSeq_loc > m_feat1_mapped_loc
CConstRef< CCompareSeq_locs > GetComparison() const
static string s_GetLocLabel(const CSeq_loc &loc, bool merged=false)
CRef< CCompareSeq_locs > m_compare
CConstRef< CSeq_loc > GetSelfLocQ() const
CConstRef< CSeq_loc > m_feat2_self_loc
CConstRef< CSeq_feat > GetFeatT() const
static string s_GetFeatLabel(const CSeq_feat &gene_feat, feature::TFeatLabelFlags type=feature::fFGL_Both)
CConstRef< CSeq_feat > m_feat2
CCompareFeats(const CSeq_feat &feat2, const CSeq_loc &feat2_self_loc, double mapped_identity, CScope *scope2)
No matching feat1.
int GetIrrelevance() const
double GetMappedIdentity() const
Compare multiple feature annotations on the specified seq_locs.
CCompareSeqRegions(const CSeq_loc &query_loc, CScope *q_scope, CScope *t_scope, ILocMapper &mapper, const SAnnotSelector &q_sel, const SAnnotSelector &t_sel, const CSeq_id &target_id, TComparisonOptions options=fSelectBest|fMergeExons, EScoreMethod score_method=eScore_SymmetricPctOverlap)
const CSeq_loc & GetQueryLoc() const
CRef< CSeq_loc_Mapper > m_self_mapper_q
const SAnnotSelector & m_selector_q
bool m_already_processed_unmatched_targets
EScoreMethod m_score_method
CConstRef< CSeq_id > m_target_id
TComparisonOptions & SetOptions()
TComparisonOptions m_comp_options
CRef< CSeq_loc_Mapper > m_self_mapper_t
TComparisonOptions GetOptions() const
std::set< std::string > m_seen_targets
const SAnnotSelector & m_selector_t
CRef< ILocMapper > m_mapper
CConstRef< CSeq_loc > m_loc_q
CCompareSeq_locs is used for comparing locations of two features on the same coordinate system It is ...
double GetRelativeOverlap() const
Relative overlap is defined as ratio of the length of the overlap to the length of the shorter featur...
@ eOverlap_vs_Shorter
overlap versus the shorter of the two features
@ eOverlap_vs_Union
overlap versus the union of the two features
@ eOverlap_vs_First
overlap versus the first of the two features
vector< SIntervalComparisonResult > m_IntComparisons
TSeqPos m_len_seqloc_overlap
void GetSplicingSimilarity(float &score, int *loc1_intervals=NULL, int *loc2_intervals=NULL) const
int m_loc1_interval_count
float m_shared_sites_score
CCompareSeq_locs(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope2, TCompareFlags flags=fCmp_Defaults)
const vector< SIntervalComparisonResult > & GetIndividualComparisons() const
Return the vector of individual exon comparisons.
int m_loc2_interval_count
bool m_cachedOverlapValues
double GetSymmetricalOverlap() const
Symmetrical overlap is defined as length(intersection(loc1, loc2) / (length(loc1) + length(loc2)) int...
@ fCmp_Match
all junctions match (fuzz-agnostic)
void Reset()
Reset cached comparison results.
double GetOverlap(EOverlapMethod method) const
Calculate overlap according to the specified method.
namespace ncbi::objects::
CNcbiOstream & operator<<(CNcbiOstream &out, const CEquivRange &range)
std::ofstream out("events_result.xml")
main entry point for tests
unsigned int TSeqPos
Type for sequence locations and lengths.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
string GetLabel(const CSeq_id &id)
void GetLabel(string *label) const
Appends a label suitable for display (e.g., error messages) label must point to an existing string ob...
int TFeatLabelFlags
binary OR of FFeatLabelFlags
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
CRef< CSeq_loc > Seq_loc_Merge(const CSeq_loc &loc, CSeq_loc::TOpFlags flags, CScope *scope)
Merge ranges in the seq-loc.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
#define NCBI_XALGOSEQ_EXPORT
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
unsigned missing_internal() const
unsigned extra_internal() const
unsigned partially_matched
This helper struct is used to accumulate the neighboring comparisons of the same class,...
bool Add(const SIntervalComparisonResult &r)
if the comparison is neighboring and of the same class, set the terminal compariosn to it and return ...
void Reset(const SIntervalComparisonResult &r)
SIntervalComparisonResult m_last
SIntervalComparisonResultGroup(bool isReverse)
SIntervalComparisonResult m_first
SIntervalComparisonResult()
bool missing_second() const
int m_position_comparison
SIntervalComparisonResult(unsigned pos1, unsigned pos2, FCompareLocs result, int pos_comparison=0)
bool missing_first() const
string ToString(const wxRect &rc)