69 if(!c1->IsMatch() && c2->IsMatch())
return true;
70 if(c1->IsMatch() && !c2->IsMatch())
return false;
71 if(!c1->IsMatch() && !c2->IsMatch())
return c1->GetFeatQ().
IsNull();
74 bool c1_sameType = c1->IsSameType();
75 bool c2_sameType = c2->IsSameType();
76 if(c1_sameType && !c2_sameType)
return false;
77 if(!c1_sameType && c2_sameType)
return true;
82 bool c1_same_product = !c1->GetFeatQ().
IsNull() && c1->GetFeatQ()->CanGetProduct()
83 && !c1->GetFeatT().
IsNull() && c1->GetFeatT()->CanGetProduct()
88 bool c2_same_product = !c2->GetFeatQ().
IsNull() && c2->GetFeatQ()->CanGetProduct()
89 && !c2->GetFeatT().
IsNull() && c2->GetFeatT()->CanGetProduct()
95 if(c1_same_product && !c2_same_product)
return false;
96 if(!c1_same_product && c2_same_product)
return true;
104 const float k = 0.8f;
108 c1->GetComparison()->GetSplicingSimilarity(score1);
109 c2->GetComparison()->GetSplicingSimilarity(score2);
111 score1 = k * score1 + (1.0 - k) * c1->GetComparison()->GetSymmetricalOverlap();
112 score2 = k * score2 + (1.0 - k) * c2->GetComparison()->GetSymmetricalOverlap();
114 if(score1 < score2)
return true;
115 if(score2 < score1)
return false;
118 bool c1_sameSubtype = c1->IsSameSubtype();
119 bool c2_sameSubtype = c2->IsSameSubtype();
120 if(c1_sameSubtype && !c2_sameSubtype)
return false;
121 if(!c1_sameSubtype && c2_sameSubtype)
return true;
152 out.setf(ios::fixed);
153 out.setf(ios::showpoint);
160 out << sResult <<
"\t";
195 strm << s_pos1 <<
":";
198 strm <<
"strand-mismatch(" << s_pos2 <<
")";
235 strm << sep << grp.
ToString() <<
"](" << ii <<
")";
315 unsigned it1_exon_ordinal = 1;
324 ++it1, ++it1_exon_ordinal)
327 unsigned it2_exon_ordinal = 1;
328 bool loc1_found_Overlap =
false;
335 ++it2, ++it2_exon_ordinal)
341 it1_cmp_it2 = adjust_for_strand *
351 && loc2_reported_set.
find(it2_exon_ordinal) == loc2_reported_set.
end()
354 loc2_reported_set.
insert(it2_exon_ordinal);
359 , cmp_res, it1_cmp_it2);
372 loc2_reported_set.
insert(it2_exon_ordinal);
373 loc1_found_Overlap =
true;
377 if(!loc1_found_Overlap) {
384 unsigned it2_exon_ordinal = 1;
385 for(
CSeq_loc_CI it2(*loc2); it2; ++it2, ++it2_exon_ordinal) {
386 if(loc2_reported_set.
find(it2_exon_ordinal) == loc2_reported_set.
end()) {
396 for(vector<SIntervalComparisonResult>::iterator it =
m_IntComparisons.begin();
410 for(vector<SIntervalComparisonResult>::reverse_iterator it =
m_IntComparisons.rbegin();
427 if(it->missing_first()) {
429 }
else if(it->missing_second()) {
489 const float terminal_jitter_thr = 20.0f;
490 const float splice_jitter_thr = 5.0f;
496 float best_match_start = 0.0f;
497 float best_match_stop = 0.0f;
506 bool same_strand = strand1 == strand2
518 float thr = (seg1_start == terminal_start
519 || seg1_stop == terminal_stop
520 || seg2_start == terminal_start
521 || seg2_stop == terminal_stop ) ? terminal_jitter_thr : splice_jitter_thr;
524 float match_stop =
max(0.0f, 1.0f -
abs((
long)seg1_stop - (
long)seg2_stop) /
thr);
525 best_match_stop =
max(match_stop, best_match_stop);
550 if(str_result) *str_result =
"strand mismatch; ";
553 if(str_result) *str_result =
"complete match; ";
592 strm <<
"5'extended; ";
595 strm <<
"5'truncated; ";
600 strm <<
"3'extended; ";
603 strm <<
"3'truncated; ";
613 strm <<
"complete match; ";
619 strm <<
"region overlap; ";
622 strm <<
"no overlap; ";
633 strm <<
"superset; ";
666 if ((*dbxref)->GetDb() ==
"GeneID" || (*dbxref)->GetDb() ==
"LocusID") {
667 return (*dbxref)->GetTag().GetId();
678 if ((*dbxref)->GetDb() ==
"GeneID" || (*dbxref)->GetDb() ==
"LocusID") {
679 return (*dbxref)->GetTag().GetId();
704 std::vector<string> tokens;
709 if(tokens.size() == 4 && (tokens[3] ==
"m" || tokens[3] ==
"p")) {
733 vComparisons.clear();
739 _TRACE(
"Starting next comparison group");
741 for(; m_loc_q_ci; ++m_loc_q_ci) {
745 vector<CRef<CCompareFeats> > feat_matches;
746 x_GetPutativeMatches(feat_matches, feat1);
750 aggregate_match_loc_t.
SetNull();
752 if(!(*it)->GetFeatT().IsNull() && !(*it)->GetSelfLocT().IsNull()) {
753 aggregate_match_loc_t.
SetMix();
754 aggregate_match_loc_t.
Add(*(*it)->GetSelfLocT());
775 vComparisons.insert(vComparisons.end(), feat_matches.begin(), feat_matches.end());
801 if(!vComparisons.empty()) {
805 if(m_already_processed_unmatched_targets) {
813 _TRACE(
"Processing unmatched targets");
820 if(m_seen_targets.find(loc_label) != m_seen_targets.end()) {
834 || feat_self_loc->
IsNull()
837 double mapped = feat_len == 0 ? 0.0f : 1.0 - (len_subtr / feat_len);
846 vComparisons.push_back(cf);
849 m_already_processed_unmatched_targets =
true;
851 _TRACE(
"Finished processing this group");
852 return !vComparisons.empty();
860 double mapped_identity(0);
874 int feat1_gene_id = s_GetGeneId(*feat1);
877 if(feat1_gene_id == 0) {
883 bool had_some_matches =
false;
888 && feat1_gene_id == s_GetGeneId(*feat_t))
893 string loc_label =
"";
895 this->m_seen_targets.insert(loc_label);
911 , usingRangeComparison ? *feat1_mapped_range_loc : *feat1_mapped_loc
913 , usingRangeComparison ? *feat1_self_range_loc : *feat1_self_loc
920 vComparisons.push_back(cf);
921 had_some_matches =
true;
925 if(!had_some_matches) {
938 vComparisons.push_back(cf);
946 bool merge_single_range)
951 CSeq_loc_Mapper& mapper = (scope == m_scope_q ? *m_self_mapper_q : *m_self_mapper_t);
952 new_loc = mapper.
Map(loc);
955 if(merge_single_range){
957 (new_loc.
IsNull() ? loc : *new_loc),
992 return *obj_id1 < *obj_id2;
1037 typedef priority_queue<CRef<CCompareFeats>,
1038 vector<CRef<CCompareFeats> >,
1055 if(!cf->GetFeatQ().
IsNull()) {
1056 q_map[cf->GetFeatQ()].push(cf);
1059 if(!cf->GetFeatT().
IsNull()) {
1060 t_map[cf->GetFeatT()].push(cf);
1069 ITERATE(TMatchesMap, it, q_map) {
1077 if(!best_match.
IsNull() && best_match->IsMatch()) {
1080 ERR_POST(
Info <<
"Best match for " << s0 <<
" : " << s1 <<
", out of " << it->second.size());
1086 ITERATE(TMatchesMap, it, t_map) {
1094 if(!best_match.
IsNull() && best_match->IsMatch()) {
1097 ERR_POST(
Info <<
"Best match for " << s0 <<
" : " << s1 <<
", out of " << it->second.size());
1108 if(q_map[cf->GetFeatQ()].top() == cf && t_map[cf->GetFeatT()].top() == cf) {
1109 cf->SetIrrelevance(0);
1116 if(compset.
find(cf) == compset.
end() &&
1117 (q_map[cf->GetFeatQ()].top() == cf || cf->GetFeatQ().
IsNull()))
1119 cf->SetIrrelevance(1);
1126 if(compset.
find(cf) == compset.
end() &&
1127 (t_map[cf->GetFeatT()].top() == cf || cf->GetFeatT().
IsNull()))
1129 cf->SetIrrelevance(2);
1134 vComparisons.clear();
1144 vComparisons.push_back(*it);
User-defined methods of the data storage class.
bool IsReverse(ENa_strand s)
@ eExtreme_Biological
5' and 3'
bool SameOrientation(ENa_strand a, ENa_strand b)
CCompareFeats represens a result of comparison of two features.
CConstRef< CSeq_feat > m_feat1
CConstRef< CSeq_loc > m_feat1_mapped_loc
static string s_GetLocLabel(const CSeq_loc &loc, bool merged=false)
CRef< CCompareSeq_locs > m_compare
static string s_GetFeatLabel(const CSeq_feat &gene_feat, feature::TFeatLabelFlags type=feature::fFGL_Both)
CConstRef< CSeq_feat > m_feat2
double GetMappedIdentity() const
static int s_GetGeneId(const CSeq_feat &feat)
CConstRef< CSeq_loc > x_GetSelfLoc(const CSeq_loc &loc, CScope *scope, bool merge_single_range)
bool NextComparisonGroup(vector< CRef< CCompareFeats > > &v)
Return the next group of comparisons on the region (return true iff found any) A group is a set of fe...
void SelectMatches(vector< CRef< CCompareFeats > > &v)
void x_GetPutativeMatches(vector< CRef< CCompareFeats > > &v, CConstRef< CSeq_feat > q_feat)
double GetRelativeOverlap() const
Relative overlap is defined as ratio of the length of the overlap to the length of the shorter featur...
vector< SIntervalComparisonResult > m_IntComparisons
string GetEvidenceString() const
The evidence string is a whitespace-separated list of exon comparisons Each exon comparison is a pair...
TSeqPos m_len_seqloc_overlap
void x_ComputeOverlapValues() const
Recompute m_len_seqloc_overlap, m_len_seqloc1, and m_len_seqloc2.
TCompareLocsFlags GetResult(string *str_out=NULL) const
str_out will contain human-readable summary of the internal comparison
int m_loc1_interval_count
float m_shared_sites_score
FCompareLocs x_CompareInts(const CSeq_loc &loc1, const CSeq_loc &loc2) const
Compare two exons.
int m_loc2_interval_count
bool m_cachedOverlapValues
double GetSymmetricalOverlap() const
Symmetrical overlap is defined as length(intersection(loc1, loc2) / (length(loc1) + length(loc2)) int...
@ fCmp_Subset
comparison loc is a subset of the reference loc; some interval boundaries do not match
@ fCmp_5pExtension
5' terminal interval extended (other splice junction matches)
@ fCmp_intsExtra_internal
comparinos loc has extra interval(s) internally
@ fCmp_5pTruncation
5' terminal interval truncated (other splice junction matches)
@ fCmp_Superset
comparison loc is a superset of the reference loc; some interval boundaries do not match
@ fCmp_intsExtra_5p
comparinos loc has extra interval(s) at 5' end
@ fCmp_RegionOverlap
overlap of the extremes
@ fCmp_intsMissing_3p
comparison loc is missing interval(s) at 3' end
@ fCmp_3pExtension
3' terminal interval extended (other splice junction matches)
@ fCmp_3pTruncation
3' terminal interval truncated (other splice junction matches)
@ fCmp_NoOverlap
seq_locs do not overlap at all
@ fCmp_StrandDifferent
different strand
@ fCmp_Unknown
failed to compare
@ fCmp_Overlap
at least one interval overlaps
@ fCmp_intsMissing_internal
comparison loc is missing interval(s) internally
@ fCmp_intsMissing_5p
comparison loc is missing interval(s) at 5' end
@ fCmp_Match
all junctions match (fuzz-agnostic)
@ fCmp_intsExtra_3p
comparinos loc has extra interval(s) at 3' end
void x_Compare()
Process the seq_locs and generate the m_IntComparisons vector; Recompute the counts.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
ESubtype GetSubtype(void) const
namespace ncbi::objects::
int Compare(const CSeq_feat &f2) const
Compare relative order of this feature and feature f2, ordering first by features' coordinates,...
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
const_iterator end() const
CNcbiOstream & operator<<(CNcbiOstream &out, const CCompareFeats &cf)
std::ofstream out("events_result.xml")
main entry point for tests
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
void Info(CExceptionArgs_Base &args)
#define MSerial_AsnText
I/O stream manipulators –.
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
string GetLabel(const CSeq_id &id)
@ eContent
Untagged human-readable accession or the like.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
void Add(const CSeq_loc &other)
Simple adding of seq-locs.
void GetLabel(string *label) const
Appends a label suitable for display (e.g., error messages) label must point to an existing string ob...
void SetNull(void)
Override all setters to incorporate cache invalidation.
TSeqPos GetStop(ESeqLocExtremes ext) const
@ eEmpty_Allow
ignore empty locations
TSeqPos GetStop(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the stop of the location.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
TSeqPos GetStart(const CSeq_loc &loc, CScope *scope, ESeqLocExtremes ext=eExtreme_Positional)
If only one CBioseq is represented by CSeq_loc, returns the position at the start of the location.
sequence::ECompare Compare(const CSeq_loc &loc1, const CSeq_loc &loc2, CScope *scope)
Returns the sequence::ECompare containment relationship between CSeq_locs.
CRef< CSeq_loc > Seq_loc_Subtract(const CSeq_loc &loc1, const CSeq_loc &loc2, CSeq_loc::TOpFlags flags, CScope *scope)
Subtract the second seq-loc from the first one.
CRef< CSeq_loc > Seq_loc_Merge(const CSeq_loc &loc, CSeq_loc::TOpFlags flags, CScope *scope)
Merge ranges in the seq-loc.
bool IsOneBioseq(const CSeq_loc &loc, CScope *scope)
Returns true if all embedded CSeq_ids represent the same CBioseq, else false.
CRef< CSeq_loc > Seq_loc_Add(const CSeq_loc &loc1, const CSeq_loc &loc2, CSeq_loc::TOpFlags flags, CScope *scope)
Add two seq-locs.
int TestForOverlap(const CSeq_loc &loc1, const CSeq_loc &loc2, EOverlapType type, TSeqPos circular_len=kInvalidSeqPos, CScope *scope=0)
Calls TestForOverlap64() and if the result is greater than kMax_Int truncates it to kMax_Int.
bool IsSameBioseq(const CSeq_id &id1, const CSeq_id &id2, CScope *scope, CScope::EGetBioseqFlag get_flag=CScope::eGetBioseq_All)
Determines if two CSeq_ids represent the same CBioseq.
@ fCompareOverlapping
Check if seq-locs are overlapping.
@ eOverlap_Simple
any overlap of extremes
@ eContains
First CSeq_loc contains second.
@ eOverlap
CSeq_locs overlap.
@ eSame
CSeq_locs contain each other.
@ eContained
First CSeq_loc contained by second.
@ eNoOverlap
CSeq_locs do not overlap or abut.
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
SAnnotSelector & SetOverlapIntervals(void)
Check overlapping of individual intervals.
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
void Reset(void)
Reset reference object.
bool IsNull(void) const THROWS_NONE
Check if pointer is null – same effect as Empty().
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static const char label[]
const TDb & GetDb(void) const
Get the Db member data.
const TLocus & GetLocus(void) const
Get the Locus member data.
vector< CRef< CDbtag > > TDbxref
E_Choice Which(void) const
Which variant is currently selected.
const TId & GetId(void) const
Get the Id member data.
const TLocal & GetLocal(void) const
Get the variant data.
const TLocation & GetLocation(void) const
Get the Location member data.
bool IsLocal(void) const
Check if variant Local is selected.
bool IsGene(void) const
Check if variant Gene is selected.
const TData & GetData(void) const
Get the Data member data.
bool CanGetData(void) const
Check if it is safe to call GetData method.
const TDbxref & GetDbxref(void) const
Get the Dbxref member data.
bool CanGetId(void) const
Check if it is safe to call GetId method.
const TProduct & GetProduct(void) const
Get the Product member data.
const TGene & GetGene(void) const
Get the variant data.
bool CanGetProduct(void) const
Check if it is safe to call GetProduct method.
bool IsSetDbxref(void) const
support for xref to other databases Check if a value has been assigned to Dbxref data member.
bool IsRna(void) const
Check if variant Rna is selected.
bool IsEmpty(void) const
Check if variant Empty is selected.
ENa_strand
strand of nucleic acid
const TWhole & GetWhole(void) const
Get the variant data.
E_Choice Which(void) const
Which variant is currently selected.
bool IsNull(void) const
Check if variant Null is selected.
@ eNa_strand_both
in forward orientation
@ e_not_set
No variant selected.
unsigned missing_internal() const
unsigned extra_internal() const
unsigned partially_matched
This helper struct is used to accumulate the neighboring comparisons of the same class,...
bool Add(const SIntervalComparisonResult &r)
if the comparison is neighboring and of the same class, set the terminal compariosn to it and return ...
void Reset(const SIntervalComparisonResult &r)
SIntervalComparisonResult m_last
SIntervalComparisonResult m_first
int m_position_comparison
Comparison functor for pqueue storing related comparisons.
bool operator()(const CRef< CCompareFeats > &c1, const CRef< CCompareFeats > &c2) const
bool operator()(CConstRef< CSeq_feat > f1, CConstRef< CSeq_feat > f2) const
CRef< CTestThread > thr[k_NumThreadsMax]