49 bool is_intersecting =
50 r1.first.IntersectingWith(
r2.first);
56 <<
": is_intersecting = "
57 << (is_intersecting ?
"true" :
"false")
61 return is_intersecting;
65 const pair<TSeqRange, TSeqRange>&
r2)
67 bool is_intersecting =
68 r1.second.IntersectingWith(
r2.second);
74 <<
": is_intersecting = "
75 << (is_intersecting ?
"true" :
"false")
79 return is_intersecting;
83 const pair<TSeqRange, TSeqRange>&
r2,
86 bool is_consistent =
false;
88 is_consistent = (
r1.first <=
r2.first &&
r1.second <=
r2.second) ||
89 (
r2.first <=
r1.first &&
r2.second <=
r1.second);
92 is_consistent = (
r1.first <=
r2.first &&
r2.second <=
r1.second) ||
93 (
r2.first <=
r1.first &&
r1.second <=
r2.second);
107 <<
": is_consistent = "
108 << (is_consistent ?
"true" :
"false")
109 <<
" r1.first <= r2.first: "
110 << (
r1.first <=
r2.first ?
"true" :
"false")
111 <<
" r1.second <= r2.second: "
112 << (
r1.second <=
r2.second ?
"true" :
"false")
116 return is_consistent;
121 const pair<TSeqRange, TSeqRange>&
r2,
126 if (
r1.first.GetTo() <
r2.first.GetFrom()) {
127 diff +=
r2.first.GetFrom() -
r1.first.GetTo();
129 else if (
r2.first.GetTo() <
r1.first.GetFrom()) {
130 diff +=
r1.first.GetFrom() -
r2.first.GetTo();
133 if (
r1.second.GetTo() <
r2.second.GetFrom()) {
134 diff +=
r2.second.GetFrom() -
r1.second.GetTo();
136 else if (
r2.second.GetTo() <
r1.second.GetFrom()) {
137 diff +=
r1.second.GetFrom() -
r2.second.GetTo();
161 #ifdef _VERBOSE_DEBUG
168 <<
": diff = " << diff
176 typedef pair<TSeqRange, TSeqRange>
TRange;
186 if(
r1.first.first !=
r2.first.first) {
187 return (
r1.first.first <
r2.first.first);
189 if(
r1.first.second !=
r2.first.second) {
190 return (
r1.first.second <
r2.first.second);
208 r1.first.second.GetLength());
210 r2.first.second.GetLength());
227 return r1.second->GetSeqRange(1) <
r2.second->GetSeqRange(1);
236 int scores[2] = {0, 0};
240 if (scores[0] > scores[1]) {
243 if (scores[1] > scores[0]) {
256 return r1.second->GetSeqRange(1) <
r2.second->GetSeqRange(1);
265 double scores[2] = {0.0, 0.0};
268 if(scores[0] == scores[1]) {
270 r1.first.second.GetLength());
272 r2.first.second.GetLength());
276 if (scores[0] > scores[1]) {
279 if (scores[1] > scores[0]) {
292 return r1.second->GetSeqRange(1) <
r2.second->GetSeqRange(1);
303 if (al1_len > al2_len) {
306 if (al2_len > al1_len) {
328 int scores[2] = {0, 0};
331 return scores[0] > scores[1];
340 double scores[2] = {0.0, 0.0};
343 if(scores[0] == scores[1]) {
346 return scores[0] > scores[1];
355 return &*it1 < &*it2;
388 float diff_len_filter)
393 typedef pair<CSeq_id_Handle, ENa_strand> TIdStrand;
394 typedef pair<TIdStrand, TIdStrand> TIdPair;
396 TAlignments alignments;
442 TIdPair p(TIdStrand(qid, q_strand), TIdStrand(sid, s_strand));
443 alignments[p].push_back(*it);
446 typedef pair<SCompartScore, CRef<CSeq_align_set> > TCompartScore;
447 vector<TCompartScore> scored_compartments;
452 const TIdPair& id_pair = align_it->first;
456 vector< CRef<CSeq_align> >& aligns = align_it->second;
467 #ifdef _VERBOSE_DEBUG
469 cerr <<
"ids: " << id_pair.first.first <<
" x "
470 << id_pair.second.first << endl;
472 cerr <<
" sort by score" << endl;
475 cerr <<
" sort by percent identity" << endl;
478 cerr <<
" sort by size" << endl;
483 << (*it)->GetSeqRange(0) <<
", "
486 << (*it)->GetSeqRange(1) <<
", "
498 vector<TAlignRange> align_ranges;
501 TSeqRange q_range = (*iter)->GetSeqRange(0);
502 TSeqRange s_range = (*iter)->GetSeqRange(1);
507 #ifdef _VERBOSE_DEBUG
510 << id_pair.first.first <<
"/" << q_strand
512 << id_pair.second.first <<
"/" << s_strand
514 vector<TAlignRange>::const_iterator prev_it = align_ranges.end();
515 ITERATE (vector<TAlignRange>, it, align_ranges) {
517 << it->first.first <<
", "
518 << it->first.second <<
")"
519 <<
" [" << it->first.first.GetLength()
520 <<
", " << it->first.second.GetLength() <<
"]";
521 if (prev_it != align_ranges.end()) {
522 cerr <<
" consistent="
524 q_strand, s_strand) ?
"true" :
"false");
541 std::sort(align_ranges.begin(), align_ranges.end(),
545 std::sort(align_ranges.begin(), align_ranges.end(),
549 std::sort(align_ranges.begin(), align_ranges.end(),
553 list< TAlignRangeMultiSet > compartments;
562 list<TAlignRangeMultiSet>::iterator best_compart =
567 compart_it, compartments) {
570 compart_it->lower_bound(*it);
573 bool is_consistent =
false;
574 bool is_intersecting_query =
false;
575 bool is_intersecting_subject =
false;
576 if (place == compart_it->end()) {
580 is_intersecting_query =
582 is_intersecting_subject =
592 if (place == compart_it->begin()) {
595 is_intersecting_query =
597 is_intersecting_subject =
609 is_intersecting_query =
611 is_intersecting_subject =
622 is_intersecting_query |=
624 is_intersecting_subject |=
633 q_strand, s_strand));
637 #ifdef _VERBOSE_DEBUG
638 float diff_len_ratio = double(diff) / it->second->GetAlignLength(
false);
639 cerr <<
" comp_id=" << comp_id
640 <<
" is_consistent=" << (is_consistent ?
"true" :
"false")
641 <<
" is_intersecting_query=" << (is_intersecting_query ?
"true" :
"false")
642 <<
" is_intersecting_subject=" << (is_intersecting_subject ?
"true" :
"false")
643 <<
" allow intersect_query="
645 <<
" allow intersect_subject="
647 <<
" allow intersect_both="
650 <<
" best_diff=" << best_diff
651 <<
" align_len=" << it->second->GetAlignLength(
false)
652 <<
" diff_len_ratio=" << diff_len_ratio
653 <<
" filter=" << (diff_len_ratio <= diff_len_filter ?
"pass" :
"fail" )
657 if ( ((is_consistent && !is_intersecting_query && !is_intersecting_subject) ||
660 is_intersecting_query ) ||
662 is_intersecting_subject ) ||
664 is_intersecting_query && is_intersecting_subject )))) &&
666 best_compart = compart_it;
672 if ( !found || best_compart == compartments.end() ) {
674 compartments.back().insert(*it);
677 best_compart->insert(*it);
680 #ifdef _VERBOSE_DEBUG
682 cerr <<
"compartments: found " << compartments.size() << endl;
684 ITERATE (list<TAlignRangeMultiSet>, it, compartments) {
686 cerr <<
" compartment " <<
count << endl;
689 <<
i->first.first <<
", "
690 <<
i->first.second <<
")"
691 <<
" [" <<
i->first.first.GetLength()
692 <<
", " <<
i->first.second.GetLength() <<
"]"
701 #ifdef DEBUG_VERBOSE_OUTPUT
703 cerr <<
"found " << compartments.size() << endl;
705 ITERATE (list<TAlignRangeMultiSet>, it, compartments) {
707 cerr <<
" compartment " <<
count << endl;
710 <<
i->first.first <<
", "
711 <<
i->first.second <<
")"
712 <<
" [" <<
i->first.first.GetLength()
713 <<
", " <<
i->first.second.GetLength() <<
"]"
721 #ifdef DEBUG_VERBOSE_OUTPUT
722 size_t compart_count = 0;
724 ITERATE (list<TAlignRangeMultiSet>, it, compartments) {
725 #ifdef DEBUG_VERBOSE_OUTPUT
742 for (; second_subject_range != it->end()
743 && second_subject_range->first.second
744 == it->begin()->first.second; ++second_subject_range);
745 bool reverse_subject = second_subject_range != it->end() &&
746 second_subject_range->first.second
747 < it->begin()->first.second;
748 TSeqPos subject_end = (reverse_subject
750 : it->rbegin()->first) . second.GetTo();
752 forward_breaks, backward_breaks;
755 #ifdef _VERBOSE_DEBUG
760 #ifdef _VERBOSE_DEBUG
762 <<
i->first.first <<
", "
763 <<
i->first.second <<
")"
764 <<
" [" <<
i->first.first.GetLength()
765 <<
", " <<
i->first.second.GetLength() <<
"]"
769 (reverse_subject ? subject_end -
i->first.second.GetTo()
770 :
i->first.second.GetFrom());
772 (reverse_subject ? subject_end -
i->first.second.GetFrom()
773 :
i->first.second.GetTo());
774 #ifdef _VERBOSE_DEBUG
775 if (
i != it->begin()) {
776 cerr <<
"At " << ++
count <<
" Gap " <<
int(start_point - current_end_point) <<
" on length " << (current_potential_break - current_end_point) << endl;
779 if (
i != it->begin() &&
780 start_point > current_potential_break)
785 #ifdef _VERBOSE_DEBUG
786 cerr <<
"Break! " << (
count - last_break) <<
" members start_point " << start_point <<
" current break " << current_potential_break << endl;
790 current_end_point =
max(current_potential_break, end_point);
791 current_potential_break = current_end_point +
792 diff_len_filter *
i->second->GetAlignLength(
false);
794 current_potential_break = current_end_point = INT_MAX;
795 #ifdef _VERBOSE_DEBUG
801 #ifdef _VERBOSE_DEBUG
803 <<
i->first.first <<
", "
804 <<
i->first.second <<
")"
805 <<
" [" <<
i->first.first.GetLength()
806 <<
", " <<
i->first.second.GetLength() <<
"]"
810 (reverse_subject ? subject_end -
i->first.second.GetTo()
811 :
i->first.second.GetFrom());
813 (reverse_subject ? subject_end -
i->first.second.GetFrom()
814 :
i->first.second.GetTo());
815 #ifdef _VERBOSE_DEBUG
816 if (
i != it->rbegin()) {
817 cerr <<
"At " << ++
count <<
" Gap " <<
int(current_end_point - end_point) <<
" on length " << (current_end_point - current_potential_break) << endl;
820 if (
i != it->rbegin() &&
821 end_point < current_potential_break)
826 backward_breaks.
insert(breakpoint);
827 #ifdef _VERBOSE_DEBUG
828 if (forward_breaks.count(breakpoint)) {
829 cerr <<
"Double after " << (
count - last_double_break) <<
' ';
830 last_double_break =
count;
832 cerr <<
"Break! " << (
count - last_break) <<
" members end_point " << end_point <<
" current break " << current_potential_break << endl;
836 current_end_point =
min(current_potential_break, start_point);
837 current_potential_break = current_end_point -
838 diff_len_filter *
i->second->GetAlignLength(
false);
840 set_intersection(forward_breaks.
begin(), forward_breaks.
end(),
841 backward_breaks.
begin(), backward_breaks.
end(),
842 inserter(break_positions, break_positions.
end()),
846 if (break_positions.count(
i)) {
847 #ifdef DEBUG_VERBOSE_OUTPUT
848 cerr <<
"compartment " << compart_count <<
" break at " <<
i->first.first.GetFrom() <<
".." <<
i->first.first.GetTo() << endl;
850 TCompartScore sc(score, sas);
851 scored_compartments.push_back(sc);
855 sas->
Set().push_back(
i->second);
861 TCompartScore sc(score, sas);
862 scored_compartments.push_back(sc);
869 std::sort(scored_compartments.begin(), scored_compartments.end());
870 ITERATE (vector<TCompartScore>, it, scored_compartments) {
871 align_sets.push_back(it->second);
881 typedef const list <CRef<CSeq_align> > TConstSeqAlignList;
882 TConstSeqAlignList& alignments = compartment->
Get();
884 ITERATE(TConstSeqAlignList, al_it, alignments) {
885 len += (*al_it)->GetAlignLength(
false);
888 ITERATE(TConstSeqAlignList, al_it, alignments) {
889 TConstSeqAlignList::const_iterator al_it_next = al_it;
892 disc_align_set->
Set().push_back(*al_it);
893 if (al_it_next == alignments.end() ||
894 (*al_it)->GetSeqStop(0) + max_gap_len < (*al_it_next)->GetSeqStart(0) ||
895 (*al_it)->GetSeqStop(1) + max_gap_len < (*al_it_next)->GetSeqStart(1))
900 comp_align->
SetSegs().SetDisc(*disc_align_set);
901 aligns.push_back(comp_align);
902 disc_align_set.
Reset(0);
bool SameOrientation(ENa_strand a, ENa_strand b)
@ eScore_PercentIdentity_Ungapped
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: On a Spliced-seg, in case the product-type is protein, these only return the amin par...
bool GetNamedScore(const string &id, int &score) const
Get score.
TSeqPos GetAlignLength(bool include_gaps=true) const
Get the length of this alignment.
parent_type::iterator iterator
parent_type::const_iterator const_iterator
iterator_bool insert(const value_type &val)
const_iterator begin() const
const_iterator end() const
Include a standard set of the NCBI C++ Toolkit most basic headers.
pair< TSeqRange, TSeqRange > TRange
bool IsConsistent(const pair< TSeqRange, TSeqRange > &r1, const pair< TSeqRange, TSeqRange > &r2, ENa_strand s1, ENa_strand s2)
void JoinCompartment(const CRef< CSeq_align_set > &compartment, float gap_ratio, list< CRef< CSeq_align > > &aligns)
bool IsIntersectingQuery(const pair< TSeqRange, TSeqRange > &r1, const pair< TSeqRange, TSeqRange > &r2)
multiset< TAlignRange, SRangesByStart > TAlignRangeMultiSet
pair< TRange, CRef< CSeq_align > > TAlignRange
TSeqPos Difference(const pair< TSeqRange, TSeqRange > &r1, const pair< TSeqRange, TSeqRange > &r2, ENa_strand s1, ENa_strand s2)
void FindCompartments(const list< CRef< CSeq_align > > &aligns, list< CRef< CSeq_align_set > > &align_sets, TCompartOptions options, float diff_len_filter)
bool IsIntersectingSubject(const pair< TSeqRange, TSeqRange > &r1, const pair< TSeqRange, TSeqRange > &r2)
@ fCompart_AllowIntersectionsSubject
@ fCompart_SortByPctIdent
@ fCompart_FilterByDiffLen
@ fCompart_AllowIntersectionsBoth
@ fCompart_AllowIntersectionsQuery
@ fCompart_AllowInconsistentIntersection
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define REVERSE_ITERATE(Type, Var, Cont)
ITERATE macro to reverse sequence through container elements.
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
Tdata & Set(void)
Assign a value to data member.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
void SetType(TType value)
Assign a value to Type data member.
const Tdata & Get(void) const
Get the member data.
@ eType_disc
discontinuous alignment
ENa_strand
strand of nucleic acid
unsigned int
A callback function used to compare two keys in a database.
static void hex(unsigned char c)
constexpr auto sort(_Init &&init)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static const sljit_gpr r1
static const sljit_gpr r2
bool operator<(const SCompartScore &o) const
bool operator()(TAlignRangeMultiSet::const_iterator it1, TAlignRangeMultiSet::const_iterator it2) const
bool operator()(const TAlignRange &r1, const TAlignRange &r2) const
bool operator()(const TAlignRange &r1, const TAlignRange &r2) const
bool operator()(const TAlignRange &r1, const TAlignRange &r2) const
bool operator()(const TAlignRange &r1, const TAlignRange &r2) const
bool operator()(const CRef< CSeq_align > &al_ref1, const CRef< CSeq_align > &al_ref2) const
bool operator()(const CRef< CSeq_align > &al_ref1, const CRef< CSeq_align > &al_ref2) const
bool operator()(const CRef< CSeq_align > &al_ref1, const CRef< CSeq_align > &al_ref2) const