50 : m_DsCnt(aln_mix_matches->m_DsCnt),
51 m_AlnMixMatches(aln_mix_matches),
52 m_Matches(aln_mix_matches->m_Matches),
53 m_AlnMixSequences(aln_mix_matches->m_AlnMixSequences),
54 m_Seqs(aln_mix_matches->m_Seqs),
55 m_Rows(m_AlnMixSequences->m_Rows),
56 m_ExtraRows(m_AlnMixSequences->m_ExtraRows),
58 m_SingleRefseq(
false),
59 x_CalculateScore(calc_score)
78 (*seq_i)->SetStarts().clear();
79 (*seq_i)->m_ExtraRow = 0;
89 "CAlnMixMerger::Merge(): "
90 "No alignments were added for merging.");
103 bool first_refseq =
true;
112 ds_cnt = (*it)->m_DsCnt;
118 if ( !first_refseq ) {
125 first_refseq =
false;
133 (*seq_i)->m_SeqIdx = seq_idx++;
143 (*seq_i)->m_Width = (*seq_i)->m_IsAA ? 1 : 3;
149 CAlnMixSeq * refseq = 0, * seq1 = 0, * seq2 = 0;
151 int width1 = 0, width2 = 0;
153 CAlnMixSeq::TMatchList::iterator match_list_iter1, match_list_iter2;
154 CAlnMixSeq::TMatchList::iterator match_list_i;
157 refseq = *(
m_Seqs.begin());
158 TMatches::iterator match_i =
m_Matches.begin();
187 if ( !((*it)->m_MatchList.empty()) &&
188 (*it)->m_RefBy == refseq) {
191 "CAlnMixMerger::x_Merge(): "
192 "Infinitue loop detected "
193 "while searching for a connected candidate.");
203 if ( !((*it)->m_MatchList.empty()) ) {
206 "CAlnMixMerger::x_Merge(): "
207 "Infinitue loop detected "
208 "while searching for a new candidate.");
219 first_refseq =
false;
229 match = *match_list_i;
231 if (refseq ==
match->m_AlnSeq2 && refseq ==
match->m_AlnSeq1) {
244 if (
match->m_AlnSeq1 == refseq) {
245 seq1 =
match->m_AlnSeq1;
246 start1 =
match->m_Start1;
248 match_list_iter1 =
match->m_MatchIter1;
249 seq2 =
match->m_AlnSeq2;
250 start2 =
match->m_Start2;
252 match_list_iter2 =
match->m_MatchIter2;
253 }
else if (
match->m_AlnSeq2 == refseq) {
254 seq1 =
match->m_AlnSeq2;
255 start1 =
match->m_Start2;
257 match_list_iter1 =
match->m_MatchIter2;
258 seq2 =
match->m_AlnSeq1;
259 start2 =
match->m_Start1;
261 match_list_iter2 =
match->m_MatchIter1;
263 seq1 =
match->m_AlnSeq1;
264 seq2 =
match->m_AlnSeq2;
268 match->m_MatchIter1 =
269 seq1->m_MatchList.insert(seq1->m_MatchList.end(),
match);
271 match->m_MatchIter2 =
272 seq2->m_MatchList.insert(seq2->m_MatchList.end(),
match);
285 if ( !first_refseq ) {
286 if ( !seq1->m_MatchList.empty() ) {
287 seq1->m_MatchList.erase(match_list_iter1);
288 match_list_iter1 = seq1->m_MatchList.end();
293 match->m_AlnSeq1 = seq1;
294 match->m_Start1 = start1;
295 match->m_AlnSeq2 = seq2;
296 match->m_Start2 = start2;
297 match->m_MatchIter1 = match_list_iter1;
299 match->m_MatchIter2 = match_list_iter2;
305 !
match->m_StrandsDiffer);
310 if ((plane_it =
m_Planes.
find(make_pair(seq1, seq2))) !=
312 plane = &(plane_it->second);
315 match->m_AlnSeq2->m_Width);
316 plane = &(
m_Planes[make_pair(seq1, seq2)] = new_plane);
318 plane->
Diff(substrahend, diff);
333 width1 = seq1->m_Width;
335 width2 = seq2->m_Width;
344 seq1 =
match->m_AlnSeq1;
345 start1 =
match->m_Start1;
346 match_list_iter1 =
match->m_MatchIter1;
347 seq2 =
match->m_AlnSeq2;
348 start2 =
match->m_Start2;
350 match_list_iter2 =
match->m_MatchIter2;
357 bool proper_row_found =
false;
359 if (seq1->m_DsIdx ==
match->m_DsIdx) {
360 proper_row_found =
true;
363 if (seq1->m_ExtraRow) {
364 seq1 =
match->m_AlnSeq1 = seq1->m_ExtraRow;
370 if ( !proper_row_found &&
373 "CAlnMixMerger::x_Merge(): "
374 "Proper row not found for the match. "
375 "Cannot use fQuerySeqMergeOnly?");
382 seq2->m_RefBy = refseq;
385 if ( !first_refseq ) {
386 if ( !seq2->m_MatchList.empty() ) {
388 match_list_iter2 = seq2->m_MatchList.end();
389 match->m_MatchIter2 = match_list_iter2;
394 start_i = starts.
end();
395 lo_start_i = starts.
end();
396 hi_start_i = starts.
end();
422 seq1 =
match->m_AlnSeq1;
423 start1 =
match->m_Start1;
424 match_list_iter1 =
match->m_MatchIter1;
425 seq2 =
match->m_AlnSeq2;
426 start2 =
match->m_Start2;
428 match_list_iter2 =
match->m_MatchIter2;
432 if (!seq2->m_ExtraRow) {
435 row->m_BioseqHandle = seq2->m_BioseqHandle;
436 row->m_SeqId = seq2->m_SeqId;
437 row->m_Width = seq2->m_Width;
438 row->m_Frame = start2 % 3;
439 row->m_SeqIdx = seq2->m_SeqIdx;
440 row->m_ChildIdx = seq2->m_ChildIdx + 1;
445 row->m_ExtraRowIdx = seq2->m_ExtraRowIdx + 1;
446 seq2 =
match->m_AlnSeq2 = seq2->m_ExtraRow =
row;
449 seq2 =
match->m_AlnSeq2 = seq2->m_ExtraRow;
455 seq1 =
match->m_AlnSeq1;
456 start1 =
match->m_Start1;
457 match_list_iter1 =
match->m_MatchIter1;
458 seq2 =
match->m_AlnSeq2;
459 start2 =
match->m_Start2;
461 match_list_iter2 =
match->m_MatchIter2;
468 if (!starts.
size()) {
476 if ( !((*it)->m_MatchList.empty()) ) {
477 (*it)->m_MatchList.clear();
486 seq1->m_PositiveStrand = (seq1->m_StrandScore >= 0);
495 starts[start1] = seg;
497 (seq1, lo_start_i = hi_start_i = starts.
begin());
500 seq2->m_DsIdx =
match->m_DsIdx;
507 if ((lo_start_i = start_i = starts.
lower_bound(start1))
509 start1 < start_i->
first) {
511 if (lo_start_i != starts.
begin()) {
517 if (hi_start_i == starts.
end() && start_i != lo_start_i) {
519 if (lo_start_i->first + prev_seg->
m_Len * width1 >
524 TSeqPos len1 = (start1 - lo_start_i->first) / width1;
531 starts[start1] = seg;
537 tmp_start_i = it->second;
539 seq1->m_PositiveStrand) {
541 [tmp_start_i->first + len1 * seq->
m_Width]
547 "CAlnMixMerger::x_Merge(): "
548 "Internal error: tmp_start_i == seq->GetStarts().end()");
552 [tmp_start_i->first + len2 * seq->
m_Width]
554 seq->
SetStarts()[tmp_start_i->first] = seg;
560 "CAlnMixMerger::x_Merge(): "
561 "Internal error: tmp_start_i == seq->GetStarts().end()");
567 prev_seg->
m_Len = len1;
569 if (start_i != starts.
begin()) {
573 if (lo_start_i != starts.
end()) {
581 while (hi_start_i == starts.
end()) {
582 if (start_i != starts.
end() && start_i->first == start) {
584 if (prev_seg->
m_Len > curr_len) {
592 start += curr_len * width1;
595 prev_seg->
m_Len = curr_len;
601 tmp_start_i = it->second;
603 seq1->m_PositiveStrand) {
611 "CAlnMixMerger::x_Merge(): "
612 "Internal error: tmp_start_i == seq->GetStarts().end()");
618 seq->
SetStarts()[tmp_start_i->first] = seg;
624 "CAlnMixMerger::x_Merge(): "
625 "Internal error: tmp_start_i == seq->GetStarts().end()");
629 #if _DEBUG && _ALNMGR_DEBUG
639 hi_start_i = start_i;
640 }
else if (curr_len == prev_seg->
m_Len) {
643 hi_start_i = start_i;
647 start += prev_seg->
m_Len * width1;
648 curr_len -= prev_seg->
m_Len;
649 if (start_i != starts.
end()) {
656 tmp_start_i = start_i;
657 if (tmp_start_i != starts.
begin()) {
661 if (start_i != starts.
end() &&
662 start + curr_len * width1 > start_i->first) {
665 seg->
m_Len = (start_i->first - start) / width1;
670 seg->
m_Len = curr_len;
672 hi_start_i = start_i;
673 if (hi_start_i != starts.
begin()) {
677 start += seg->
m_Len * width1;
678 curr_len -= seg->
m_Len;
679 if (lo_start_i == start_i) {
680 if (lo_start_i != starts.
begin()) {
692 if (!seq2->m_ExtraRow) {
695 row->m_BioseqHandle = seq2->m_BioseqHandle;
696 row->m_SeqId = seq2->m_SeqId;
697 row->m_Width = seq2->m_Width;
698 row->m_Frame = start2 % 3;
699 row->m_SeqIdx = seq2->m_SeqIdx;
700 row->m_ChildIdx = seq2->m_ChildIdx + 1;
705 row->m_ExtraRowIdx = seq2->m_ExtraRowIdx + 1;
706 seq2 =
match->m_AlnSeq2 = seq2->m_ExtraRow =
row;
709 seq2 =
match->m_AlnSeq2 = seq2->m_ExtraRow;
715 seq1 =
match->m_AlnSeq1;
716 start1 =
match->m_Start1;
717 match_list_iter1 =
match->m_MatchIter1;
718 seq2 =
match->m_AlnSeq2;
719 start2 =
match->m_Start2;
721 match_list_iter2 =
match->m_MatchIter2;
732 start1 =
match->m_Start1;
733 start2 =
match->m_Start2;
738 if (seq2->GetStarts().empty()) {
739 seq2->m_PositiveStrand =
740 (seq1->m_PositiveStrand ?
741 !
match->m_StrandsDiffer :
742 match->m_StrandsDiffer);
750 start_i =
match->m_StrandsDiffer ? hi_start_i : lo_start_i;
752 while(start < start2 +
len * width2) {
753 if (start2_i != starts2.
end() &&
754 start2_i->first == start) {
757 if (start2_i->second != start_i->second) {
768 tmp_start_i = it->second;
769 tmp_start_i->second = start_i->second;
770 start_i->second->SetStartIterator(tmp_seq, tmp_start_i);
772 #if _DEBUG && _ALNMGR_DEBUG
773 start_i->second->StartItsConsistencyCheck(*seq2,
780 seq2->
SetStarts()[start] = start_i->second;
787 if (start_i->second->m_StartIts.find(seq2) !=
788 start_i->second->m_StartIts.end()) {
791 "CAlnMixMerger::x_Merge(): "
793 "Start iterator already exists for seq2.");
795 start_i->second->SetStartIterator(seq2, start2_i);
796 #if _DEBUG && _ALNMGR_DEBUG
797 start_i->second->StartItsConsistencyCheck(*seq2,
804 start += start_i->second->m_Len * width2;
805 if (start2_i != starts2.
end()) {
808 if (
match->m_StrandsDiffer) {
809 if (start_i != starts.
begin()) {
813 if (start_i != starts.
end()) {
842 const int& width1 = seq1->
m_Width;
843 const int& width2 = seq2->
m_Width;
872 !
match->m_StrandsDiffer :
873 match->m_StrandsDiffer)) {
898 starts2_i->second->m_StartIts.find(seq1);
899 if (seq1_start_it_i != starts2_i->second->m_StartIts.end()) {
900 const TSeqPos& existing_start1 = seq1_start_it_i->second->first;
901 if (
match->m_StrandsDiffer) {
902 delta = start1 +
len * width1 - existing_start1;
912 start2 +=
delta * width2;
920 delta = (existing_start1 +
921 starts2_i->second->m_Len - start1) /
925 start1 +=
delta * width1;
940 delta = existing_start1
941 + starts2_i->second->m_Len * width1
952 start1 +=
delta * width1;
953 start2 +=
delta * width2;
961 delta = (existing_start1 - start1) / width1;
981 delta = starts2_i->first + starts2_i->second->m_Len * width2
992 start2 +=
delta * width2;
993 if ( !
match->m_StrandsDiffer ) {
994 start1 +=
delta * width1;
1010 starts2_i->first < start2 +
len * width2) {
1013 starts2_i->second->m_StartIts.find(seq1);
1014 if (seq1_start_it_i != starts2_i->second->m_StartIts.end()) {
1015 const TSeqPos& existing_start1 = seq1_start_it_i->second->first;
1016 if (
match->m_StrandsDiffer) {
1019 delta1 = (start1 - existing_start1) / width1 +
1020 len - starts2_i->second->m_Len;
1024 delta1 = (existing_start1 - start1) / width1;
1026 delta2 = (starts2_i->first - start2) / width2;
1027 if (delta1 != delta2) {
1029 delta = (delta1 < delta2 ? delta1 : delta2);
1031 if (
match->m_StrandsDiffer) {
1053 starts2_i->second->m_StartIts.find(seq1);
1054 if (seq1_start_it_i != starts2_i->second->m_StartIts.end()) {
1055 const TSeqPos& existing_start1 = seq1_start_it_i->second->first;
1056 if (
match->m_StrandsDiffer) {
1057 delta = existing_start1 +
1058 starts2_i->second->m_Len * width1 - start1;
1080 delta = start1 +
len * width1 - existing_start1;
1108 starts1_i->first != start1) {
1116 match->m_StrandsDiffer ? start2 +
len * width2 : start2;
1118 starts1_i->first < start1 +
len * width1) {
1121 starts1_i->second->m_StartIts;
1123 if (
match->m_StrandsDiffer) {
1124 tmp_start -= starts1_i->second->m_Len * width2;
1127 if ((seq2_start_it_i = seg_start_its.
find(seq2)) !=
1128 seg_start_its.
end()) {
1129 if (seq2_start_it_i->second->first != tmp_start) {
1135 if ( !
match->m_StrandsDiffer ) {
1136 tmp_start += starts1_i->second->m_Len * width2;
1162 int num = numrows * numsegs;
1181 ids.resize(numrows);
1182 lens.resize(numsegs);
1183 starts.resize(num, -1);
1186 vector<bool> row_empty(numrows,
true);
1191 ids[numrow++] = (*row_i)->m_SeqId;
1202 lens[numseg] = (*seg_i)->m_Len;
1206 (*seg_i)->m_StartIts) {
1207 starts[
offset + start_its_i->first->m_RowIdx] =
1208 start_its_i->second->first;
1211 row_empty[start_its_i->first->m_RowIdx] =
false;
1218 if ((*row_i)->m_PositiveStrand) {
1225 numseg++;
offset += numrows;
1235 widths->resize(numrows);
1238 (*widths)[numrow++] = (*row_i)->m_Width;
1243 for(
int row = numrows-1;
row >=0; --
row) {
1244 if (row_empty[
row]) {
1245 ids.erase(ids.begin()+
row);
1247 widths->erase(widths->begin()+
row);
1249 for (
int i = (numsegs-1)*numrows +
row;
i > 0;
i -= numrows) {
1250 starts.erase(starts.begin()+
i);
1251 strands.erase(strands.begin()+
i);
1272 if (seq ==
match->m_AlnSeq1) {
1273 frame =
match->m_Start1 % 3;
1275 frame =
match->m_Start2 % 3;
1280 while ((
unsigned)seq->
m_Frame != frame) {
class CAlignRangeCollection<TAlignRange> represent a sorted collection of TAlignRange.
const_iterator insert(const TAlignRange &r)
const_iterator begin() const
CAlignRange Represents an element of pairwise alignment of two sequences.
void x_CreateDenseg(void)
TSecondRowFits x_SecondRowFits(CAlnMixMatch *match) const
void Merge(TMergeFlags flags=0)
@ eSecondRowInconsistency
CRef< CAlnMixMatches > m_AlnMixMatches
CRef< CAlnMixSegments > m_AlnMixSegments
CAlnMixMerger(CRef< CAlnMixMatches > &aln_mix_matches, TCalcScoreMethod calc_score=0)
list< CRef< CAlnMixSeq > > & m_ExtraRows
void x_SetSeqFrame(CAlnMixMatch *match, CAlnMixSeq *&seq)
vector< CRef< CAlnMixSeq > > TSeqs
CAlnMixMatches::TCalcScoreMethod TCalcScoreMethod
CRef< CAlnMixSequences > m_AlnMixSequences
vector< CRef< CAlnMixSeq > > & m_Rows
void StartItsConsistencyCheck(const CAlnMixSeq &seq, const TSeqPos &start, size_t match_idx) const
void SetStartIterator(CAlnMixSeq *seq, CAlnMixStarts::iterator iter)
TStartIterators m_StartIts
void Build(bool gap_join=false, bool min_gap=false, bool remove_leading_and_trailing_gaps=false)
list< CAlnMixSegment * > TSegments
void FillUnalignedRegions(void)
CAlnMixStarts & SetStarts()
const CAlnMixStarts & GetStarts() const
const CBioseq_Handle * m_BioseqHandle
TWidths & SetWidths(void)
void Validate(bool full_test=false) const
void Diff(const TAlnRngColl &substrahend, TAlnRngColl &difference)
Calculate a difference.
void x_SetTaskCompleted(int completed)
void x_SetTaskName(const string &name)
Methods for reporting task progress.
void x_SetTaskTotal(int total)
bool x_InterruptTask()
Check if the task should be interrupted.
container_type::const_iterator const_iterator
container_type::iterator iterator
const_iterator begin() const
const_iterator end() const
const_iterator lower_bound(const key_type &key) const
const_iterator find(const key_type &key) const
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Reset(void)
Reset reference object.
position_type GetSecondFrom(void) const
position_type GetFirstFrom(void) const
position_type GetLength(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
TLens & SetLens(void)
Assign a value to Lens data member.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
vector< ENa_strand > TStrands
void SetDim(TDim value)
Assign a value to Dim data member.
vector< TSignedSeqPos > TStarts
void SetDim(TDim value)
Assign a value to Dim data member.
void SetType(TType value)
Assign a value to Type data member.
vector< CRef< CSeq_id > > TIds
TStarts & SetStarts(void)
Assign a value to Starts data member.
TStrands & SetStrands(void)
Assign a value to Strands data member.
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
TIds & SetIds(void)
Assign a value to Ids data member.
unsigned int
A callback function used to compare two keys in a database.
Int4 delta(size_t dimension_, const Int4 *score_)
static int match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
#define row(bind, expected)