49 TCalcScoreMethod calc_score)
50 : m_AlnMixSequences(aln_mix_sequences),
51 m_Rows(m_AlnMixSequences->m_Rows),
52 m_ExtraRows(m_AlnMixSequences->m_ExtraRows),
53 x_CalculateScore(calc_score)
60 bool remove_leading_and_trailing_gaps)
64 #if _DEBUG && _ALNMGR_DEBUG
71 CAlnMixSequences::TSeqs::iterator refseq_it =
m_Rows.begin();
72 bool orig_refseq =
true;
75 while (refseq_it !=
m_Rows.end()) {
76 refseq = *(refseq_it++);
87 if (gapped_segs.size()) {
105 #if _DEBUG && _ALNMGR_TRACE
106 cerr <<
"refseq is on row " << refseq->
m_RowIdx
107 <<
" seq " << refseq->
m_SeqIdx <<
"\n";
111 stack< CRef<CAlnMixSegment> > seg_stack;
118 #if _DEBUG && _ALNMGR_TRACE
119 cerr <<
" [row " << refseq->
m_RowIdx
121 <<
" start " << refseq_start
122 <<
" was pushed into stack\n";
126 while ( !seg_stack.empty() ) {
131 seg_stack.top()->m_StartIts) {
135 if (
row->GetStarts().current != start_its_i->second) {
137 const TSeqPos& curr_row_start =
row->GetStarts().current->first;
138 const TSeqPos& row_start = start_its_i->second->first;
140 if (
row->m_PositiveStrand ?
146 string(
"CAlnMixSegments::Build():")
147 +
" Internal error: Integrity broken" +
157 (
row->m_PositiveStrand ?
"plus" :
"minus");
161 seg_stack.push(
row->GetStarts().current->second);
164 if (!seg_set.
insert(
row->GetStarts().current->second).second) {
166 string(
"CAlnMixSegments::Build():")
167 +
" Internal error: Infinite loop detected.";
170 #if _DEBUG && _ALNMGR_TRACE
171 cerr <<
" [row " <<
row->m_RowIdx
172 <<
" seq " <<
row->m_SeqIdx
173 <<
" start " << curr_row_start
174 <<
" (left of start " << row_start <<
") "
175 <<
"was pushed into stack\n";
186 seg_stack.top()->m_StartIts) {
190 const TSeqPos& curr_row_start =
row->GetStarts().current->first;
191 const TSeqPos& row_start = start_its_i->second->first;
193 if (
row->m_PositiveStrand &&
196 !
row->m_PositiveStrand &&
200 string(
"CAlnMixSegments::Build():")
201 +
" Internal error: Integrity broken" +
211 (
row->m_PositiveStrand ?
"plus" :
"minus");
216 if (
row->m_PositiveStrand) {
217 row->SetStarts().current++;
219 if (
row->SetStarts().current ==
row->GetStarts().begin()) {
220 row->SetStarts().current =
row->GetStarts().end();
222 row->SetStarts().current--;
227 if (seg_stack.size() > 1) {
229 gapped_segs.push_back(seg_stack.top());
231 #if _DEBUG && _ALNMGR_TRACE
232 cerr <<
" seg popped].\n";
236 if (gapped_segs.size()) {
241 }
else if (min_gap) {
248 seg_i, gapped_segs) {
258 gapped_segs.push_back(seg_stack.top());
261 #if _DEBUG && _ALNMGR_TRACE
262 cerr <<
" refseq seg popped].\n";
272 if (remove_leading_and_trailing_gaps) {
288 vector<TSignedSeqPos> starts;
289 vector<TSeqPos> lens;
290 starts.resize(
m_Rows.size(), -1);
291 lens.resize(
m_Rows.size(), 0);
296 TSegments::iterator seg_i =
m_Segments.begin();
298 len = (*seg_i)->m_Len;
300 (*seg_i)->m_StartIts) {
302 rowidx =
row->m_RowIdx;
304 TSeqPos& prev_len = lens[rowidx];
305 TSeqPos start = start_its_i->second->first;
306 const bool plus =
row->m_PositiveStrand;
307 const int& width =
row->m_Width;
308 TSeqPos prev_start_plus_len = prev_start + prev_len * width;
310 if (prev_start >= 0) {
311 if (
plus && prev_start_plus_len < start ||
316 if (
row->m_PositiveStrand) {
317 new_start = prev_start + prev_len * width;
318 seg->
m_Len = (start - new_start) / width;
320 new_start = start_plus_len;
321 seg->
m_Len = (prev_start - new_start) / width;
323 row->SetStarts()[new_start] = seg;
327 row->m_PositiveStrand ?
346 TSegmentsContainer::iterator seg1_i, seg2_i;
348 seg2_i = seg1_i = gapped_segs.begin();
349 if (seg2_i != gapped_segs.end()) {
359 while (seg2_i != gapped_segs.end()) {
375 (*seg1_i)->m_StartIts) {
376 if (st_it->first == seq2) {
409 if (score2 < 75 * score1 / 100) {
427 seg2_i = gapped_segs.erase(seg2_i);
440 TSegmentsContainer::iterator seg_i, seg_i_end, seg_i_begin;
446 seg_i_begin = seg_i_end = seg_i = gapped_segs.begin();
451 while (seg_i_end != gapped_segs.end()) {
453 len_map[(*seg_i_end)->m_Len];
459 while (seg_i != seg_i_end) {
480 if ( !
possible || seg_i_end == gapped_segs.end()) {
485 TLenMap::iterator len_i = len_map.begin();
486 while (len_i != len_map.end()) {
488 len_i->second->m_Len = len_i->first - len_so_far;
489 len_so_far += len_i->second->m_Len;
495 TLenMap::iterator len_i_end;
497 while (seg_i != seg_i_end) {
498 TSeqPos orig_len = (*seg_i)->m_Len;
501 len_i_end = len_map.find(orig_len);
507 (*seg_i)->m_StartIts) {
510 TSeqPos orig_start = st_it->second->first;
512 len_i = len_map.begin();
515 while (len_i != len_i_end) {
519 TSeqPos this_start = orig_start +
522 orig_len - len_so_far - seg->
m_Len) *
529 len_so_far += seg->
m_Len;
535 new_segs.push_back(len_it->second);
538 seg_i_begin = seg_i_end;
543 gapped_segs.push_back(*new_seg_i);
551 size_t match_idx)
const
555 if ((*st_it_i).second->second !=
this) {
557 string(
"CAlnMixSegment::StartItsConsistencyCheck")
559 +
" The internal consistency check failed for"
560 +
" the segment containing ["
564 ((*st_it_i).first->m_PositiveStrand ?
"plus" :
"minus")
void StartItsConsistencyCheck(const CAlnMixSeq &seq, const TSeqPos &start, size_t match_idx) const
void SetStartIterator(CAlnMixSeq *seq, CAlnMixStarts::iterator iter)
TStartIterators m_StartIts
void Build(bool gap_join=false, bool min_gap=false, bool remove_leading_and_trailing_gaps=false)
list< CRef< CAlnMixSegment > > TSegmentsContainer
void x_MinimizeGaps(TSegmentsContainer &gapped_segs)
CAlnMixSegments(CRef< CAlnMixSequences > &aln_mix_sequences, TCalcScoreMethod calc_score=0)
CRef< CAlnMixSequences > m_AlnMixSequences
TCalcScoreMethod x_CalculateScore
void FillUnalignedRegions(void)
void x_ConsolidateGaps(TSegmentsContainer &gapped_segs)
vector< CRef< CAlnMixSeq > > & m_Rows
CAlnMixStarts & SetStarts()
const CAlnMixStarts & GetStarts() const
void GetSeqString(string &s, TSeqPos start, TSeqPos len, bool positive_strand=true)
void RowsStartItsContsistencyCheck(size_t match_idx)
void InitExtraRowsStartIts()
CAlnMixStarts::const_iterator current
container_type::iterator iterator
const_iterator begin() const
const_iterator end() const
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
use n only the LMDB cache</td > n</tr > n< tr > n< td ></td > n< td > use the LMDB cache if at all possible
#define row(bind, expected)