NCBI C++ ToolKit
mm_aligner.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: mm_aligner.cpp 100300 2023-07-18 19:57:36Z mozese2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Yuri Kapustin
27  *
28  * File Description: CMMAligner implementation
29  *
30  * ===========================================================================
31  *
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include "mm_aligner_threads.hpp"
36 
37 #include <corelib/ncbimtx.hpp>
39 
41 
43 {
44 }
45 
46 
47 CMMAligner::CMMAligner( const char* seq1, size_t len1,
48  const char* seq2, size_t len2,
49  const SNCBIPackedScoreMatrix* scoremat )
50  : CNWAligner(seq1, len1, seq2, len2, scoremat)
51 {
52 }
53 
54 
55 CMMAligner::CMMAligner(const string& seq1,
56  const string& seq2,
57  const SNCBIPackedScoreMatrix* scoremat )
58  : CNWAligner(seq1, seq2, scoremat)
59 {
60 }
61 
62 
64 {
65  m_terminate = false;
66  if(m_prg_callback) {
70  }
71 
72  if(m_terminate) {
73  return m_score = 0;
74  }
75 
76  m_score = kMin_Int;
77  m_TransList.clear();
78  m_TransList.push_back(eTS_None);
79 
80  SCoordRect m (0, 0, m_SeqLen1 - 1, m_SeqLen2 - 1);
81  x_DoSubmatrix(m, m_TransList.end(), false, false); // top-level call
82 
83  if(m_terminate) {
84  return m_score = 0;
85  }
86 
87  // reverse_copy not supported by some compilers
88  list<ETranscriptSymbol>::const_iterator ii = m_TransList.begin();
89  size_t nsize = m_TransList.size() - 1;
90  m_Transcript.clear();
91  m_Transcript.resize(nsize);
92  for(size_t k = 1; k <= nsize; ++k)
93  m_Transcript[nsize - k] = *++ii;
94 
95  return m_score;
96 }
97 
98 
99 // trace bit coding (four bits per value): D E Ec Fc
100 // D: 1 if diagonal; 0 - otherwise
101 // E: 1 if space in 1st sequence; 0 if space in 2nd sequence
102 // Ec: 1 if gap in 1st sequence was extended; 0 if it is was opened
103 // Fc: 1 if gap in 2nd sequence was extended; 0 if it is was opened
104 //
105 
106 const unsigned char kMaskFc = 0x0001;
107 const unsigned char kMaskEc = 0x0002;
108 const unsigned char kMaskE = 0x0004;
109 const unsigned char kMaskD = 0x0008;
110 
111 DEFINE_STATIC_FAST_MUTEX (masterlist_mutex);
112 
114  list<ETranscriptSymbol>::iterator translist_pos,
115  bool left_top, bool right_bottom )
116 {
117 
118  if( m_terminate ) {
119  return;
120  }
121 
122  const Int8 dimI = submatr.i2 - submatr.i1 + 1;
123  const Int8 dimJ = submatr.j2 - submatr.j1 + 1;
124  if(dimI < 1 || dimJ < 1) return;
125 
126  bool top_level = submatr.i1 == 0 && submatr.j1 == 0 &&
127  submatr.i2 == m_SeqLen1-1 && submatr.j2 == m_SeqLen2-1;
128 
129 
130  if(dimI < 3 || dimJ < 3) { // terminal case
131  CFastMutexGuard guard (masterlist_mutex);
132  list<ETranscriptSymbol> lts;
133  TScore score = x_RunTerm(submatr, left_top, right_bottom, lts);
134  if(top_level) m_score = score;
135  m_TransList.splice(translist_pos, lts);
136  return;
137  }
138 
139  const size_t I = submatr.i1 + dimI / 2;
140  const size_t dim = dimJ + 1;
141 
142  // run Needleman-Wunsch without storing full traceback matrix
143  SCoordRect rtop (submatr.i1, submatr.j1, I, submatr.j2);
144  vector<TScore> vEtop (dim), vFtop (dim), vGtop (dim);
145  vector<unsigned char> trace_top (dim);
146 
147  SCoordRect rbtm (I + 1, submatr.j1, submatr.i2 , submatr.j2);
148  vector<TScore> vEbtm (dim), vFbtm (dim), vGbtm (dim);
149  vector<unsigned char> trace_btm (dim);
150 
152  CThreadRunOnTop* thr2 = new CThreadRunOnTop ( this, &rtop,
153  &vEtop, &vFtop, &vGtop,
154  &trace_top, left_top );
155  thr2->Run();
156  x_RunBtm(rbtm, vEbtm, vFbtm, vGbtm, trace_btm, right_bottom);
157  thr2->Join(0);
158  }
159  else {
160  x_RunTop(rtop, vEtop, vFtop, vGtop, trace_top, left_top);
161  x_RunBtm(rbtm, vEbtm, vFbtm, vGbtm, trace_btm, right_bottom);
162  }
163 
164  if( m_terminate ) {
165  return;
166  }
167 
168  // locate the transition point
169  size_t trans_pos = 0;
170  ETransitionType trans_type = eGG;
171  TScore score1 = x_FindBestJ ( vEtop, vFtop, vGtop, vEbtm, vFbtm, vGbtm,
172  trans_pos, trans_type );
173  if(top_level)
174  m_score = score1;
175 
176  // modify traces at the transition point if applicable
177  if(trans_type == eII) {
178  unsigned char mask_top = kMaskE;
179  if(trace_top[trans_pos] & kMaskEc)
180  mask_top |= kMaskEc;
181  trace_top[trans_pos] = mask_top;
182  trace_btm[trans_pos] = kMaskE | kMaskEc;
183  }
184  if(trans_type == eDD) {
185  trace_top[trans_pos] = (trace_top[trans_pos] & kMaskFc)? kMaskFc: 0;
186  trace_btm[trans_pos] = kMaskFc;
187  }
188 
189  // extend a subpath to both directions
190  vector<unsigned char>::const_iterator trace_it_top =
191  trace_top.begin() + trans_pos;
192  list<ETranscriptSymbol> subpath_left;
193  size_t steps_left = x_ExtendSubpath(trace_it_top, false, subpath_left);
194 
195  vector<unsigned char>::const_iterator trace_it_btm =
196  trace_btm.begin() + trans_pos;
197  list<ETranscriptSymbol> subpath_right;
198  size_t steps_right = x_ExtendSubpath(trace_it_btm, true, subpath_right);
199 
200  // check if we reached left or top line
201  bool bNoLT = false;
202  Int8 nc0 = trans_pos - steps_left;
203  if(nc0 < 0) {
204  NCBI_THROW(CAlgoAlignException, eInternal,
205  "Assertion: LT underflow");
206  }
207  bool bLeft = nc0 == 0;
208  bool bTop = I == submatr.i1;
209  if(bLeft && !bTop) {
210  Int8 jump = I - submatr.i1;
211  subpath_left.insert(subpath_left.begin(), jump, eTS_Delete);
212  }
213  if(!bLeft && bTop) {
214  subpath_left.insert(subpath_left.begin(), nc0, eTS_Insert);
215  }
216  if(bLeft || bTop) {
217  bNoLT = true;
218  }
219 
220  // check if we reached right or bottom line
221  bool bNoRB = false;
222  Int8 nc1 = trans_pos + steps_right;
223  if(nc1 > dimJ) {
224  NCBI_THROW(CAlgoAlignException, eInternal,
225  "Assertion: RB overflow");
226  }
227  bool bRight = nc1 == dimJ;
228  bool bBottom = I == submatr.i2 - 1;
229  if(bRight && !bBottom) {
230  Int8 jump = submatr.i2 - I - 1;
231  subpath_right.insert(subpath_right.end(), jump, eTS_Delete);
232  }
233  if(!bRight && bBottom) {
234  Int8 jump = dimJ - nc1;
235  subpath_right.insert(subpath_right.end(), jump, eTS_Insert);
236  }
237  if(bRight || bBottom) {
238  bNoRB = true;
239  }
240 
241  // find out if we have special left-top and/or right-bottom
242  // on the new sub-matrices
243  bool rb = subpath_left.front() == eTS_Delete;
244  bool lt = subpath_right.back() == eTS_Delete;
245 
246  // coalesce the pieces together and insert into the master list
247  subpath_left.splice( subpath_left.end(), subpath_right );
248 
249  list<ETranscriptSymbol>::iterator ti0, ti1;
250  {{
251  CFastMutexGuard guard (masterlist_mutex);
252  ti0 = translist_pos;
253  --ti0;
254  m_TransList.splice( translist_pos, subpath_left );
255  ++ti0;
256  ti1 = translist_pos;
257  }}
258 
259  // Recurse submatrices:
260  SCoordRect rlt;
261  if(!bNoLT) {
262  // left top
263  size_t left_bnd = submatr.j1 + trans_pos - steps_left - 1;
264  if(left_bnd >= submatr.j1) {
265  rlt.i1 = submatr.i1;
266  rlt.j1 = submatr.j1;
267  rlt.i2 = I - 1;
268  rlt.j2 = left_bnd;
269  }
270  else {
271  NCBI_THROW(CAlgoAlignException, eInternal,
272  "Assertion: Left boundary out of range");
273  }
274  }
275 
276  SCoordRect rrb;
277  if(!bNoRB) {
278  // right bottom
279  size_t right_bnd = submatr.j1 + trans_pos + steps_right;
280  if(right_bnd <= submatr.j2) {
281  rrb.i1 = I + 2;
282  rrb.j1 = right_bnd;
283  rrb.i2 = submatr.i2;
284  rrb.j2 = submatr.j2;
285  }
286  else {
287  NCBI_THROW(CAlgoAlignException, eInternal,
288  "Assertion: Right boundary out of range");
289  }
290  }
291 
292  if(!bNoLT && !bNoRB) {
294  // find out which rect is larger
295  if(rlt.GetArea() > rrb.GetArea()) {
296  // do rb in a second thread
297  CThread* thr2 = new CThreadDoSM( this, &rrb, ti1,
298  lt, right_bottom);
299  thr2->Run();
300  // do lt in this thread
301  x_DoSubmatrix(rlt, ti0, left_top, rb);
302  thr2->Join(0);
303  }
304  else {
305  // do lt in a second thread
306  CThread* thr2 = new CThreadDoSM( this, &rlt, ti0,
307  left_top, rb);
308  thr2->Run();
309  // do rb in this thread
310  x_DoSubmatrix(rrb, ti1, lt, right_bottom);
311  thr2->Join(0);
312  }
313  }
314  else { // just do both
315  x_DoSubmatrix(rlt, ti0, left_top, rb);
316  x_DoSubmatrix(rrb, ti1, lt, right_bottom);
317  }
318  }
319  else if(!bNoLT) {
320  x_DoSubmatrix(rlt, ti0, left_top, rb);
321  }
322  else if(!bNoRB) {
323  x_DoSubmatrix(rrb, ti1, lt, right_bottom);
324  }
325 }
326 
327 
329  const vector<TScore>& vEtop, const vector<TScore>& vFtop,
330  const vector<TScore>& vGtop, const vector<TScore>& vEbtm,
331  const vector<TScore>& vFbtm, const vector<TScore>& vGbtm,
332  size_t& pos,
333  ETransitionType& trans_type ) const
334 {
335  const size_t dim = vEtop.size();
336  TScore score = kMin_Int;
337  TScore trans_alts [9];
338 
339  bool bFreeGapLeft2 = m_esf_L2 && dim == m_SeqLen2 + 1;
340  bool bFreeGapRight2 = m_esf_R2 && dim == m_SeqLen2 + 1;
341 
342  for(size_t i = 0; i < dim ; ++i) {
343  trans_alts [0] = vEtop[i] + vEbtm[i] - m_Wg; // II
344  trans_alts [1] = vFtop[i] + vEbtm[i]; // DI
345  trans_alts [2] = vGtop[i] + vEbtm[i]; // GI
346  trans_alts [3] = vEtop[i] + vFbtm[i]; // ID
347  TScore wg = ((bFreeGapLeft2 && i == 0) || ( bFreeGapRight2 && i == dim -1) )?
348  0: m_Wg;
349  trans_alts [4] = vFtop[i] + vFbtm[i] - wg; // DD
350  trans_alts [5] = vGtop[i] + vFbtm[i]; // GD
351  trans_alts [6] = vEtop[i] + vGbtm[i]; // IG
352  trans_alts [7] = vFtop[i] + vGbtm[i]; // DG
353  trans_alts [8] = vGtop[i] + vGbtm[i]; // GG
354 
355  for(size_t k = 0; k < 9; ++k) {
356  if(trans_alts[k] > score) {
357  score = trans_alts[k];
358  pos = i;
359  trans_type = (ETransitionType)k;
360  }
361  }
362  }
363 
364  return score;
365 }
366 
367 
369  vector<unsigned char>::const_iterator trace_it,
370  bool direction,
371  list<ETranscriptSymbol>& subpath ) const
372 {
373  subpath.clear();
374  size_t step_counter = 0;
375  if(direction) {
376  while(true) {
377  unsigned char Key = *trace_it;
378  if (Key & kMaskD) {
379  subpath.push_back(eTS_Match);
380  ++step_counter;
381  break;
382  }
383  else if (Key & kMaskE) {
384  subpath.push_back(eTS_Insert);
385  ++step_counter;
386  ++trace_it;
387  while(Key & kMaskEc) {
388  Key = *trace_it++;
389  subpath.push_back(eTS_Insert);
390  ++step_counter;
391  }
392  }
393  else if ((Key & kMaskE) == 0) {
394  subpath.push_back(eTS_Delete);
395  break;
396  }
397  else {
398  NCBI_THROW(CAlgoAlignException, eInternal,
399  "Assertion: incorrect backtrace symbol "
400  "(right expansion)");
401  }
402  }
403  }
404  else {
405  while(true) {
406  unsigned char Key = *trace_it;
407  if (Key & kMaskD) {
408  subpath.push_front(eTS_Match);
409  ++step_counter;
410  break;
411  }
412  else if (Key & kMaskE) {
413  subpath.push_front(eTS_Insert);
414  ++step_counter;
415  --trace_it;
416  while(Key & kMaskEc) {
417  Key = *trace_it--;
418  subpath.push_front(eTS_Insert);
419  ++step_counter;
420  }
421  }
422  else if ((Key & kMaskE) == 0) {
423  subpath.push_front(eTS_Delete);
424  break;
425  }
426  else {
427  NCBI_THROW(CAlgoAlignException, eInternal,
428  "Assertion: incorrect backtrace symbol "
429  "(left expansion)");
430  }
431  }
432  }
433  return step_counter;
434 }
435 
436 
437 #ifdef NCBI_THREADS
438 DEFINE_STATIC_FAST_MUTEX (progress_mutex);
439 #endif
440 
441 
442 void CMMAligner::x_RunTop ( const SCoordRect& rect,
443  vector<TScore>& vE, vector<TScore>& vF, vector<TScore>& vG,
444  vector<unsigned char>& trace, bool lt ) const
445 {
446  if( m_terminate ) {
447  return;
448  }
449 
450  const size_t dim1 = rect.i2 - rect.i1 + 1;
451  const size_t dim2 = rect.j2 - rect.j1 + 1;
452  const size_t N1 = dim1 + 1;
453  const size_t N2 = dim2 + 1;
454 
455  vector<TScore> stl_rowV (N2), stl_rowF (N2);
456  TScore* rowV = &stl_rowV [0];
457  TScore* rowF = &stl_rowF [0];
458 
459  TScore* pV = rowV - 1;
460 
461  const char* seq1 = m_Seq1 - 1 + rect.i1;
462  const char* seq2 = m_Seq2 - 1 + rect.j1;
463 
464  const TNCBIScore (*sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s;
465 
466  bool bFreeGapLeft1 = m_esf_L1 && rect.i1 == 0;
467  bool bFreeGapLeft2 = m_esf_L2 && rect.j1 == 0;
468  bool bFreeGapRight2 = m_esf_R2 && rect.j2 == m_SeqLen2 - 1;
469 
470  // progress reporting
471  const size_t prg_rep_rate = 100;
472  const size_t prg_rep_increment = prg_rep_rate*N2;
473 
474  // first row
475 
476  TScore wg = bFreeGapLeft1? 0: m_Wg;
477  TScore ws = bFreeGapLeft1? 0: m_Ws;
478 
479  rowV[0] = wg;
480  size_t i, j;
481  for (j = 1; j < N2; ++j) {
482  rowV[j] = pV[j] + ws;
483  rowF[j] = kInfMinus;
484  }
485  rowV[0] = 0;
486 
487  // recurrences
488 
489  wg = bFreeGapLeft2? 0: m_Wg;
490  ws = bFreeGapLeft2? 0: m_Ws;
491 
492  TScore V = 0;
493  TScore V0 = lt? 0: wg;
494  TScore E, G, n0;
495 
496  for(i = 1; i < N1 - 1; ++i) {
497 
498  V = V0 += ws;
499  E = kInfMinus;
500  unsigned char ci = seq1[i];
501 
502  TScore wg2 = m_Wg, ws2 = m_Ws;
503 
504  for (j = 1; j < N2; ++j) {
505 
506  G = pV[j] + sm[ci][(unsigned char)seq2[j]];
507  pV[j] = V;
508 
509  n0 = V + m_Wg;
510  if(E >= n0)
511  E += m_Ws; // continue the gap
512  else
513  E = n0 + m_Ws; // open a new gap
514 
515  if(j == N2 - 1 && bFreeGapRight2)
516  wg2 = ws2 = 0;
517 
518  n0 = rowV[j] + wg2;
519  if (rowF[j] > n0)
520  rowF[j] += ws2;
521  else
522  rowF[j] = n0 + ws2;
523 
524  V = (E >= rowF[j])? (E >= G? E: G): (rowF[j] >= G? rowF[j]: G);
525  }
526  pV[j] = V;
527 
528  if( m_prg_callback && i % prg_rep_rate == 0 ) {
529 #ifdef NCBI_THREADS
530  CFastMutexGuard guard (progress_mutex);
531 #endif
532  m_prg_info.m_iter_done += prg_rep_increment;
534  break;
535  }
536  }
537  }
538 
539  // the last row (i == N1 - 1)
540  if(!m_terminate) {
541 
542  vG[0] = vE[0] = E = kInfMinus;
543  vF[0] = V = V0 += ws;
544  trace[0] = kMaskFc;
545  unsigned char ci = seq1[i];
546 
547  TScore wg2 = m_Wg, ws2 = m_Ws;
548 
549  unsigned char tracer;
550  for (j = 1; j < N2; ++j) {
551 
552  vG[j] = G = pV[j] + sm[ci][(unsigned char)seq2[j]];
553  pV[j] = V;
554 
555  n0 = V + m_Wg;
556  if(E >= n0) {
557  E += m_Ws; // continue the gap
558  tracer = kMaskEc;
559  }
560  else {
561  E = n0 + m_Ws; // open a new gap
562  tracer = 0;
563  }
564  vE[j] = E;
565 
566  if(j == N2 - 1 && bFreeGapRight2) {
567  wg2 = ws2 = 0;
568  }
569 
570  n0 = rowV[j] + wg2;
571  if(rowF[j] >= n0) {
572  rowF[j] += ws2;
573  tracer |= kMaskFc;
574  }
575  else {
576  rowF[j] = n0 + ws2;
577  }
578  vF[j] = rowF[j];
579 
580  if (E >= rowF[j]) {
581  if(E >= G) {
582  V = E;
583  tracer |= kMaskE;
584  }
585  else {
586  V = G;
587  tracer |= kMaskD;
588  }
589  } else {
590  if(rowF[j] >= G) {
591  V = rowF[j];
592  }
593  else {
594  V = G;
595  tracer |= kMaskD;
596  }
597  }
598  trace[j] = tracer;
599  }
600  }
601 
602  if( m_prg_callback ) {
603 #ifdef NCBI_THREADS
604  CFastMutexGuard guard (progress_mutex);
605 #endif
606  m_prg_info.m_iter_done += (i % prg_rep_rate)*N2;
608  }
609 }
610 
611 
613  vector<TScore>& vE, vector<TScore>& vF, vector<TScore>& vG,
614  vector<unsigned char>& trace, bool rb) const
615 {
616  if( m_terminate ) {
617  return;
618  }
619 
620  const size_t dim1 = rect.i2 - rect.i1 + 1;
621  const size_t dim2 = rect.j2 - rect.j1 + 1;
622  const size_t N1 = dim1 + 1;
623  const size_t N2 = dim2 + 1;
624 
625  vector<TScore> stl_rowV (N2), stl_rowF (N2);
626  TScore* rowV = &stl_rowV [0];
627  TScore* rowF = &stl_rowF [0];
628 
629  TScore* pV = rowV + 1;
630 
631  const char* seq1 = m_Seq1 + rect.i1;
632  const char* seq2 = m_Seq2 + rect.j1;
633 
634  const TNCBIScore (*sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s;
635 
636  bool bFreeGapRight1 = m_esf_R1 && rect.i2 == m_SeqLen1 - 1;
637  bool bFreeGapRight2 = m_esf_R2 && rect.j2 == m_SeqLen2 - 1;
638  bool bFreeGapLeft2 = m_esf_L2 && rect.j1 == 0;
639 
640  // progress reporting
641 
642  const size_t prg_rep_rate = 100;
643  const size_t prg_rep_increment = prg_rep_rate*N2;
644 
645  // bottom row
646 
647  TScore wg = bFreeGapRight1? 0: m_Wg;
648  TScore ws = bFreeGapRight1? 0: m_Ws;
649 
650  rowV[N2 - 1] = wg;
651  Int8 i, j;
652  for (j = N2 - 2; j >= 0; --j) {
653  rowV[j] = pV[j] + ws;
654  rowF[j] = kInfMinus;
655  }
656  rowV[N2 - 1] = 0;
657 
658  // recurrences
659 
660  wg = bFreeGapRight2? 0: m_Wg;
661  ws = bFreeGapRight2? 0: m_Ws;
662 
663  TScore V = 0;
664  TScore V0 = rb? 0: wg;
665  TScore E, G, n0;
666 
667  for(i = N1 - 2; i > 0; --i) {
668 
669  V = V0 += ws;
670  E = kInfMinus;
671  unsigned char ci = seq1[i];
672 
673  TScore wg2 = m_Wg, ws2 = m_Ws;
674 
675  for (j = N2 - 2; j >= 0; --j) {
676 
677  G = pV[j] + sm[ci][(unsigned char)seq2[j]];
678  pV[j] = V;
679 
680  n0 = V + m_Wg;
681  if(E >= n0)
682  E += m_Ws; // continue the gap
683  else
684  E = n0 + m_Ws; // open a new gap
685 
686  if(j == 0 && bFreeGapLeft2) {
687  wg2 = ws2 = 0;
688  }
689 
690  n0 = rowV[j] + wg2;
691  if (rowF[j] > n0)
692  rowF[j] += ws2;
693  else
694  rowF[j] = n0 + ws2;
695 
696  V = (E >= rowF[j])? (E >= G? E: G): (rowF[j] >= G? rowF[j]: G);
697  }
698  pV[j] = V;
699 
700  if( m_prg_callback && (N1 - i) % prg_rep_rate == 0 ) {
701 #ifdef NCBI_THREADS
702  CFastMutexGuard guard (progress_mutex);
703 #endif
704  m_prg_info.m_iter_done += prg_rep_increment;
706  break;
707  }
708  }
709  }
710 
711  // the top row (i == 0)
712  if(!m_terminate) {
713 
714  vF[N2-1] = V = V0 += ws;
715  vG[N2-1] = vE[N2-1] = E = kInfMinus;
716  trace[N2-1] = kMaskFc;
717  unsigned char ci = seq1[i];
718 
719  TScore wg2 = m_Wg, ws2 = m_Ws;
720 
721  unsigned char tracer;
722  for (j = N2 - 2; j >= 0; --j) {
723 
724  vG[j] = G = pV[j] + sm[ci][(unsigned char)seq2[j]];
725  pV[j] = V;
726 
727  n0 = V + m_Wg;
728  if(E >= n0) {
729  E += m_Ws; // continue the gap
730  tracer = kMaskEc;
731  }
732  else {
733  E = n0 + m_Ws; // open a new gap
734  tracer = 0;
735  }
736  vE[j] = E;
737 
738  if(j == 0 && bFreeGapLeft2) {
739  wg2 = ws2 = 0;
740  }
741 
742  n0 = rowV[j] + wg2;
743  if(rowF[j] >= n0) {
744  rowF[j] += ws2;
745  tracer |= kMaskFc;
746  }
747  else {
748  rowF[j] = n0 + ws2;
749  }
750  vF[j] = rowF[j];
751 
752  if (E >= rowF[j]) {
753  if(E >= G) {
754  V = E;
755  tracer |= kMaskE;
756  }
757  else {
758  V = G;
759  tracer |= kMaskD;
760  }
761  } else {
762  if(rowF[j] >= G) {
763  V = rowF[j];
764  }
765  else {
766  V = G;
767  tracer |= kMaskD;
768  }
769  }
770  trace[j] = tracer;
771  }
772  }
773 
774  if( m_prg_callback ) {
775 #ifdef NCBI_THREADS
776  CFastMutexGuard guard (progress_mutex);
777 #endif
778  m_prg_info.m_iter_done += (N1 - i) % prg_rep_rate;
780  }
781 }
782 
783 
785  bool left_top, bool right_bottom,
786  list<ETranscriptSymbol>& subpath)
787 {
788  if( m_terminate ) {
789  return 0;
790  }
791 
792  const size_t N1 = rect.i2 - rect.i1 + 2;
793  const size_t N2 = rect.j2 - rect.j1 + 2;
794 
795  vector<TScore> stl_rowV (N2), stl_rowF (N2);
796  TScore* rowV = &stl_rowV [0];
797  TScore* rowF = &stl_rowF [0];
798 
799  // index calculation: [i,j] = i*n2 + j
800  vector<unsigned char> stl_bm (N1*N2);
801  unsigned char* backtrace = &stl_bm[0];
802 
803  TScore* pV = rowV - 1;
804 
805  const char* seq1 = m_Seq1 + rect.i1 - 1;
806  const char* seq2 = m_Seq2 + rect.j1 - 1;
807 
808  const TNCBIScore (*sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s;
809 
810  bool bFreeGapLeft1 = m_esf_L1 && rect.i1 == 0;
811  bool bFreeGapRight1 = m_esf_R1 && rect.i2 == m_SeqLen1 - 1;
812  bool bFreeGapLeft2 = m_esf_L2 && rect.j1 == 0;
813  bool bFreeGapRight2 = m_esf_R2 && rect.j2 == m_SeqLen2 - 1;
814 
815  TScore wgleft1 = bFreeGapLeft1? 0: m_Wg;
816  TScore wsleft1 = bFreeGapLeft1? 0: m_Ws;
817  TScore wg1 = m_Wg, ws1 = m_Ws;
818 
819  // first row
820  size_t k;
821  {
822  rowV[0] = wgleft1;
823  for (k = 1; k < N2; k++) {
824  rowV[k] = pV[k] + wsleft1;
825  rowF[k] = kInfMinus;
826  backtrace[k] = kMaskE | kMaskEc;
827  }
828  rowV[0] = 0;
829  }
830 
831  // recurrences
832  TScore wgleft2 = bFreeGapLeft2? 0: m_Wg;
833  TScore wsleft2 = bFreeGapLeft2? 0: m_Ws;
834  TScore V = 0;
835  TScore V0 = left_top? 0: wgleft2;
836  TScore E, G, n0;
837  unsigned char tracer;
838 
839  size_t i, j;
840  for(i = 1; i < N1; ++i) {
841 
842  V = V0 += wsleft2;
843  E = kInfMinus;
844  backtrace[k++] = kMaskFc;
845  unsigned char ci = seq1[i];
846 
847  if(i == N1 - 1 && bFreeGapRight1) {
848  wg1 = ws1 = 0;
849  }
850 
851  TScore wg2 = m_Wg, ws2 = m_Ws;
852 
853  for (j = 1; j < N2; ++j, ++k) {
854 
855  G = pV[j] + sm[ci][(unsigned char)seq2[j]];
856  pV[j] = V;
857 
858  n0 = V + wg1;
859  if(E >= n0) {
860  E += ws1; // continue the gap
861  tracer = kMaskEc;
862  }
863  else {
864  E = n0 + ws1; // open a new gap
865  tracer = 0;
866  }
867 
868  if(j == N2 - 1 && bFreeGapRight2) {
869  wg2 = ws2 = 0;
870  }
871  n0 = rowV[j] + ((right_bottom && j == N2 - 1)? 0: wg2);
872  if(rowF[j] >= n0) {
873  rowF[j] += ws2;
874  tracer |= kMaskFc;
875  }
876  else {
877  rowF[j] = n0 + ws2;
878  }
879 
880  if (E >= rowF[j]) {
881  if(E >= G) {
882  V = E;
883  tracer |= kMaskE;
884  }
885  else {
886  V = G;
887  tracer |= kMaskD;
888  }
889  } else {
890  if(rowF[j] >= G) {
891  V = rowF[j];
892  }
893  else {
894  V = G;
895  tracer |= kMaskD;
896  }
897  }
898  backtrace[k] = tracer;
899  }
900 
901  pV[j] = V;
902  }
903 
904  // fill the subpath
905  subpath.clear();
906 
907  // run backtrace
908  k = N1*N2 - 1;
909  while (k != 0) {
910  unsigned char Key = backtrace[k];
911  if (Key & kMaskD) {
912  subpath.push_front(eTS_Match);
913  k -= N2 + 1;
914  }
915  else if (Key & kMaskE) {
916  subpath.push_front(eTS_Insert); --k;
917  while(k > 0 && (Key & kMaskEc)) {
918  subpath.push_front(eTS_Insert);
919  Key = backtrace[k--];
920  }
921  }
922  else {
923  subpath.push_front(eTS_Delete);
924  k -= N2;
925  while(k > 0 && (Key & kMaskFc)) {
926  subpath.push_front(eTS_Delete);
927  Key = backtrace[k];
928  k -= N2;
929  }
930  }
931  }
932 
933  return V;
934 }
935 
936 
938 {
939  return true;
940 }
941 
942 
943 
#define G(x, y, z)
Definition: md4.c:179
static bool trace
TScore x_FindBestJ(const vector< TScore > &vEtop, const vector< TScore > &vFtop, const vector< TScore > &vGtop, const vector< TScore > &vEbtm, const vector< TScore > &vFbtm, const vector< TScore > &vGbtm, size_t &pos, ETransitionType &trans_type) const
Definition: mm_aligner.cpp:328
friend class CThreadRunOnTop
Definition: mm_aligner.hpp:118
virtual TScore x_Run()
Definition: mm_aligner.cpp:63
void x_DoSubmatrix(const SCoordRect &submatr, list< ETranscriptSymbol >::iterator translist_pos, bool left_top, bool right_bottom)
Definition: mm_aligner.cpp:113
unsigned int GetArea()
Definition: mm_aligner.hpp:130
list< ETranscriptSymbol > m_TransList
Definition: mm_aligner.hpp:78
void x_RunBtm(const SCoordRect &rect, vector< TScore > &vE, vector< TScore > &vF, vector< TScore > &vG, vector< unsigned char > &trace, bool rb) const
Definition: mm_aligner.cpp:612
TScore x_RunTerm(const SCoordRect &rect, bool left_top, bool right_bottom, list< ETranscriptSymbol > &subpath)
Definition: mm_aligner.cpp:784
size_t x_ExtendSubpath(vector< unsigned char >::const_iterator trace_it, bool direction, list< ETranscriptSymbol > &subpath) const
Definition: mm_aligner.cpp:368
void x_RunTop(const SCoordRect &rect, vector< TScore > &vE, vector< TScore > &vF, vector< TScore > &vG, vector< unsigned char > &trace, bool lt) const
Definition: mm_aligner.cpp:442
friend class CThreadDoSM
Definition: mm_aligner.hpp:119
virtual bool x_CheckMemoryLimit()
Definition: mm_aligner.cpp:937
bool m_terminate
Definition: nw_aligner.hpp:291
const char * m_Seq1
Definition: nw_aligner.hpp:295
TScore m_Ws
Definition: nw_aligner.hpp:271
SNCBIFullScoreMatrix m_ScoreMatrix
Definition: nw_aligner.hpp:281
TScore m_Wg
Definition: nw_aligner.hpp:270
FProgressCallback m_prg_callback
Definition: nw_aligner.hpp:285
size_t m_SeqLen1
Definition: nw_aligner.hpp:296
const char * m_Seq2
Definition: nw_aligner.hpp:298
size_t m_maxthreads
Definition: nw_aligner.hpp:321
TTranscript m_Transcript
Definition: nw_aligner.hpp:314
TScore m_score
Definition: nw_aligner.hpp:316
size_t m_SeqLen2
Definition: nw_aligner.hpp:299
SProgressInfo m_prg_info
Definition: nw_aligner.hpp:288
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
#define kMin_Int
Definition: ncbi_limits.h:183
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
bool Run(TRunMode flags=fRunDefault)
Run the thread.
Definition: ncbithr.cpp:724
void Join(void **exit_data=0)
Wait for the thread termination.
Definition: ncbithr.cpp:863
int i
const unsigned char kMaskFc
Definition: mm_aligner.cpp:106
const unsigned char kMaskEc
Definition: mm_aligner.cpp:107
const unsigned char kMaskE
Definition: mm_aligner.cpp:108
const unsigned char kMaskD
Definition: mm_aligner.cpp:109
DEFINE_STATIC_FAST_MUTEX(masterlist_mutex)
bool MM_RequestNewThread(const size_t max_threads)
Multi-threading – mutexes; rw-locks; semaphore.
const double E
#define NCBI_FSM_DIM
Recommended approach: unpack and index directly.
Definition: raw_scoremat.h:85
int TNCBIScore
data types
Definition: raw_scoremat.h:45
TNCBIScore s[128][128]
Definition: raw_scoremat.h:87
Modified on Wed Apr 17 13:09:58 2024 by modify_doxy.py rev. 669887