NCBI C++ ToolKit
nucprot.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /*
2 * $Id: nucprot.cpp 101425 2023-12-12 18:03:47Z dicuccio $
3 *
4 * =========================================================================
5 *
6 * PUBLIC DOMAIN NOTICE
7 * National Center for Biotechnology Information
8 *
9 * This software/database is a "United States Government Work" under the
10 * terms of the United States Copyright Act. It was written as part of
11 * the author's official duties as a United States Government employee and
12 * thus cannot be copyrighted. This software/database is freely available
13 * to the public for use. The National Library of Medicine and the U.S.
14 * Government have not placed any restriction on its use or reproduction.
15 *
16 * Although all reasonable efforts have been taken to ensure the accuracy
17 * and reliability of the software and data, the NLM and the U.S.
18 * Government do not and cannt warrant the performance or results that
19 * may be obtained by using this software or data. The NLM and the U.S.
20 * Government disclaim all warranties, express or implied, including
21 * warranties of performance, merchantability or fitness for any particular
22 * purpose.
23 *
24 * Please cite the author in any work or product based on this material.
25 *
26 * =========================================================================
27 *
28 * Author: Boris Kiryutin
29 *
30 * =========================================================================
31 */
32 
33 #include <ncbi_pch.hpp>
34 
35 #include <corelib/ncbistd.hpp>
36 
38 
39 #include "nucprot.hpp"
40 #include "intron.hpp"
41 #include "Info.hpp"
42 #include "BackAlignInfo.hpp"
43 #include "Ali.hpp"
44 #include "AlignInfo.hpp"
45 #include "scoring.hpp"
46 
48 
50 BEGIN_SCOPE(prosplign)
51 
52 const int infinity = numeric_limits<int>::min()/3;
53 
54 CSubstMatrix::CSubstMatrix(const string& matrix_name, int scaling)
55 {
56  const SNCBIPackedScoreMatrix* packed_mtx = &NCBISM_Blosum62;
57 // NCBISM_GetStandardMatrix(matrix_name.c_str());
58  if (packed_mtx == NULL)
59  NCBI_THROW(CProSplignException, eParam, "unknown scoring matrix: "+matrix_name);
60 
61  m_alphabet = packed_mtx->symbols;
62  m_alphabet = NStr::ToUpper(m_alphabet);
63 
64  if (string::npos == m_alphabet.find('X'))
65  NCBI_THROW(CProSplignException, eParam, "unsupported scoring matrix: "+matrix_name);
66 
68  NCBISM_Unpack(packed_mtx, &mtx);
69 
70  for(int i = 0; i < 256; ++i) {
71  for(int j = 0; j < 256; ++j) {
72  scaled_subst_matrix[i][j] = packed_mtx->defscore*scaling;
73  }
74  }
75 
76  int score;
77  for(const char* p1 = packed_mtx->symbols; *p1; ++p1) {
78  int c = toupper(*p1);
79  int lc = tolower(c);
80  for(const char* p2 = packed_mtx->symbols; *p2; ++p2) {
81  int d = toupper(*p2);
82  int ld = tolower(d);
83  score = mtx.s[(int)(*p1)][(int)(*p2)]*scaling;
84  scaled_subst_matrix[c][d] = score;
85  scaled_subst_matrix[lc][ld] = score;
86  scaled_subst_matrix[c][ld] = score;
87  scaled_subst_matrix[lc][d] = score;
88  }}
89 }
90 
92 {
93  m_trans_table.Reset(trans_table);
94 }
95 
96 CTranslationTable::CTranslationTable(int gcode, bool allow_alt_starts) :
97  m_trans_table(CGen_code_table::GetTransTable(gcode)), m_allow_alt_starts(allow_alt_starts)
98 {
99  for(int i=0; i<5; ++i)
100  for(int j=0; j<5; ++j)
101  for(int k=0; k<5; ++k)
102  aa_table[i*(8*8)+j*8+k] =
104 }
105 
107  if(c == 'A' || c == 'a') return nA;
108  if(c == 'C' || c == 'c') return nC;
109  if(c == 'G' || c == 'g') return nG;
110  if(c == 'T' || c == 't') return nT;
111  return nN;
112 }
113 
115  if(n == nA) return 'A';
116  if(n == nT) return 'T';
117  if(n == nG) return 'G';
118  if(n == nC) return 'C';
119  return 'N';
120 }
121 
122 //fast score access implementation
123 void CFastIScore::Init(const CNSeq& seq, const CSubstMatrix& matrix) {
124  Init(matrix);
125  m_size = seq.size() - 2;
126  m_scores.resize( m_size * matrix.m_alphabet.size() + 1);
127  int j;
128  string::size_type i;
129  int *pos = &m_scores[0];
130  for(i=0; i<matrix.m_alphabet.size(); ++i) {
131  m_gpos = &m_gscores[i * 125];
132  for(j=2; j<seq.size(); ++j) {
133  *++pos = GetScore(seq[j-2], seq[j-1], seq[j]);
134  }
135  }
136 }
137 
138 void CFastIScore::SetAmin(char amin, const CSubstMatrix& matrix) {
139  Init(matrix);
140  string::size_type num = matrix.m_alphabet.find(toupper(amin));
141  if(num == string::npos) num = matrix.m_alphabet.find('X');
142  m_gpos = &m_gscores[num * 125];
143  m_pos = &m_scores[num * m_size];
144 }
145 
146 // bool CFastIScore::m_init = false;
147 // int *CFastIScore::m_gpos;
148 // vector<int> CFastIScore::m_gscores;
149 
150 void CFastIScore::Init(const CSubstMatrix& matrix) {
151  if(m_init) return;
152  m_init = true;
153  m_gscores.resize(125*matrix.m_alphabet.size());
154  int *pos = &m_gscores[0];
155  int arr[] = { nA, nC, nG, nT, nN };
156  int i1, i2, i3;
157  string::size_type i;
158  for(i=0; i<matrix.m_alphabet.size(); ++i) {
159  char amin = matrix.m_alphabet[i];
160  for(i1 = 0; i1<5; ++i1) {
161  for(i2 = 0; i2<5; ++i2) {
162  for(i3 = 0; i3<5; ++i3) *pos++ = matrix.MultScore(arr[i1], arr[i2], arr[i3], amin);
163  }
164  }
165  }
166 }
167 //end of fast score access implementation
168 
169 
170 
171 int FrAlign(const CProSplignInterrupt& interrupt, CBackAlignInfo& bi, const PSEQ& pseq, const CNSeq& nseq, int g/*gap opening*/, int e/*one nuc extension cost*/,
172  int f/*frameshift opening cost*/, const CProSplignScaledScoring& /*scoring*/, const CSubstMatrix& matrix)
173 {
174  int ilen = (int)pseq.size() + 1;
175  int jlen = nseq.size() + 1;
176  CFrAlignRow row1(jlen), row2(jlen);
177  CFrAlignRow *crow = &row1, *prow = &row2;
178  int v1, v2, h1, h2, w1, w2, w3, w4, w5, w6, w7, w8, w9, v0/* = w10*/, h0/* = w11*/, w0;
179  int hpre1, hpre2, hpre3;
180  int i, j;
181  //first row, i.e. i=0
182  for(j=0;j<jlen;j++) {
183  crow->w[j] = 0;
184  crow->v[j] = infinity;
185  }
186  // ******* MAIN LOOP ******************
187  for(i=1;i<ilen;i++) {
188  swap(crow, prow);
189  crow->w[0] = - g - 3*i*e;
190  //leads to artefacts!
191  crow->w[1] = - f - (i*3 - 1)*e;
192  bi.b[i-1][0] = 3;
193  crow->w[2] = - f - (i*3 - 2)*e;
194  bi.b[i-1][1] = 5;
195  /**/
196  crow->v[1] = crow->v[2] = infinity;
197  hpre1 = hpre2 = h0 = infinity;
198  for(j=3;j<jlen;j++) {
199  interrupt.CheckUserInterrupt();
200  char& b = bi.b[i-1][j-1];
201  b = (char)0;
202  hpre3 = hpre2;
203  hpre2 = hpre1;
204  hpre1 = h0;
205  //best v-gap
206  v1 = prow->w[j] - g - 3*e;
207  v2 = prow->v[j] - 3*e;
208  v0 = max(v1, v2);
209  crow->v[j] = v0;
210  if(v0 == v2) b += (char)32;
211  //best h-gap
212  h1 = crow->w[j-3] - g - 3*e;
213  h2 = hpre3 - 3*e;
214  h0 = max(h1, h2);
215  if(h0 == h2) b += (char)16;
216  //rest
217  w1 = prow->w[j-3] + matrix.MultScore(nseq[j-3], nseq[j-2], nseq[j-1], pseq[i-1]);
218  w2 = prow->w[j-1] - f - 2*e;
219  w3 = prow->v[j-1] - (f - g) - 2*e;
220  w4 = prow->w[j-2] - f - e;
221  w5 = prow->v[j-2] - (f - g) - e;
222  w6 = crow->w[j-1] - f - e;
223  w7 = hpre1 - (f - g) - e;
224  w8 = crow->w[j-2] - f - 2*e;
225  w9 = hpre2 - (f - g) - 2*e;
226  w0 = max(w1, max(w2, max(w3, max(w4, max(w5, max(w6, max(w7, max(w8, max(w9, max(v0, h0))))))))));
227  if(w0 == w1) b += 1;
228  else if(w0 == w2) b += 2;
229  else if(w0 == w3) b += 3;
230  else if(w0 == w4) b += 4;
231  else if(w0 == w5) b += 5;
232  else if(w0 == w6) b += 6;
233  else if(w0 == w7) b += 7;
234  else if(w0 == w8) b += 8;
235  else if(w0 == w9) b += 9;
236  else if(w0 == v0) b += 10;
237  else if(w0 == h0) b += 11;
238  crow->w[j] = w0;
239  }
240  }
241  //best from the last row
242  bi.maxi = bi.ilen-1;// == ilen-2
243  int wmax = crow->w[0];
244  int jmax = 0;
245  for(j=1;j<jlen;++j) {
246  if(wmax <= crow->w[j]) {
247  wmax = crow->w[j];
248  jmax = j;
249  }
250  }
251  bi.maxj = jmax - 1;//shift to back align coord
252 /*
253  bi.maxi = bi.ilen-1;// == ilen-2
254  bi.maxj = bi.jlen-1;// == jlen-2
255  return crow->w[jlen-1];
256 */
257  return wmax;
258 }
259 
260 int FindFGapIntronNog(const CProSplignInterrupt& interrupt, vector<pair<int, int> >& igi/*to return end gap/intron set*/, const PSEQ& pseq, const CNSeq& nseq, bool& left_gap, bool& right_gap, const CProSplignScaledScoring& scoring, const CSubstMatrix& matrix)
261 {
262  CIgapIntronPool pool;
263 
264  left_gap = false;
265  right_gap = false;
266  if(nseq.size() < 1) return 0;
267  // in matrices letters starts at [1]
268  int ilen = (int)pseq.size() + 1;
269  int jlen = nseq.size() + 1;
270  CFindGapIntronRow row1(jlen, scoring, pool), row2(jlen, scoring, pool);
271  CFindGapIntronRow *crow = &row1, *prow = &row2;
272 
273  int i, j;
274  int wmax = 0;//max score in the last column
275  //int imax = 0;//row number for wmax
276  int jmax = jlen - 1;//column number for wmax
277  CIgapIntronChain lsb; //last column best, corresponds to wmax
278  lsb.SetPool(pool);
279  lsb.Creat(0, jmax);
280 
281  CFastIScore fiscore;
282  fiscore.Init(nseq, matrix);
283  CFIntron fin(nseq, scoring);
284  // ** prepare for main loop
285  //penalties
286  // CScoring::Init();
287  int e = scoring.sm_Ine;
288  int g = scoring.sm_Ig;
289  int f = scoring.sm_If;
290  int pev1 = - g - 3*e;
291  int pev2 = - 3*e;
292  int pe2 = - f - 2*e;
293  int pe3 = - (f - g) - 2*e;
294  int pe4 = - f - e;
295  int pe5 = - (f - g) - e;
296  int pe6 = f - g - e;
297  int *cv, *cw, *pv, *pv1, *pw1, *pw3, *ch1, *ch2, *ch3;
298 
299  //first row, i.e. i=0
300  crow->w[0] = 0;
301  crow->v[0] = infinity;
302  for(j=1;j<jlen;j++) {
303  crow->w[j] = 0;
304  crow->v[j] = infinity;
305  crow->wis[j].Creat(0, 0);//to differ initial gap from intron starting at 0
306  crow->wis[j].Expand(crow->wis[j], 0, j);
307  }
308 
309  // ******* MAIN LOOP ******************
310  int jlen_1 = jlen - 1;
311  for(i=1;i<ilen;++i) {
312  swap(crow, prow);
313  crow->w[0] = 0;
314  crow->w[1] = 0;
315  crow->w[2] = 0;
316  crow->v[0] = crow->v[1] = crow->v[2] = infinity;
317  crow->h1[0] = crow->h1[1] = crow->h1[2] = infinity;
318  crow->h2[0] = crow->h2[1] = crow->h2[2] = infinity;
319  crow->h3[0] = crow->h3[1] = crow->h3[2] = infinity;
320  int h1 = infinity;
321  int h2 = infinity;
322  int h3 = infinity;
323  //extra init for intron scoring
324  fin.InitRowScores(crow, prow->m_w, 3);
325  fiscore.SetAmin(pseq[i-1], matrix);
326  // pointers
327  cv = &crow->v[2];
328  ch1 = &crow->h1[2];
329  ch2 = &crow->h2[2];
330  ch3 = &crow->h3[2];
331  cw = &crow->w[2];
332  pw3 = &prow->w[0];
333  pw1 = &prow->w[2];
334  pv1 = &prow->v[2];
335  pv = &prow->v[0];
336  // ******* INTERNAL LOOP ******************
337  for(j=3;j<jlen_1;++j) {
338  interrupt.CheckUserInterrupt();
339  const CBestI& bei = fin.Step(j, scoring, fiscore);
340  //rest
341  int w1 = *pw3 + fiscore.GetScore();
342  int w2 = *pw1 + pe2;
343  int w3 = *pv1 + pe3;
344  int w4 = *++pw3 + pe4;
345  int w5 = *++pv + pe5;
346  //best v-gap
347  int v0 = *++pw1 + pev1;
348  int v2 = *++pv1 + pev2;
349  if(bei.v > v0 && bei.v > v2) {
350  v0 = bei.v;
351  int len = fin.GetVlen(j, scoring);
352  crow->vis[j].Expand(crow->vis[j - len], j - len, len);
353  } else if(v2 > v0) {
354  v0 = v2;
355  crow->vis[j].Copy(prow->vis[j]);
356  } else {
357  crow->vis[j].Copy(prow->wis[j]);
358  }
359  *++cv = v0;
360  //best h-gap
361  int h12 = h3 + pe5;
362  h3 = h2 + pe6;
363  if(bei.h3 > h3) {
364  h3 = bei.h3;
365  int len = fin.GetH3len(j, scoring);
366  crow->h3is[j].Expand(crow->h3is[j - len], j - len, len);
367  } else {
368  crow->h3is[j].Copy(crow->h2is[j - 1]);
369  }
370  h2 = h1 - e;
371  if(bei.h2 > h2) {
372  h2 = bei.h2;
373  int len = fin.GetH2len(j, scoring);
374  crow->h2is[j].Expand(crow->h2is[j - len], j - len, len);
375  } else {
376  crow->h2is[j].Copy(crow->h1is[j - 1]);
377  }
378  h1 = *cw + pe4;
379  if(bei.h1 > h1 && bei.h1 > h12) {
380  h1 = bei.h1;
381  int len = fin.GetH1len(j, scoring);
382  crow->h1is[j].Expand(crow->h1is[j - len], j - len, len);
383  } else if(h12 > h1) {
384  h1 = h12;
385  crow->h1is[j].Copy(crow->h3is[j - 1]);
386  } else {
387  crow->h1is[j].Copy(crow->wis[j - 1]);
388  }
389  *++ch1 = h1;
390  *++ch2 = h2;
391  *++ch3 = h3;
392  //max
393  int w0 = max(w1, max(w2, max(w3, max(w4, max(w5, max(h1, max(h2, max(h3, max(v0, max(bei.w2, max(bei.w1, bei.w)))))))))));
394  if(w0 == w1) crow->wis[j].Copy(prow->wis[j - 3]);
395  else if(w0 == v0) crow->wis[j].Copy(crow->vis[j]);
396  else if(w0 == h3) crow->wis[j].Copy(crow->h3is[j]);
397  else if(w0 == h1) crow->wis[j].Copy(crow->h1is[j]);
398  else if(w0 == h2) crow->wis[j].Copy(crow->h2is[j]);
399  else if(w0 == w2) crow->wis[j].Copy(prow->wis[j - 1]);
400  else if(w0 == w3) crow->wis[j].Copy(prow->vis[j - 1]);
401  else if(w0 == w4) crow->wis[j].Copy(prow->wis[j - 2]);
402  else if(w0 == w5) crow->wis[j].Copy(prow->vis[j - 2]);
403  else if(w0 == bei.w1) {
404  int len = fin.GetW1len(j, scoring);
405  crow->wis[j].Expand(prow->wis[j - len - 3], j - len - 2, len);
406  } else if(w0 == bei.w2) {
407  int len = fin.GetW2len(j, scoring);
408  crow->wis[j].Expand(prow->wis[j - len - 3], j - len - 1, len);
409  } else { //w == bei.w
410  int len = fin.GetWlen(j, scoring);
411  crow->wis[j].Expand(crow->wis[j - len], j - len, len);
412  }
413  *++cw = w0;
414  }
415  //the last column !
416  if(2 < jlen_1) { //j == jlen_1
417  const CBestI bei = fin.Step(j, scoring, fiscore);
418  //rest
419  int w1 = *pw3 + fiscore.GetScore();
420  int w12 = prow->w[j-1];
421  int w13 = prow->w[j-2];
422  //best h-gap
423  int h12 = h3 + pe5;
424  h3 = h2 + pe6;
425  if(bei.h3 > h3) {
426  h3 = bei.h3;
427  int len = fin.GetH3len(j, scoring);
428  crow->h3is[j].Expand(crow->h3is[j - len], j - len, len);
429  } else {
430  crow->h3is[j].Copy(crow->h2is[j - 1]);
431  }
432  h2 = h1 - e;
433  if(bei.h2 > h2) {
434  h2 = bei.h2;
435  int len = fin.GetH2len(j, scoring);
436  crow->h2is[j].Expand(crow->h2is[j - len], j - len, len);
437  } else {
438  crow->h2is[j].Copy(crow->h1is[j - 1]);
439  }
440  h1 = *cw + pe4;
441  if(bei.h1 > h1 && bei.h1 > h12) {
442  h1 = bei.h1;
443  int len = fin.GetH1len(j, scoring);
444  crow->h1is[j].Expand(crow->h1is[j - len], j - len, len);
445  } else if(h12 > h1) {
446  h1 = h12;
447  crow->h1is[j].Copy(crow->h3is[j - 1]);
448  } else {
449  crow->h1is[j].Copy(crow->wis[j - 1]);
450  }
451  //max
452  int w0 = max(w1, max(w12, max(w13, max(h1, max(h2, max(h3, max(bei.w2, max(bei.w1, bei.w))))))));
453  if(w0 == w1) crow->wis[j].Copy(prow->wis[j - 3]);
454  else if(w0 == h3) crow->wis[j].Copy(crow->h3is[j]);
455  else if(w0 == h1) crow->wis[j].Copy(crow->h1is[j]);
456  else if(w0 == h2) crow->wis[j].Copy(crow->h2is[j]);
457  else if(w0 == w12) crow->wis[j].Copy(prow->wis[j - 1]);
458  else if(w0 == w13) crow->wis[j].Copy(prow->wis[j - 2]);
459  else if(w0 == bei.w1) {
460  int len = fin.GetW1len(j, scoring);
461  crow->wis[j].Expand(prow->wis[j - len - 3], j - len - 2, len);
462  } else if(w0 == bei.w2) {
463  int len = fin.GetW2len(j, scoring);
464  crow->wis[j].Expand(prow->wis[j - len - 3], j - len - 1, len);
465  } else { //w == bei.w
466  int len = fin.GetWlen(j, scoring);
467  crow->wis[j].Expand(crow->wis[j - len], j - len, len);
468  }
469  crow->w[j] = w0;
470  }
471  prow->ClearIIC();
472  //remember the best W in the last column
473  if(wmax <= crow->w[jlen - 1]) {
474  wmax = crow->w[jlen - 1];
475  //imax = i;
476  lsb.Clear();
477  lsb.Copy(crow->wis[jlen - 1]);
478  }
479  }
480  //at this point wmax - best from the last column
481  //find best from the last row
482  for(j=1;j<jlen;++j) {
483  if(wmax <= crow->w[j]) {
484  wmax = crow->w[j];
485  //imax = ilen - 1;
486  jmax = j;
487  }
488  }
489  igi.clear();
490  CIgapIntron *tmp;
491  if(jmax<jlen-1) {//there is an end gap
492  right_gap = true;
493  igi.push_back(make_pair(jmax, jlen - jmax - 1));
494  tmp = crow->wis[jmax].m_Top;
495  } else tmp = lsb.m_Top;
496  while(tmp) {
497  if(tmp->m_Len > 0) igi.push_back(make_pair(tmp->m_Beg, tmp->m_Len));
498  else left_gap = true;
499  tmp = tmp->m_Prev;
500  }
501  reverse(igi.begin(), igi.end());
502  if(left_gap) _ASSERT(!igi.empty() && igi.front().first == 0);
503  crow->ClearIIC();
504  lsb.Clear();
505  return wmax;
506 }
507 
508 int FindIGapIntrons(const CProSplignInterrupt& interrupt, vector<pair<int, int> >& igi/*to return end gap/intron set*/, const PSEQ& pseq, const CNSeq& nseq, int g/*gap opening*/, int e/*one nuc extension cost*/,
509  int f/*frameshift opening cost*/, const CProSplignScaledScoring& scoring, const CSubstMatrix& matrix)
510 {
511  // in matrices letters starts at [1]
512 
513  CIgapIntronPool pool;
514 
515  int ilen = (int)pseq.size() + 1;
516  int jlen = nseq.size() + 1;
517  CAlignInfo row1(jlen, pool), row2(jlen, pool);
518  CAlignInfo *crow = &row1, *prow = &row2;
519 
520  int i, j;
521  //first row, i.e. i=0
522  for(j=0;j<jlen;j++) {
523  crow->w[j] = 0;
524  crow->wis[j].Creat(0, j);
525  crow->v[j] = infinity;
526  crow->fv[j] = infinity;
527  }
528 
529  // ******* MAIN LOOP ******************
530  for(i=1;i<ilen;i++) {
531  swap(crow, prow);
532  //do not need to init intron-gap chain where no end H-gap
533  crow->w[0] = - g - i*e*3;
534  //second column
535  crow->w[1] = crow->v[1] = - g - i*e*3;
536  crow->wis[1].Creat(0, 1);
537  crow->vis[1].Copy(crow->wis[1]);
538  crow->fv[1] = - f - (i*3 - 1)*e;
539  //third column
540  crow->w[2] = crow->v[2] = - g - i*e*3;
541  crow->wis[2].Creat(0, 2);
542  crow->vis[2].Copy(crow->wis[2]);
543  crow->fv[2] = - f - (i*3 - 2)*e;
544 
545  crow->h[0] = crow->h[1] = crow->h[2] = infinity;
546  crow->fh[0] = crow->fh[1] = crow->fh[2] = infinity;
547  CBestIntron chin(/*i, */2, pseq[i-1], *prow, *crow, nseq, scoring);
548  CHIntronScore spl101, spl102, spl103, spl104, spl105;
549  CHIntronScore dspl101, dspl102, dspl103, dspl104;
550  for(j=3;j<jlen;j++) {
551  interrupt.CheckUserInterrupt();
552  chin.NucStep(scoring, matrix);
553  int d1, d2, d3, d4, d5, d6;
554  int h1, h2;
555  int v1, v2;
556  int fv1, fv2, fv3;
557  int fh1, fh2, fh3;
558  int d101, d102, d103, d104, h101, h102, h103, h104, h105, fv101, fv102, fh101, v101, s0;
559  int d0, v0, h0, fv0, fh0, w0; //maximum values
560 
561  dspl101 = chin.GetW1(matrix);
562  d101 = dspl101.first;
563  dspl102 = chin.GetW2();
564  d102 = dspl102.first;
565  dspl103 = chin.Getw111();
566  d103 = dspl103.first -f - e;
567  dspl104 = chin.Getfv111();
568  d104 = dspl104.first - e;
569  d1 = prow->w[j-3] + matrix.MultScore(nseq[j-3], nseq[j-2], nseq[j-1], pseq[i-1]);
570  d2 = prow->w[j-1] - f - 2*e;
571  d3 = prow->fv[j-1] - 2*e;
572  d4 = prow->v[j-1] - (f - g) - 2*e;
573  d5 = prow->w[j-2] - f - e;
574  d6 = prow->fv[j-2] - e;
575  d0 = max(d1, max(d2, max(d3, max(d4, max(d5, max(d6, max(d101, max(d102, max(d103, d104)))))))));
576 
577  spl101 = chin.Geth000();
578  h101 = spl101.first;
579  spl102 = chin.Getw012();
580  h102 = spl102.first - g - 3*e;
581  spl103 = chin.Geth012();
582  h103 = spl103.first - 3*e;
583  spl104 = chin.Getw021();
584  h104 = spl104.first - g - 3*e;
585  spl105 = chin.Geth021();
586  h105 = spl105.first - 3*e;
587  h1 = crow->w[j-3] - g - 3*e;
588  h2 = crow->h[j-3] - 3*e;
589  h0 = max(h1, max(h2, max(h101, max(h102, max(h103, max(h104, h105))))));
590  if(h0 == h1) crow->his[j].Copy(crow->wis[j-3]);
591  else if(h0 == h2) crow->his[j].Copy(crow->his[j-3]);
592  else if(h0 == h101) crow->his[j].Expand(crow->his[j-spl101.second], j-spl101.second, spl101.second);
593  else if(h0 == h102) crow->his[j].Expand(crow->wis[j-3-spl102.second], j-2-spl102.second, spl102.second);
594  else if(h0 == h103) crow->his[j].Expand(crow->his[j-3-spl103.second], j-2-spl103.second, spl103.second);
595  else if(h0 == h104) crow->his[j].Expand(crow->wis[j-3-spl104.second], j-1-spl104.second, spl104.second);
596  else if(h0 == h105) crow->his[j].Expand(crow->his[j-3-spl105.second], j-1-spl105.second, spl105.second);
597  crow->h[j] = h0;
598 
599  spl101 = chin.Getfh000();
600  fh101 = spl101.first;
601  fh1 = crow->w[j-1] - f - e;
602  fh2 = crow->fh[j-1] - e;
603  fh3 = crow->h[j-1] - (f-g) - e;
604  fh0 = max(fh1, max(fh2, max(fh3, fh101)));
605  if(fh0 == fh1) crow->fhis[j].Copy(crow->wis[j-1]);
606  else if(fh0 == fh2) crow->fhis[j].Copy(crow->fhis[j-1]);
607  else if(fh0 == fh3) crow->fhis[j].Copy(crow->his[j-1]);
608  else crow->fhis[j].Expand(crow->fhis[j-spl101.second], j-spl101.second, spl101.second);
609  crow->fh[j] = fh0;
610 
611  spl101 = chin.Getfv000();
612  fv101 = spl101.first;
613  spl102 = chin.Getw111();
614  fv102 = spl102.first - f - e;
615  fv1 = prow->w[j-2] - f - e;
616  fv2 = prow->w[j-1] - f -2*e;
617  fv3 = prow->fv[j] - 3*e;
618  fv0 = max(fv1, max(fv2, max(fv3, max(fv101, fv102))));
619  if(fv0 == fv1) crow->fvis[j].Copy(prow->wis[j-2]);
620  else if(fv0 == fv2) crow->fvis[j].Copy(prow->wis[j-1]);
621  else if(fv0 == fv3) crow->fvis[j].Copy(prow->fvis[j]);
622  else if(fv0 == fv101) crow->fvis[j].Expand(crow->fvis[j-spl101.second], j-spl101.second, spl101.second);
623  else if(fv0 == fv102) crow->fvis[j].Expand(prow->wis[j-2-spl102.second], j-1-spl102.second, spl102.second);
624  crow->fv[j] = fv0;
625 
626  spl101 = chin.Getv000();
627  v101 = spl101.first;
628  v1 = prow->w[j] - g - 3*e;
629  v2 = prow->v[j] - 3*e;
630  v0 = max(v1, max(v2, v101));
631  if(v0 == v1) crow->vis[j].Copy(prow->wis[j]);
632  else if(v0 == v2) crow->vis[j].Copy(prow->vis[j]);
633  else if(v0 == v101) crow->vis[j].Expand(crow->vis[j-spl101.second], j-spl101.second, spl101.second);
634  crow->v[j] = v0;
635 
636  spl101 = chin.Getw000();
637  s0 = spl101.first;
638 
639  w0 = max(d0, max(v0, max(h0, max(fv0, max(fh0, s0)))));
640  if(w0 == d0) {
641  if(d0 == d1) crow->wis[j].Copy(prow->wis[j-3]);
642  else if(d0 == d2) crow->wis[j].Copy(prow->wis[j-1]);
643  else if(d0 == d3) crow->wis[j].Copy(prow->fvis[j-1]);
644  else if(d0 == d4) crow->wis[j].Copy(prow->vis[j-1]);
645  else if(d0 == d5) crow->wis[j].Copy(prow->wis[j-2]);
646  else if(d0 == d6) crow->wis[j].Copy(prow->fvis[j-2]);
647  else if(d0 == d101) crow->wis[j].Expand(prow->wis[j-3-dspl101.second], j - 2 - dspl101.second, dspl101.second);
648  else if(d0 == d102) crow->wis[j].Expand(prow->wis[j-3-dspl102.second], j - 1 - dspl102.second, dspl102.second);
649  else if(d0 == d103) crow->wis[j].Expand(prow->wis[j-2-dspl103.second], j - 1 - dspl103.second, dspl103.second);
650  else if(d0 == d104) crow->wis[j].Expand(prow->fvis[j-2-dspl104.second], j - 1 - dspl104.second, dspl104.second);
651  }
652  else if(w0 == h0) crow->wis[j].Copy(crow->his[j]);
653  else if(w0 == v0) crow->wis[j].Copy(crow->vis[j]);
654  else if(w0 == fv0) crow->wis[j].Copy(crow->fvis[j]);
655  else if(w0 == fh0) crow->wis[j].Copy(crow->fhis[j]);
656  else crow->wis[j].Expand(crow->wis[j-spl101.second], j-spl101.second, spl101.second);
657  crow->w[j] = w0;
658 
659  }
660  prow->ClearIIC();
661 
662  }
663  //best from the last row
664  int wmax = crow->w[0];
665  int jmax = 0;
666  for(j=1;j<jlen;++j) {
667  if(wmax < crow->w[j]) {
668  wmax = crow->w[j];
669  jmax = j;
670  }
671  }
672  igi.clear();
673  if(jmax<jlen-1) igi.push_back(make_pair(jmax, jlen - jmax - 1));
674  CIgapIntron *tmp = crow->wis[jmax].m_Top;
675  while(tmp) {
676  if(tmp->m_Len > 0) igi.push_back(make_pair(tmp->m_Beg, tmp->m_Len));
677  tmp = tmp->m_Prev;
678  }
679  reverse(igi.begin(), igi.end());
680  crow->ClearIIC();//not really needed, it will die by itself
681  return wmax;
682 }
683 
684 void FrBackAlign(CBackAlignInfo& bi, CAli& ali) {
685  CAliCreator alic(ali);
686  int i, j;
687  for(i = bi.ilen-1; i>bi.maxi; --i) {
688  alic.Add(eVP, 3);
689  }
690  for(j = bi.jlen-1; j>bi.maxj; --j) {
691  alic.Add(eHP, 1);
692  }
693  int curGAPmode = eD;
694  while(i>=0 && j>=0) {
695  char b = bi.b[i][j];
696  char h1mode = b&64;
697  char vmode = b&32;
698  char hmode = b&16;
699  b &= 0xf;
700  if(b == 12) {//handles new case (FrAlignNog() and FrAlignNog1()), does not affect old one (FrAlign())
701  alic.Add(eVP, 2);
702  alic.Add(eMP, 1);
703  i -= 1;
704  j -= 1;
705  curGAPmode = eD;
706  } else if(b == 13) {//handles new case (FrAlignNog() and FrAlignNog1()), does not affect old one (FrAlign())
707  alic.Add(eVP, 1);
708  alic.Add(eMP, 2);
709  i -= 1;
710  j -= 2;
711  curGAPmode = eD;
712  } else if( curGAPmode == eV || ( curGAPmode == eD && b == 10 ) ) {
713  alic.Add(eVP, 3);
714  i -= 1;
715  if(vmode) curGAPmode = eV;
716  else curGAPmode = eD;
717  } else if(curGAPmode == eH || ( curGAPmode == eD && b == 11 ) ) {
718  alic.Add(eHP, 3);
719  j -= 3;
720  if(hmode) curGAPmode = eH;
721  else curGAPmode = eD;
722  } else if(curGAPmode == eH3 || ( curGAPmode == eD && b == 15) ) {//FrAlignNog1() only
723  alic.Add(eHP, 2);
724  j -= 2;
725  curGAPmode = eH1;
726  } else if(curGAPmode == eH2 || ( curGAPmode == eD && b == 14) ) {//FrAlignNog1() only
727  alic.Add(eHP, 1);
728  j -= 1;
729  curGAPmode = eH1;
730  } else if(curGAPmode == eH1 || ( curGAPmode == eD && b == 0) ) {//FrAlignNog1() only
731  alic.Add(eHP, 1);
732  j -= 1;
733  if(h1mode) curGAPmode = eH3;
734  else curGAPmode = eD;
735  } else {//eD mode
736  if(b == 1) {
737  alic.Add(eMP, 3);
738  i -= 1;
739  j -= 3;
740  } else if( b == 2 || b == 3 ) {
741  alic.Add(eMP, 1);
742  alic.Add(eVP, 2);
743  i -= 1;
744  j -= 1;
745  } else if( b == 4 || b == 5 ) {
746  alic.Add(eMP, 2);
747  alic.Add(eVP, 1);
748  i -= 1;
749  j -= 2;
750  } else if( b == 6 || b == 7 ) {
751  alic.Add(eHP, 1);
752  j -= 1;
753  } else if( b == 8 || b == 9 ) {
754  alic.Add(eHP, 2);
755  j -= 2;
756  }
757  else NCBI_THROW(CProSplignException, eBackAli, "wrong value for FR back alignment");
758  if( b == 3 || b == 5 ) curGAPmode = eV;
759  else if( b == 7 || b == 9 ) curGAPmode = eH;
760  }
761  }
762  for(int j1 = j; j1 >= 0; j1--) {//rest of nuc
763  alic.Add(eHP, 1);
764  }
765  for(int i1 = i; i1>= 0 ; i1--) { //rest of amins
766  alic.Add(eVP, 3);
767  }
768 
769  alic.Fini();
770 }
771 
772 
773 
774 int FrAlignFNog1(const CProSplignInterrupt& interrupt, CBackAlignInfo& bi, const PSEQ& pseq, const CNSeq& nseq,
775  // int g/*gap opening*/, int e/*one nuc extension cost*/, int f/*frameshift opening cost*/,
776  const CProSplignScaledScoring& scoring, const CSubstMatrix& matrix,
777  bool left_gap, bool right_gap)
778 {
779  int g/*gap opening*/ = scoring.GetGapOpeningCost();
780  int e/*one nuc extension cost*/ = scoring.GetGapExtensionCost();
781  int f/*frameshift opening cost*/ = scoring.GetFrameshiftOpeningCost();
782 
783  if(nseq.size() < 1) return 0;
784  int ilen = (int)pseq.size() + 1;
785  int jlen = nseq.size() + 1;
786  CFrAlignRow row1(jlen), row2(jlen);
787  CFrAlignRow *crow = &row1, *prow = &row2;
788  int i, j;
789  CFastIScore fiscore;
790  fiscore.Init(nseq, matrix);
791  //first row, i.e. i=0
792  for(j=0;j<jlen;j++) {
793  crow->w[j] = 0;
794  crow->v[j] = infinity;
795  }
796  int wmax = 0;
797  int imax = 0;
798  int jmax = jlen - 1;
799  // ** prepare for main loop
800  //penalties
801  int pev1 = - g - 3*e;
802  int pev2 = - 3*e;
803  int pe2 = - f - 2*e;
804  int pe3 = - (f - g) - 2*e;
805  int pe4 = - f - e;
806  int pe5 = - (f - g) - e;
807  int pe6 = f - g - e;
808  char *pb, bb;
809  int *cv, *cw, *pv, *pv1, *pw1, *pw3;
810  // ******* MAIN LOOP ******************
811  int jlen_1;
812  if(right_gap) {//penalty for end gap because there was a H-gap on the first stage
813  jlen_1 = jlen;
814  } else {
815  jlen_1 = jlen - 1;
816  }
817  for(i=1;i<ilen;++i) {
818  swap(crow, prow);
819  if(left_gap) {//penalty for end gap because there was a H-gap on the first stage
820  crow->w[0] = -g -3*e*i;
821  crow->w[1] = - f - (i*3 - 1)*e;
822  crow->w[2] = - f - (i*3 - 2)*e;
823  } else {
824  crow->w[0] = 0;
825  crow->w[1] = 0;
826  crow->w[2] = 0;
827  }
828  bi.b[i-1][0] = 3;
829  bi.b[i-1][1] = 5;
830  /**/
831  crow->v[1] = crow->v[2] = infinity;
832  int h1 = infinity;
833  int h2 = infinity;
834  int h3 = infinity;
835  pb = &bi.b[i-1][1];
836  cv = &crow->v[2];
837  cw = &crow->w[2];
838  pw3 = &prow->w[0];
839  pw1 = &prow->w[2];
840  pv1 = &prow->v[2];
841  pv = &prow->v[0];
842  fiscore.SetAmin(pseq[i-1], matrix);
843  for(j=3;j<jlen_1;++j) {
844  interrupt.CheckUserInterrupt();
845  bb = 0;
846  //rest
847  int w1 = *pw3 + fiscore.GetScore();
848  int w2 = *pw1 + pe2;
849  int w3 = *pv1 + pe3;
850  int w4 = *++pw3 + pe4;
851  int w5 = *++pv + pe5;
852  //best v-gap
853  int v0 = *++pw1 + pev1;
854  int v2 = *++pv1 + pev2;
855  if(v2 > v0) {
856  v0 = v2;
857  bb |= 32;
858  }
859  *++cv = v0;
860  //best h-gap
861  int h12 = h3 + pe5;
862  h3 = h2 + pe6;
863  h2 = h1 - e;
864  h1 = *cw + pe4;
865  if(h12 > h1) {
866  h1 = h12;
867  bb |= 64;
868  }
869  int w0 = max(w1, max(w2, max(w3, max(w4, max(w5, max(h1, max(h2, max(h3, v0))))))));
870  if(w0 == w1) bb += 1;
871  else if(w0 == v0) bb += 10;
872  else if(w0 == h3) bb += 15;
873  else if(w0 == h2) bb += 14;
874  else if(w0 == w2) bb += 2;
875  else if(w0 == w3) bb += 3;
876  else if(w0 == w4) bb += 4;
877  else if(w0 == w5) bb += 5;
878  //default w=h1, b += 0;
879  *++cw = w0;
880  *++pb = bb;
881  }
882  //the last column !
883  if(!right_gap) {//last column is different if we don't charge for end gap
884  if(2 < jlen_1) { //j == jlen_1
885  _ASSERT(j == jlen_1);
886  char& b = bi.b[i-1][j-1];
887  b = 0;
888  //v-gap is not needed
889  //best h-gap
890  int h12 = h3 + pe5;
891  h3 = h2 + pe6;
892  h2 = h1 - e;
893  h1 = crow->w[j-1] + pe4;
894  if(h12 > h1) {
895  h1 = h12;
896  b |= 64;
897  }
898  //rest
899  int w1 = prow->w[j-3] + matrix.MultScore(nseq[j-3], nseq[j-2], nseq[j-1], pseq[i-1]);
900  int w12 = prow->w[j-1];
901  int w13 = prow->w[j-2];
902  int w0 = max(w1, max(w12, max(w13, max(h1, max(h2, h3)))));
903  if(w0 == w1) b += 1;
904  else if(w0 == w12) b += 12;
905  else if(w0 == w13) b += 13;
906  else if(w0 == h2) b += 14;
907  else if(w0 == h3) b += 15;
908  //default w=h1, b += 0;
909  crow->w[j] = w0;
910  }
911  //remember the best W in the last column
912  if(wmax <= crow->w[jlen - 1]) {
913  wmax = crow->w[jlen - 1];
914  imax = i;
915  }
916  } else {//regular score for the right v-gap
917  wmax = crow->w[jlen - 1];
918  imax = i;
919  jmax = jlen - 1;
920  }
921  }
922  //at this point wmax - best from the last column
923  //find best from the last row
924  for(j=1;j<jlen;++j) {
925  if(wmax <= crow->w[j]) {
926  wmax = crow->w[j];
927  imax = ilen - 1;
928  jmax = j;
929  }
930  }
931  bi.maxi = imax - 1;//shift to back align coord
932  bi.maxj = jmax - 1;//shift to back align coord
933  return wmax;
934 }
935 
936 
937 int AlignFNog(const CProSplignInterrupt& interrupt, CTBackAlignInfo<CBMode>& bi, const PSEQ& pseq, const CNSeq& nseq, const CProSplignScaledScoring& scoring, const CSubstMatrix& matrix)
938 {
939  if(nseq.size() < 1) return 0;
940  int ilen = (int)pseq.size() + 1;
941  int jlen = nseq.size() + 1;
942  CAlignRow row1(jlen, scoring), row2(jlen, scoring);
943  CAlignRow *crow = &row1, *prow = &row2;
944  int i, j;
945  CFastIScore fiscore;
946  fiscore.Init(nseq, matrix);
947  CFIntron fin(nseq, scoring);
948  //first row, i.e. i=0
949  for(j=0;j<jlen;j++) {
950  crow->w[j] = 0;
951  crow->v[j] = infinity;
952  }
953  int wmax = 0;
954  int imax = 0;
955  int jmax = jlen - 1;
956  // ** prepare for main loop
957  //penalties
958  // CScoring::Init();
959  int e = scoring.sm_Ine;
960  int g = scoring.sm_Ig;
961  int f = scoring.sm_If;
962  int pev1 = - g - 3*e;
963  int pev2 = - 3*e;
964  int pe2 = - f - 2*e;
965  int pe3 = - (f - g) - 2*e;
966  int pe4 = - f - e;
967  int pe5 = - (f - g) - e;
968  int pe6 = f - g - e;
969  int *cv, *cw, *pv, *pv1, *pw1, *pw3, *ch1, *ch2, *ch3;
970  // ******* MAIN LOOP ******************
971  for(i=1;i<ilen;++i) {
972  swap(crow, prow);
973  //set first few columns
974  crow->w[0] = 0;
975  crow->w[1] = 0;
976  bi.b[i-1][0].wmode = 4;
977  crow->w[2] = 0;
978  bi.b[i-1][1].wmode = 6;
979  crow->v[1] = crow->v[2] = infinity;
980  int h1 = infinity;
981  int h2 = infinity;
982  int h3 = infinity;
983  //extra init for intron scoring
984  fin.InitRowScores(crow, prow->m_w, 3);
985  // pointers
986  CBMode *pb = &bi.b[i-1][2];
987  cv = &crow->v[2];
988  ch1 = &crow->h1[2];
989  ch2 = &crow->h2[2];
990  ch3 = &crow->h3[2];
991  cw = &crow->w[2];
992  pw3 = &prow->w[0];
993  pw1 = &prow->w[2];
994  pv1 = &prow->v[2];
995  pv = &prow->v[0];
996  fiscore.SetAmin(pseq[i-1], matrix);
997  int jlen_1 = jlen - 1;
998  // ******* INTERNAL LOOP ******************
999  for(j=3;j<jlen_1;++j) {
1000  interrupt.CheckUserInterrupt();
1001  int bb = 0;
1002  const CBestI& bei = fin.Step(j, scoring, fiscore);
1003  //rest
1004  int w1 = *pw3 + fiscore.GetScore();
1005  int w2 = *pw1 + pe2;
1006  int w3 = *pv1 + pe3;
1007  int w4 = *++pw3 + pe4;
1008  int w5 = *++pv + pe5;
1009  //best v-gap
1010  int v0 = *++pw1 + pev1;
1011  int v2 = *++pv1 + pev2;
1012  if(bei.v > v0 && bei.v > v2) {
1013  v0 = bei.v;
1014  pb->vlen = fin.GetVlen(j, scoring);
1015  bb |= 32;
1016  } else if(v2 > v0) {
1017  v0 = v2;
1018  bb |= 512;
1019  }
1020  *++cv = v0;
1021  //best h-gap
1022  int h12 = h3 + pe5;
1023  h3 = h2 + pe6;
1024  if(bei.h3 > h3) {
1025  h3 = bei.h3;
1026  pb->h3len = fin.GetH3len(j, scoring);
1027  bb |= 256;
1028  }
1029  h2 = h1 - e;
1030  if(bei.h2 > h2) {
1031  h2 = bei.h2;
1032  pb->h2len = fin.GetH2len(j, scoring);
1033  bb |= 128;
1034  }
1035  h1 = *cw + pe4;
1036  if(bei.h1 > h1 && bei.h1 > h12) {
1037  h1 = bei.h1;
1038  pb->h1len = fin.GetH1len(j, scoring);
1039  bb |= 64;
1040  } else if(h12 > h1) {
1041  h1 = h12;
1042  bb |= 1024;
1043  }
1044  *++ch1 = h1;
1045  *++ch2 = h2;
1046  *++ch3 = h3;
1047  //max
1048  int w0 = max(w1, max(w2, max(w3, max(w4, max(w5, max(h1, max(h2, max(h3, max(v0, max(bei.w2, max(bei.w1, bei.w)))))))))));
1049  if(w0 == w1) bb += 3;
1050  else if(w0 == v0) bb += 1;
1051  else if(w0 == h3) bb += 11;
1052  else if(w0 == h1) bb += 8;
1053  else if(w0 == h2) bb += 10;
1054  else if(w0 == w2) bb += 4;
1055  else if(w0 == w3) bb += 5;
1056  else if(w0 == w4) bb += 6;
1057  else if(w0 == w5) bb += 7;
1058  else if(w0 == bei.w1) {
1059  bb += 21;
1060  pb->wlen = fin.GetW1len(j, scoring);
1061  } else if(w0 == bei.w2) {
1062  bb += 22;
1063  pb->wlen = fin.GetW2len(j, scoring);
1064  } else { //w == bei.w
1065  bb += 20;
1066  pb->wlen = fin.GetWlen(j, scoring);
1067  }
1068  *++cw = w0;
1069  (pb++)->wmode = bb;
1070  }
1071  //the last column !
1072  if(2 < jlen_1) { //j == jlen_1
1073  int& bb = bi.b[i-1][j-1].wmode;
1074  bb = 0;
1075  const CBestI bei = fin.Step(j, scoring, fiscore);
1076  //rest
1077  int w1 = *pw3 + fiscore.GetScore();
1078  int w12 = prow->w[j-1];
1079  int w13 = prow->w[j-2];
1080  //best h-gap
1081  int h12 = h3 + pe5;
1082  h3 = h2 + pe6;
1083  if(bei.h3 > h3) {
1084  h3 = bei.h3;
1085  pb->h3len = fin.GetH3len(j, scoring);
1086  bb |= 256;
1087  }
1088  h2 = h1 - e;
1089  if(bei.h2 > h2) {
1090  h2 = bei.h2;
1091  pb->h2len = fin.GetH2len(j, scoring);
1092  bb |= 128;
1093  }
1094  h1 = *cw + pe4;
1095  if(bei.h1 > h1 && bei.h1 > h12) {
1096  h1 = bei.h1;
1097  pb->h1len = fin.GetH1len(j, scoring);
1098  bb |= 64;
1099  } else if(h12 > h1) {
1100  h1 = h12;
1101  bb |= 1024;
1102  }
1103  //max
1104  int w0 = max(w1, max(w12, max(w13, max(h1, max(h2, max(h3, max(bei.w2, max(bei.w1, bei.w))))))));
1105  if(w0 == w1) bb += 3;
1106  else if(w0 == h3) bb += 11;
1107  else if(w0 == h1) bb += 8;
1108  else if(w0 == h2) bb += 10;
1109  else if(w0 == w12) bb += 12;
1110  else if(w0 == w13) bb += 13;
1111  else if(w0 == bei.w1) {
1112  bb += 21;
1113  pb->wlen = fin.GetW1len(j, scoring);
1114  } else if(w0 == bei.w2) {
1115  bb += 22;
1116  pb->wlen = fin.GetW2len(j, scoring);
1117  } else { //w == bei.w
1118  bb += 20;
1119  pb->wlen = fin.GetWlen(j, scoring);
1120  }
1121  crow->w[j] = w0;
1122  }
1123  //remember the best W in the last column
1124  if(wmax <= crow->w[jlen - 1]) {
1125  wmax = crow->w[jlen - 1];
1126  imax = i;
1127  }
1128  }
1129  //at this point wmax - best from the last column
1130  //find best from the last row
1131  for(j=1;j<jlen;++j) {
1132  if(wmax <= crow->w[j]) {
1133  wmax = crow->w[j];
1134  imax = ilen - 1;
1135  jmax = j;
1136  }
1137  }
1138  bi.maxi = imax - 1;//shift to back align coord
1139  bi.maxj = jmax - 1;//shift to back align coord
1140  return wmax;
1141 }
1142 
1144 {
1145  CAliCreator alic(ali);
1146  int i, j;
1147  for(i = bi.ilen-1; i>bi.maxi; --i) {
1148  alic.Add(eVP, 3);
1149  }
1150  for(j = bi.jlen-1; j>bi.maxj; --j) {
1151  alic.Add(eHP, 1);
1152  }
1153  int curGAPmode = eD;
1154  int vs, h1s, h2s, h3s, vmode, h1mode, wm;
1155  while(i>=0 && j>=0) {
1156  CBMode bm = bi.b[i][j];
1157  wm = bm.wmode;
1158  vs = wm&32;
1159  h1s = wm&64;
1160  h2s = wm&128;
1161  h3s = wm&256;
1162  vmode = wm&512;
1163  h1mode = wm&1024;
1164  wm &= 31;
1165  if(curGAPmode == eV || (curGAPmode == eD && wm == 1)) {
1166  if(vs) {//v-splice
1167  alic.Add(eSP, bm.vlen);
1168  j -= bm.vlen;
1169  curGAPmode = eV;
1170  } else {
1171  alic.Add(eVP, 3);
1172  i -= 1;
1173  if(vmode) curGAPmode = eV;
1174  else curGAPmode = eD;
1175  }
1176  } else if(curGAPmode == eH1 || (curGAPmode == eD && wm == 8)) {
1177  if(h1s) {//h1-splice
1178  alic.Add(eSP, bm.h1len);
1179  j -= bm.h1len;
1180  curGAPmode = eH1;
1181  } else {
1182  alic.Add(eHP, 1);
1183  j -= 1;
1184  if(h1mode) curGAPmode = eH3;
1185  else curGAPmode = eD;
1186  }
1187  } else if(curGAPmode == eH2 || (curGAPmode == eD && wm == 10)) {
1188  if(h2s) {//h2-splice
1189  alic.Add(eSP, bm.h2len);
1190  j -= bm.h2len;
1191  curGAPmode = eH2;
1192  } else {
1193  alic.Add(eHP, 1);
1194  j -= 1;
1195  curGAPmode = eH1;
1196  }
1197  } else if(curGAPmode == eH3 || (curGAPmode == eD && wm == 11)) {
1198  if(h3s) {//h3-splice
1199  alic.Add(eSP, bm.h3len);
1200  j -= bm.h3len;
1201  curGAPmode = eH3;
1202  } else {
1203  alic.Add(eHP, 1);
1204  j -= 1;
1205  curGAPmode = eH2;
1206  }
1207  } else if(curGAPmode == eD && ((wm > 2 && wm < 8)
1208  || wm == 20 || wm == 21 || wm == 22 || wm == 12 || wm == 13)) {
1209  if(wm == 20) {
1210  alic.Add(eSP, bm.wlen);
1211  j -= bm.wlen;
1212  } else if(wm == 21) {
1213  alic.Add(eMP, 2);
1214  alic.Add(eSP, bm.wlen);
1215  alic.Add(eMP, 1);
1216  j -= 3;
1217  j -= bm.wlen;
1218  i -= 1;
1219  } else if(wm == 22) {
1220  alic.Add(eMP, 1);
1221  alic.Add(eSP, bm.wlen);
1222  alic.Add(eMP, 2);
1223  j -= 3;
1224  j -= bm.wlen;
1225  i -= 1;
1226  } else if(wm == 3) {
1227  alic.Add(eMP, 3);
1228  j -= 3;
1229  i -= 1;
1230  } else if(wm == 4 || wm == 5) {
1231  alic.Add(eMP, 1);
1232  alic.Add(eVP, 2);
1233  i -= 1;
1234  j -= 1;
1235  } else if(wm == 6 || wm == 7) {
1236  alic.Add(eMP, 2);
1237  alic.Add(eVP, 1);
1238  i -= 1;
1239  j -= 2;
1240  } else if(wm == 12) {//right end
1241  alic.Add(eVP, 2);
1242  alic.Add(eMP, 1);
1243  i -= 1;
1244  j -= 1;
1245  } else if(wm == 13) {//right end
1246  alic.Add(eVP, 1);
1247  alic.Add(eMP, 2);
1248  i -= 1;
1249  j -= 2;
1250  }
1251  if(wm == 5 || wm == 7) curGAPmode = eV;
1252  } else NCBI_THROW(CProSplignException, eBackAli, "wrong value for 1 stage back alignment");
1253  }
1254  if(( j!= -1 && i != -1) || j < -1 || i < -1) NCBI_THROW(CProSplignException, eBackAli, "wrong data for 1 stage back alignment");
1255  for(int j1 = j; j1 >= 0; j1--) {//rest of nuc
1256  alic.Add(eHP, 1);
1257  }
1258  for(int i1 = i; i1>= 0 ; i1--) { //rest of amins
1259  alic.Add(eVP, 3);
1260  }
1261 
1262  alic.Fini();
1263 }
1264 
1265 
1266 END_SCOPE(prosplign)
@ eMP
Definition: Ali.hpp:44
@ eHP
Definition: Ali.hpp:46
@ eVP
Definition: Ali.hpp:45
@ eSP
Definition: Ali.hpp:47
@ nA
Definition: NSeq.hpp:47
@ nG
Definition: NSeq.hpp:47
@ nT
Definition: NSeq.hpp:47
@ nN
Definition: NSeq.hpp:47
@ nC
Definition: NSeq.hpp:47
vector< char > PSEQ
Definition: PSeq.hpp:48
const int infinity
Definition: nucprot.cpp:52
void FrBackAlign(CBackAlignInfo &bi, CAli &ali)
Definition: nucprot.cpp:684
int FindIGapIntrons(const CProSplignInterrupt &interrupt, vector< pair< int, int > > &igi, const PSEQ &pseq, const CNSeq &nseq, int g, int e, int f, const CProSplignScaledScoring &scoring, const CSubstMatrix &matrix)
Definition: nucprot.cpp:508
int FindFGapIntronNog(const CProSplignInterrupt &interrupt, vector< pair< int, int > > &igi, const PSEQ &pseq, const CNSeq &nseq, bool &left_gap, bool &right_gap, const CProSplignScaledScoring &scoring, const CSubstMatrix &matrix)
Definition: nucprot.cpp:260
int FrAlign(const CProSplignInterrupt &interrupt, CBackAlignInfo &bi, const PSEQ &pseq, const CNSeq &nseq, int g, int e, int f, const CProSplignScaledScoring &, const CSubstMatrix &matrix)
Definition: nucprot.cpp:171
void BackAlignNog(CTBackAlignInfo< CBMode > &bi, CAli &ali)
Definition: nucprot.cpp:1143
int FrAlignFNog1(const CProSplignInterrupt &interrupt, CBackAlignInfo &bi, const PSEQ &pseq, const CNSeq &nseq, const CProSplignScaledScoring &scoring, const CSubstMatrix &matrix, bool left_gap, bool right_gap)
Definition: nucprot.cpp:774
int AlignFNog(const CProSplignInterrupt &interrupt, CTBackAlignInfo< CBMode > &bi, const PSEQ &pseq, const CNSeq &nseq, const CProSplignScaledScoring &scoring, const CSubstMatrix &matrix)
Definition: nucprot.cpp:937
void Add(EAliPieceType type, int len)
Definition: Ali.hpp:82
void Fini(void)
Definition: Ali.hpp:77
Definition: Ali.hpp:60
vector< int > v
Definition: AlignInfo.hpp:53
void ClearIIC(void)
Definition: AlignInfo.cpp:73
vector< int > fv
Definition: AlignInfo.hpp:53
CIgapIntronChain * his
Definition: AlignInfo.hpp:54
CIgapIntronChain * wis
Definition: AlignInfo.hpp:54
CIgapIntronChain * vis
Definition: AlignInfo.hpp:54
vector< int > w
Definition: AlignInfo.hpp:53
vector< int > fh
Definition: AlignInfo.hpp:53
vector< int > h
Definition: AlignInfo.hpp:53
CIgapIntronChain * fhis
Definition: AlignInfo.hpp:54
CIgapIntronChain * fvis
Definition: AlignInfo.hpp:54
int * v
Definition: AlignInfo.hpp:79
int * h2
Definition: AlignInfo.hpp:79
vector< int > m_w
Definition: AlignInfo.hpp:78
int * h1
Definition: AlignInfo.hpp:79
int * h3
Definition: AlignInfo.hpp:79
int * w
Definition: AlignInfo.hpp:79
CHIntronScore Getw111()
Definition: intron.hpp:384
CHIntronScore Getw012()
Definition: intron.hpp:386
CHIntronScore GetW2()
Definition: intron.hpp:383
CHIntronScore Getw021()
Definition: intron.hpp:388
CHIntronScore Getfv111()
Definition: intron.hpp:385
CHIntronScore Getfv000()
Definition: intron.hpp:394
CHIntronScore Geth021()
Definition: intron.hpp:389
CHIntronScore Geth012()
Definition: intron.hpp:387
CHIntronScore Getfh000()
Definition: intron.hpp:393
CHIntronScore Geth000()
Definition: intron.hpp:390
void NucStep(const CProSplignScaledScoring scoring, const CSubstMatrix &matrix)
Definition: intron.hpp:349
CHIntronScore Getw000()
Definition: intron.hpp:392
CHIntronScore Getv000()
Definition: intron.hpp:391
CHIntronScore GetW1(const CSubstMatrix &matrix)
Definition: intron.hpp:381
const CBestI & Step(int j, const CProSplignScaledScoring &scoring, const CFastIScore &fiscore)
Definition: intron.hpp:615
int GetH1len(int j, const CProSplignScaledScoring &scoring) const
Definition: intron.hpp:734
int GetWlen(int j, const CProSplignScaledScoring &scoring) const
Definition: intron.hpp:732
int GetH2len(int j, const CProSplignScaledScoring &scoring) const
Definition: intron.hpp:735
void InitRowScores(CAlignRow *row, vector< int > &prevw, int j)
Definition: intron.cpp:215
int GetVlen(int j, const CProSplignScaledScoring &scoring) const
Definition: intron.hpp:733
int GetH3len(int j, const CProSplignScaledScoring &scoring) const
Definition: intron.hpp:736
int GetW2len(int j, const CProSplignScaledScoring &scoring) const
Definition: intron.hpp:737
int GetW1len(int j, const CProSplignScaledScoring &scoring) const
Definition: intron.hpp:758
vector< int > m_scores
Definition: nucprot.hpp:152
int GetScore()
Definition: nucprot.hpp:148
int * m_gpos
Definition: nucprot.hpp:156
void SetAmin(char amin, const CSubstMatrix &matrix)
Definition: nucprot.cpp:138
void Init(const CNSeq &seq, const CSubstMatrix &matrix)
Definition: nucprot.cpp:123
bool m_init
Definition: nucprot.hpp:159
vector< int > m_gscores
Definition: nucprot.hpp:157
int * m_pos
Definition: nucprot.hpp:153
CIgapIntronChain * wis
Definition: AlignInfo.hpp:93
void ClearIIC(void)
Definition: AlignInfo.cpp:115
CIgapIntronChain * h3is
Definition: AlignInfo.hpp:93
CIgapIntronChain * h2is
Definition: AlignInfo.hpp:93
CIgapIntronChain * vis
Definition: AlignInfo.hpp:93
CIgapIntronChain * h1is
Definition: AlignInfo.hpp:93
vector< int > v
Definition: AlignInfo.hpp:69
vector< int > w
Definition: AlignInfo.hpp:69
CIgapIntron * m_Top
void Expand(CIgapIntronChain &source, int beg, int len)
void Creat(int beg, int len)
void SetPool(CIgapIntronPool &pool)
void Copy(CIgapIntronChain &source)
Definition: NSeq.hpp:52
int size(void) const
Definition: NSeq.hpp:71
void CheckUserInterrupt(void) const
Definition: nucprot.hpp:192
int GetGapOpeningCost() const
Definition: prosplign.cpp:480
int GetFrameshiftOpeningCost() const
Definition: prosplign.cpp:500
int GetGapExtensionCost() const
Definition: prosplign.cpp:490
Substitution Matrix for Scoring Amino-Acid Alignments.
Definition: nucprot.hpp:123
CConstRef< CTranslationTable > m_trans_table
Definition: nucprot.hpp:138
void SetTranslationTable(const CTranslationTable *trans_table)
Definition: nucprot.cpp:91
int MultScore(int nuc1, int nuc2, int nuc3, char amin) const
Definition: nucprot.hpp:129
string m_alphabet
Definition: nucprot.hpp:135
static char NucToChar(int n)
Definition: nucprot.cpp:114
char aa_table[8 *8 *8]
Definition: nucprot.hpp:119
char TranslateTriplet(char n1, char n2, char n3) const
Definition: nucprot.hpp:92
CTranslationTable(int gcode, bool allow_alt_starts)
Definition: nucprot.cpp:96
static int CharToNuc(char c)
Definition: nucprot.cpp:106
Include a standard set of the NCBI C++ Toolkit most basic headers.
static ulg bb
struct parameters_t * pb[]
static int lc
Definition: getdata.c:30
static char tmp[3200]
Definition: utf8.c:42
static tds_mutex mtx
Definition: condition.c:43
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const CVect2< U > & v2
Definition: globals.hpp:440
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
#define numeric_limits
Pre-declaration of the "numeric_limits<>" template Forcibly overrides (using preprocessor) the origin...
Definition: ncbi_limits.hpp:92
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static string & ToUpper(string &str)
Convert string to upper case – string& version.
Definition: ncbistr.cpp:424
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
yy_size_t n
int len
#include<zmmintrin.h>
Definition: bm.h:78
static const BitmapCharRec ch3
Definition: ncbi_10x20.c:1809
static const BitmapCharRec ch1
Definition: ncbi_10x20.c:1827
static const BitmapCharRec ch2
Definition: ncbi_10x20.c:1819
int tolower(Uchar c)
Definition: ncbictype.hpp:72
int toupper(Uchar c)
Definition: ncbictype.hpp:73
T max(T x_, T y_)
T min(T x_, T y_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
@ eH2
Definition: nucprot.hpp:59
@ eH
Definition: nucprot.hpp:57
@ eV
Definition: nucprot.hpp:56
@ eH3
Definition: nucprot.hpp:60
@ eH1
Definition: nucprot.hpp:58
@ eD
Definition: nucprot.hpp:55
const SNCBIPackedScoreMatrix NCBISM_Blosum62
Definition: sm_blosum62.c:92
void NCBISM_Unpack(const SNCBIPackedScoreMatrix *psm, SNCBIFullScoreMatrix *fsm)
Expand a packed score matrix into an unpacked one, which callers can proceed to index directly by sta...
Definition: raw_scoremat.c:81
int h2
Definition: intron.hpp:409
int h1
Definition: intron.hpp:409
int w
Definition: intron.hpp:409
int h3
Definition: intron.hpp:409
int w1
Definition: intron.hpp:409
int v
Definition: intron.hpp:409
int w2
Definition: intron.hpp:409
TNCBIScore defscore
score for unknown residues
Definition: raw_scoremat.h:49
const char * symbols
order of residues
Definition: raw_scoremat.h:47
#define _ASSERT
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
Definition: thrddgri.c:44
#define const
Definition: zconf.h:232
Modified on Wed Sep 04 15:02:46 2024 by modify_doxy.py rev. 669887