NCBI C++ ToolKit
nw_pssm_aligner.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: nw_pssm_aligner.cpp 100300 2023-07-18 19:57:36Z mozese2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Jason Papadopoulos
27  *
28  * File Description: CPSSMAligner implementation
29  *
30  * ===========================================================================
31  *
32  */
33 
34 
35 #include <ncbi_pch.hpp>
36 #include <math.h>
37 #include "messages.hpp"
40 
41 
43 
45  : CNWAligner(),
46  m_Pssm1(0), m_Freq1(0),
47  m_Seq2(0), m_Freq2(0),
48  m_FreqScale(1),
49  m_StartWg(GetDefaultWg()),
50  m_StartWs(GetDefaultWs()),
51  m_EndWg(GetDefaultWg()),
52  m_EndWs(GetDefaultWs())
53 {
54 }
55 
56 
57 CPSSMAligner::CPSSMAligner(const TScore** pssm1, size_t len1,
58  const char* seq2, size_t len2)
59  : CNWAligner(),
60  m_Pssm1(pssm1), m_Freq1(0),
61  m_Seq2(seq2), m_Freq2(0),
62  m_FreqScale(1),
63  m_StartWg(GetDefaultWg()),
64  m_StartWs(GetDefaultWs()),
65  m_EndWg(GetDefaultWg()),
66  m_EndWs(GetDefaultWs())
67 {
68  SetSequences(pssm1, len1, seq2, len2);
69 }
70 
71 
72 CPSSMAligner::CPSSMAligner(const double** freq1, size_t len1,
73  const double** freq2, size_t len2,
74  const SNCBIPackedScoreMatrix *scoremat,
75  const int scale)
76  : CNWAligner(),
77  m_Pssm1(0), m_Freq1(freq1),
78  m_Seq2(0), m_Freq2(freq2),
79  m_FreqScale(scale),
80  m_StartWg(GetDefaultWg()),
81  m_StartWs(GetDefaultWs()),
82  m_EndWg(GetDefaultWg()),
83  m_EndWs(GetDefaultWs())
84 {
85  SetScoreMatrix(scoremat);
86  SetSequences(freq1, len1, freq2, len2, scale);
87 }
88 
89 void CPSSMAligner::SetSequences(const char* seq1, size_t len1,
90  const char* seq2, size_t len2,
91  bool verify)
92 {
93  m_Pssm1 = 0;
94  m_Freq1 = 0;
95  m_Seq2 = 0;
96  m_Freq2 = 0;
97  CNWAligner::SetSequences(seq1, len1, seq2, len2, verify);
98 }
99 
100 
101 void CPSSMAligner::SetSequences(const TScore** pssm1, size_t len1,
102  const char* seq2, size_t len2,
103  bool verify)
104 {
105  if(!pssm1 || !len1 || !seq2 || !len2) {
106  NCBI_THROW(CAlgoAlignException, eBadParameter,
108  }
109 
110  if(verify) {
111  for (size_t i = 0; i < len2; i++) {
112  if (seq2[i] < 0 || seq2[i] >= kPSSM_ColumnSize) {
113  NCBI_THROW(CAlgoAlignException, eInvalidCharacter,
115  }
116  }
117  }
118  m_Pssm1 = pssm1;
119  m_Freq1 = 0;
120  m_SeqLen1 = len1;
121  m_Seq2 = seq2;
122  m_Freq2 = 0;
123  m_SeqLen2 = len2;
124  CNWAligner::m_Seq1 = 0;
125  CNWAligner::m_Seq2 = 0;
126 }
127 
128 
129 void CPSSMAligner::SetSequences(const double** freq1, size_t len1,
130  const double** freq2, size_t len2,
131  const int scale)
132 {
133  if(!freq1 || !len1 || !freq2 || !len2) {
134  NCBI_THROW(CAlgoAlignException, eBadParameter,
136  }
137  m_Pssm1 = 0;
138  m_Freq1 = freq1;
139  m_SeqLen1 = len1;
140  m_Seq2 = 0;
141  m_Freq2 = freq2;
142  m_SeqLen2 = len2;
143  m_FreqScale = scale;
144  CNWAligner::m_Seq1 = 0;
145  CNWAligner::m_Seq2 = 0;
146 }
147 
148 
150 {
151  // upacking the score matrix will automatically arrange
152  // its entries in the correct order to support NCBIstdaa
153 
154  if(!scoremat) {
155  NCBI_THROW(CAlgoAlignException, eBadParameter,
157  }
158  CNWAligner::SetScoreMatrix(scoremat);
159 
160  // no penalty for aligning gaps with each other
161  m_ScoreMatrix.s[0][0] = 0;
162 
163  for (int i = 0; i < kPSSM_ColumnSize; i++) {
164  for (int j = 0; j < kPSSM_ColumnSize; j++) {
165  m_DScoreMatrix[i][j] = (double)m_ScoreMatrix.s[i][j];
166  }
167  }
168 }
169 
170 
172 {
173  if(!x_CheckMemoryLimit()) {
175  }
176 
178 
179  return m_score;
180 }
181 
182 
184 const
185 {
186  if (m_Freq1 || m_Pssm1) {
187  return eTS_Match; // makes no differences for profile alignments
188  }
189  else {
190  return CNWAligner::x_GetDiagTS(i1, i2);
191  }
192 }
193 
194 
196 {
197  if (m_Freq1)
198  return x_AlignProfile(data); // profile-profile
199  else if (m_Pssm1)
200  return x_AlignPSSM(data); // PSSM-sequence
201  else
202  return CNWAligner::x_Align(data); // sequence-sequence
203 }
204 
205 
206 // evaluate score for each possible alignment;
207 // fill out backtrace matrix
208 // bit coding (four bits per value): D E Ec Fc
209 // D: 1 if diagonal; 0 - otherwise
210 // E: 1 if space in 1st sequence; 0 if space in 2nd sequence
211 // Ec: 1 if gap in 1st sequence was extended; 0 if it is was opened
212 // Fc: 1 if gap in 2nd sequence was extended; 0 if it is was opened
213 //
214 
215 const unsigned char kMaskFc = 0x01;
216 const unsigned char kMaskEc = 0x02;
217 const unsigned char kMaskE = 0x04;
218 const unsigned char kMaskD = 0x08;
219 
221 {
222  const size_t N1 = data->m_len1 + 1;
223  const size_t N2 = data->m_len2 + 1;
224 
225  vector<TScore> stl_rowV (N2), stl_rowF(N2);
226 
227  TScore* rowV = &stl_rowV[0];
228  TScore* rowF = &stl_rowF[0];
229 
230  TScore* pV = rowV - 1;
231 
232  const TScore** pssm_row = m_Pssm1 + data->m_offset1 - 1;
233  const char* seq2 = m_Seq2 + data->m_offset2 - 1;
234 
235  m_terminate = false;
236 
237  if(m_prg_callback) {
238  m_prg_info.m_iter_total = N1*N2;
241  return 0;
242  }
243  }
244 
245  TScore wg1L = m_Wg;
246  TScore wg1R = m_Wg;
247  TScore wg2L = m_Wg;
248  TScore wg2R = m_Wg;
249 
250  TScore ws1L = m_Ws;
251  TScore ws1R = m_Ws;
252  TScore ws2L = m_Ws;
253  TScore ws2R = m_Ws;
254 
255  if (data->m_offset1 == 0) {
256  if (data->m_esf_L1) {
257  wg1L = ws1L = 0;
258  }
259  else {
260  wg1L = m_StartWg;
261  ws1L = m_StartWs;
262  }
263  }
264 
265  if (m_SeqLen1 == data->m_offset1 + data->m_len1) {
266  if (data->m_esf_R1) {
267  wg1R = ws1R = 0;
268  }
269  else {
270  wg1R = m_EndWg;
271  ws1R = m_EndWs;
272  }
273  }
274 
275  if (data->m_offset2 == 0) {
276  if (data->m_esf_L2) {
277  wg2L = ws2L = 0;
278  }
279  else {
280  wg2L = m_StartWg;
281  ws2L = m_StartWs;
282  }
283  }
284 
285  if (m_SeqLen2 == data->m_offset2 + data->m_len2) {
286  if (data->m_esf_R2) {
287  wg2R = ws2R = 0;
288  }
289  else {
290  wg2R = m_EndWg;
291  ws2R = m_EndWs;
292  }
293  }
294 
295  TScore wgleft1 = wg1L;
296  TScore wsleft1 = ws1L;
297  TScore wg1 = m_Wg, ws1 = m_Ws;
298 
299  // index calculation: [i,j] = i*n2 + j
300  CBacktraceMatrix4 backtrace_matrix (N1 * N2);
301  backtrace_matrix.SetAt(0, 0);
302 
303  // first row
304  size_t k;
305  rowV[0] = wgleft1;
306  for (k = 1; k < N2; k++) {
307  rowV[k] = pV[k] + wsleft1;
308  rowF[k] = kInfMinus;
309  backtrace_matrix.SetAt(k, kMaskE | kMaskEc);
310  }
311  backtrace_matrix.Purge(k);
312  rowV[0] = 0;
313 
314  if(m_prg_callback) {
317  }
318 
319  // recurrences
320  TScore wgleft2 = wg2L;
321  TScore wsleft2 = ws2L;
322  TScore V = rowV[N2 - 1];
323  TScore V0 = wgleft2;
324  TScore E, G, n0;
325  unsigned char tracer;
326 
327  size_t i, j;
328  for(i = 1; i < N1 && !m_terminate; ++i) {
329 
330  V = V0 += wsleft2;
331  E = kInfMinus;
332  backtrace_matrix.SetAt(k++, kMaskFc);
333 
334  if(i == N1 - 1) {
335  wg1 = wg1R;
336  ws1 = ws1R;
337  }
338 
339  TScore wg2 = m_Wg, ws2 = m_Ws;
340 
341  for (j = 1; j < N2; ++j, ++k) {
342 
343  G = pV[j] + pssm_row[i][(unsigned char)seq2[j]];
344 
345  pV[j] = V;
346 
347  n0 = V + wg1;
348  if(E >= n0) {
349  E += ws1;
350  tracer = kMaskEc;
351  }
352  else {
353  E = n0 + ws1;
354  tracer = 0;
355  }
356 
357  if(j == N2 - 1) {
358  wg2 = wg2R;
359  ws2 = ws2R;
360  }
361  n0 = rowV[j] + wg2;
362  if(rowF[j] >= n0) {
363  rowF[j] += ws2;
364  tracer |= kMaskFc;
365  }
366  else {
367  rowF[j] = n0 + ws2;
368  }
369 
370  if (E >= rowF[j]) {
371  if(E >= G) {
372  V = E;
373  tracer |= kMaskE;
374  }
375  else {
376  V = G;
377  tracer |= kMaskD;
378  }
379  } else {
380  if(rowF[j] >= G) {
381  V = rowF[j];
382  }
383  else {
384  V = G;
385  tracer |= kMaskD;
386  }
387  }
388  backtrace_matrix.SetAt(k, tracer);
389  }
390 
391  pV[j] = V;
392 
393  if(m_prg_callback) {
396  break;
397  }
398  }
399  }
400  backtrace_matrix.Purge(k);
401 
402  if(!m_terminate) {
403  x_DoBackTrace(backtrace_matrix, data);
404  }
405  return V;
406 }
407 
408 
410 {
411  const size_t N1 = data->m_len1 + 1;
412  const size_t N2 = data->m_len2 + 1;
413 
414  vector<double> stl_rowV (N2), stl_rowF(N2);
415 
416  double* rowV = &stl_rowV[0];
417  double* rowF = &stl_rowF[0];
418 
419  double* pV = rowV - 1;
420 
421  const double** freq1_row = m_Freq1 + data->m_offset1 - 1;
422  const double** freq2_row = m_Freq2 + data->m_offset2 - 1;
423 
424  m_terminate = false;
425 
426  if(m_prg_callback) {
427  m_prg_info.m_iter_total = N1*N2;
430  return 0;
431  }
432  }
433 
434  TScore wg1L = m_Wg;
435  TScore wg1R = m_Wg;
436  TScore wg2L = m_Wg;
437  TScore wg2R = m_Wg;
438 
439  TScore ws1L = m_Ws;
440  TScore ws1R = m_Ws;
441  TScore ws2L = m_Ws;
442  TScore ws2R = m_Ws;
443 
444  if (data->m_offset1 == 0) {
445  if (data->m_esf_L1) {
446  wg1L = ws1L = 0;
447  }
448  else {
449  wg1L = m_StartWg;
450  ws1L = m_StartWs;
451  }
452  }
453 
454  if (m_SeqLen1 == data->m_offset1 + data->m_len1) {
455  if (data->m_esf_R1) {
456  wg1R = ws1R = 0;
457  }
458  else {
459  wg1R = m_EndWg;
460  ws1R = m_EndWs;
461  }
462  }
463 
464  if (data->m_offset2 == 0) {
465  if (data->m_esf_L2) {
466  wg2L = ws2L = 0;
467  }
468  else {
469  wg2L = m_StartWg;
470  ws2L = m_StartWs;
471  }
472  }
473 
474  if (m_SeqLen2 == data->m_offset2 + data->m_len2) {
475  if (data->m_esf_R2) {
476  wg2R = ws2R = 0;
477  }
478  else {
479  wg2R = m_EndWg;
480  ws2R = m_EndWs;
481  }
482  }
483 
484  TScore wgleft1 = wg1L;
485  TScore wsleft1 = ws1L;
486  TScore wg1 = m_Wg, ws1 = m_Ws;
487 
488  // index calculation: [i,j] = i*n2 + j
489  CBacktraceMatrix4 backtrace_matrix (N1 * N2);
490 
491  // first row
492  size_t k = 1;
493  if (N2 > 1) {
494  rowV[0] = wgleft1 * (1.0 - freq2_row[1][0]);
495  for (k = 1; k < N2; k++) {
496  rowV[k] = pV[k] + wsleft1;
497  rowF[k] = kInfMinus;
498  backtrace_matrix.SetAt(k, kMaskE | kMaskEc);
499  }
500  backtrace_matrix.Purge(k);
501  }
502  rowV[0] = 0;
503 
504  if(m_prg_callback) {
507  }
508 
509  // recurrences
510  TScore wgleft2 = wg2L;
511  TScore wsleft2 = ws2L;
512  double V = rowV[N2 - 1];
513  double V0 = 0;
514  double E, G, n0;
515  unsigned char tracer;
516 
517  if (N1 > 1)
518  V0 = wgleft2 * (1.0 - freq1_row[1][0]);
519 
520  size_t i, j;
521  for(i = 1; i < N1 && !m_terminate; ++i) {
522 
523  V = V0 += wsleft2;
524  E = kInfMinus;
525  backtrace_matrix.SetAt(k++, kMaskFc);
526 
527  if(i == N1 - 1) {
528  wg1 = wg1R;
529  ws1 = ws1R;
530  }
531 
532  TScore wg2 = m_Wg, ws2 = m_Ws;
533 
534  for (j = 1; j < N2; ++j, ++k) {
535 
536  if(j == N2 - 1) {
537  wg2 = wg2R;
538  ws2 = ws2R;
539  }
540  const double *profile1 = freq1_row[i];
541  const double *profile2 = freq2_row[j];
542  const double scaled_wg1 = wg1 * (1.0 - profile2[0]);
543  const double scaled_ws1 = ws1;
544  const double scaled_wg2 = wg2 * (1.0 - profile1[0]);
545  const double scaled_ws2 = ws2;
546 
547  double accum = 0.0, sum = 0.0;
548  int num_zeros1 = 0, num_zeros2 = 0;
549  double diff_freq1[kPSSM_ColumnSize];
550  double diff_freq2[kPSSM_ColumnSize];
551 
552  // separate the residue frequencies into two components:
553  // a component that is the same for both columns, and
554  // a component that is different. The all-against-all
555  // score computation only takes place on the components
556  // that are different, so this will assign a higher score
557  // to more similar frequency columns
558  //
559  // Begin by separating out the common portion of each
560  // profile
561 
562  for (int m = 1; m < kPSSM_ColumnSize; m++) {
563  if (profile1[m] < profile2[m]) {
564  accum += profile1[m] * m_DScoreMatrix[m][m];
565  diff_freq1[m] = 0.0;
566  diff_freq2[m] = profile2[m] - profile1[m];
567  num_zeros1++;
568  }
569  else {
570  accum += profile2[m] * m_DScoreMatrix[m][m];
571  diff_freq1[m] = profile1[m] - profile2[m];
572  diff_freq2[m] = 0.0;
573  num_zeros2++;
574  }
575  }
576 
577  // normalize difference for profile with smaller gap
578  if (profile1[0] <= profile2[0]) {
579  for (int m = 1; m < kPSSM_ColumnSize; m++)
580  sum += diff_freq1[m];
581  } else {
582  for (int m = 1; m < kPSSM_ColumnSize; m++)
583  sum += diff_freq2[m];
584  }
585 
586  if (sum > 0) {
587  sum = 1.0 / sum;
588  if (profile1[0] <= profile2[0]) {
589  for (int m = 1; m < kPSSM_ColumnSize; m++)
590  diff_freq1[m] *= sum;
591  } else {
592  for (int m = 1; m < kPSSM_ColumnSize; m++)
593  diff_freq2[m] *= sum;
594  }
595 
596  // Add in the cross terms (not counting gaps).
597  // Note that the following assumes a symmetric
598  // score matrix
599 
600  if (num_zeros1 > num_zeros2) {
601  for (int m = 1; m < kPSSM_ColumnSize; m++) {
602  if (diff_freq1[m] > 0) {
603  sum = 0.0;
604  double *matrix_row = m_DScoreMatrix[m];
605  for (int n = 1; n < kPSSM_ColumnSize; n++) {
606  sum += diff_freq2[n] * matrix_row[n];
607  }
608  accum += diff_freq1[m] * sum;
609  }
610  }
611  } else {
612  for (int m = 1; m < kPSSM_ColumnSize; m++) {
613  if (diff_freq2[m] > 0) {
614  sum = 0.0;
615  double *matrix_row = m_DScoreMatrix[m];
616  for (int n = 1; n < kPSSM_ColumnSize; n++) {
617  sum += diff_freq1[n] * matrix_row[n];
618  }
619  accum += diff_freq2[m] * sum;
620  }
621  }
622  }
623  }
624 
625  G = pV[j] + accum * m_FreqScale +
626  profile1[0] * m_Ws * (1-profile2[0]) +
627  profile2[0] * m_Ws * (1-profile1[0]);
628 
629  pV[j] = V;
630 
631  n0 = V + scaled_wg1;
632  if(E >= n0) {
633  E += scaled_ws1; // continue the gap
634  tracer = kMaskEc;
635  }
636  else {
637  E = n0 + scaled_ws1; // open a new gap
638  tracer = 0;
639  }
640 
641  n0 = rowV[j] + scaled_wg2;
642  if(rowF[j] >= n0) {
643  rowF[j] += scaled_ws2;
644  tracer |= kMaskFc;
645  }
646  else {
647  rowF[j] = n0 + scaled_ws2;
648  }
649 
650  if (E >= rowF[j]) {
651  if(E >= G) {
652  V = E;
653  tracer |= kMaskE;
654  }
655  else {
656  V = G;
657  tracer |= kMaskD;
658  }
659  } else {
660  if(rowF[j] >= G) {
661  V = rowF[j];
662  }
663  else {
664  V = G;
665  tracer |= kMaskD;
666  }
667  }
668  backtrace_matrix.SetAt(k, tracer);
669  }
670 
671  pV[j] = V;
672 
673  if(m_prg_callback) {
676  break;
677  }
678  }
679  }
680  backtrace_matrix.Purge(k);
681 
682  if(!m_terminate) {
683  x_DoBackTrace(backtrace_matrix, data);
684  }
685  return (TScore)(V + 0.5);
686 }
687 
688 
689 // The present implementation works with full transcripts only,
690 // e.g. aligner.ScoreFromTranscript(aligner.GetTranscript(false));
691 //
693  const TTranscript& transcript,
694  size_t start1, size_t start2) const
695 {
696  if (m_Freq1 == 0 && m_Pssm1 == 0) {
697  return CNWAligner::ScoreFromTranscript(transcript, start1, start2);
698  }
699 
700  TScore score = 0;
701 
702  int state1 = 0; // 0 = normal, 1 = gap
703  int state2 = 0; // 0 = normal, 1 = gap
704 
705  const TNCBIScore (*sm) [NCBI_FSM_DIM] = m_ScoreMatrix.s;
706  int offset1 = -1;
707  int offset2 = -1;
708 
709  const size_t dim = transcript.size();
710 
711  if (m_Pssm1) { // PSSM-sequence score
712  for(size_t i = 0; i < dim; ++i) {
713 
714  TScore wg = 0, ws = 0;
715 
716  if (offset1 < 0) {
717  if (!m_esf_L1) {
718  wg = m_StartWg; ws = m_StartWs;
719  }
720  } else if (offset2 < 0) {
721  if (!m_esf_L2) {
722  wg = m_StartWg; ws = m_StartWs;
723  }
724  } else if (offset1 == (int)m_SeqLen1 - 1) {
725  if (!m_esf_R1) {
726  wg = m_EndWg; ws = m_EndWs;
727  }
728  } else if (offset2 == (int)m_SeqLen2 - 1) {
729  if (!m_esf_R2) {
730  wg = m_EndWg; ws = m_EndWs;
731  }
732  } else {
733  wg = m_Wg; ws = m_Ws;
734  }
735 
736  ETranscriptSymbol ts = transcript[i];
737  switch(ts) {
738 
739  case eTS_Replace:
740  case eTS_Match: {
741  ++offset1; ++offset2;
742  state1 = state2 = 0;
743  score += m_Pssm1[offset1][(unsigned char)m_Seq2[offset2]];
744  }
745  break;
746 
747  case eTS_Insert: {
748  ++offset2;
749  if(state1 != 1) score += wg;
750  state1 = 1; state2 = 0;
751  score += ws;
752  }
753  break;
754 
755  case eTS_Delete: {
756  ++offset1;
757  if(state2 != 1) score += wg;
758  state1 = 0; state2 = 1;
759  score += ws;
760  }
761  break;
762 
763  default: {
764  NCBI_THROW(CAlgoAlignException, eInternal,
766  }
767  }
768  }
769  } else { // profile-profile score
770  double dscore = 0.0;
771 
772  for(size_t i = 0; i < dim; ++i) {
773 
774  TScore wg1 = 0, ws1 = 0;
775  TScore wg2 = 0, ws2 = 0;
776 
777  if (offset1 < 0) {
778  if (!m_esf_L1) {
779  wg1 = m_StartWg; ws1 = m_StartWs;
780  }
781  } else if (offset1 == (int)m_SeqLen1 - 1) {
782  if (!m_esf_R1) {
783  wg1 = m_EndWg; ws1 = m_EndWs;
784  }
785  } else {
786  wg1 = m_Wg; ws1 = m_Ws;
787  }
788 
789  if (offset2 < 0) {
790  if (!m_esf_L2) {
791  wg2 = m_StartWg; ws2 = m_StartWs;
792  }
793  } else if (offset2 == (int)m_SeqLen2 - 1) {
794  if (!m_esf_R2) {
795  wg2 = m_EndWg; ws2 = m_EndWs;
796  }
797  } else {
798  wg2 = m_Wg; ws2 = m_Ws;
799  }
800 
801  ETranscriptSymbol ts = transcript[i];
802  switch(ts) {
803 
804  case eTS_Replace:
805  case eTS_Match: {
806  state1 = state2 = 0;
807  ++offset1; ++offset2;
808  double accum = 0.0, sum = 0.0;
809  double diff_freq1[kPSSM_ColumnSize];
810  double diff_freq2[kPSSM_ColumnSize];
811 
812  for (int m = 1; m < kPSSM_ColumnSize; m++) {
813  if (m_Freq1[offset1][m] < m_Freq2[offset2][m]) {
814  accum += m_Freq1[offset1][m] * (double)sm[m][m];
815  diff_freq1[m] = 0.0;
816  diff_freq2[m] = m_Freq2[offset2][m] -
817  m_Freq1[offset1][m];
818  }
819  else {
820  accum += m_Freq2[offset2][m] * (double)sm[m][m];
821  diff_freq1[m] = m_Freq1[offset1][m] -
822  m_Freq2[offset2][m];
823  diff_freq2[m] = 0.0;
824  }
825  }
826 
827  if (m_Freq1[offset1][0] <= m_Freq2[offset2][0]) {
828  for (int m = 1; m < kPSSM_ColumnSize; m++)
829  sum += diff_freq1[m];
830  } else {
831  for (int m = 1; m < kPSSM_ColumnSize; m++)
832  sum += diff_freq2[m];
833  }
834 
835  if (sum > 0) {
836  if (m_Freq1[offset1][0] <= m_Freq2[offset2][0]) {
837  for (int m = 1; m < kPSSM_ColumnSize; m++)
838  diff_freq1[m] /= sum;
839  } else {
840  for (int m = 1; m < kPSSM_ColumnSize; m++)
841  diff_freq2[m] /= sum;
842  }
843 
844  for (int m = 1; m < kPSSM_ColumnSize; m++) {
845  for (int n = 1; n < kPSSM_ColumnSize; n++) {
846  accum += diff_freq1[m] *
847  diff_freq2[n] *
848  (double)sm[m][n];
849  }
850  }
851  }
852  dscore += accum * m_FreqScale +
853  m_Freq1[offset1][0] * m_Ws * (1-m_Freq2[offset2][0]) +
854  m_Freq2[offset2][0] * m_Ws * (1-m_Freq1[offset1][0]);
855  }
856  break;
857 
858  case eTS_Insert: {
859  ++offset2;
860  if(state1 != 1) dscore += wg1 * (1.0 - m_Freq2[offset2][0]);
861  state1 = 1; state2 = 0;
862  dscore += ws1;
863  }
864  break;
865 
866  case eTS_Delete: {
867  ++offset1;
868  if(state2 != 1) dscore += wg2 * (1.0 - m_Freq1[offset1][0]);
869  state1 = 0; state2 = 1;
870  dscore += ws2;
871  }
872  break;
873 
874  default: {
875  NCBI_THROW(CAlgoAlignException, eInternal,
877  }
878  }
879  }
880  score = (TScore)(dscore + 0.5);
881  }
882 
883  return score;
884 }
885 
#define G(x, y, z)
Definition: md4.c:179
void SetAt(size_t i, Uint1 v)
Definition: nw_aligner.hpp:346
virtual bool x_CheckMemoryLimit(void)
double m_DScoreMatrix[kPSSM_ColumnSize][kPSSM_ColumnSize]
bool m_terminate
Definition: nw_aligner.hpp:291
static const int kPSSM_ColumnSize
virtual TScore x_Align(SAlignInOut *data)
Definition: nw_aligner.cpp:229
virtual TScore ScoreFromTranscript(const TTranscript &transcript, size_t start1=kMax_UInt, size_t start2=kMax_UInt) const
virtual ETranscriptSymbol x_GetDiagTS(size_t i1, size_t i2) const
Definition: nw_aligner.cpp:707
const char * m_Seq1
Definition: nw_aligner.hpp:295
TScore m_Ws
Definition: nw_aligner.hpp:271
virtual void SetSequences(const char *seq1, size_t len1, const char *seq2, size_t len2, bool verify=true)
Definition: nw_aligner.cpp:140
SNCBIFullScoreMatrix m_ScoreMatrix
Definition: nw_aligner.hpp:281
virtual CNWAligner::TScore Run(void)
const double ** m_Freq2
virtual TScore ScoreFromTranscript(const TTranscript &transcript, size_t start1=0, size_t start2=0) const
void SetScoreMatrix(const SNCBIPackedScoreMatrix *scoremat)
TScore m_Wg
Definition: nw_aligner.hpp:270
const double ** m_Freq1
FProgressCallback m_prg_callback
Definition: nw_aligner.hpp:285
const char * m_Seq2
virtual TScore x_Run(void)
Definition: nw_aligner.cpp:533
size_t m_SeqLen1
Definition: nw_aligner.hpp:296
const char * m_Seq2
Definition: nw_aligner.hpp:298
TScore x_AlignProfile(SAlignInOut *data)
vector< ETranscriptSymbol > TTranscript
Definition: nw_aligner.hpp:199
virtual TScore x_Align(SAlignInOut *data)
void SetScoreMatrix(const SNCBIPackedScoreMatrix *scoremat)
void SetSequences(const char *seq1, size_t len1, const char *seq2, size_t len2, bool verify=true)
virtual ETranscriptSymbol x_GetDiagTS(size_t i1, size_t i2) const
TScore m_score
Definition: nw_aligner.hpp:316
void x_DoBackTrace(const CBacktraceMatrix4 &backtrace, SAlignInOut *data)
Definition: nw_aligner.cpp:726
TScore x_AlignPSSM(SAlignInOut *data)
size_t m_SeqLen2
Definition: nw_aligner.hpp:299
SProgressInfo m_prg_info
Definition: nw_aligner.hpp:288
const TScore ** m_Pssm1
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
int i
yy_size_t n
#define verify(expr)
Definition: ncbi_assert.h:51
const double E
const unsigned char kMaskFc
const unsigned char kMaskEc
const unsigned char kMaskE
const unsigned char kMaskD
#define NCBI_FSM_DIM
Recommended approach: unpack and index directly.
Definition: raw_scoremat.h:85
int TNCBIScore
data types
Definition: raw_scoremat.h:45
const char g_msg_HitSpaceLimit[]
Definition: messages.hpp:34
const char g_msg_InvalidTranscriptSymbol[]
Definition: messages.hpp:10
const char g_msg_NullParameter[]
Definition: messages.hpp:23
const char g_msg_InvalidSequenceChars[]
Definition: messages.hpp:18
TNCBIScore s[128][128]
Definition: raw_scoremat.h:87
Modified on Sun Feb 25 03:01:24 2024 by modify_doxy.py rev. 669887