NCBI C++ ToolKit
hmm.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: hmm.cpp 92081 2020-12-21 15:13:00Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software / database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software / database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Alexandre Souvorov
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
34 #include <algo/gnomon/gnomon.hpp>
35 #include "hmm.hpp"
36 #include "hmm_inlines.hpp"
37 #include <serial/serial.hpp>
38 #include <serial/objistr.hpp>
39 
41 BEGIN_SCOPE(gnomon)
42 
44 
45 const double kLnHalf = log(0.5);
46 const double kLnThree = log(3.);
47 
48 double CLorentz::ClosingScore(int l) const
49 {
50  if(l >= MaxLen()) return BadScore();
51  int i = (l-1)/m_step;
52  int delx = min((i+1)*m_step,MaxLen())-l;
53  double dely = (i == 0 ? 1 : m_clscore[i-1])-m_clscore[i];
54  return log(dely/m_step*delx+m_clscore[i]);
55 }
56 
57 CHMM_State::CHMM_State(EStrand strn, int point, const CSeqScores& seqscr)
58  : m_stop(point), m_strand(strn),
59  m_score(BadScore()), m_leftstate(0), m_terminal(0),
60  m_seqscr(&seqscr)
61 {
62 }
63 
64 /*
65 int CHMM_State::MinLen() const
66 {
67  int minlen = 1;
68 
69  if(!NoLeftEnd())
70  {
71  if(isPlus()) minlen += m_leftstate->m_terminal->Right();
72  else minlen += m_leftstate->m_terminal->Left();
73  }
74 
75  if(!NoRightEnd())
76  {
77  if(isPlus()) minlen += m_terminal->Left();
78  else minlen += m_terminal->Right();
79  }
80 
81  return minlen;
82 }
83 */
84 
86 {
87  if(NoLeftEnd()) return 0;
88  else
89  {
90  int a = m_leftstate->m_stop+1;
91  if(isPlus()) a += m_leftstate->m_terminal->Right();
92  else a += m_leftstate->m_terminal->Left();
93  return min(a,m_seqscr->SeqLen()-1);
94  }
95 }
96 
98 {
99  if(NoRightEnd()) return m_seqscr->SeqLen()-1;
100  else
101  {
102  int b = m_stop;
103  if(isPlus()) b -= m_terminal->Left();
104  else b -= m_terminal->Right();
105  return max(0,b);
106  }
107 }
108 
109 CExon::CExon(EStrand strn, int point, int ph, const CSeqScores& seqscr, const CExonParameters& exon_params)
110  : CHMM_State(strn,point,seqscr), m_phase(ph),
111  m_prevexon(0), m_mscore(BadScore()),
112  m_param(&exon_params)
113 {
114  if(!m_param || !m_param->m_initialised)
115  CInputModel::Error("CExon is not initialised\n");
116 }
117 
118 bool CExon::StopInside() const
119 {
120  int frame;
121  if(isPlus())
122  {
123  frame = (Phase()-Stop())%3; // Stop()-Phase() is left codon end
124  if(frame < 0) frame += 3;
125  }
126  else
127  {
128  frame = (Phase()+Stop())%3; // Stop()+Phase() is right codon end
129  }
130 
131  return m_seqscr->StopInside(Start(),Stop(),Strand(),frame);
132 }
133 
134 bool CExon::OpenRgn() const
135 {
136  int frame;
137  if(isPlus())
138  {
139  frame = (Phase()-Stop())%3; // Stop()-Phase() is left codon end
140  if(frame < 0) frame += 3;
141  }
142  else
143  {
144  frame = (Phase()+Stop())%3; // Stop()+Phase() is right codon end
145  }
146 
147  return m_seqscr->OpenCodingRegion(Start(),Stop(),Strand(),frame);
148 }
149 
150 double CExon::RgnScore() const
151 {
152  int frame;
153  if(isPlus())
154  {
155  frame = (Phase()-Stop())%3;
156  if(frame < 0) frame += 3;
157  }
158  else
159  {
160  frame = (Phase()+Stop())%3;
161  }
162 
163  double score = m_seqscr->CodingScore(RegionStart(),RegionStop(),Strand(),frame);
164 
165  return score;
166 }
167 
169 {
170  m_mscore = max(e.Score(),e.MScore());
171  m_prevexon = &e;
172  while(m_prevexon != 0 && m_prevexon->Score() <= Score()) m_prevexon = m_prevexon->m_prevexon;
173 }
174 
175 CSingleExon::CSingleExon(EStrand strn, int point, const CSeqScores& seqscr, const CExonParameters& exon_params)
176  : CExon(strn,point,2,seqscr,exon_params)
177 {
178  m_terminal = isPlus() ? &m_seqscr->Stop() : &m_seqscr->Start();
179  if(isMinus()) m_phase = 0;
180 
181  EvaluateInitialScore(*this);
182 }
183 
185 {
186  return m_param->m_singlelen.Score(Stop()-Start()+1)+kLnThree;
187 }
188 
190 {
191  if(isPlus()) return m_seqscr->StopScore(Stop(),Strand());
192  else return m_seqscr->StartScore(Stop(),Strand());
193 }
194 
196 {
197  if(isPlus() || (Stop()-Start())%3 == 2) return kLnHalf;
198  else return BadScore();
199 }
200 
201 CFirstExon::CFirstExon(EStrand strn, int ph, int point, const CSeqScores& seqscr, const CExonParameters& exon_params)
202  : CExon(strn,point,ph,seqscr,exon_params)
203 {
204  if(isPlus())
205  {
206  m_terminal = &m_seqscr->Donor();
207  }
208  else
209  {
210  m_phase = 0;
211  m_terminal = &m_seqscr->Start();
212  }
213 
214  EvaluateInitialScore(*this);
215 }
216 
218 {
219  int last = Stop()-Start();
221 }
222 
223 double CFirstExon::TermScore() const
224 {
225  if(isPlus()) return m_seqscr->DonorScore(Stop(),Strand());
226  else return m_seqscr->StartScore(Stop(),Strand());
227 }
228 
230 {
231  if(Strand() != next.Strand()) return BadScore();
232 
233  int ph = isPlus() ? Phase() : Phase()+Stop()-Start();
234 
235  if((ph+1)%3 == next.Phase()) return 0;
236  else return BadScore();
237 }
238 
239 CInternalExon::CInternalExon(EStrand strn, int ph, int point, const CSeqScores& seqscr, const CExonParameters& exon_params)
240  : CExon(strn,point,ph,seqscr,exon_params)
241 {
243 
244  EvaluateInitialScore(*this);
245 }
246 
248 {
249  int ph0,ph1;
250  int last = Stop()-Start();
251  if(isPlus())
252  {
253  ph1 = Phase();
254  ph0 = (ph1-last)%3;
255  if(ph0 < 0) ph0 += 3;
256  }
257  else
258  {
259  ph0 = Phase();
260  ph1 = (ph0+last)%3;
261  }
262 
264 }
265 
267 {
268  if(isPlus()) return m_seqscr->DonorScore(Stop(),Strand());
269  else return m_seqscr->AcceptorScore(Stop(),Strand());
270 }
271 
272 CLastExon::CLastExon(EStrand strn, int ph, int point, const CSeqScores& seqscr, const CExonParameters& exon_params)
273  : CExon(strn,point,ph,seqscr,exon_params)
274 {
275  if(isPlus())
276  {
277  m_phase = 2;
278  m_terminal = &m_seqscr->Stop();
279  }
280  else
281  {
283  }
284 
285  EvaluateInitialScore(*this);
286 }
287 
288 double CLastExon::LengthScore() const
289 {
290  int last = Stop()-Start();
291  return m_param->m_lastlen.Score(last+1)+kLnThree;
292 }
293 
294 double CLastExon::TermScore() const
295 {
296  if(isPlus()) return m_seqscr->StopScore(Stop(),Strand());
297  else return m_seqscr->AcceptorScore(Stop(),Strand());
298 }
299 
300 double CLastExon::BranchScore(const CIntergenic&) const
301 {
302  if(isPlus() || (Phase()+Stop()-Start())%3 == 2) return kLnHalf;
303  else return BadScore();
304 }
305 
306 CIntron::CIntron(EStrand strn, int ph, int point, const CSeqScores& seqscr, const CIntronParameters& intron_params)
307  : CHMM_State(strn,point,seqscr), m_phase(ph),
308  m_param(&intron_params)
309 {
310  if(!m_param || !m_param->m_initialised) CInputModel::Error("Intron is not initialised\n");
312 
313  EvaluateInitialScore(*this);
314 }
315 
317 {
318  return m_param->m_intronlen.ClosingScore(Stop()-Start()+1);
319 }
320 
321 double CIntron::BranchScore(const CLastExon& next) const
322 {
323  if(Strand() != next.Strand()) return BadScore();
324 
325  if(isPlus())
326  {
327  int shift = next.Stop()-next.Start();
328  if((Phase()+shift)%3 == next.Phase()) return m_param->m_lnTerminal;
329  }
330  else if(Phase() == next.Phase()) return m_param->m_lnTerminal;
331 
332  return BadScore();
333 }
334 
335 double CIntron::RgnScore() const {
336  return 0; // Intron scores are substructed from all others
337 }
338 
339 
340 CIntergenic::CIntergenic(EStrand strn, int point, const CSeqScores& seqscr, const CIntergenicParameters& intergenic_params)
341  : CHMM_State(strn,point,seqscr),
342  m_param(&intergenic_params)
343 {
344  if(!m_param || !m_param->m_initialised) CInputModel::Error("Intergenic is not initialised\n");
345  m_terminal = isPlus() ? &m_seqscr->Start() : &m_seqscr->Stop();
346 
347  EvaluateInitialScore(*this);
348 }
349 
351 {
353 }
354 
355 double CIntergenic::RgnScore() const
356 {
358 }
359 
361 {
362  if(isPlus()) return m_seqscr->StartScore(Stop(),Strand());
363  else return m_seqscr->StopScore(Stop(),Strand());
364 }
365 
367 {
368  if(&next == m_leftstate)
369  {
370  if(next.isMinus()) return m_param->m_lnMulti;
371  else return BadScore();
372  }
373  else if(isPlus() && next.isPlus())
374  {
375  if((next.Stop()-next.Start())%3 == next.Phase()) return m_param->m_lnMulti;
376  else return BadScore();
377  }
378  else return BadScore();
379 }
380 
382 {
383  if(&next == m_leftstate)
384  {
385  if(next.isMinus()) return m_param->m_lnSingle;
386  else return BadScore();
387  }
388  else if(isPlus() && next.isPlus() &&
389  (next.Stop()-next.Start())%3 == 2) return m_param->m_lnSingle;
390  else return BadScore();
391 }
392 
394 {
395  Init(from);
396  toScore();
397 }
398 
400 {
401  if (from.GetOrder() != 0)
402  CInputModel::Error("Wrong Markov Chain order");
403  CMarkov_chain_params::TProbabilities::const_iterator i = from.GetProbabilities().begin();
404  m_score[enA] = (*i++)->GetValue();
405  m_score[enC] = (*i++)->GetValue();
406  m_score[enG] = (*i++)->GetValue();
407  m_score[enT] = (*i++)->GetValue();
408  if (i != from.GetProbabilities().end())
409  CInputModel::Error("Too many values in Markov Chain");
410  m_score[enN] = 0.25*(m_score[enA]+m_score[enC]+m_score[enG]+m_score[enT]);
411 }
412 
413 void CMarkovChain<0>::Average(Type& mc0, Type& mc1, Type& mc2, Type& mc3)
414 {
415  m_score[enA] = 0.25*(mc0.m_score[enA]+mc1.m_score[enA]+mc2.m_score[enA]+mc3.m_score[enA]);
416  m_score[enC] = 0.25*(mc0.m_score[enC]+mc1.m_score[enC]+mc2.m_score[enC]+mc3.m_score[enC]);
417  m_score[enG] = 0.25*(mc0.m_score[enG]+mc1.m_score[enG]+mc2.m_score[enG]+mc3.m_score[enG]);
418  m_score[enT] = 0.25*(mc0.m_score[enT]+mc1.m_score[enT]+mc2.m_score[enT]+mc3.m_score[enT]);
419  m_score[enN] = 0.25*(m_score[enA]+m_score[enC]+m_score[enG]+m_score[enT]);
420 }
421 
423 {
424  m_score[enA] = (m_score[enA] <= 0) ? BadScore() : log(4*m_score[enA]);
425  m_score[enC] = (m_score[enC] <= 0) ? BadScore() : log(4*m_score[enC]);
426  m_score[enG] = (m_score[enG] <= 0) ? BadScore() : log(4*m_score[enG]);
427  m_score[enT] = (m_score[enT] <= 0) ? BadScore() : log(4*m_score[enT]);
428  m_score[enN] = (m_score[enN] <= 0) ? BadScore() : log(4*m_score[enN]);
429 }
430 
432 
434 {
435  m_inexon = from.GetStart().GetIn_exon();
436  m_inintron = from.GetStart().GetIn_intron();
437 
438  m_left = m_inintron;
439  m_right = m_inexon;
440 
442 }
443 
444 double CWMM_Start::Score(const CEResidueVec& seq, int i) const
445 {
446  int first = i-m_left+1;
447  int last = i+m_right;
448  if(first < 0 || last >= (int)seq.size()) return BadScore(); // out of range
449  if(seq[i-2] != enA || seq[i-1] != enT || seq[i] != enG) return BadScore(); // no ATG
450 
451  return m_matrix.Score(&seq[first]);
452 }
453 
455 {
456  m_inexon = from.GetStop().GetIn_exon();
457  m_inintron = from.GetStop().GetIn_intron();
458 
459  m_left = m_inexon;
461 
463 }
464 
465 double CWAM_Stop::Score(const CEResidueVec& seq, int i) const
466 {
467  int first = i-m_left+1;
468  int last = i+m_right;
469  if(first-1 < 0 || last >= (int)seq.size()) return BadScore(); // out of range
470  if((seq[i+1] != enT || seq[i+2] != enA || seq[i+3] != enA) &&
471  (seq[i+1] != enT || seq[i+2] != enA || seq[i+3] != enG) &&
472  (seq[i+1] != enT || seq[i+2] != enG || seq[i+3] != enA)) return BadScore(); // no stop-codon
473 
474  return m_matrix.Score(&seq[first]);
475 }
476 
477 
479 {
480  m_A = from.GetA();
481  m_L = from.GetL();
482  m_minl = from.GetRange().GetMin();
483  m_maxl = from.GetRange().GetMax();
484  m_step = from.GetStep();
485 
486  int num = (m_maxl-1)/m_step+1;
487  m_maxl = (num-1)*m_step+1;
488 
489  try
490  {
491  m_score.resize(num,0);
492  m_clscore.resize(num,0);
493  }
494  catch(bad_alloc)
495  {
496  NCBI_THROW(CGnomonException, eMemoryLimit, "Not enough memory for CLorentz");
497  }
498 
499  int i = 0;
501  m_score[i] = *s;
502  if((i+1)*m_step < m_minl)
503  m_score[i] = 0;
504  ++i;
505  }
506  while(i < num)
507  {
508  m_score[i] = m_A/(m_L*m_L+pow((i+0.5)*m_step,2));
509  ++i;
510  }
511 
512  double sum = 0;
513  m_avlen = 0;
514  for(int l = m_minl; l <= m_maxl; ++l)
515  {
516  double scr = m_score[(l-1)/m_step];
517  sum += scr;
518  m_avlen += l*scr;
519  }
520 
521  m_avlen /= sum;
522  for(int i = 0; i < num; ++i) m_score[i] /= sum;
523 
524  m_clscore[num-1] = 0;
525  for(int i = num-2; i >= 0; --i)
526  m_clscore[i] = m_clscore[i+1]+m_score[i+1]*m_step;
527 
528  for(int i = 0; i < num; ++i)
529  m_score[i] = (m_score[i] == 0) ? -25 : log(m_score[i]);
530 }
531 
532 double CLorentz::Through(int seqlen) const
533 {
534  if(seqlen >= MaxLen()) return BadScore();
535 
536  double through = 0;
537 
538  if(seqlen >= MinLen()) {
539  int ifirst = (MinLen()-1)/m_step;
540  if(m_score[ifirst] != BadScore()) {
541  int w = (ifirst+1)*m_step-MinLen()+1;
542  int l2 = (MinLen()+(ifirst+1)*m_step);
543  through = (l2-2*seqlen)*w/2*exp(m_score[ifirst]);
544  }
545 
546  int ilast = (seqlen-1)/m_step;
547  for(int i = 0; i < ilast; ++i) {
548  if(m_score[i] == BadScore()) continue;
549  int l2 = (i+1)*m_step+i*m_step+1;
550  through += (l2-2*seqlen)*m_step/2*exp(m_score[i]);
551  }
552 
553  if(m_score[ilast] != BadScore()) {
554  int w = seqlen-ilast*m_step;
555  int l2 = ilast*m_step+1+seqlen;
556  through += (l2-2*seqlen)*w/2*exp(m_score[ilast]);
557  }
558  }
559 
560  through = (AvLen()-seqlen-through)/AvLen();
561  through = (through <= 0) ? BadScore() : log(through);
562  return through;
563 }
564 
566 {
567  string label = class_id();
568 
569  {
570  int i = 0;
572  if (i<3)
573  m_firstphase[i] = log(*p);
574  else
575  Error(label+" Too long First_exon_phase_probabilities");
576  ++i;
577  }
578  }
579 
580  {
581  CExon_params::TInternal_exon_phase_probabilities::const_iterator p = from.GetExon().GetInternal_exon_phase_probabilities().begin();
582  for(int i = 0; i < 3; ++i) {
583  for(int j = 0; j < 3; ++j) {
584  m_internalphase[i][j] = log(*p++);
585  }
586  }
587  if (p != from.GetExon().GetInternal_exon_phase_probabilities().end())
588  Error(label+" Too long Internal_exon_phase_probabilities");
589  }
590 
595 
596  m_initialised = true;
597 }
598 
599 
601 {
602  m_initp = from.GetIntron().GetInitp();
603  m_initp = m_initp/2; // two strands
604 
605  int i = 0;
607  if (i<3)
608  m_phasep[i] = *p;
609  else
610  Error(class_id()+" Too long Phase_probabilities");
611  ++i;
612  }
613 
614  double toterm = from.GetIntron().GetTo_term();
615  m_lnTerminal = log(toterm);
616  m_lnInternal = log(1-toterm);
617 
619 }
620 
621 void CIntronParameters::SetSeqLen(int seqlen) const
622 {
623  double lnthrough = m_intronlen.Through(seqlen);
624  for(int i = 0; i < 3; ++i)
625  {
626  m_lnDen[i] = log(m_initp*m_phasep[i]);
627  m_lnThrough[i] = (lnthrough == BadScore()) ? BadScore() : m_lnDen[i]+lnthrough;
628  }
629 
630  m_initialised = true;
631 }
632 
633 
635 {
636  m_initp = from.GetIntergenic().GetInitp();
637  m_initp = m_initp/2; // two strands
638 
639  double tosingle = from.GetIntergenic().GetTo_single();
640 
641  m_lnSingle = log(tosingle);
642  m_lnMulti = log(1-tosingle);
643 
645 }
646 
647 void CIntergenicParameters::SetSeqLen(int seqlen) const
648 {
649  double lnthrough = m_intergeniclen.Through(seqlen);
650  m_lnDen = log(m_initp);
651  m_lnThrough = (lnthrough == BadScore()) ? BadScore() : m_lnDen+lnthrough;
652 
653  m_initialised = true;
654 }
655 
657 {
659 }
661 {
662  ITERATE(vector<CInputModel*>, i, all_created_models)
663  delete *i;
664  all_created_models.clear();
665  params.clear();
666 }
667 
669 {
670  TParamMap::iterator i = params.insert(make_pair(type,TCGContentList())).first;
671  if (i->second.empty())
672  i->second.push_back(make_pair<int,CInputModel*>(101,NULL));
673 
674  return i->second;
675 }
676 
677 void CHMMParameters::SDetails::StoreParam( const string& type, CInputModel* input_model, int low, int high )
678 {
679  TCGContentList& list = GetCGList(type);
680 
681  int lowest = 0;
682  TCGContentList::iterator i = list.begin();
683  while( i->first <= low ) {
684  lowest = i->first;
685  ++i;
686  }
687 
688  if (lowest < low) {
689  i = list.insert(i, *i );
690  i->first = low;
691  ++i;
692  }
693 
694  if (high < i->first) {
695  i = list.insert(i, *i );
696  i->first = high;
697  } else if (high > i->first) {
699  }
700 
701  i->second = input_model;
702 }
703 
704 CHMMParameters::CHMMParameters(const CGnomon_params& hmm_params_asn) : m_details( new SDetails(hmm_params_asn) )
705 {
706 }
707 
709 {
710  unique_ptr<CObjectIStream> inp(CObjectIStream::Open(format,hmm_params_istr));
711  CRef<CGnomon_params> params_asn(new CGnomon_params);
712  *inp >> *params_asn;
713  m_details.Reset( new SDetails(*params_asn) );
714 }
715 
717 {
718 }
719 
720 template <class CParam>
722 {
723  ITERATE(CGnomon_params::Tdata, p, hmm_params_asn.Get()) {
724  if ((*p)->GetParam().Which() != choice)
725  continue;
726 
727  int low = (*p)->GetGc_content_range().GetFrom();
728  int high = (*p)->GetGc_content_range().GetTo();
729  if (!( 0<=low && low < high && high <= 100) )
730  CInputModel::Error(CParam::class_id());
731 
732  CInputModel* input_model( new CParam((*p)->GetParam()) );
733  if (input_model==NULL)
734  continue;
735 
736  all_created_models.push_back(input_model);
737 
738  StoreParam(CParam::class_id(), input_model, low, high);
739  }
740 }
741 
743 {
744  DeleteAllCreatedModels();
745 
746  ReadParameters<CWAM_Donor<2> >(hmm_params_asn, CGnomon_param::C_Param::e_Donor);
747  ReadParameters<CWAM_Acceptor<2> >(hmm_params_asn, CGnomon_param::C_Param::e_Acceptor);
748  ReadParameters<CWMM_Start>(hmm_params_asn, CGnomon_param::C_Param::e_Start);
749  ReadParameters<CWAM_Stop>(hmm_params_asn, CGnomon_param::C_Param::e_Stop);
750  ReadParameters<CMC3_CodingRegion<5> >(hmm_params_asn, CGnomon_param::C_Param::e_Coding_region);
751  ReadParameters<CMC_NonCodingRegion<5> >(hmm_params_asn, CGnomon_param::C_Param::e_Non_coding_region);
752  ReadParameters<CIntronParameters>(hmm_params_asn, CGnomon_param::C_Param::e_Intron);
753  ReadParameters<CIntergenicParameters>(hmm_params_asn, CGnomon_param::C_Param::e_Intergenic);
754  ReadParameters<CExonParameters>(hmm_params_asn, CGnomon_param::C_Param::e_Exon);
755 
756 }
757 
758 const CInputModel& CHMMParameters::GetParameter(const string& type, int cgcontent) const
759 {
760  return m_details->GetParameter(type, cgcontent);
761 }
762 
763 const CInputModel& CHMMParameters::SDetails::GetParameter(const string& type, int cgcontent) const
764 {
765  if (cgcontent < 0)
766  cgcontent = 0;
767  else if (cgcontent >= 100)
768  cgcontent = 99;
769 
770  TParamMap::const_iterator i_param = params.find(type);
771  if (i_param == params.end())
773 
774  ITERATE( TCGContentList, i, i_param->second) {
775  if (cgcontent < i->first) {
776  if (i->second == NULL) {
778  }
779  return *i->second;
780  }
781  }
782 
783  _ASSERT( false);
785  return *params.begin()->second.front().second;
786 }
787 
788 
789 END_SCOPE(gnomon)
double m_firstphase[3]
Definition: hmm.hpp:316
static string class_id()
Definition: hmm.hpp:312
CLorentz m_internallen
Definition: hmm.hpp:317
double m_internalphase[3][3]
Definition: hmm.hpp:316
CLorentz m_singlelen
Definition: hmm.hpp:317
bool m_initialised
Definition: hmm.hpp:318
CExonParameters(const objects::CGnomon_param::C_Param &from)
Definition: hmm.cpp:565
CLorentz m_lastlen
Definition: hmm.hpp:317
CLorentz m_firstlen
Definition: hmm.hpp:317
Definition: hmm.hpp:322
double m_mscore
Definition: hmm.hpp:344
void UpdatePrevExon(const CExon &e)
Definition: hmm.cpp:168
bool OpenRgn() const
Definition: hmm.cpp:134
const CExonParameters * m_param
Definition: hmm.hpp:346
double MScore() const
Definition: hmm.hpp:336
bool StopInside() const
Definition: hmm.cpp:118
CExon(EStrand strn, int point, int ph, const CSeqScores &seqscr, const CExonParameters &params)
Definition: hmm.cpp:109
const CExon * m_prevexon
Definition: hmm.hpp:343
double RgnScore() const
Definition: hmm.cpp:150
int m_phase
Definition: hmm.hpp:342
int Phase() const
Definition: hmm.hpp:327
double TermScore() const
Definition: hmm.cpp:223
double BranchScore(const CHMM_State &) const
Definition: hmm.hpp:380
CFirstExon(EStrand strn, int ph, int point, const CSeqScores &seqscr, const CExonParameters &params)
Definition: hmm.cpp:201
double LengthScore() const
Definition: hmm.cpp:217
CGnomon_params –.
void DeleteAllCreatedModels()
Definition: hmm.cpp:660
void ReadParameters(const objects::CGnomon_params &hmm_params_asn, objects::CGnomon_param::C_Param::E_Choice choice)
vector< pair< int, CInputModel * > > TCGContentList
Definition: hmm.hpp:569
TCGContentList & GetCGList(const string &type)
Definition: hmm.cpp:668
void StoreParam(const string &type, CInputModel *input_model, int low, int high)
Definition: hmm.cpp:677
SDetails(const objects::CGnomon_params &hmm_params_asn)
Definition: hmm.cpp:742
const CInputModel & GetParameter(const string &type, int cgcontent) const
Definition: hmm.cpp:763
CHMMParameters(const objects::CGnomon_params &hmm_params_asn)
CRef< SDetails > m_details
Definition: gnomon.hpp:62
const CInputModel & GetParameter(const string &type, int cgcontent) const
Definition: hmm.cpp:758
~CHMMParameters()
Definition: hmm.cpp:716
const CSeqScores * m_seqscr
Definition: hmm.hpp:304
int Start() const
Definition: hmm.hpp:283
bool NoRightEnd() const
Definition: hmm.hpp:284
int RegionStop() const
Definition: hmm.cpp:97
bool isPlus() const
Definition: hmm.hpp:280
int RegionStart() const
Definition: hmm.cpp:85
bool NoLeftEnd() const
Definition: hmm.hpp:285
double Score() const
Definition: hmm.hpp:282
EStrand Strand() const
Definition: hmm.hpp:279
int Stop() const
Definition: hmm.hpp:286
const CTerminal * m_terminal
Definition: hmm.hpp:303
CHMM_State(EStrand strn, int point, const CSeqScores &seqscr)
Definition: hmm.cpp:57
bool isMinus() const
Definition: hmm.hpp:281
const CHMM_State * m_leftstate
Definition: hmm.hpp:302
int m_stop
Definition: hmm.hpp:299
static void Error(const string &label)
Definition: hmm.hpp:100
virtual ~CInputModel()=0
Definition: hmm.cpp:431
double m_lnThrough
Definition: hmm.hpp:528
void SetSeqLen(int seqlen) const
Definition: hmm.cpp:647
CIntergenicParameters(const objects::CGnomon_param::C_Param &from)
Definition: hmm.cpp:634
CLorentz m_intergeniclen
Definition: hmm.hpp:530
CIntergenic(EStrand strn, int point, const CSeqScores &seqscr, const CIntergenicParameters &params)
Definition: hmm.cpp:340
double BranchScore(const CHMM_State &) const
Definition: hmm.hpp:551
const CIntergenicParameters * m_param
Definition: hmm.hpp:558
double RgnScore() const
Definition: hmm.cpp:355
double TermScore() const
Definition: hmm.cpp:360
bool OpenRgn() const
Definition: hmm.cpp:350
double LengthScore() const
Definition: hmm.cpp:247
double TermScore() const
Definition: hmm.cpp:266
CInternalExon(EStrand strn, int ph, int point, const CSeqScores &seqscr, const CExonParameters &params)
Definition: hmm.cpp:239
CLorentz m_intronlen
Definition: hmm.hpp:438
bool m_initialised
Definition: hmm.hpp:441
double m_lnThrough[3]
Definition: hmm.hpp:435
double m_lnTerminal
Definition: hmm.hpp:437
double m_lnDen[3]
Definition: hmm.hpp:436
double m_initp
Definition: hmm.hpp:440
double m_lnInternal
Definition: hmm.hpp:437
static string class_id()
Definition: hmm.hpp:428
void SetSeqLen(int seqlen) const
Definition: hmm.cpp:621
CIntronParameters(const objects::CGnomon_param::C_Param &from)
Definition: hmm.cpp:600
double m_phasep[3]
Definition: hmm.hpp:440
const CIntronParameters * m_param
Definition: hmm.hpp:492
double BranchScore(const CHMM_State &) const
Definition: hmm.hpp:473
int Phase() const
Definition: hmm.hpp:453
double RgnScore() const
Definition: hmm.cpp:335
CIntron(int j_in, char amin_in, char don11, char don21, char acs11, char acs21, const CAlignInfo &prev, const CAlignInfo &cur, const CNSeq &nseq_in, const CProSplignScaledScoring &scoring)
Definition: intron.hpp:317
double ClosingLengthScore() const
Definition: hmm.cpp:316
double LengthScore() const
Definition: hmm.cpp:288
CLastExon(EStrand strn, int ph, int point, const CSeqScores &seqscr, const CExonParameters &params)
Definition: hmm.cpp:272
double TermScore() const
Definition: hmm.cpp:294
double BranchScore(const CHMM_State &) const
Definition: hmm.hpp:417
CLength_distribution_params –.
int MaxLen() const
Definition: hmm.hpp:255
double Score(int l) const
Definition: hmm.hpp:252
int m_minl
Definition: hmm.hpp:260
void Init(const objects::CLength_distribution_params &from)
Definition: hmm.cpp:478
double Through(int seqlen) const
Definition: hmm.cpp:532
int MinLen() const
Definition: hmm.hpp:254
int m_step
Definition: hmm.hpp:260
double m_A
Definition: hmm.hpp:261
double ClosingScore(int l) const
Definition: hmm.cpp:48
double AvLen() const
Definition: hmm.hpp:256
double m_avlen
Definition: hmm.hpp:261
TDVec m_score
Definition: hmm.hpp:262
int m_maxl
Definition: hmm.hpp:260
double m_L
Definition: hmm.hpp:261
TDVec m_clscore
Definition: hmm.hpp:262
void InitScore(int l, const objects::CMarkov_chain_array &from)
Definition: hmm_inlines.hpp:94
double Score(const EResidue *seq) const
Definition: hmm_inlines.hpp:82
double m_score[5]
Definition: hmm.hpp:82
void Init(const objects::CMarkov_chain_params &hmm_param_asn)
Definition: hmm_inlines.hpp:50
void toScore()
Definition: hmm_inlines.hpp:73
void InitScore(const objects::CMarkov_chain_params &hmm_param_asn)
Definition: hmm_inlines.hpp:44
void Average(Type &mc0, Type &mc1, Type &mc2, Type &mc3)
Definition: hmm_inlines.hpp:64
CMarkov_chain_params –.
CParam.
Definition: ncbi_param.hpp:447
CRef –.
Definition: ncbiobj.hpp:618
bool OpenCodingRegion(int a, int b, int strand, int frame) const
Definition: score.cpp:120
const CTerminal & Acceptor() const
Definition: score.hpp:73
const CTerminal & Stop() const
Definition: score.hpp:76
double StartScore(int i, int strand) const
Definition: score.hpp:71
bool OpenIntergenicRegion(int a, int b) const
Definition: score.cpp:146
double IntergenicScore(int a, int b, int strand) const
Definition: score.cpp:151
const CTerminal & Start() const
Definition: score.hpp:75
double AcceptorScore(int i, int strand) const
Definition: score.hpp:69
double CodingScore(int a, int b, int strand, int frame) const
Definition: score.cpp:130
double DonorScore(int i, int strand) const
Definition: score.hpp:70
int SeqLen() const
Definition: score.hpp:90
const CTerminal & Donor() const
Definition: score.hpp:74
bool StopInside(int a, int b, int strand, int frame) const
Definition: score.cpp:115
double StopScore(int i, int strand) const
Definition: score.hpp:72
double BranchScore(const CHMM_State &) const
Definition: hmm.hpp:360
double LengthScore() const
Definition: hmm.cpp:184
CSingleExon(EStrand strn, int point, const CSeqScores &seqscr, const CExonParameters &params)
Definition: hmm.cpp:175
double TermScore() const
Definition: hmm.cpp:189
int m_right
Definition: hmm.hpp:118
int m_inintron
Definition: hmm.hpp:118
int Right() const
Definition: hmm.hpp:114
int m_inexon
Definition: hmm.hpp:118
int Left() const
Definition: hmm.hpp:113
int m_left
Definition: hmm.hpp:118
double Score(const CEResidueVec &seq, int i) const
Definition: hmm.cpp:465
CWAM_Stop(const objects::CGnomon_param::C_Param &from)
Definition: hmm.cpp:454
CMarkovChainArray< 1 > m_matrix
Definition: hmm.hpp:168
CWMM_Start(const objects::CGnomon_param::C_Param &from)
Definition: hmm.cpp:433
CMarkovChainArray< 0 > m_matrix
Definition: hmm.hpp:156
double Score(const CEResidueVec &seq, int i) const
Definition: hmm.cpp:444
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:51
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56
@ enN
@ enC
@ enG
@ enT
@ enA
double BadScore()
EStrand
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
ESerialDataFormat
Data file format.
Definition: serialdef.hpp:71
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
Definition: objistr.cpp:195
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
static const char label[]
list< double > TFirst_exon_phase_probabilities
TMin GetMin(void) const
Get the Min member data.
TL GetL(void) const
Get the L member data.
const TSingle_exon_length & GetSingle_exon_length(void) const
Get the Single_exon_length member data.
const TLast_exon_length & GetLast_exon_length(void) const
Get the Last_exon_length member data.
TA GetA(void) const
Get the A member data.
const Tdata & Get(void) const
Get the member data.
TStep GetStep(void) const
Get the Step member data.
const TRange & GetRange(void) const
Get the Range member data.
const TStop & GetStop(void) const
Get the variant data.
const TLength & GetLength(void) const
Get the Length member data.
const TInternal_exon_length & GetInternal_exon_length(void) const
Get the Internal_exon_length member data.
const TStart & GetStart(void) const
Get the variant data.
TIn_intron GetIn_intron(void) const
Get the In_intron member data.
TTo_term GetTo_term(void) const
Get the To_term member data.
const TIntron & GetIntron(void) const
Get the variant data.
const TFirst_exon_length & GetFirst_exon_length(void) const
Get the First_exon_length member data.
TTo_single GetTo_single(void) const
Get the To_single member data.
TIn_exon GetIn_exon(void) const
Get the In_exon member data.
const TP & GetP(void) const
Get the P member data.
TInitp GetInitp(void) const
Get the Initp member data.
const TFirst_exon_phase_probabilities & GetFirst_exon_phase_probabilities(void) const
Get the First_exon_phase_probabilities member data.
TMax GetMax(void) const
Get the Max member data.
list< double > TPhase_probabilities
const TProbabilities & GetProbabilities(void) const
Get the Probabilities member data.
TInitp GetInitp(void) const
Get the Initp member data.
const TPhase_probabilities & GetPhase_probabilities(void) const
Get the Phase_probabilities member data.
const TLength & GetLength(void) const
Get the Length member data.
list< CRef< CGnomon_param > > Tdata
const TIntergenic & GetIntergenic(void) const
Get the variant data.
const TExon & GetExon(void) const
Get the variant data.
const TInternal_exon_phase_probabilities & GetInternal_exon_phase_probabilities(void) const
Get the Internal_exon_phase_probabilities member data.
TOrder GetOrder(void) const
Get the Order member data.
@ e_Coding_region
three elements (per phase)
USING_SCOPE(objects)
const double kLnThree
Definition: hmm.cpp:46
const double kLnHalf
Definition: hmm.cpp:45
void EvaluateInitialScore(State &r)
int i
unsigned int a
Definition: ncbi_localip.c:102
T max(T x_, T y_)
T min(T x_, T y_)
static Format format
Definition: njn_ioutil.cpp:53
Definition: type.c:6
#define _ASSERT
#define const
Definition: zconf.h:232
Modified on Sat Jun 22 10:43:01 2024 by modify_doxy.py rev. 669887