NCBI C++ ToolKit
gnomon_model.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef ALGO_GNOMON___GNOMON_MODEL__HPP
2 #define ALGO_GNOMON___GNOMON_MODEL__HPP
3 
4 /* $Id: gnomon_model.hpp 100465 2023-08-03 17:24:45Z souvorov $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors: Alexandre Souvorov
30  *
31  * File Description:
32  *
33  */
34 
35 #include <corelib/ncbiobj.hpp>
36 #include <corelib/ncbistd.hpp>
37 #include <corelib/ncbi_limits.hpp>
38 #include "set.hpp"
39 
40 #include <set>
41 #include <vector>
42 #include <algorithm>
43 #include <math.h>
44 
45 #include <objmgr/seq_vector_ci.hpp> // CSeqVectorTypes::TResidue
46 #include <util/range.hpp> // TSignedSeqRange
47 
50 class CSeq_align;
51 class CSeq_id;
52 class CGenetic_code;
55 BEGIN_SCOPE(gnomon)
56 
57 class CGnomonEngine;
58 
59 // Making this a constant declaration (kBadScore) would be preferable,
60 // but backfires on WorkShop, where it is implicitly static and hence
61 // unavailable for use in inline functions.
62 inline
63 double BadScore() { return -numeric_limits<double>::max(); }
64 
65 enum EStrand { ePlus, eMinus};
66 inline EStrand OtherStrand(EStrand s) { return (s == ePlus ? eMinus : ePlus); }
67 
69 
70 typedef vector<TResidue> CResidueVec;
71 typedef vector<int> TIVec;
72 typedef vector<double> TDVec;
73 
74 
75 inline bool Precede(TSignedSeqRange l, TSignedSeqRange r) { return l.GetTo() < r.GetFrom(); }
76 inline bool Include(TSignedSeqRange big, TSignedSeqRange small) { return (big.GetFrom()<=small.GetFrom() && small.GetTo()<=big.GetTo()); }
77 inline bool Include(TSignedSeqRange r, TSignedSeqPos p) { return (r.GetFrom()<=p && p<=r.GetTo()); }
78 inline bool Enclosed(TSignedSeqRange big, TSignedSeqRange small) { return (big != small && Include(big, small)); }
79 
81 {
82 public:
83  CSupportInfo(Int8 model_id, bool core=false);
84 
85 
86  Int8 GetId() const;
87  void SetCore(bool core);
88  bool IsCore() const;
89  bool operator==(const CSupportInfo& s) const;
90  bool operator<(const CSupportInfo& s) const;
91 
92 private:
95 };
96 
98 
99 class CAlignModel;
100 
102 public:
103 
104  struct SSource {
105  SSource() : m_strand(ePlus) {}
106  string m_acc;
109  };
110 
111  enum EType {eDel, eIns, eMism};
112  enum EStatus {eGenomeNotCorrect, eGenomeCorrect, eUnknown};
113 
114  CInDelInfo(TSignedSeqPos l, int len, EType type, const string& v = kEmptyStr, const SSource& s = SSource()) { Init(l, len, type, v, s); }
115 
116  TSignedSeqPos Loc() const { return m_loc; }
117  int Len() const { return m_len; }
118  int InDelEnd() const { return ((IsInsertion() || IsMismatch()) ? Loc()+Len() : Loc()); } // first base "after" correction
119  bool IsInsertion() const { return m_type == eIns; }
120  bool IsDeletion() const { return m_type == eDel; }
121  bool IsMismatch() const { return m_type == eMism; }
122  bool IntersectingWith(TSignedSeqPos a, TSignedSeqPos b) const // insertion/mismatch at least partially inside, deletion inside or flanking
123  {
124  return (IsDeletion() && Loc() >= a && Loc() <= b+1) ||
125  ((IsInsertion() || IsMismatch()) && Loc() <= b && a <= Loc()+Len()-1);
126  }
127  bool operator<(const CInDelInfo& fsi) const // source is ignored!!!!!!!!!!!
128  {
129  if(m_loc != fsi.m_loc)
130  return m_loc < fsi.m_loc;
131  else if(m_type != fsi.m_type)
132  return m_type < fsi.m_type; // if location is same deletion first
133  else if(m_len != fsi.m_len)
134  return m_len < fsi.m_len;
135  else
136  return m_indelv < fsi.m_indelv;
137  }
138  bool operator!=(const CInDelInfo& fsi) const { return (*this < fsi || fsi < *this); }
139  bool operator==(const CInDelInfo& fsi) const { return !(*this != fsi); }
140  string GetInDelV() const { return m_indelv; }
141  const SSource& GetSource() const { return m_source; }
142  EType GetType() const { return m_type; };
143  void SetStatus(EStatus s) { m_status = s; }
144  EStatus GetStatus() const { return m_status; }
145  void SetLoc(TSignedSeqPos l) { m_loc = l; }
146 
147 private:
148  void Init(TSignedSeqPos l, int len, EType type, const string& v, const SSource& s) {
149  m_loc = l;
150  m_len = len;
151  m_type = type;
152  m_status = eUnknown;
153  m_indelv = v;
154  m_source = s;
155  _ASSERT(m_indelv.empty() || (int)m_indelv.length() == len);
156  _ASSERT(m_indelv.empty() || m_type != eIns);
157  if((IsDeletion() || IsMismatch()) && GetInDelV().empty())
158  m_indelv.insert( m_indelv.end(), Len(),'N');
159  }
160 
161  TSignedSeqPos m_loc; // left location for insertion, deletion is before m_loc
162  // insertion - when there are extra bases in the genome
163  int m_len;
166  string m_indelv;
168 };
169 
170 typedef vector<CInDelInfo> TInDels;
171 
172 template <class Res>
173 bool IsStartCodon(const Res * seq, int strand = ePlus); // seq points to the first base in biological order
174 template <class Res>
175 bool IsStopCodon(const Res * seq, int strand = ePlus); // seq points to the first base in biological order
176 
177 
179 public:
180  virtual ~CRangeMapper() {}
181  virtual TSignedSeqRange operator()(TSignedSeqRange r, bool withextras = true) const = 0;
182 };
183 
185 public:
186  CModelExon(TSignedSeqPos f = 0, TSignedSeqPos s = 0, bool fs = false, bool ss = false, const string& fsig = "", const string& ssig = "", double ident = 0, const string& seq = "", const CInDelInfo::SSource& src = CInDelInfo::SSource()) :
187  m_fsplice(fs), m_ssplice(ss), m_fsplice_sig(fsig), m_ssplice_sig(ssig), m_ident(ident), m_seq(seq), m_source(src), m_range(f,s)
188  {
189  _ASSERT(m_seq.empty() || m_range.Empty());
190  };
191 
192  bool operator==(const CModelExon& p) const
193  {
194  return (m_range==p.m_range && m_fsplice == p.m_fsplice && m_ssplice == p.m_ssplice);
195  }
196  bool operator!=(const CModelExon& p) const
197  {
198  return !(*this == p);
199  }
200  bool operator<(const CModelExon& p) const { return Precede(Limits(),p.Limits()); }
201 
202  operator TSignedSeqRange() const { return m_range; }
203  const TSignedSeqRange& Limits() const { return m_range; }
204  TSignedSeqRange& Limits() { return m_range; }
205  TSignedSeqPos GetFrom() const { return m_range.GetFrom(); }
206  TSignedSeqPos GetTo() const { return m_range.GetTo(); }
207  void Extend(const CModelExon& e);
208  void AddFrom(int d) { m_range.SetFrom( m_range.GetFrom() +d ); }
209  void AddTo(int d) { m_range.SetTo( m_range.GetTo() +d ); }
210 
211  bool m_fsplice, m_ssplice;
212  string m_fsplice_sig, m_ssplice_sig; // obeys strand
213  double m_ident;
214  string m_seq; // exon sequence if in gap; obeys strand
216 
217  void Remap(const CRangeMapper& mapper) { m_range = mapper(m_range); }
218 private:
220 };
221 
222 class CAlignMap;
223 
224 class CCDSInfo {
225 public:
227 
228  bool operator== (const CCDSInfo& another) const;
229 
230  //CDS mapped to transcript should be used only for for final models (not alignments)
231  //Change in indels or 5' UTR will invalidate the cooordinates (in particular conversion from CAlignModel to CGeneModel);
232  bool IsMappedToGenome() const { return m_genomic_coordinates; }
233  CCDSInfo MapFromOrigToEdited(const CAlignMap& amap) const;
234  CCDSInfo MapFromEditedToOrig(const CAlignMap& amap) const; // returns 'empty' CDS if can't map
235 
238  TSignedSeqRange Cds() const { return Start()+ReadingFrame()+Stop(); }
240 
241  TSignedSeqRange Start() const {return m_start; }
242  TSignedSeqRange Stop() const {return m_stop; }
243  bool HasStart() const { return Start().NotEmpty(); }
244  bool HasStop () const { return Stop().NotEmpty(); }
245  bool ConfirmedStart() const { return m_confirmed_start; } // start is confirmed by protein alignment
246  bool ConfirmedStop() const { return m_confirmed_stop; } // stop is confirmed by protein alignment
247 
248  bool OpenCds() const { return m_open; } // "optimal" CDS is not internal
249  double Score() const { return m_score; }
250 
251  void SetReadingFrame(TSignedSeqRange r, bool protein = false);
252  void SetStart(TSignedSeqRange r, bool confirmed = false);
253  void SetStop(TSignedSeqRange r, bool confirmed = false);
255  void Clear5PrimeCdsLimit();
256  void SetScore(double score, bool open=false);
257 
258  void CombineWith(const CCDSInfo& another_cds_info);
259  void Remap(const CRangeMapper& mapper);
261  void Cut(TSignedSeqRange hole);
262  void Clear();
263 
264  int Strand() const; // -1 (minus), 0 (unknown), 1 (plus)
265 
267  struct SPStop : public TSignedSeqRange {
269 
270  //not overloaded == is used for uniquing and finding intervals
271  //overloaded < is used for sorting before uniquing
272  bool operator<(const SPStop& stp) const {
273  if(operator==(stp)) // == is not overloaded
274  return m_status < stp.m_status;
275  else
276  return TSignedSeqRange::operator<(stp);
277  }
278 
280  };
281 
282  typedef vector<SPStop> TPStops;
283  const TPStops& PStops() const { return m_p_stops; }
284  bool PStop(bool includeall = true) const; // has premature stop(s)
285  void AddPStop(SPStop stp) { m_p_stops.push_back(stp); _ASSERT( Invariant() ); }
286  void AddPStop(TSignedSeqRange r, EStatus status);
287  void ClearPStops() { m_p_stops.clear(); }
288 
289  bool Invariant() const
290  {
291 #ifdef _DEBUG
292  if (ReadingFrame().Empty()) {
293  _ASSERT( !HasStop() && !HasStart() );
295  _ASSERT( !ConfirmedStart() );
296  _ASSERT( !ConfirmedStop() );
297  // _ASSERT( !PStop() );
298  _ASSERT( !OpenCds() );
299  _ASSERT( Score()==BadScore() );
300  return true;
301  }
302 
303  _ASSERT( !Start().IntersectingWith(ReadingFrame()) );
304  _ASSERT( !Stop().IntersectingWith(ReadingFrame()) );
306  _ASSERT( Include( MaxCdsLimits(), Cds() ) );
307 
308  if (!HasStop() && !HasStart()) {
310  } else if (HasStart() && !HasStop()) {
311  if (Precede(Start(), ReadingFrame())) {
313  } else {
315  }
316  } else if (HasStart() && HasStop()) {
318  }
319  if (HasStop()) {
320  if (Precede(ReadingFrame(),Stop())) {
321  _ASSERT( MaxCdsLimits().GetTo()==Stop().GetTo() );
322  } else {
323  _ASSERT( MaxCdsLimits().GetFrom()==Stop().GetFrom() );
324  }
325  }
326 
327  if (ConfirmedStart()) {
328  _ASSERT( HasStart() );
329  }
330 
331  if (ConfirmedStop()) {
332  _ASSERT( HasStop() );
333  }
334 
335  // ITERATE(TPStops, s, PStops())
336  // _ASSERT( Include(MaxCdsLimits(), *s) );
337 #endif
338 
339  return true;
340  }
341 
342 private:
347 
350 
351  bool m_open;
352  double m_score;
353 
355 };
356 
357 
359 {
360 public:
361  enum EType {
362  eWall = 1,
363  eNested = 2,
364  eSR = 4,
365  eEST = 8,
366  emRNA = 16,
367  eProt = 32,
368  eNotForChaining = 64,
369  eChain = 128,
370  eGnomon = 256
371  };
372  static string TypeToString(int type);
373 
374  enum EStatus {
375  ecDNAIntrons = 1,
376  eReversed = 2,
377  eSkipped = 4,
378  eLeftTrimmed = 8,
379  eRightTrimmed = 16,
380  eFullSupCDS = 32,
381  ePseudo = 64,
382  ePolyA = 128,
383  eCap = 256,
384  eBestPlacement = 512,
385  eUnknownOrientation = 1024,
386  eConsistentCoverage = 2048,
387  eGapFiller = 4096,
388  eUnmodifiedAlign = 8192,
389  eChangedByFilter = 16384,
390  eTSA = 32768,
391  eLeftConfirmed = 65536,
392  eRightConfirmed = 131072,
393  eLeftFlexible = 262144,
394  eRightFlexible = 524288
395  };
396 
397  CGeneModel(EStrand s = ePlus, Int8 id = 0, int type = 0) :
398  m_type(type), m_id(id), m_status(0), m_ident(0), m_weight(1), m_expecting_hole(false), m_strand(s), m_geneid(0), m_rank_in_gene(0) {}
399  virtual ~CGeneModel() {}
400 
401  void AddExon(TSignedSeqRange exon, const string& fs = "", const string& ss = "", double ident = 0, const string& seq = "", const CInDelInfo::SSource& src = CInDelInfo::SSource());
402  void AddHole(); // between model and next exons
403  void AddGgapExon(double ident, const string& seq, const CInDelInfo::SSource& src, bool infront);
404  void AddNormalExon(TSignedSeqRange exon, const string& fs, const string& ss, double ident, bool infront);
405 
406  typedef vector<CModelExon> TExons;
407  const TExons& Exons() const { return m_exons; }
408  TExons& Exons() { return m_exons; }
409  void ClearExons() {
410  m_exons.clear();
411  m_fshifts.clear();
412  m_range = TSignedSeqRange::GetEmpty();
413  m_cds_info = CCDSInfo();
414  m_edge_reading_frames.clear();
415  }
416  void SetSplices(int i, const string& f_sig, const string& s_sig) { m_exons[i].m_fsplice_sig = f_sig; m_exons[i].m_ssplice_sig = s_sig; }
417 
418  void ReverseComplementModel();
419 
420  void Remap(const CRangeMapper& mapper);
421  enum EClipMode { eRemoveExons, eDontRemoveExons };
422  virtual void Clip(TSignedSeqRange limits, EClipMode mode, bool ensure_cds_invariant = true); // drops the score!!!!!!!!!
423  virtual void CutExons(TSignedSeqRange hole); // clip or remove exons, dangerous, should be completely in or outside the cds, should not cut an exon in two
424  void ExtendLeft(int amount);
425  void ExtendRight(int amount);
426  void Extend(const CGeneModel& a, bool ensure_cds_invariant = true);
427  void RemoveShortHolesAndRescore(const CGnomonEngine& gnomon); // removes holes shorter than min intron (may add frameshifts/stops)
428 
429  TSignedSeqRange TranscriptExon(int i) const;
430 
431  TSignedSeqRange Limits() const { return m_range; }
432  TSignedSeqRange TranscriptLimits() const;
433  int AlignLen() const ;
434  void RecalculateLimits();
435 
436  // ReadingFrame doesn't include start/stop. It's always on codon boundaries
437  TSignedSeqRange ReadingFrame() const { return m_cds_info.ReadingFrame(); }
438  // CdsLimits include start/stop if any, goes to model limit if no start/stop
439  TSignedSeqRange RealCdsLimits() const;
440  int RealCdsLen() const ; // %3!=0 is possible
441  // MaxCdsLimits - longest cds. include start/stop if any,
442  // goes to 5' limit if no upstream stop, goes to 3' limit if no stop
443  TSignedSeqRange MaxCdsLimits() const;
444 
445  const CCDSInfo& GetCdsInfo() const { return m_cds_info; }
446  void SetCdsInfo(const CCDSInfo& cds_info);
447  void SetCdsInfo(const CGeneModel& a);
448  void CombineCdsInfo(const CGeneModel& a, bool ensure_cds_invariant = true);
449  void CombineCdsInfo(const CCDSInfo& cds_info, bool ensure_cds_invariant = true);
450 
451  bool IntersectingWith(const CGeneModel& a) const
452  {
453  return Limits().IntersectingWith(a.Limits());
454  }
455 
456  double Ident() const { return m_ident; }
457  void SetIdent(double i) { m_ident = i; }
458 
459  double Weight() const { return m_weight; }
460  void SetWeight(double w) { m_weight = w; }
461 
462  void SetStrand(EStrand s) { m_strand = s; }
463  EStrand Strand() const { return m_strand; }
465  bool notreversed = (Status()&CGeneModel::eReversed) == 0;
466  bool plusstrand = Strand() == ePlus;
467  return (notreversed == plusstrand) ? ePlus : eMinus;
468  }
469 
470  void SetType(int t) { m_type = t; }
471  int Type() const { return m_type; }
472  Int8 GeneID() const { return m_geneid; }
473  void SetGeneID(Int8 id) { m_geneid = id; }
474  int RankInGene() const { return m_rank_in_gene; }
475  void SetRankInGene(int rank) { m_rank_in_gene = rank; }
476  Int8 ID() const { return m_id; }
477  void SetID(Int8 id) { m_id = id; }
478  const CSupportInfoSet& Support() const { return m_support; }
479  bool AddSupport(const CSupportInfo& support) { return m_support.insert(support); }
480  void ReplaceSupport(const CSupportInfoSet& support_set) { m_support = support_set; }
481  const string& ProteinHit() const { return m_protein_hit; }
482  string& ProteinHit() { return m_protein_hit; }
483 
484  unsigned int& Status() { return m_status; }
485  const unsigned int& Status() const { return m_status; }
486  void ClearStatus() { m_status = 0; }
487 
488  const string& GetComment() const { return m_comment; }
489  void SetComment(const string& comment) { m_comment = comment; }
490  void AddComment(const string& comment) { m_comment += " " + comment; }
491 
492  bool operator<(const CGeneModel& a) const { return Precede(Limits(),a.Limits()); }
493 
494  double Score() const { return m_cds_info.Score(); }
495 
496  bool Continuous() const // no "holes" in alignment
497  {
498  for(unsigned int i = 1; i < Exons().size(); ++i)
499  if (!Exons()[i-1].m_ssplice || !Exons()[i].m_fsplice)
500  return false;
501  return true;
502  }
503  bool HasStart() const { return m_cds_info.HasStart(); }
504  bool HasStop () const { return m_cds_info.HasStop (); }
505  bool LeftComplete() const { return Strand() == ePlus ? HasStart() : HasStop(); }
506  bool RightComplete() const { return Strand() == ePlus ? HasStop() : HasStart(); }
507  bool FullCds() const { return HasStart() && HasStop() && Continuous(); }
508  bool CompleteCds() const { return FullCds() && (!Open5primeEnd() || ConfirmedStart()); }
509 
511  {
512  _ASSERT( !(OpenCds()&&ConfirmedStart()) );
513  return (ReadingFrame().Empty() || (!OpenCds() && FullCds()));
514  }
515 
516  bool Open5primeEnd() const
517  {
518  return (Strand() == ePlus ? OpenLeftEnd() : OpenRightEnd());
519  }
520  bool OpenLeftEnd() const
521  {
522  return ReadingFrame().NotEmpty() && GetCdsInfo().MaxCdsLimits().GetFrom()==TSignedSeqRange::GetWholeFrom();
523  }
524  bool OpenRightEnd() const
525  {
526  return ReadingFrame().NotEmpty() && GetCdsInfo().MaxCdsLimits().GetTo()==TSignedSeqRange::GetWholeTo();
527  }
528 
529  bool OpenCds() const { return m_cds_info.OpenCds(); } // "optimal" CDS is not internal
530  bool PStop(bool includeall = true) const { return m_cds_info.PStop(includeall); } // has premature stop(s)
531 
532  bool ConfirmedStart() const { return m_cds_info.ConfirmedStart(); } // start is confirmed by protein alignment
533  bool ConfirmedStop() const { return m_cds_info.ConfirmedStop(); } // stop is confirmed by protein alignment
534 
535  bool isNMD(int limit = 50) const;
536 
537  TInDels& FrameShifts() { return m_fshifts; }
538  const TInDels& FrameShifts() const { return m_fshifts; }
539  TInDels FrameShifts(TSignedSeqPos a, TSignedSeqPos b) const;
540  TInDels GetInDels(bool fs_only) const;
541  TInDels GetInDels(TSignedSeqPos a, TSignedSeqPos b, bool fs_only) const;
542 
543  int FShiftedLen(TSignedSeqRange ab, bool withextras = true) const; // won't work if a/b is insertion
544  int FShiftedLen(TSignedSeqPos a, TSignedSeqPos b, bool withextras = true) const { return FShiftedLen(TSignedSeqRange(a,b),withextras); }
545 
546  // move along mrna skipping introns
547  TSignedSeqPos FShiftedMove(TSignedSeqPos pos, int len) const; // may retun <0 if hits a deletion at the end of move
548 
549  virtual CAlignMap GetAlignMap() const;
550 
551  string GetCdsDnaSequence (const CResidueVec& contig_sequence) const;
552  string GetProtein (const CResidueVec& contig_sequence) const;
553  string GetProtein (const CResidueVec& contig_sequence, const CGenetic_code* gencode) const;
554 
555  // Below comparisons ignore CDS completely, first 3 assume that alignments are the same strand
556 
557  int isCompatible(const CGeneModel& a) const; // returns 0 for notcompatible or (number of common splices)+1
558  bool IsSubAlignOf(const CGeneModel& a) const;
559  int MutualExtension(const CGeneModel& a) const; // returns 0 for notcompatible or (number of introns) + 1
560 
561  bool IdenticalAlign(const CGeneModel& a) const
562  { return Strand()==a.Strand() && Limits()==a.Limits() && Exons() == a.Exons() && FrameShifts()==a.FrameShifts() &&
563  GetCdsInfo().PStops() == a.GetCdsInfo().PStops() && Type() == a.Type() && Status() == a.Status(); }
564  bool operator==(const CGeneModel& a) const
565  {
566  return IdenticalAlign(a) && Type()==a.Type() && m_id==a.m_id && m_support==a.m_support;
567  }
568 
569  const list< CRef<CSeq_id> >& TrustedmRNA() const { return m_trusted_mrna; }
570  void InsertTrustedmRNA(CRef<CSeq_id> g) { m_trusted_mrna.push_back(g); };
571  void ClearTrustedmRNA() { m_trusted_mrna.clear(); };
572 
573  const list< CRef<CSeq_id> >& TrustedProt() const { return m_trusted_prot; }
574  void InsertTrustedProt(CRef<CSeq_id> g) { m_trusted_prot.push_back(g); };
575  void ClearTrustedProt() { m_trusted_prot.clear(); };
576 
577  const vector<CCDSInfo>* GetEdgeReadingFrames() const { return &m_edge_reading_frames; }
578  vector<CCDSInfo>* SetEdgeReadingFrames() { return &m_edge_reading_frames; }
579 
580 
581 #ifdef _DEBUG
583 #endif
584 
585 private:
586  void RemoveExtraFShifts(int left, int right);
587  void TrimEdgesToFrameInOtherAlignGaps(const TExons& exons_with_gaps);
588 
589  int m_type;
591  unsigned int m_status;
592 
593  double m_ident;
594  double m_weight;
595 
597  TExons& MyExons() { return m_exons; }
599 
603 
605  bool CdsInvariant(bool check_start_stop = true) const;
606 
611  string m_comment;
612  list< CRef<CSeq_id> > m_trusted_prot;
613  list< CRef<CSeq_id> > m_trusted_mrna;
614 
615 
616  vector<CCDSInfo> m_edge_reading_frames;
617 
618  friend class CChain;
619 };
620 
621 
622 class CAlignMap {
623 public:
626 
627  CAlignMap() {};
629  m_orig_ranges.push_back(SMapRange(SMapRangeEdge(orig_a), SMapRangeEdge(orig_b), kEmptyStr));
631  m_target_len = FShiftedLen(orig_a, orig_b);
632  }
633  CAlignMap(TSignedSeqPos orig_a, TSignedSeqPos orig_b, TInDels::const_iterator fsi_begin, const TInDels::const_iterator fsi_end) : m_orientation(ePlus) {
634  EEdgeType atype = eBoundary;
635  EEdgeType btype = eBoundary;
636  if(fsi_begin != fsi_end) {
637  if(fsi_begin->Loc() == orig_a && !fsi_begin->IsMismatch()) {
638  _ASSERT(!fsi_begin->IsInsertion()); // no reason to have insertion
639  atype = eInDel;
640  }
641  TInDels::const_iterator fs = fsi_end-1;
642  if(fs->Loc() == orig_b+1 && fs->IsDeletion())
643  btype = eInDel;
644  }
645  InsertIndelRangesForInterval(orig_a, orig_b, 0, fsi_begin, fsi_end, atype, btype, "", "");
646  m_target_len = FShiftedLen(orig_a, orig_b);
647  }
648  CAlignMap(const CGeneModel::TExons& exons, const vector<TSignedSeqRange>& transcript_exons, const TInDels& indels, EStrand orientation, int targetlen ); //orientation == strand if not Reversed
649  CAlignMap(const CGeneModel::TExons& exons, const TInDels& frameshifts, EStrand strand, TSignedSeqRange lim = TSignedSeqRange::GetWhole(), int holelen = 0, int polyalen = 0);
651  TSignedSeqPos MapEditedToOrig(TSignedSeqPos edited_pos) const;
653  TSignedSeqRange MapRangeOrigToEdited(TSignedSeqRange orig_range, bool withextras = true) const { return MapRangeOrigToEdited(orig_range, withextras?eLeftEnd:eSinglePoint, withextras?eRightEnd:eSinglePoint); }
654  TSignedSeqRange MapRangeEditedToOrig(TSignedSeqRange edited_range, bool withextras = true) const;
655  template <class In, class Out>
656  void EditedSequence(const In& original_sequence, Out& edited_sequence, bool includeholes = false) const;
657  int FShiftedLen(TSignedSeqRange ab, ERangeEnd lend, ERangeEnd rend) const;
658  int FShiftedLen(TSignedSeqRange ab, bool withextras = true) const;
659  int FShiftedLen(TSignedSeqPos a, TSignedSeqPos b, bool withextras = true) const { return FShiftedLen(TSignedSeqRange(a,b), withextras); }
660  //snap to codons works by analising transcript coordinates (MUST be a protein or reading frame cutout)
661  TSignedSeqRange ShrinkToRealPoints(TSignedSeqRange orig_range, bool snap_to_codons = false) const;
663  TSignedSeqPos FShiftedMove(TSignedSeqPos orig_pos, int len) const; // may reurn < 0 if hits a gap
664  // TInDels GetInDels(bool fs_only) const;
665  // TInDels GetAllCorrections() const;
666  int TargetLen() const { return m_target_len; }
667  EStrand Orientation() const { return m_orientation; }
668  void MoveOrigin(TSignedSeqPos shift) {
669  for(auto& mrange : m_orig_ranges)
670  mrange.MoveOrigin(shift);
671  }
672 
673 // private: // breaks SMapRange on WorkShop. :-/
674  struct SMapRangeEdge {
676  bool operator<(const SMapRangeEdge& mre) const { return m_pos < mre.m_pos; }
677  bool operator==(const SMapRangeEdge& mre) const { return m_pos == mre.m_pos; }
678 
681  string m_extra_seq;
682  };
683 
684  class SMapRange {
685  public:
686  SMapRange(SMapRangeEdge from, SMapRangeEdge to, const string& mism) : m_from(from), m_to(to), m_mism_seq(mism) {}
687  SMapRangeEdge GetEdgeFrom() const { return m_from; }
688  SMapRangeEdge GetEdgeTo() const { return m_to; }
689  void SetEdgeFrom(SMapRangeEdge from) { m_from = from; }
690  void SetEdgeTo(SMapRangeEdge to) { m_to = to; }
691  void MoveOrigin(TSignedSeqPos shift) {
692  m_from.m_pos -= shift;
693  m_to.m_pos -= shift;
694  }
695  TSignedSeqPos GetFrom() const { return m_from.m_pos; }
696  TSignedSeqPos GetTo() const { return m_to.m_pos; }
700  string GetExtraSeqFrom() const { return m_from.m_extra_seq; }
701  TSignedSeqPos GetExtraTo() const { return m_to.m_extra; }
702  string GetExtraSeqTo() const { return m_to.m_extra_seq; }
704  EEdgeType GetTypeTo() const { return m_to.m_edge_type; }
705  const string& GetMismatch() const { return m_mism_seq; }
706  bool operator<(const SMapRange& mr) const {
707  if(m_from.m_pos == mr.m_from.m_pos) return m_to.m_pos < mr.m_to.m_pos;
708  else return m_from.m_pos < mr.m_from.m_pos;
709  }
710 
711  private:
713  string m_mism_seq;
714  };
715 
716  // static TInDels RemoveExtraIndels(const TInDels& indels, const CGeneModel::TExons& exons);
717 
718 private:
719  static TSignedSeqPos MapAtoB(const vector<CAlignMap::SMapRange>& a, const vector<CAlignMap::SMapRange>& b, TSignedSeqPos p, ERangeEnd move_mode);
720  static TSignedSeqRange MapRangeAtoB(const vector<CAlignMap::SMapRange>& a, const vector<CAlignMap::SMapRange>& b, TSignedSeqRange r, ERangeEnd lend, ERangeEnd rend);
721  static TSignedSeqRange MapRangeAtoB(const vector<CAlignMap::SMapRange>& a, const vector<CAlignMap::SMapRange>& b, TSignedSeqRange r, bool withextras ) {
722  return MapRangeAtoB(a, b, r, withextras?eLeftEnd:eSinglePoint, withextras?eRightEnd:eSinglePoint);
723  };
724  static int FindLowerRange(const vector<CAlignMap::SMapRange>& a, TSignedSeqPos p);
725 
726  void InsertOneToOneRange(TSignedSeqPos orig_start, TSignedSeqPos edited_start, TSignedSeqPos len, const string& mism, TSignedSeqPos left_orige, TSignedSeqPos left_edite, TSignedSeqPos right_orige, TSignedSeqPos right_edite,
727  EEdgeType left_type, EEdgeType right_type, const string& left_edit_extra_seq, const string& right_edit_extra_seq);
728  TSignedSeqPos InsertIndelRangesForInterval(TSignedSeqPos orig_a, TSignedSeqPos orig_b, TSignedSeqPos edit_a, TInDels::const_iterator fsi_begin, TInDels::const_iterator fsi_end, EEdgeType type_a, EEdgeType type_b, const string& gseq_a, const string& gseq_b);
729 
730  vector<SMapRange> m_orig_ranges, m_edited_ranges;
733 };
734 
735 
736 
737 
739 public:
741  CAlignModel(const objects::CSeq_align& seq_align);
742  CAlignModel(const CGeneModel& g, const CAlignMap& a);
743  virtual CAlignMap GetAlignMap() const { return m_alignmap; }
744  void ResetAlignMap();
745 
746  virtual void Clip(TSignedSeqRange limits, EClipMode mode, bool ensure_cds_invariant = true) { // drops the score!!!!!!!!!
747  CGeneModel::Clip(limits,mode,ensure_cds_invariant);
748  RecalculateAlignMap(limits.GetFrom(), limits.GetTo());
749  }
750  virtual void CutExons(TSignedSeqRange hole) { // clip or remove exons, dangerous, should be completely in or outside the cds, should not cut an exon in two
751  CGeneModel::CutExons(hole);
752  RecalculateAlignMap(hole.GetTo()+1, hole.GetFrom()-1);
753  }
754 
755  string TargetAccession() const;
756  void SetTargetId(const objects::CSeq_id& id) { m_target_id.Reset(&id); }
757  CConstRef<objects::CSeq_id> GetTargetId() const { return m_target_id; }
758  int TargetLen() const { return m_alignmap.TargetLen(); }
759  int PolyALen() const;
760  CRef<objects::CSeq_align> MakeSeqAlign(const string& contig) const; // should be used for alignments only; for chains and models will produce a Splign alignment of mRNA
761 
762 private:
763  void RecalculateAlignMap(int left, int right);
766 };
767 
768 
769 
770 
772  const string& m_contig;
773  explicit setcontig(const string& cntg) : m_contig(cntg) {}
774 };
776  string& m_contig;
777  explicit getcontig(string& cntg) : m_contig(cntg) {}
778 };
781 
785 
786 
787 template<class Model>
788 class NCBI_XALGOGNOMON_EXPORT CModelCluster : public list<Model> {
789 public:
790  typedef Model TModel;
791  CModelCluster(int f = numeric_limits<int>::max(), int s = 0) : m_limits(f,s) {}
793  void Insert(const Model& a) {
794  m_limits.CombineWith(a.Limits());
795  this->push_back(a);
796  }
797  void Splice(CModelCluster& c) { // elements removed from c and inserted into *this
798  m_limits.CombineWith(c.Limits());
799  this->splice(list<Model>::end(),c);
800  }
801  TSignedSeqRange Limits() const { return m_limits; }
802  bool operator<(const CModelCluster& c) const { return Precede(m_limits, c.m_limits); }
804  list<Model>::clear();
805  m_limits.SetFrom( first );
806  m_limits.SetTo( second );
807  }
808 
809 private:
811 };
812 
815 
816 typedef list<CGeneModel> TGeneModelList;
817 typedef list<CAlignModel> TAlignModelList;
818 
819 
820 template<class Cluster>
822  public:
825  void Insert(const typename Cluster::TModel& a) {
826  Cluster clust;
827  clust.Insert(a);
828  Titerator first = set<Cluster>::lower_bound(Cluster(a.Limits().GetFrom(),a.Limits().GetFrom()));
829  Titerator second = set<Cluster>::upper_bound(Cluster(a.Limits().GetTo(),a.Limits().GetTo()));
830  for(Titerator it = first; it != second;) {
831  clust.Splice(const_cast<Cluster&>(*it));
832  this->erase(it++);
833  }
834  const_cast<Cluster&>(*this->insert(second,Cluster(clust.Limits()))).Splice(clust);
835  }
836 };
837 
840 
841 
843 
844 class EResidue {
845 public :
846  EResidue() : data(enN) {}
848 
849  operator int() const { return int(data); }
850 
851 private:
852  unsigned char data;
853 };
854 
856 {
857  switch(c)
858  {
859  case 'A':
860  return 'T';
861  case 'a':
862  return 't';
863  case 'C':
864  return 'G';
865  case 'c':
866  return 'g';
867  case 'G':
868  return 'C';
869  case 'g':
870  return 'c';
871  case 'T':
872  return 'A';
873  case 't':
874  return 'a';
875  default:
876  return 'N';
877  }
878 }
879 
880 extern const EResidue k_toMinus[5];
881 extern const char *const k_aa_table;
882 
884 {
885  return k_toMinus[c];
886 }
887 
888 template <class BidirectionalIterator>
889 void ReverseComplement(const BidirectionalIterator& first, const BidirectionalIterator& last)
890 {
891  for (BidirectionalIterator i( first ); i != last; ++i)
892  *i = Complement(*i);
893  reverse(first, last);
894 }
895 
896 template<class Model>
897 list<Model> GetAlignParts(const Model& algn, bool settrimflags) {
898  list<Model> parts;
899  int left = algn.Limits().GetFrom();
900  for(unsigned int i = 1; i < algn.Exons().size(); ++i) {
901  if (!algn.Exons()[i-1].m_ssplice || !algn.Exons()[i].m_fsplice) {
902  Model m = algn;
903  m.Status() &= ~CGeneModel::ePolyA;
904  m.Status() &= ~CGeneModel::eCap;
905  m.Clip(TSignedSeqRange(left,algn.Exons()[i-1].GetTo()),CGeneModel::eRemoveExons);
906  if(!parts.empty() && settrimflags) {
907  parts.back().Status() &= ~CGeneModel::eRightTrimmed;
908  m.Status() &= ~CGeneModel::eLeftTrimmed;
909  }
910  parts.push_back(m);
911  left = algn.Exons()[i].GetFrom();
912  }
913  }
914  if(!parts.empty()) {
915  Model m = algn;
916  m.Clip(TSignedSeqRange(left,algn.Limits().GetTo()),CGeneModel::eRemoveExons);
917  m.Status() &= ~CGeneModel::ePolyA;
918  m.Status() &= ~CGeneModel::eCap;
919  if(settrimflags) {
920  parts.back().Status() &= ~CGeneModel::eRightTrimmed;
921  m.Status() &= ~CGeneModel::eLeftTrimmed;
922  }
923  parts.push_back(m);
924 
925  if(algn.Status()&CGeneModel::ePolyA) {
926  if(algn.Strand() == ePlus)
927  parts.back().Status() |= CGeneModel::ePolyA;
928  else
929  parts.front().Status() |= CGeneModel::ePolyA;
930  }
931  if(algn.Status()&CGeneModel::eCap) {
932  if(algn.Strand() == ePlus)
933  parts.front().Status() |= CGeneModel::eCap;
934  else
935  parts.back().Status() |= CGeneModel::eCap;
936  }
937  }
938 
939  return parts;
940 }
941 
942 void MapAlignsToOrigContig(TAlignModelList& aligns, const TInDels& corrections, int contig_size);
943 
944 
945 
946 END_SCOPE(gnomon)
948 
949 #endif // ALGO_GNOMON___GNOMON_MODEL__HPP
EStatus
#define false
Definition: bool.h:36
TSignedSeqPos GetTo() const
const string & GetMismatch() const
TSignedSeqPos GetExtendedFrom() const
void SetEdgeFrom(SMapRangeEdge from)
EEdgeType GetTypeTo() const
string GetExtraSeqFrom() const
void MoveOrigin(TSignedSeqPos shift)
bool operator<(const SMapRange &mr) const
TSignedSeqPos GetExtraTo() const
SMapRangeEdge m_from
string GetExtraSeqTo() const
SMapRangeEdge GetEdgeFrom() const
TSignedSeqPos GetFrom() const
TSignedSeqPos GetExtraFrom() const
SMapRangeEdge GetEdgeTo() const
TSignedSeqPos GetExtendedTo() const
EEdgeType GetTypeFrom() const
void SetEdgeTo(SMapRangeEdge to)
SMapRange(SMapRangeEdge from, SMapRangeEdge to, const string &mism)
CAlignMap(TSignedSeqPos orig_a, TSignedSeqPos orig_b, TInDels::const_iterator fsi_begin, const TInDels::const_iterator fsi_end)
TSignedSeqPos FShiftedMove(TSignedSeqPos orig_pos, int len) const
Definition: gnomon_seq.cpp:819
static int FindLowerRange(const vector< CAlignMap::SMapRange > &a, TSignedSeqPos p)
Definition: gnomon_seq.cpp:674
TSignedSeqRange ShrinkToRealPointsOnEdited(TSignedSeqRange edited_range) const
Definition: gnomon_seq.cpp:688
TSignedSeqRange MapRangeOrigToEdited(TSignedSeqRange orig_range, bool withextras=true) const
int TargetLen() const
void MoveOrigin(TSignedSeqPos shift)
int FShiftedLen(TSignedSeqPos a, TSignedSeqPos b, bool withextras=true) const
void InsertOneToOneRange(TSignedSeqPos orig_start, TSignedSeqPos edited_start, TSignedSeqPos len, const string &mism, TSignedSeqPos left_orige, TSignedSeqPos left_edite, TSignedSeqPos right_orige, TSignedSeqPos right_edite, EEdgeType left_type, EEdgeType right_type, const string &left_edit_extra_seq, const string &right_edit_extra_seq)
Definition: gnomon_seq.cpp:396
static TSignedSeqRange MapRangeAtoB(const vector< CAlignMap::SMapRange > &a, const vector< CAlignMap::SMapRange > &b, TSignedSeqRange r, ERangeEnd lend, ERangeEnd rend)
Definition: gnomon_seq.cpp:902
vector< SMapRange > m_edited_ranges
CAlignMap(TSignedSeqPos orig_a, TSignedSeqPos orig_b)
TSignedSeqPos InsertIndelRangesForInterval(TSignedSeqPos orig_a, TSignedSeqPos orig_b, TSignedSeqPos edit_a, TInDels::const_iterator fsi_begin, TInDels::const_iterator fsi_end, EEdgeType type_a, EEdgeType type_b, const string &gseq_a, const string &gseq_b)
Definition: gnomon_seq.cpp:413
TSignedSeqRange MapRangeEditedToOrig(TSignedSeqRange edited_range, bool withextras=true) const
Definition: gnomon_seq.cpp:956
static TSignedSeqRange MapRangeAtoB(const vector< CAlignMap::SMapRange > &a, const vector< CAlignMap::SMapRange > &b, TSignedSeqRange r, bool withextras)
TSignedSeqPos MapOrigToEdited(TSignedSeqPos orig_pos) const
Definition: gnomon_seq.cpp:884
void EditedSequence(const In &original_sequence, Out &edited_sequence, bool includeholes=false) const
Definition: gnomon_seq.cpp:622
TSignedSeqPos MapEditedToOrig(TSignedSeqPos edited_pos) const
Definition: gnomon_seq.cpp:893
TSignedSeqRange ShrinkToRealPoints(TSignedSeqRange orig_range, bool snap_to_codons=false) const
Definition: gnomon_seq.cpp:754
int FShiftedLen(TSignedSeqRange ab, ERangeEnd lend, ERangeEnd rend) const
Definition: gnomon_seq.cpp:983
static TSignedSeqPos MapAtoB(const vector< CAlignMap::SMapRange > &a, const vector< CAlignMap::SMapRange > &b, TSignedSeqPos p, ERangeEnd move_mode)
Definition: gnomon_seq.cpp:832
EStrand m_orientation
EStrand Orientation() const
TSignedSeqRange MapRangeOrigToEdited(TSignedSeqRange orig_range, ERangeEnd lend, ERangeEnd rend) const
Definition: gnomon_seq.cpp:928
vector< SMapRange > m_orig_ranges
virtual void Clip(TSignedSeqRange limits, EClipMode mode, bool ensure_cds_invariant=true)
CAlignMap m_alignmap
CConstRef< objects::CSeq_id > m_target_id
CConstRef< objects::CSeq_id > GetTargetId() const
virtual void CutExons(TSignedSeqRange hole)
int TargetLen() const
virtual CAlignMap GetAlignMap() const
void SetTargetId(const objects::CSeq_id &id)
CAlignModel(const objects::CSeq_align &seq_align)
void Cut(TSignedSeqRange hole)
CCDSInfo MapFromEditedToOrig(const CAlignMap &amap) const
bool PStop(bool includeall=true) const
bool HasStop() const
void SetStart(TSignedSeqRange r, bool confirmed=false)
bool m_confirmed_stop
TPStops m_p_stops
CCDSInfo MapFromOrigToEdited(const CAlignMap &amap) const
TSignedSeqRange MaxCdsLimits() const
bool OpenCds() const
void Set5PrimeCdsLimit(TSignedSeqPos p)
CCDSInfo(bool gcoords=true)
double m_score
bool HasStart() const
void Remap(const CRangeMapper &mapper)
bool IsMappedToGenome() const
void SetScore(double score, bool open=false)
bool m_genomic_coordinates
TSignedSeqRange m_reading_frame_from_proteins
TSignedSeqRange Start() const
bool ConfirmedStart() const
TSignedSeqRange m_reading_frame
void Clip(TSignedSeqRange limits)
void AddPStop(SPStop stp)
TSignedSeqRange Cds() const
void CombineWith(const CCDSInfo &another_cds_info)
int Strand() const
double Score() const
TSignedSeqRange ReadingFrame() const
bool m_confirmed_start
TSignedSeqRange m_max_cds_limits
TSignedSeqRange m_start
TSignedSeqRange ProtReadingFrame() const
void Clear()
TSignedSeqRange m_stop
void SetStop(TSignedSeqRange r, bool confirmed=false)
const TPStops & PStops() const
bool Invariant() const
vector< SPStop > TPStops
bool ConfirmedStop() const
void Clear5PrimeCdsLimit()
void SetReadingFrame(TSignedSeqRange r, bool protein=false)
bool operator==(const CCDSInfo &another) const
TSignedSeqRange Stop() const
void ClearPStops()
bool OpenLeftEnd() const
void SetGeneID(Int8 id)
void SetSplices(int i, const string &f_sig, const string &s_sig)
const list< CRef< CSeq_id > > & TrustedProt() const
bool operator<(const CGeneModel &a) const
const unsigned int & Status() const
EStrand m_strand
bool GoodEnoughToBeAnnotation() const
double Score() const
virtual ~CGeneModel()
double m_ident
string m_comment
bool Open5primeEnd() const
void ClearExons()
int FShiftedLen(TSignedSeqPos a, TSignedSeqPos b, bool withextras=true) const
virtual void CutExons(TSignedSeqRange hole)
Int8 GeneID() const
bool IntersectingWith(const CGeneModel &a) const
TExons & MyExons()
EStrand Orientation() const
void InsertTrustedProt(CRef< CSeq_id > g)
TSignedSeqRange m_range
list< CRef< CSeq_id > > m_trusted_mrna
void SetRankInGene(int rank)
bool IdenticalAlign(const CGeneModel &a) const
void ClearTrustedProt()
double Weight() const
unsigned int & Status()
const CSupportInfoSet & Support() const
bool OpenRightEnd() const
void ClearTrustedmRNA()
const TExons & Exons() const
vector< CCDSInfo > * SetEdgeReadingFrames()
TExons & Exons()
bool LeftComplete() const
CCDSInfo m_cds_info
TSignedSeqRange ReadingFrame() const
const TInDels & FrameShifts() const
void SetWeight(double w)
list< CRef< CSeq_id > > m_trusted_prot
bool RightComplete() const
CGeneModel(EStrand s=ePlus, Int8 id=0, int type=0)
bool Continuous() const
void SetType(int t)
CSupportInfoSet m_support
bool FullCds() const
unsigned int m_status
void SetID(Int8 id)
void SetStrand(EStrand s)
TExons m_exons
void ReplaceSupport(const CSupportInfoSet &support_set)
virtual void Clip(TSignedSeqRange limits, EClipMode mode, bool ensure_cds_invariant=true)
bool CompleteCds() const
int RankInGene() const
bool ConfirmedStop() const
void InsertTrustedmRNA(CRef< CSeq_id > g)
Int8 ID() const
string m_protein_hit
void ClearStatus()
bool AddSupport(const CSupportInfo &support)
TSignedSeqRange Limits() const
int Type() const
const list< CRef< CSeq_id > > & TrustedmRNA() const
void AddComment(const string &comment)
TInDels m_fshifts
double Ident() const
void SetIdent(double i)
const CCDSInfo & GetCdsInfo() const
const string & GetComment() const
bool operator==(const CGeneModel &a) const
vector< CModelExon > TExons
bool HasStop() const
string & ProteinHit()
const string & ProteinHit() const
double m_weight
bool ConfirmedStart() const
bool HasStart() const
const vector< CCDSInfo > * GetEdgeReadingFrames() const
bool OpenCds() const
vector< CCDSInfo > m_edge_reading_frames
bool PStop(bool includeall=true) const
bool m_expecting_hole
TInDels & FrameShifts()
void SetComment(const string &comment)
EStrand Strand() const
const SSource & GetSource() const
EStatus m_status
bool IsDeletion() const
string m_indelv
void SetStatus(EStatus s)
int Len() const
TSignedSeqPos Loc() const
EType GetType() const
bool IsInsertion() const
bool operator==(const CInDelInfo &fsi) const
EStatus GetStatus() const
int InDelEnd() const
void Init(TSignedSeqPos l, int len, EType type, const string &v, const SSource &s)
void SetLoc(TSignedSeqPos l)
string GetInDelV() const
TSignedSeqPos m_loc
bool operator<(const CInDelInfo &fsi) const
bool IsMismatch() const
bool operator!=(const CInDelInfo &fsi) const
bool IntersectingWith(TSignedSeqPos a, TSignedSeqPos b) const
SSource m_source
CInDelInfo(TSignedSeqPos l, int len, EType type, const string &v=kEmptyStr, const SSource &s=SSource())
set< Cluster >::iterator Titerator
void Insert(const typename Cluster::TModel &a)
CModelCluster(TSignedSeqRange limits)
void Insert(const Model &a)
void Init(TSignedSeqPos first, TSignedSeqPos second)
CModelCluster(int f=numeric_limits< int >::max(), int s=0)
TSignedSeqRange m_limits
TSignedSeqRange Limits() const
void Splice(CModelCluster &c)
bool operator<(const CModelCluster &c) const
bool operator==(const CModelExon &p) const
CInDelInfo::SSource m_source
CModelExon(TSignedSeqPos f=0, TSignedSeqPos s=0, bool fs=false, bool ss=false, const string &fsig="", const string &ssig="", double ident=0, const string &seq="", const CInDelInfo::SSource &src=CInDelInfo::SSource())
bool operator<(const CModelExon &p) const
double m_ident
void AddTo(int d)
string m_ssplice_sig
TSignedSeqPos GetFrom() const
void AddFrom(int d)
const TSignedSeqRange & Limits() const
void Remap(const CRangeMapper &mapper)
TSignedSeqRange m_range
TSignedSeqPos GetTo() const
TSignedSeqRange & Limits()
bool operator!=(const CModelExon &p) const
virtual ~CRangeMapper()
virtual TSignedSeqRange operator()(TSignedSeqRange r, bool withextras=true) const =0
Int8 GetId() const
void SetCore(bool core)
bool operator==(const CSupportInfo &s) const
CSupportInfo(Int8 model_id, bool core=false)
bool operator<(const CSupportInfo &s) const
bool IsCore() const
unsigned char data
EResidue(EResidueNames e)
Definition: set.hpp:45
const_iterator upper_bound(const key_type &key) const
Definition: set.hpp:139
const_iterator lower_bound(const key_type &key) const
Definition: set.hpp:138
Include a standard set of the NCBI C++ Toolkit most basic headers.
#define Len
static void Init(void)
Definition: cursor6.c:76
@ eTSA
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:51
bool Empty(const CNcbiOstrstream &src)
Definition: fileutil.cpp:523
static int type
Definition: getdata.c:31
USING_SCOPE(objects)
vector< TResidue > CResidueVec
bool Precede(TSignedSeqRange l, TSignedSeqRange r)
CVectorSet< CSupportInfo > CSupportInfoSet
TResidue Complement(TResidue c)
vector< int > TIVec
CModelCluster< CAlignModel > TAlignModelCluster
vector< double > TDVec
const char *const k_aa_table
Definition: gnomon_seq.cpp:41
CModelCluster< CGeneModel > TGeneModelCluster
EResidueNames
@ enN
@ enC
@ enG
@ enT
@ enA
list< CAlignModel > TAlignModelList
CModelClusterSet< TAlignModelCluster > TAlignModelClusterSet
double BadScore()
CNcbiOstream & operator<<(CNcbiOstream &s, const setcontig &c)
EStrand
@ eMinus
@ ePlus
CNcbiIstream & operator>>(CNcbiIstream &s, const getcontig &c)
bool Include(TSignedSeqRange big, TSignedSeqRange small)
const EResidue k_toMinus[5]
Definition: gnomon_seq.cpp:40
void ReverseComplement(const BidirectionalIterator &first, const BidirectionalIterator &last)
CModelClusterSet< TGeneModelCluster > TGeneModelClusterSet
list< CGeneModel > TGeneModelList
bool IsStopCodon(const Res *seq, int strand=ePlus)
Definition: gnomon_seq.cpp:124
EStrand OtherStrand(EStrand s)
vector< CInDelInfo > TInDels
objects::CSeqVectorTypes::TResidue TResidue
bool Enclosed(TSignedSeqRange big, TSignedSeqRange small)
void MapAlignsToOrigContig(TAlignModelList &aligns, const TInDels &corrections, int contig_size)
list< Model > GetAlignParts(const Model &algn, bool settrimflags)
bool IsStartCodon(const Res *seq, int strand=ePlus)
Definition: gnomon_seq.cpp:108
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
bool NotEmpty(void) const
Definition: range.hpp:152
bool operator<(const TThisType &r) const
Definition: range.hpp:223
static TThisType GetEmpty(void)
Definition: range.hpp:306
static position_type GetWholeFrom(void)
Definition: range.hpp:256
CRange< TSignedSeqPos > TSignedSeqRange
Definition: range.hpp:420
static TThisType GetWhole(void)
Definition: range.hpp:272
static position_type GetWholeTo(void)
Definition: range.hpp:264
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
#define kEmptyStr
Definition: ncbistr.hpp:123
#define NCBI_XALGOGNOMON_EXPORT
Definition: ncbi_export.h:1001
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
@ eEST
int i
int len
constexpr bool empty(list< Ts... >) noexcept
mdb_mode_t mode
Definition: lmdb++.h:38
const struct ncbi::grid::netcache::search::fields::SIZE size
unsigned int a
Definition: ncbi_localip.c:102
EIPRangeType t
Definition: ncbi_localip.c:101
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
T max(T x_, T y_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
static const TDS_WORD limits[]
Definition: num_limits.h:85
void Out(T t, int w, CNcbiOstream &to=cout)
Definition: parse.cpp:467
bool operator<(const SMapRangeEdge &mre) const
SMapRangeEdge(TSignedSeqPos p, TSignedSeqPos e=0, EEdgeType t=eBoundary, const string &seq=kEmptyStr)
bool operator==(const SMapRangeEdge &mre) const
SPStop(TSignedSeqRange r, EStatus s)
bool operator<(const SPStop &stp) const
TSignedSeqRange m_range
getcontig(string &cntg)
string & m_contig
const string & m_contig
setcontig(const string &cntg)
Definition: type.c:6
#define _ASSERT
#define Type
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
Definition: thrddgri.c:44
void AddExon(CRef< CSeq_entry > seq, const string &number, TSeqPos start)
#define const
Definition: zconf.h:230
Modified on Sat Dec 09 04:47:12 2023 by modify_doxy.py rev. 669887