NCBI C++ ToolKit
gnomon_model.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef ALGO_GNOMON___GNOMON_MODEL__HPP
2 #define ALGO_GNOMON___GNOMON_MODEL__HPP
3 
4 /* $Id: gnomon_model.hpp 101798 2024-02-13 17:18:22Z souvorov $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors: Alexandre Souvorov
30  *
31  * File Description:
32  *
33  */
34 
35 #include <corelib/ncbiobj.hpp>
36 #include <corelib/ncbistd.hpp>
37 #include <corelib/ncbi_limits.hpp>
38 
39 #include <set>
40 #include <vector>
41 #include <algorithm>
42 #include <math.h>
43 
44 #include <objmgr/seq_vector_ci.hpp> // CSeqVectorTypes::TResidue
45 #include <util/range.hpp> // TSignedSeqRange
46 
49 class CSeq_align;
50 class CSeq_id;
51 class CGenetic_code;
54 BEGIN_SCOPE(gnomon)
55 
56 class CGnomonEngine;
57 
58 // Making this a constant declaration (kBadScore) would be preferable,
59 // but backfires on WorkShop, where it is implicitly static and hence
60 // unavailable for use in inline functions.
61 inline
62 double BadScore() { return -numeric_limits<double>::max(); }
63 
64 enum EStrand { ePlus, eMinus};
65 inline EStrand OtherStrand(EStrand s) { return (s == ePlus ? eMinus : ePlus); }
66 
68 
69 typedef vector<TResidue> CResidueVec;
70 typedef vector<int> TIVec;
71 typedef vector<double> TDVec;
72 
73 
74 inline bool Precede(TSignedSeqRange l, TSignedSeqRange r) { return l.GetTo() < r.GetFrom(); }
75 inline bool Include(TSignedSeqRange big, TSignedSeqRange small) { return (big.GetFrom()<=small.GetFrom() && small.GetTo()<=big.GetTo()); }
76 inline bool Include(TSignedSeqRange r, TSignedSeqPos p) { return (r.GetFrom()<=p && p<=r.GetTo()); }
77 inline bool Enclosed(TSignedSeqRange big, TSignedSeqRange small) { return (big != small && Include(big, small)); }
78 
80 {
81 public:
82  CSupportInfo(Int8 model_id, bool core=false);
83 
84 
85  Int8 GetId() const;
86  void SetCore(bool core);
87  bool IsCore() const;
88  bool operator==(const CSupportInfo& s) const;
89  bool operator<(const CSupportInfo& s) const;
90 
91 private:
94 };
95 
97 
98 class CAlignModel;
99 
101 public:
102 
103  struct SSource {
104  SSource() : m_strand(ePlus) {}
105  string m_acc;
108  };
109 
110  enum EType {eDel, eIns, eMism};
111  enum EStatus {eGenomeNotCorrect, eGenomeCorrect, eUnknown};
112 
113  CInDelInfo(TSignedSeqPos l, int len, EType type, const string& v = kEmptyStr, const SSource& s = SSource()) { Init(l, len, type, v, s); }
114 
115  TSignedSeqPos Loc() const { return m_loc; }
116  int Len() const { return m_len; }
117  int InDelEnd() const { return ((IsInsertion() || IsMismatch()) ? Loc()+Len() : Loc()); } // first base "after" correction
118  bool IsInsertion() const { return m_type == eIns; }
119  bool IsDeletion() const { return m_type == eDel; }
120  bool IsMismatch() const { return m_type == eMism; }
121  bool IntersectingWith(TSignedSeqPos a, TSignedSeqPos b) const // insertion/mismatch at least partially inside, deletion inside or flanking
122  {
123  return (IsDeletion() && Loc() >= a && Loc() <= b+1) ||
124  ((IsInsertion() || IsMismatch()) && Loc() <= b && a <= Loc()+Len()-1);
125  }
126  bool operator<(const CInDelInfo& fsi) const // source is ignored!!!!!!!!!!!
127  {
128  if(m_loc != fsi.m_loc)
129  return m_loc < fsi.m_loc;
130  else if(m_type != fsi.m_type)
131  return m_type < fsi.m_type; // if location is same deletion first
132  else if(m_len != fsi.m_len)
133  return m_len < fsi.m_len;
134  else
135  return m_indelv < fsi.m_indelv;
136  }
137  bool operator!=(const CInDelInfo& fsi) const { return (*this < fsi || fsi < *this); }
138  bool operator==(const CInDelInfo& fsi) const { return !(*this != fsi); }
139  string GetInDelV() const { return m_indelv; }
140  const SSource& GetSource() const { return m_source; }
141  EType GetType() const { return m_type; };
142  void SetStatus(EStatus s) { m_status = s; }
143  EStatus GetStatus() const { return m_status; }
144  void SetLoc(TSignedSeqPos l) { m_loc = l; }
145 
146 private:
147  void Init(TSignedSeqPos l, int len, EType type, const string& v, const SSource& s) {
148  m_loc = l;
149  m_len = len;
150  m_type = type;
151  m_status = eUnknown;
152  m_indelv = v;
153  m_source = s;
154  _ASSERT(m_indelv.empty() || (int)m_indelv.length() == len);
155  _ASSERT(m_indelv.empty() || m_type != eIns);
156  if((IsDeletion() || IsMismatch()) && GetInDelV().empty())
157  m_indelv.insert( m_indelv.end(), Len(),'N');
158  }
159 
160  TSignedSeqPos m_loc; // left location for insertion, deletion is before m_loc
161  // insertion - when there are extra bases in the genome
162  int m_len;
165  string m_indelv;
167 };
168 
169 typedef vector<CInDelInfo> TInDels;
170 
171 template <class Res>
172 bool IsStartCodon(const Res * seq, int strand = ePlus); // seq points to the first base in biological order
173 template <class Res>
174 bool IsStopCodon(const Res * seq, int strand = ePlus); // seq points to the first base in biological order
175 
176 
178 public:
179  virtual ~CRangeMapper() {}
180  virtual TSignedSeqRange operator()(TSignedSeqRange r, bool withextras = true) const = 0;
181 };
182 
184 public:
185  CModelExon(TSignedSeqPos f = 0, TSignedSeqPos s = 0, bool fs = false, bool ss = false, const string& fsig = "", const string& ssig = "", double ident = 0, const string& seq = "", const CInDelInfo::SSource& src = CInDelInfo::SSource()) :
186  m_fsplice(fs), m_ssplice(ss), m_fsplice_sig(fsig), m_ssplice_sig(ssig), m_ident(ident), m_seq(seq), m_source(src), m_range(f,s)
187  {
188  _ASSERT(m_seq.empty() || m_range.Empty());
189  };
190 
191  bool operator==(const CModelExon& p) const
192  {
193  return (m_range==p.m_range && m_fsplice == p.m_fsplice && m_ssplice == p.m_ssplice);
194  }
195  bool operator!=(const CModelExon& p) const
196  {
197  return !(*this == p);
198  }
199  bool operator<(const CModelExon& p) const { return Precede(Limits(),p.Limits()); }
200 
201  operator TSignedSeqRange() const { return m_range; }
202  const TSignedSeqRange& Limits() const { return m_range; }
203  TSignedSeqRange& Limits() { return m_range; }
204  TSignedSeqPos GetFrom() const { return m_range.GetFrom(); }
205  TSignedSeqPos GetTo() const { return m_range.GetTo(); }
206  void Extend(const CModelExon& e);
207  void AddFrom(int d) { m_range.SetFrom( m_range.GetFrom() +d ); }
208  void AddTo(int d) { m_range.SetTo( m_range.GetTo() +d ); }
209 
210  bool m_fsplice, m_ssplice;
211  string m_fsplice_sig, m_ssplice_sig; // obeys strand
212  double m_ident;
213  string m_seq; // exon sequence if in gap; obeys strand
215 
216  void Remap(const CRangeMapper& mapper) { m_range = mapper(m_range); }
217 private:
219 };
220 
221 class CAlignMap;
222 
223 class CCDSInfo {
224 public:
226 
227  bool operator== (const CCDSInfo& another) const;
228 
229  //CDS mapped to transcript should be used only for for final models (not alignments)
230  //Change in indels or 5' UTR will invalidate the cooordinates (in particular conversion from CAlignModel to CGeneModel);
231  bool IsMappedToGenome() const { return m_genomic_coordinates; }
232  CCDSInfo MapFromOrigToEdited(const CAlignMap& amap) const;
233  CCDSInfo MapFromEditedToOrig(const CAlignMap& amap) const; // returns 'empty' CDS if can't map
234 
237  TSignedSeqRange Cds() const { return Start()+ReadingFrame()+Stop(); }
239 
240  TSignedSeqRange Start() const {return m_start; }
241  TSignedSeqRange Stop() const {return m_stop; }
242  bool HasStart() const { return Start().NotEmpty(); }
243  bool HasStop () const { return Stop().NotEmpty(); }
244  bool ConfirmedStart() const { return m_confirmed_start; } // start is confirmed by protein alignment
245  bool ConfirmedStop() const { return m_confirmed_stop; } // stop is confirmed by protein alignment
246 
247  bool OpenCds() const { return m_open; } // "optimal" CDS is not internal
248  double Score() const { return m_score; }
249 
250  void SetReadingFrame(TSignedSeqRange r, bool protein = false);
251  void SetStart(TSignedSeqRange r, bool confirmed = false);
252  void SetStop(TSignedSeqRange r, bool confirmed = false);
254  void Clear5PrimeCdsLimit();
255  void SetScore(double score, bool open=false);
256 
257  void CombineWith(const CCDSInfo& another_cds_info);
258  void Remap(const CRangeMapper& mapper);
260  void Cut(TSignedSeqRange hole);
261  void Clear();
262 
263  int Strand() const; // -1 (minus), 0 (unknown), 1 (plus)
264 
266  struct SPStop : public TSignedSeqRange {
268 
269  //not overloaded == is used for uniquing and finding intervals
270  //overloaded < is used for sorting before uniquing
271  bool operator<(const SPStop& stp) const {
272  if(operator==(stp)) // == is not overloaded
273  return m_status < stp.m_status;
274  else
275  return TSignedSeqRange::operator<(stp);
276  }
277 
279  };
280 
281  typedef vector<SPStop> TPStops;
282  const TPStops& PStops() const { return m_p_stops; }
283  bool PStop(bool includeall = true) const; // has premature stop(s)
284  void AddPStop(SPStop stp) { m_p_stops.push_back(stp); _ASSERT( Invariant() ); }
285  void AddPStop(TSignedSeqRange r, EStatus status);
286  void ClearPStops() { m_p_stops.clear(); }
287 
288  bool Invariant() const
289  {
290 #ifdef _DEBUG
291  if (ReadingFrame().Empty()) {
292  _ASSERT( !HasStop() && !HasStart() );
294  _ASSERT( !ConfirmedStart() );
295  _ASSERT( !ConfirmedStop() );
296  // _ASSERT( !PStop() );
297  _ASSERT( !OpenCds() );
298  _ASSERT( Score()==BadScore() );
299  return true;
300  }
301 
302  _ASSERT( !Start().IntersectingWith(ReadingFrame()) );
303  _ASSERT( !Stop().IntersectingWith(ReadingFrame()) );
305  _ASSERT( Include( MaxCdsLimits(), Cds() ) );
306 
307  if (!HasStop() && !HasStart()) {
309  } else if (HasStart() && !HasStop()) {
310  if (Precede(Start(), ReadingFrame())) {
312  } else {
314  }
315  } else if (HasStart() && HasStop()) {
317  }
318  if (HasStop()) {
319  if (Precede(ReadingFrame(),Stop())) {
320  _ASSERT( MaxCdsLimits().GetTo()==Stop().GetTo() );
321  } else {
322  _ASSERT( MaxCdsLimits().GetFrom()==Stop().GetFrom() );
323  }
324  }
325 
326  if (ConfirmedStart()) {
327  _ASSERT( HasStart() );
328  }
329 
330  if (ConfirmedStop()) {
331  _ASSERT( HasStop() );
332  }
333 
334  // ITERATE(TPStops, s, PStops())
335  // _ASSERT( Include(MaxCdsLimits(), *s) );
336 #endif
337 
338  return true;
339  }
340 
341 private:
346 
349 
350  bool m_open;
351  double m_score;
352 
354 };
355 
356 
358 {
359 public:
360  enum EType {
361  eWall = 1,
362  eNested = 2,
363  eSR = 4,
364  eEST = 8,
365  emRNA = 16,
366  eProt = 32,
367  eNotForChaining = 64,
368  eChain = 128,
369  eGnomon = 256
370  };
371  static string TypeToString(int type);
372 
373  enum EStatus {
374  ecDNAIntrons = 1,
375  eReversed = 2,
376  eSkipped = 4,
377  eLeftTrimmed = 8,
378  eRightTrimmed = 16,
379  eFullSupCDS = 32,
380  ePseudo = 64,
381  ePolyA = 128,
382  eCap = 256,
383  eBestPlacement = 512,
384  eUnknownOrientation = 1024,
385  eConsistentCoverage = 2048,
386  eGapFiller = 4096,
387  eUnmodifiedAlign = 8192,
388  eChangedByFilter = 16384,
389  eTSA = 32768,
390  eLeftConfirmed = 65536,
391  eRightConfirmed = 131072,
392  eLeftFlexible = 262144,
393  eRightFlexible = 524288
394  };
395 
396  CGeneModel(EStrand s = ePlus, Int8 id = 0, int type = 0) :
397  m_type(type), m_id(id), m_status(0), m_ident(0), m_weight(1), m_expecting_hole(false), m_strand(s), m_geneid(0), m_rank_in_gene(0) {}
398  virtual ~CGeneModel() {}
399 
400  void AddExon(TSignedSeqRange exon, const string& fs = "", const string& ss = "", double ident = 0, const string& seq = "", const CInDelInfo::SSource& src = CInDelInfo::SSource());
401  void AddHole(); // between model and next exons
402  void AddGgapExon(double ident, const string& seq, const CInDelInfo::SSource& src, bool infront);
403  void AddNormalExon(TSignedSeqRange exon, const string& fs, const string& ss, double ident, bool infront);
404 
405  typedef vector<CModelExon> TExons;
406  const TExons& Exons() const { return m_exons; }
407  TExons& Exons() { return m_exons; }
408  void ClearExons() {
409  m_exons.clear();
410  m_fshifts.clear();
411  m_range = TSignedSeqRange::GetEmpty();
412  m_cds_info = CCDSInfo();
413  m_edge_reading_frames.clear();
414  }
415  void SetSplices(int i, const string& f_sig, const string& s_sig) { m_exons[i].m_fsplice_sig = f_sig; m_exons[i].m_ssplice_sig = s_sig; }
416 
417  void ReverseComplementModel();
418 
419  void Remap(const CRangeMapper& mapper);
420  enum EClipMode { eRemoveExons, eDontRemoveExons };
421  virtual void Clip(TSignedSeqRange limits, EClipMode mode, bool ensure_cds_invariant = true); // drops the score!!!!!!!!!
422  virtual void CutExons(TSignedSeqRange hole); // clip or remove exons, dangerous, should be completely in or outside the cds, should not cut an exon in two
423  void ExtendLeft(int amount);
424  void ExtendRight(int amount);
425  void Extend(const CGeneModel& a, bool ensure_cds_invariant = true);
426  void RemoveShortHolesAndRescore(const CGnomonEngine& gnomon); // removes holes shorter than min intron (may add frameshifts/stops)
427 
428  TSignedSeqRange TranscriptExon(int i) const;
429 
430  TSignedSeqRange Limits() const { return m_range; }
431  TSignedSeqRange TranscriptLimits() const;
432  int AlignLen() const ;
433  void RecalculateLimits();
434 
435  // ReadingFrame doesn't include start/stop. It's always on codon boundaries
436  TSignedSeqRange ReadingFrame() const { return m_cds_info.ReadingFrame(); }
437  // CdsLimits include start/stop if any, goes to model limit if no start/stop
438  TSignedSeqRange RealCdsLimits() const;
439  int RealCdsLen() const ; // %3!=0 is possible
440  // MaxCdsLimits - longest cds. include start/stop if any,
441  // goes to 5' limit if no upstream stop, goes to 3' limit if no stop
442  TSignedSeqRange MaxCdsLimits() const;
443 
444  const CCDSInfo& GetCdsInfo() const { return m_cds_info; }
445  void SetCdsInfo(const CCDSInfo& cds_info);
446  void SetCdsInfo(const CGeneModel& a);
447  void CombineCdsInfo(const CGeneModel& a, bool ensure_cds_invariant = true);
448  void CombineCdsInfo(const CCDSInfo& cds_info, bool ensure_cds_invariant = true);
449 
450  bool IntersectingWith(const CGeneModel& a) const
451  {
452  return Limits().IntersectingWith(a.Limits());
453  }
454 
455  double Ident() const { return m_ident; }
456  void SetIdent(double i) { m_ident = i; }
457 
458  double Weight() const { return m_weight; }
459  void SetWeight(double w) { m_weight = w; }
460 
461  void SetStrand(EStrand s) { m_strand = s; }
462  EStrand Strand() const { return m_strand; }
464  bool notreversed = (Status()&CGeneModel::eReversed) == 0;
465  bool plusstrand = Strand() == ePlus;
466  return (notreversed == plusstrand) ? ePlus : eMinus;
467  }
468 
469  void SetType(int t) { m_type = t; }
470  int Type() const { return m_type; }
471  Int8 GeneID() const { return m_geneid; }
472  void SetGeneID(Int8 id) { m_geneid = id; }
473  int RankInGene() const { return m_rank_in_gene; }
474  void SetRankInGene(int rank) { m_rank_in_gene = rank; }
475  Int8 ID() const { return m_id; }
476  void SetID(Int8 id) { m_id = id; }
477  const CSupportInfoSet& Support() const { return m_support; }
478  bool AddSupport(const CSupportInfo& support) { return m_support.insert(support).second; }
479  void ReplaceSupport(const CSupportInfoSet& support_set) { m_support = support_set; }
480  const string& ProteinHit() const { return m_protein_hit; }
481  string& ProteinHit() { return m_protein_hit; }
482 
483  unsigned int& Status() { return m_status; }
484  const unsigned int& Status() const { return m_status; }
485  void ClearStatus() { m_status = 0; }
486 
487  const string& GetComment() const { return m_comment; }
488  void SetComment(const string& comment) { m_comment = comment; }
489  void AddComment(const string& comment) { m_comment += " " + comment; }
490 
491  bool operator<(const CGeneModel& a) const { return Precede(Limits(),a.Limits()); }
492 
493  double Score() const { return m_cds_info.Score(); }
494 
495  bool Continuous() const // no "holes" in alignment
496  {
497  for(unsigned int i = 1; i < Exons().size(); ++i)
498  if (!Exons()[i-1].m_ssplice || !Exons()[i].m_fsplice)
499  return false;
500  return true;
501  }
502  bool HasStart() const { return m_cds_info.HasStart(); }
503  bool HasStop () const { return m_cds_info.HasStop (); }
504  bool LeftComplete() const { return Strand() == ePlus ? HasStart() : HasStop(); }
505  bool RightComplete() const { return Strand() == ePlus ? HasStop() : HasStart(); }
506  bool FullCds() const { return HasStart() && HasStop() && Continuous(); }
507  bool CompleteCds() const { return FullCds() && (!Open5primeEnd() || ConfirmedStart()); }
508 
510  {
511  _ASSERT( !(OpenCds()&&ConfirmedStart()) );
512  return (ReadingFrame().Empty() || (!OpenCds() && FullCds()));
513  }
514 
515  bool Open5primeEnd() const
516  {
517  return (Strand() == ePlus ? OpenLeftEnd() : OpenRightEnd());
518  }
519  bool OpenLeftEnd() const
520  {
521  return ReadingFrame().NotEmpty() && GetCdsInfo().MaxCdsLimits().GetFrom()==TSignedSeqRange::GetWholeFrom();
522  }
523  bool OpenRightEnd() const
524  {
525  return ReadingFrame().NotEmpty() && GetCdsInfo().MaxCdsLimits().GetTo()==TSignedSeqRange::GetWholeTo();
526  }
527 
528  bool OpenCds() const { return m_cds_info.OpenCds(); } // "optimal" CDS is not internal
529  bool PStop(bool includeall = true) const { return m_cds_info.PStop(includeall); } // has premature stop(s)
530 
531  bool ConfirmedStart() const { return m_cds_info.ConfirmedStart(); } // start is confirmed by protein alignment
532  bool ConfirmedStop() const { return m_cds_info.ConfirmedStop(); } // stop is confirmed by protein alignment
533 
534  bool isNMD(int limit = 50) const;
535 
536  TInDels& FrameShifts() { return m_fshifts; }
537  const TInDels& FrameShifts() const { return m_fshifts; }
538  TInDels FrameShifts(TSignedSeqPos a, TSignedSeqPos b) const;
539  TInDels GetInDels(bool fs_only) const;
540  TInDels GetInDels(TSignedSeqPos a, TSignedSeqPos b, bool fs_only) const;
541 
542  int FShiftedLen(TSignedSeqRange ab, bool withextras = true) const; // won't work if a/b is insertion
543  int FShiftedLen(TSignedSeqPos a, TSignedSeqPos b, bool withextras = true) const { return FShiftedLen(TSignedSeqRange(a,b),withextras); }
544 
545  // move along mrna skipping introns
546  TSignedSeqPos FShiftedMove(TSignedSeqPos pos, int len) const; // may retun <0 if hits a deletion at the end of move
547 
548  virtual CAlignMap GetAlignMap() const;
549 
550  string GetCdsDnaSequence (const CResidueVec& contig_sequence) const;
551  string GetProtein (const CResidueVec& contig_sequence) const;
552  string GetProtein (const CResidueVec& contig_sequence, const CGenetic_code* gencode) const;
553 
554  // Below comparisons ignore CDS completely, first 3 assume that alignments are the same strand
555 
556  int HasCompatibleOverlap(const CGeneModel& a, int min_overlap = 2) const; // returns 0 for notcompatible or (number of common splices)+1; neither alignment can have holes
557  int isCompatible(const CGeneModel& a) const; // returns 0 for notcompatible or (number of common splices)+1
558  bool IsSubAlignOf(const CGeneModel& a) const;
559  int MutualExtension(const CGeneModel& a) const; // returns 0 for notcompatible or (number of introns) + 1
560 
561  bool IdenticalAlign(const CGeneModel& a) const
562  { return Strand()==a.Strand() && Limits()==a.Limits() && Exons() == a.Exons() && FrameShifts()==a.FrameShifts() &&
563  GetCdsInfo().PStops() == a.GetCdsInfo().PStops() && Type() == a.Type() && Status() == a.Status(); }
564  bool operator==(const CGeneModel& a) const
565  {
566  return IdenticalAlign(a) && Type()==a.Type() && m_id==a.m_id && m_support==a.m_support;
567  }
568 
569  const list< CRef<CSeq_id> >& TrustedmRNA() const { return m_trusted_mrna; }
570  void InsertTrustedmRNA(CRef<CSeq_id> g) { m_trusted_mrna.push_back(g); };
571  void ClearTrustedmRNA() { m_trusted_mrna.clear(); };
572 
573  const list< CRef<CSeq_id> >& TrustedProt() const { return m_trusted_prot; }
574  void InsertTrustedProt(CRef<CSeq_id> g) { m_trusted_prot.push_back(g); };
575  void ClearTrustedProt() { m_trusted_prot.clear(); };
576 
577  const vector<CCDSInfo>* GetEdgeReadingFrames() const { return &m_edge_reading_frames; }
578  vector<CCDSInfo>* SetEdgeReadingFrames() { return &m_edge_reading_frames; }
579 
580 
581 #ifdef _DEBUG
583 #endif
584 
585 private:
586  void RemoveExtraFShifts(int left, int right);
587  void TrimEdgesToFrameInOtherAlignGaps(const TExons& exons_with_gaps);
588 
589  int m_type;
591  unsigned int m_status;
592 
593  double m_ident;
594  double m_weight;
595 
597  TExons& MyExons() { return m_exons; }
599 
603 
605  bool CdsInvariant(bool check_start_stop = true) const;
606 
611  string m_comment;
612  list< CRef<CSeq_id> > m_trusted_prot;
613  list< CRef<CSeq_id> > m_trusted_mrna;
614 
615 
616  vector<CCDSInfo> m_edge_reading_frames;
617 
618  friend class CChain;
619 };
620 
621 
622 class CAlignMap {
623 public:
626 
627  CAlignMap() {};
629  m_orig_ranges.push_back(SMapRange(SMapRangeEdge(orig_a), SMapRangeEdge(orig_b), kEmptyStr));
631  m_target_len = FShiftedLen(orig_a, orig_b);
632  }
633  CAlignMap(TSignedSeqPos orig_a, TSignedSeqPos orig_b, TInDels::const_iterator fsi_begin, const TInDels::const_iterator fsi_end) : m_orientation(ePlus) {
634  EEdgeType atype = eBoundary;
635  EEdgeType btype = eBoundary;
636  if(fsi_begin != fsi_end) {
637  if(fsi_begin->Loc() == orig_a && !fsi_begin->IsMismatch()) {
638  _ASSERT(!fsi_begin->IsInsertion()); // no reason to have insertion
639  atype = eInDel;
640  }
641  TInDels::const_iterator fs = fsi_end-1;
642  if(fs->Loc() == orig_b+1 && fs->IsDeletion())
643  btype = eInDel;
644  }
645  InsertIndelRangesForInterval(orig_a, orig_b, 0, fsi_begin, fsi_end, atype, btype, "", "");
646  m_target_len = FShiftedLen(orig_a, orig_b);
647  }
648  CAlignMap(const CGeneModel::TExons& exons, const vector<TSignedSeqRange>& transcript_exons, const TInDels& indels, EStrand orientation, int targetlen ); //orientation == strand if not Reversed
649  CAlignMap(const CGeneModel::TExons& exons, const TInDels& frameshifts, EStrand strand, TSignedSeqRange lim = TSignedSeqRange::GetWhole(), int holelen = 0, int polyalen = 0);
651  TSignedSeqPos MapEditedToOrig(TSignedSeqPos edited_pos) const;
653  TSignedSeqRange MapRangeOrigToEdited(TSignedSeqRange orig_range, bool withextras = true) const { return MapRangeOrigToEdited(orig_range, withextras?eLeftEnd:eSinglePoint, withextras?eRightEnd:eSinglePoint); }
654  TSignedSeqRange MapRangeEditedToOrig(TSignedSeqRange edited_range, bool withextras = true) const;
655  template <class In, class Out>
656  void EditedSequence(const In& original_sequence, Out& edited_sequence, bool includeholes = false) const;
657  int FShiftedLen(TSignedSeqRange ab, ERangeEnd lend, ERangeEnd rend) const;
658  int FShiftedLen(TSignedSeqRange ab, bool withextras = true) const;
659  int FShiftedLen(TSignedSeqPos a, TSignedSeqPos b, bool withextras = true) const { return FShiftedLen(TSignedSeqRange(a,b), withextras); }
660  //snap to codons works by analising transcript coordinates (MUST be a protein or reading frame cutout)
661  TSignedSeqRange ShrinkToRealPoints(TSignedSeqRange orig_range, bool snap_to_codons = false) const;
663  TSignedSeqPos FShiftedMove(TSignedSeqPos orig_pos, int len) const; // may reurn < 0 if hits a gap
664  // TInDels GetInDels(bool fs_only) const;
665  // TInDels GetAllCorrections() const;
666  int TargetLen() const { return m_target_len; }
667  EStrand Orientation() const { return m_orientation; }
668  void MoveOrigin(TSignedSeqPos shift) {
669  for(auto& mrange : m_orig_ranges)
670  mrange.MoveOrigin(shift);
671  }
672 
673 // private: // breaks SMapRange on WorkShop. :-/
674  struct SMapRangeEdge {
676  bool operator<(const SMapRangeEdge& mre) const { return m_pos < mre.m_pos; }
677  bool operator==(const SMapRangeEdge& mre) const { return m_pos == mre.m_pos; }
678 
681  string m_extra_seq;
682  };
683 
684  class SMapRange {
685  public:
686  SMapRange(SMapRangeEdge from, SMapRangeEdge to, const string& mism) : m_from(from), m_to(to), m_mism_seq(mism) {}
687  SMapRangeEdge GetEdgeFrom() const { return m_from; }
688  SMapRangeEdge GetEdgeTo() const { return m_to; }
689  void SetEdgeFrom(SMapRangeEdge from) { m_from = from; }
690  void SetEdgeTo(SMapRangeEdge to) { m_to = to; }
691  void MoveOrigin(TSignedSeqPos shift) {
692  m_from.m_pos -= shift;
693  m_to.m_pos -= shift;
694  }
695  TSignedSeqPos GetFrom() const { return m_from.m_pos; }
696  TSignedSeqPos GetTo() const { return m_to.m_pos; }
700  string GetExtraSeqFrom() const { return m_from.m_extra_seq; }
701  TSignedSeqPos GetExtraTo() const { return m_to.m_extra; }
702  string GetExtraSeqTo() const { return m_to.m_extra_seq; }
704  EEdgeType GetTypeTo() const { return m_to.m_edge_type; }
705  const string& GetMismatch() const { return m_mism_seq; }
706  bool operator<(const SMapRange& mr) const {
707  if(m_from.m_pos == mr.m_from.m_pos) return m_to.m_pos < mr.m_to.m_pos;
708  else return m_from.m_pos < mr.m_from.m_pos;
709  }
710 
711  private:
713  string m_mism_seq;
714  };
715 
716  // static TInDels RemoveExtraIndels(const TInDels& indels, const CGeneModel::TExons& exons);
717 
718 private:
719  static TSignedSeqPos MapAtoB(const vector<CAlignMap::SMapRange>& a, const vector<CAlignMap::SMapRange>& b, TSignedSeqPos p, ERangeEnd move_mode);
720  static TSignedSeqRange MapRangeAtoB(const vector<CAlignMap::SMapRange>& a, const vector<CAlignMap::SMapRange>& b, TSignedSeqRange r, ERangeEnd lend, ERangeEnd rend);
721  static TSignedSeqRange MapRangeAtoB(const vector<CAlignMap::SMapRange>& a, const vector<CAlignMap::SMapRange>& b, TSignedSeqRange r, bool withextras ) {
722  return MapRangeAtoB(a, b, r, withextras?eLeftEnd:eSinglePoint, withextras?eRightEnd:eSinglePoint);
723  };
724  static int FindLowerRange(const vector<CAlignMap::SMapRange>& a, TSignedSeqPos p);
725 
726  void InsertOneToOneRange(TSignedSeqPos orig_start, TSignedSeqPos edited_start, TSignedSeqPos len, const string& mism, TSignedSeqPos left_orige, TSignedSeqPos left_edite, TSignedSeqPos right_orige, TSignedSeqPos right_edite,
727  EEdgeType left_type, EEdgeType right_type, const string& left_edit_extra_seq, const string& right_edit_extra_seq);
728  TSignedSeqPos InsertIndelRangesForInterval(TSignedSeqPos orig_a, TSignedSeqPos orig_b, TSignedSeqPos edit_a, TInDels::const_iterator fsi_begin, TInDels::const_iterator fsi_end, EEdgeType type_a, EEdgeType type_b, const string& gseq_a, const string& gseq_b);
729 
730  vector<SMapRange> m_orig_ranges, m_edited_ranges;
733 };
734 
735 
736 
737 
739 public:
741  CAlignModel(const objects::CSeq_align& seq_align);
742  CAlignModel(const CGeneModel& g, const CAlignMap& a);
743  virtual CAlignMap GetAlignMap() const { return m_alignmap; }
744  void ResetAlignMap();
745 
746  virtual void Clip(TSignedSeqRange limits, EClipMode mode, bool ensure_cds_invariant = true) { // drops the score!!!!!!!!!
747  CGeneModel::Clip(limits,mode,ensure_cds_invariant);
748  RecalculateAlignMap(limits.GetFrom(), limits.GetTo());
749  }
750  virtual void CutExons(TSignedSeqRange hole) { // clip or remove exons, dangerous, should be completely in or outside the cds, should not cut an exon in two
751  CGeneModel::CutExons(hole);
752  RecalculateAlignMap(hole.GetTo()+1, hole.GetFrom()-1);
753  }
754 
755  string TargetAccession() const;
756  void SetTargetId(const objects::CSeq_id& id) { m_target_id.Reset(&id); }
757  CConstRef<objects::CSeq_id> GetTargetId() const { return m_target_id; }
758  int TargetLen() const { return m_alignmap.TargetLen(); }
759  int PolyALen() const;
760  CRef<objects::CSeq_align> MakeSeqAlign(const string& contig) const; // should be used for alignments only; for chains and models will produce a Splign alignment of mRNA
761 
762 private:
763  void RecalculateAlignMap(int left, int right);
766 };
767 
768 
769 
770 
772  const string& m_contig;
773  explicit setcontig(const string& cntg) : m_contig(cntg) {}
774 };
776  string& m_contig;
777  explicit getcontig(string& cntg) : m_contig(cntg) {}
778 };
781 
785 
786 
787 template<class Model>
788 class NCBI_XALGOGNOMON_EXPORT CModelCluster : public list<Model> {
789 public:
790  typedef Model TModel;
791  CModelCluster(int f = numeric_limits<int>::max(), int s = 0) : m_limits(f,s) {}
793  void Insert(const Model& a) {
794  m_limits.CombineWith(a.Limits());
795  this->push_back(a);
796  }
797  void Splice(CModelCluster& c) { // elements removed from c and inserted into *this
798  m_limits.CombineWith(c.Limits());
799  this->splice(list<Model>::end(),c);
800  }
801  TSignedSeqRange Limits() const { return m_limits; }
802  bool operator<(const CModelCluster& c) const { return Precede(m_limits, c.m_limits); }
804  list<Model>::clear();
805  m_limits.SetFrom( first );
806  m_limits.SetTo( second );
807  }
808 
809 private:
811 };
812 
815 
816 typedef list<CGeneModel> TGeneModelList;
817 typedef list<CAlignModel> TAlignModelList;
818 
819 
820 template<class Cluster>
822  public:
825  void Insert(const typename Cluster::TModel& a) {
826  Cluster clust;
827  clust.Insert(a);
828  Titerator first = set<Cluster>::lower_bound(Cluster(a.Limits().GetFrom(),a.Limits().GetFrom()));
829  Titerator second = set<Cluster>::upper_bound(Cluster(a.Limits().GetTo(),a.Limits().GetTo()));
830  for(Titerator it = first; it != second;) {
831  clust.Splice(const_cast<Cluster&>(*it));
832  this->erase(it++);
833  }
834  const_cast<Cluster&>(*this->insert(second,Cluster(clust.Limits()))).Splice(clust);
835  }
836 };
837 
840 
841 
843 
844 class EResidue {
845 public :
846  EResidue() : data(enN) {}
848 
849  operator int() const { return int(data); }
850 
851 private:
852  unsigned char data;
853 };
854 
856 {
857  switch(c)
858  {
859  case 'A':
860  return 'T';
861  case 'a':
862  return 't';
863  case 'C':
864  return 'G';
865  case 'c':
866  return 'g';
867  case 'G':
868  return 'C';
869  case 'g':
870  return 'c';
871  case 'T':
872  return 'A';
873  case 't':
874  return 'a';
875  default:
876  return 'N';
877  }
878 }
879 
880 extern const EResidue k_toMinus[5];
881 extern const char *const k_aa_table;
882 
884 {
885  return k_toMinus[c];
886 }
887 
888 template <class BidirectionalIterator>
889 void ReverseComplement(const BidirectionalIterator& first, const BidirectionalIterator& last)
890 {
891  for (BidirectionalIterator i( first ); i != last; ++i)
892  *i = Complement(*i);
893  reverse(first, last);
894 }
895 
896 template<class Model>
897 list<Model> GetAlignParts(const Model& algn, bool settrimflags) { // if no parts result empty
898  list<Model> parts;
899  int left = algn.Limits().GetFrom();
900  for(unsigned int i = 1; i < algn.Exons().size(); ++i) {
901  if (!algn.Exons()[i-1].m_ssplice || !algn.Exons()[i].m_fsplice) {
902  Model m = algn;
903  m.Clip(TSignedSeqRange(left,algn.Exons()[i-1].GetTo()),CGeneModel::eRemoveExons);
904  if(!parts.empty() && settrimflags) {
905  parts.back().Status() &= ~CGeneModel::eRightTrimmed;
906  m.Status() &= ~CGeneModel::eLeftTrimmed;
907  }
908  parts.push_back(m);
909  left = algn.Exons()[i].GetFrom();
910  }
911  }
912  if(!parts.empty()) {
913  Model m = algn;
914  m.Clip(TSignedSeqRange(left,algn.Limits().GetTo()),CGeneModel::eRemoveExons);
915  if(settrimflags) {
916  parts.back().Status() &= ~CGeneModel::eRightTrimmed;
917  m.Status() &= ~CGeneModel::eLeftTrimmed;
918  }
919  parts.push_back(m);
920  }
921 
922  return parts;
923 }
924 
925 /*
926 template<class Model>
927 list<Model> GetAlignParts(const Model& algn, bool settrimflags) {
928  list<Model> parts;
929  int left = algn.Limits().GetFrom();
930  for(unsigned int i = 1; i < algn.Exons().size(); ++i) {
931  if (!algn.Exons()[i-1].m_ssplice || !algn.Exons()[i].m_fsplice) {
932  Model m = algn;
933  m.Status() &= ~CGeneModel::ePolyA;
934  m.Status() &= ~CGeneModel::eCap;
935  m.Clip(TSignedSeqRange(left,algn.Exons()[i-1].GetTo()),CGeneModel::eRemoveExons);
936  if(!parts.empty() && settrimflags) {
937  parts.back().Status() &= ~CGeneModel::eRightTrimmed;
938  m.Status() &= ~CGeneModel::eLeftTrimmed;
939  }
940  parts.push_back(m);
941  left = algn.Exons()[i].GetFrom();
942  }
943  }
944  if(!parts.empty()) {
945  Model m = algn;
946  m.Clip(TSignedSeqRange(left,algn.Limits().GetTo()),CGeneModel::eRemoveExons);
947  m.Status() &= ~CGeneModel::ePolyA;
948  m.Status() &= ~CGeneModel::eCap;
949  if(settrimflags) {
950  parts.back().Status() &= ~CGeneModel::eRightTrimmed;
951  m.Status() &= ~CGeneModel::eLeftTrimmed;
952  }
953  parts.push_back(m);
954 
955  if(algn.Status()&CGeneModel::ePolyA) {
956  if(algn.Strand() == ePlus)
957  parts.back().Status() |= CGeneModel::ePolyA;
958  else
959  parts.front().Status() |= CGeneModel::ePolyA;
960  }
961  if(algn.Status()&CGeneModel::eCap) {
962  if(algn.Strand() == ePlus)
963  parts.front().Status() |= CGeneModel::eCap;
964  else
965  parts.back().Status() |= CGeneModel::eCap;
966  }
967  }
968 
969  return parts;
970 }
971 */
972 
973 void MapAlignsToOrigContig(TAlignModelList& aligns, const TInDels& corrections, int contig_size);
974 
975 
976 
977 END_SCOPE(gnomon)
979 
980 #endif // ALGO_GNOMON___GNOMON_MODEL__HPP
EStatus
TSignedSeqPos GetTo() const
const string & GetMismatch() const
TSignedSeqPos GetExtendedFrom() const
void SetEdgeFrom(SMapRangeEdge from)
EEdgeType GetTypeTo() const
string GetExtraSeqFrom() const
void MoveOrigin(TSignedSeqPos shift)
bool operator<(const SMapRange &mr) const
TSignedSeqPos GetExtraTo() const
SMapRangeEdge m_from
string GetExtraSeqTo() const
SMapRangeEdge GetEdgeFrom() const
TSignedSeqPos GetFrom() const
TSignedSeqPos GetExtraFrom() const
SMapRangeEdge GetEdgeTo() const
TSignedSeqPos GetExtendedTo() const
EEdgeType GetTypeFrom() const
void SetEdgeTo(SMapRangeEdge to)
SMapRange(SMapRangeEdge from, SMapRangeEdge to, const string &mism)
CAlignMap(TSignedSeqPos orig_a, TSignedSeqPos orig_b, TInDels::const_iterator fsi_begin, const TInDels::const_iterator fsi_end)
TSignedSeqPos FShiftedMove(TSignedSeqPos orig_pos, int len) const
Definition: gnomon_seq.cpp:829
static int FindLowerRange(const vector< CAlignMap::SMapRange > &a, TSignedSeqPos p)
Definition: gnomon_seq.cpp:684
TSignedSeqRange ShrinkToRealPointsOnEdited(TSignedSeqRange edited_range) const
Definition: gnomon_seq.cpp:698
TSignedSeqRange MapRangeOrigToEdited(TSignedSeqRange orig_range, bool withextras=true) const
int TargetLen() const
void MoveOrigin(TSignedSeqPos shift)
int FShiftedLen(TSignedSeqPos a, TSignedSeqPos b, bool withextras=true) const
void InsertOneToOneRange(TSignedSeqPos orig_start, TSignedSeqPos edited_start, TSignedSeqPos len, const string &mism, TSignedSeqPos left_orige, TSignedSeqPos left_edite, TSignedSeqPos right_orige, TSignedSeqPos right_edite, EEdgeType left_type, EEdgeType right_type, const string &left_edit_extra_seq, const string &right_edit_extra_seq)
Definition: gnomon_seq.cpp:406
static TSignedSeqRange MapRangeAtoB(const vector< CAlignMap::SMapRange > &a, const vector< CAlignMap::SMapRange > &b, TSignedSeqRange r, ERangeEnd lend, ERangeEnd rend)
Definition: gnomon_seq.cpp:912
vector< SMapRange > m_edited_ranges
CAlignMap(TSignedSeqPos orig_a, TSignedSeqPos orig_b)
TSignedSeqPos InsertIndelRangesForInterval(TSignedSeqPos orig_a, TSignedSeqPos orig_b, TSignedSeqPos edit_a, TInDels::const_iterator fsi_begin, TInDels::const_iterator fsi_end, EEdgeType type_a, EEdgeType type_b, const string &gseq_a, const string &gseq_b)
Definition: gnomon_seq.cpp:423
TSignedSeqRange MapRangeEditedToOrig(TSignedSeqRange edited_range, bool withextras=true) const
Definition: gnomon_seq.cpp:966
static TSignedSeqRange MapRangeAtoB(const vector< CAlignMap::SMapRange > &a, const vector< CAlignMap::SMapRange > &b, TSignedSeqRange r, bool withextras)
TSignedSeqPos MapOrigToEdited(TSignedSeqPos orig_pos) const
Definition: gnomon_seq.cpp:894
void EditedSequence(const In &original_sequence, Out &edited_sequence, bool includeholes=false) const
Definition: gnomon_seq.cpp:632
TSignedSeqPos MapEditedToOrig(TSignedSeqPos edited_pos) const
Definition: gnomon_seq.cpp:903
TSignedSeqRange ShrinkToRealPoints(TSignedSeqRange orig_range, bool snap_to_codons=false) const
Definition: gnomon_seq.cpp:764
int FShiftedLen(TSignedSeqRange ab, ERangeEnd lend, ERangeEnd rend) const
Definition: gnomon_seq.cpp:993
static TSignedSeqPos MapAtoB(const vector< CAlignMap::SMapRange > &a, const vector< CAlignMap::SMapRange > &b, TSignedSeqPos p, ERangeEnd move_mode)
Definition: gnomon_seq.cpp:842
EStrand m_orientation
EStrand Orientation() const
TSignedSeqRange MapRangeOrigToEdited(TSignedSeqRange orig_range, ERangeEnd lend, ERangeEnd rend) const
Definition: gnomon_seq.cpp:938
vector< SMapRange > m_orig_ranges
virtual void Clip(TSignedSeqRange limits, EClipMode mode, bool ensure_cds_invariant=true)
CAlignMap m_alignmap
CConstRef< objects::CSeq_id > m_target_id
CConstRef< objects::CSeq_id > GetTargetId() const
virtual void CutExons(TSignedSeqRange hole)
int TargetLen() const
virtual CAlignMap GetAlignMap() const
void SetTargetId(const objects::CSeq_id &id)
CAlignModel(const objects::CSeq_align &seq_align)
void Cut(TSignedSeqRange hole)
CCDSInfo MapFromEditedToOrig(const CAlignMap &amap) const
bool PStop(bool includeall=true) const
bool HasStop() const
void SetStart(TSignedSeqRange r, bool confirmed=false)
bool m_confirmed_stop
TPStops m_p_stops
CCDSInfo MapFromOrigToEdited(const CAlignMap &amap) const
TSignedSeqRange MaxCdsLimits() const
bool OpenCds() const
void Set5PrimeCdsLimit(TSignedSeqPos p)
CCDSInfo(bool gcoords=true)
double m_score
bool HasStart() const
void Remap(const CRangeMapper &mapper)
bool IsMappedToGenome() const
void SetScore(double score, bool open=false)
bool m_genomic_coordinates
TSignedSeqRange m_reading_frame_from_proteins
TSignedSeqRange Start() const
bool ConfirmedStart() const
TSignedSeqRange m_reading_frame
void Clip(TSignedSeqRange limits)
void AddPStop(SPStop stp)
TSignedSeqRange Cds() const
void CombineWith(const CCDSInfo &another_cds_info)
int Strand() const
double Score() const
TSignedSeqRange ReadingFrame() const
bool m_confirmed_start
TSignedSeqRange m_max_cds_limits
TSignedSeqRange m_start
TSignedSeqRange ProtReadingFrame() const
void Clear()
TSignedSeqRange m_stop
void SetStop(TSignedSeqRange r, bool confirmed=false)
const TPStops & PStops() const
bool Invariant() const
vector< SPStop > TPStops
bool ConfirmedStop() const
void Clear5PrimeCdsLimit()
void SetReadingFrame(TSignedSeqRange r, bool protein=false)
bool operator==(const CCDSInfo &another) const
TSignedSeqRange Stop() const
void ClearPStops()
bool OpenLeftEnd() const
void SetGeneID(Int8 id)
void SetSplices(int i, const string &f_sig, const string &s_sig)
const list< CRef< CSeq_id > > & TrustedProt() const
bool operator<(const CGeneModel &a) const
const unsigned int & Status() const
EStrand m_strand
bool GoodEnoughToBeAnnotation() const
double Score() const
virtual ~CGeneModel()
double m_ident
string m_comment
bool Open5primeEnd() const
void ClearExons()
int FShiftedLen(TSignedSeqPos a, TSignedSeqPos b, bool withextras=true) const
virtual void CutExons(TSignedSeqRange hole)
Int8 GeneID() const
bool IntersectingWith(const CGeneModel &a) const
TExons & MyExons()
EStrand Orientation() const
void InsertTrustedProt(CRef< CSeq_id > g)
TSignedSeqRange m_range
list< CRef< CSeq_id > > m_trusted_mrna
void SetRankInGene(int rank)
bool IdenticalAlign(const CGeneModel &a) const
void ClearTrustedProt()
double Weight() const
unsigned int & Status()
const CSupportInfoSet & Support() const
bool OpenRightEnd() const
void ClearTrustedmRNA()
const TExons & Exons() const
vector< CCDSInfo > * SetEdgeReadingFrames()
TExons & Exons()
bool LeftComplete() const
CCDSInfo m_cds_info
TSignedSeqRange ReadingFrame() const
const TInDels & FrameShifts() const
void SetWeight(double w)
list< CRef< CSeq_id > > m_trusted_prot
bool RightComplete() const
CGeneModel(EStrand s=ePlus, Int8 id=0, int type=0)
bool Continuous() const
void SetType(int t)
CSupportInfoSet m_support
bool FullCds() const
unsigned int m_status
void SetID(Int8 id)
void SetStrand(EStrand s)
TExons m_exons
void ReplaceSupport(const CSupportInfoSet &support_set)
virtual void Clip(TSignedSeqRange limits, EClipMode mode, bool ensure_cds_invariant=true)
bool CompleteCds() const
int RankInGene() const
bool ConfirmedStop() const
void InsertTrustedmRNA(CRef< CSeq_id > g)
Int8 ID() const
string m_protein_hit
void ClearStatus()
bool AddSupport(const CSupportInfo &support)
TSignedSeqRange Limits() const
int Type() const
const list< CRef< CSeq_id > > & TrustedmRNA() const
void AddComment(const string &comment)
TInDels m_fshifts
double Ident() const
void SetIdent(double i)
const CCDSInfo & GetCdsInfo() const
const string & GetComment() const
bool operator==(const CGeneModel &a) const
vector< CModelExon > TExons
bool HasStop() const
string & ProteinHit()
const string & ProteinHit() const
double m_weight
bool ConfirmedStart() const
bool HasStart() const
const vector< CCDSInfo > * GetEdgeReadingFrames() const
bool OpenCds() const
vector< CCDSInfo > m_edge_reading_frames
bool PStop(bool includeall=true) const
bool m_expecting_hole
TInDels & FrameShifts()
void SetComment(const string &comment)
EStrand Strand() const
const SSource & GetSource() const
EStatus m_status
bool IsDeletion() const
string m_indelv
void SetStatus(EStatus s)
int Len() const
TSignedSeqPos Loc() const
EType GetType() const
bool IsInsertion() const
bool operator==(const CInDelInfo &fsi) const
EStatus GetStatus() const
int InDelEnd() const
void Init(TSignedSeqPos l, int len, EType type, const string &v, const SSource &s)
void SetLoc(TSignedSeqPos l)
string GetInDelV() const
TSignedSeqPos m_loc
bool operator<(const CInDelInfo &fsi) const
bool IsMismatch() const
bool operator!=(const CInDelInfo &fsi) const
bool IntersectingWith(TSignedSeqPos a, TSignedSeqPos b) const
SSource m_source
CInDelInfo(TSignedSeqPos l, int len, EType type, const string &v=kEmptyStr, const SSource &s=SSource())
set< Cluster >::iterator Titerator
void Insert(const typename Cluster::TModel &a)
CModelCluster(TSignedSeqRange limits)
void Insert(const Model &a)
void Init(TSignedSeqPos first, TSignedSeqPos second)
CModelCluster(int f=numeric_limits< int >::max(), int s=0)
TSignedSeqRange m_limits
TSignedSeqRange Limits() const
void Splice(CModelCluster &c)
bool operator<(const CModelCluster &c) const
bool operator==(const CModelExon &p) const
CInDelInfo::SSource m_source
CModelExon(TSignedSeqPos f=0, TSignedSeqPos s=0, bool fs=false, bool ss=false, const string &fsig="", const string &ssig="", double ident=0, const string &seq="", const CInDelInfo::SSource &src=CInDelInfo::SSource())
bool operator<(const CModelExon &p) const
double m_ident
void AddTo(int d)
string m_ssplice_sig
TSignedSeqPos GetFrom() const
void AddFrom(int d)
const TSignedSeqRange & Limits() const
void Remap(const CRangeMapper &mapper)
TSignedSeqRange m_range
TSignedSeqPos GetTo() const
TSignedSeqRange & Limits()
bool operator!=(const CModelExon &p) const
virtual ~CRangeMapper()
virtual TSignedSeqRange operator()(TSignedSeqRange r, bool withextras=true) const =0
Int8 GetId() const
void SetCore(bool core)
bool operator==(const CSupportInfo &s) const
CSupportInfo(Int8 model_id, bool core=false)
bool operator<(const CSupportInfo &s) const
bool IsCore() const
unsigned char data
EResidue(EResidueNames e)
const_iterator upper_bound(const key_type &key) const
Definition: set.hpp:139
const_iterator lower_bound(const key_type &key) const
Definition: set.hpp:138
Include a standard set of the NCBI C++ Toolkit most basic headers.
#define Len
@ eTSA
bool Empty(const CNcbiOstrstream &src)
Definition: fileutil.cpp:523
#define false
Definition: bool.h:36
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:51
static void Init(void)
Definition: cursor6.c:76
static int type
Definition: getdata.c:31
static const TDS_WORD limits[]
Definition: num_limits.h:85
USING_SCOPE(objects)
vector< TResidue > CResidueVec
bool Precede(TSignedSeqRange l, TSignedSeqRange r)
set< CSupportInfo > CSupportInfoSet
TResidue Complement(TResidue c)
vector< int > TIVec
CModelCluster< CAlignModel > TAlignModelCluster
vector< double > TDVec
const char *const k_aa_table
Definition: gnomon_seq.cpp:41
CModelCluster< CGeneModel > TGeneModelCluster
EResidueNames
@ enN
@ enC
@ enG
@ enT
@ enA
list< CAlignModel > TAlignModelList
CModelClusterSet< TAlignModelCluster > TAlignModelClusterSet
double BadScore()
CNcbiOstream & operator<<(CNcbiOstream &s, const setcontig &c)
EStrand
@ eMinus
@ ePlus
CNcbiIstream & operator>>(CNcbiIstream &s, const getcontig &c)
bool Include(TSignedSeqRange big, TSignedSeqRange small)
const EResidue k_toMinus[5]
Definition: gnomon_seq.cpp:40
void ReverseComplement(const BidirectionalIterator &first, const BidirectionalIterator &last)
CModelClusterSet< TGeneModelCluster > TGeneModelClusterSet
list< CGeneModel > TGeneModelList
bool IsStopCodon(const Res *seq, int strand=ePlus)
Definition: gnomon_seq.cpp:124
EStrand OtherStrand(EStrand s)
vector< CInDelInfo > TInDels
objects::CSeqVectorTypes::TResidue TResidue
bool Enclosed(TSignedSeqRange big, TSignedSeqRange small)
void MapAlignsToOrigContig(TAlignModelList &aligns, const TInDels &corrections, int contig_size)
list< Model > GetAlignParts(const Model &algn, bool settrimflags)
bool IsStartCodon(const Res *seq, int strand=ePlus)
Definition: gnomon_seq.cpp:108
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
bool NotEmpty(void) const
Definition: range.hpp:152
bool operator<(const TThisType &r) const
Definition: range.hpp:223
static TThisType GetEmpty(void)
Definition: range.hpp:306
static position_type GetWholeFrom(void)
Definition: range.hpp:256
CRange< TSignedSeqPos > TSignedSeqRange
Definition: range.hpp:420
static TThisType GetWhole(void)
Definition: range.hpp:272
static position_type GetWholeTo(void)
Definition: range.hpp:264
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
#define kEmptyStr
Definition: ncbistr.hpp:123
#define NCBI_XALGOGNOMON_EXPORT
Definition: ncbi_export.h:1001
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
@ eEST
int i
int len
constexpr bool empty(list< Ts... >) noexcept
mdb_mode_t mode
Definition: lmdb++.h:38
const struct ncbi::grid::netcache::search::fields::SIZE size
unsigned int a
Definition: ncbi_localip.c:102
EIPRangeType t
Definition: ncbi_localip.c:101
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
T max(T x_, T y_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
void Out(T t, int w, CNcbiOstream &to=cout)
Definition: parse.cpp:467
#define Loc
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
bool operator<(const SMapRangeEdge &mre) const
SMapRangeEdge(TSignedSeqPos p, TSignedSeqPos e=0, EEdgeType t=eBoundary, const string &seq=kEmptyStr)
bool operator==(const SMapRangeEdge &mre) const
SPStop(TSignedSeqRange r, EStatus s)
bool operator<(const SPStop &stp) const
TSignedSeqRange m_range
getcontig(string &cntg)
string & m_contig
const string & m_contig
setcontig(const string &cntg)
Definition: type.c:6
#define _ASSERT
#define Type
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
Definition: thrddgri.c:44
void AddExon(CRef< CSeq_entry > seq, const string &number, TSeqPos start)
#define const
Definition: zconf.h:232
Modified on Wed Sep 04 15:03:19 2024 by modify_doxy.py rev. 669887