1 #ifndef ALGO_GNOMON___GNOMON_MODEL__HPP
2 #define ALGO_GNOMON___GNOMON_MODEL__HPP
117 int Len()
const {
return m_len; }
118 int InDelEnd()
const {
return ((IsInsertion() || IsMismatch()) ? Loc()+
Len() : Loc()); }
124 return (IsDeletion() && Loc() >=
a && Loc() <=
b+1) ||
125 ((IsInsertion() || IsMismatch()) && Loc() <=
b &&
a <= Loc()+
Len()-1);
129 if(m_loc != fsi.m_loc)
130 return m_loc < fsi.m_loc;
131 else if(m_type != fsi.m_type)
132 return m_type < fsi.m_type;
133 else if(m_len != fsi.m_len)
134 return m_len < fsi.m_len;
136 return m_indelv < fsi.m_indelv;
155 _ASSERT(m_indelv.empty() || (
int)m_indelv.length() ==
len);
156 _ASSERT(m_indelv.empty() || m_type != eIns);
157 if((IsDeletion() || IsMismatch()) && GetInDelV().
empty())
158 m_indelv.insert( m_indelv.end(),
Len(),
'N');
187 m_fsplice(fs), m_ssplice(ss), m_fsplice_sig(fsig), m_ssplice_sig(ssig), m_ident(ident), m_seq(seq), m_source(src), m_range(
f,s)
189 _ASSERT(m_seq.empty() || m_range.Empty());
198 return !(*
this == p);
208 void AddFrom(
int d) { m_range.SetFrom( m_range.GetFrom() +d ); }
209 void AddTo(
int d) { m_range.SetTo( m_range.GetTo() +d ); }
256 void SetScore(
double score,
bool open=
false);
284 bool PStop(
bool includeall =
true)
const;
368 eNotForChaining = 64,
372 static string TypeToString(
int type);
384 eBestPlacement = 512,
385 eUnknownOrientation = 1024,
386 eConsistentCoverage = 2048,
388 eUnmodifiedAlign = 8192,
389 eChangedByFilter = 16384,
391 eLeftConfirmed = 65536,
392 eRightConfirmed = 131072,
393 eLeftFlexible = 262144,
394 eRightFlexible = 524288
398 m_type(
type), m_id(id), m_status(0), m_ident(0), m_weight(1), m_expecting_hole(
false), m_strand(s), m_geneid(0), m_rank_in_gene(0) {}
403 void AddGgapExon(
double ident,
const string& seq,
const CInDelInfo::SSource& src,
bool infront);
404 void AddNormalExon(
TSignedSeqRange exon,
const string& fs,
const string& ss,
double ident,
bool infront);
414 m_edge_reading_frames.clear();
416 void SetSplices(
int i,
const string& f_sig,
const string& s_sig) { m_exons[
i].m_fsplice_sig = f_sig; m_exons[
i].m_ssplice_sig = s_sig; }
418 void ReverseComplementModel();
424 void ExtendLeft(
int amount);
425 void ExtendRight(
int amount);
426 void Extend(
const CGeneModel&
a,
bool ensure_cds_invariant =
true);
427 void RemoveShortHolesAndRescore(
const CGnomonEngine& gnomon);
433 int AlignLen()
const ;
434 void RecalculateLimits();
440 int RealCdsLen()
const ;
446 void SetCdsInfo(
const CCDSInfo& cds_info);
448 void CombineCdsInfo(
const CGeneModel&
a,
bool ensure_cds_invariant =
true);
449 void CombineCdsInfo(
const CCDSInfo& cds_info,
bool ensure_cds_invariant =
true);
453 return Limits().IntersectingWith(
a.Limits());
456 double Ident()
const {
return m_ident; }
459 double Weight()
const {
return m_weight; }
466 bool plusstrand = Strand() ==
ePlus;
467 return (notreversed == plusstrand) ?
ePlus :
eMinus;
471 int Type()
const {
return m_type; }
484 unsigned int&
Status() {
return m_status; }
485 const unsigned int&
Status()
const {
return m_status; }
489 void SetComment(
const string& comment) { m_comment = comment; }
490 void AddComment(
const string& comment) { m_comment +=
" " + comment; }
494 double Score()
const {
return m_cds_info.Score(); }
498 for(
unsigned int i = 1;
i < Exons().size(); ++
i)
499 if (!Exons()[
i-1].m_ssplice || !Exons()[
i].m_fsplice)
503 bool HasStart()
const {
return m_cds_info.HasStart(); }
504 bool HasStop ()
const {
return m_cds_info.HasStop (); }
507 bool FullCds()
const {
return HasStart() && HasStop() && Continuous(); }
508 bool CompleteCds()
const {
return FullCds() && (!Open5primeEnd() || ConfirmedStart()); }
512 _ASSERT( !(OpenCds()&&ConfirmedStart()) );
513 return (ReadingFrame().
Empty() || (!OpenCds() && FullCds()));
518 return (Strand() ==
ePlus ? OpenLeftEnd() : OpenRightEnd());
529 bool OpenCds()
const {
return m_cds_info.OpenCds(); }
530 bool PStop(
bool includeall =
true)
const {
return m_cds_info.PStop(includeall); }
535 bool isNMD(
int limit = 50)
const;
540 TInDels GetInDels(
bool fs_only)
const;
551 string GetCdsDnaSequence (
const CResidueVec& contig_sequence)
const;
552 string GetProtein (
const CResidueVec& contig_sequence)
const;
562 {
return Strand()==
a.Strand() && Limits()==
a.Limits() && Exons() ==
a.Exons() && FrameShifts()==
a.FrameShifts() &&
563 GetCdsInfo().PStops() ==
a.GetCdsInfo().PStops() &&
Type() ==
a.Type() && Status() ==
a.Status(); }
566 return IdenticalAlign(
a) &&
Type()==
a.Type() && m_id==
a.m_id && m_support==
a.m_support;
569 const list< CRef<CSeq_id> >&
TrustedmRNA()
const {
return m_trusted_mrna; }
573 const list< CRef<CSeq_id> >&
TrustedProt()
const {
return m_trusted_prot; }
586 void RemoveExtraFShifts(
int left,
int right);
587 void TrimEdgesToFrameInOtherAlignGaps(
const TExons& exons_with_gaps);
605 bool CdsInvariant(
bool check_start_stop =
true)
const;
636 if(fsi_begin != fsi_end) {
637 if(fsi_begin->Loc() == orig_a && !fsi_begin->IsMismatch()) {
638 _ASSERT(!fsi_begin->IsInsertion());
641 TInDels::const_iterator fs = fsi_end-1;
642 if(fs->Loc() == orig_b+1 && fs->IsDeletion())
655 template <
class In,
class Out>
656 void EditedSequence(
const In& original_sequence,
Out& edited_sequence,
bool includeholes =
false)
const;
670 mrange.MoveOrigin(shift);
727 EEdgeType left_type,
EEdgeType right_type,
const string& left_edit_extra_seq,
const string& right_edit_extra_seq);
744 void ResetAlignMap();
755 string TargetAccession()
const;
756 void SetTargetId(
const objects::CSeq_id&
id) { m_target_id.Reset(&
id); }
758 int TargetLen()
const {
return m_alignmap.TargetLen(); }
759 int PolyALen()
const;
763 void RecalculateAlignMap(
int left,
int right);
773 explicit setcontig(
const string& cntg) : m_contig(cntg) {}
787 template<
class Model>
794 m_limits.CombineWith(
a.Limits());
798 m_limits.CombineWith(c.
Limits());
799 this->splice(list<Model>::end(),c);
804 list<Model>::clear();
805 m_limits.SetFrom(
first );
806 m_limits.SetTo( second );
820 template<
class Cluster>
825 void Insert(
const typename Cluster::TModel&
a) {
831 clust.Splice(
const_cast<Cluster&
>(*it));
834 const_cast<Cluster&
>(*this->insert(second,Cluster(clust.Limits()))).Splice(clust);
888 template <
class B
idirectionalIterator>
896 template<
class Model>
899 int left = algn.Limits().GetFrom();
900 for(
unsigned int i = 1;
i < algn.Exons().
size(); ++
i) {
901 if (!algn.Exons()[
i-1].m_ssplice || !algn.Exons()[
i].m_fsplice) {
906 if(!parts.empty() && settrimflags) {
907 parts.back().Status() &= ~
CGeneModel::eRightTrimmed;
911 left = algn.
Exons()[
i].GetFrom();
926 if(algn.Strand() ==
ePlus)
932 if(algn.Strand() ==
ePlus)
TSignedSeqPos GetTo() const
const string & GetMismatch() const
TSignedSeqPos GetExtendedFrom() const
void SetEdgeFrom(SMapRangeEdge from)
EEdgeType GetTypeTo() const
string GetExtraSeqFrom() const
void MoveOrigin(TSignedSeqPos shift)
bool operator<(const SMapRange &mr) const
TSignedSeqPos GetExtraTo() const
string GetExtraSeqTo() const
SMapRangeEdge GetEdgeFrom() const
TSignedSeqPos GetFrom() const
TSignedSeqPos GetExtraFrom() const
SMapRangeEdge GetEdgeTo() const
TSignedSeqPos GetExtendedTo() const
EEdgeType GetTypeFrom() const
void SetEdgeTo(SMapRangeEdge to)
SMapRange(SMapRangeEdge from, SMapRangeEdge to, const string &mism)
CAlignMap(TSignedSeqPos orig_a, TSignedSeqPos orig_b, TInDels::const_iterator fsi_begin, const TInDels::const_iterator fsi_end)
TSignedSeqPos FShiftedMove(TSignedSeqPos orig_pos, int len) const
static int FindLowerRange(const vector< CAlignMap::SMapRange > &a, TSignedSeqPos p)
TSignedSeqRange ShrinkToRealPointsOnEdited(TSignedSeqRange edited_range) const
TSignedSeqRange MapRangeOrigToEdited(TSignedSeqRange orig_range, bool withextras=true) const
void MoveOrigin(TSignedSeqPos shift)
int FShiftedLen(TSignedSeqPos a, TSignedSeqPos b, bool withextras=true) const
void InsertOneToOneRange(TSignedSeqPos orig_start, TSignedSeqPos edited_start, TSignedSeqPos len, const string &mism, TSignedSeqPos left_orige, TSignedSeqPos left_edite, TSignedSeqPos right_orige, TSignedSeqPos right_edite, EEdgeType left_type, EEdgeType right_type, const string &left_edit_extra_seq, const string &right_edit_extra_seq)
static TSignedSeqRange MapRangeAtoB(const vector< CAlignMap::SMapRange > &a, const vector< CAlignMap::SMapRange > &b, TSignedSeqRange r, ERangeEnd lend, ERangeEnd rend)
vector< SMapRange > m_edited_ranges
CAlignMap(TSignedSeqPos orig_a, TSignedSeqPos orig_b)
TSignedSeqPos InsertIndelRangesForInterval(TSignedSeqPos orig_a, TSignedSeqPos orig_b, TSignedSeqPos edit_a, TInDels::const_iterator fsi_begin, TInDels::const_iterator fsi_end, EEdgeType type_a, EEdgeType type_b, const string &gseq_a, const string &gseq_b)
TSignedSeqRange MapRangeEditedToOrig(TSignedSeqRange edited_range, bool withextras=true) const
static TSignedSeqRange MapRangeAtoB(const vector< CAlignMap::SMapRange > &a, const vector< CAlignMap::SMapRange > &b, TSignedSeqRange r, bool withextras)
TSignedSeqPos MapOrigToEdited(TSignedSeqPos orig_pos) const
void EditedSequence(const In &original_sequence, Out &edited_sequence, bool includeholes=false) const
TSignedSeqPos MapEditedToOrig(TSignedSeqPos edited_pos) const
TSignedSeqRange ShrinkToRealPoints(TSignedSeqRange orig_range, bool snap_to_codons=false) const
int FShiftedLen(TSignedSeqRange ab, ERangeEnd lend, ERangeEnd rend) const
static TSignedSeqPos MapAtoB(const vector< CAlignMap::SMapRange > &a, const vector< CAlignMap::SMapRange > &b, TSignedSeqPos p, ERangeEnd move_mode)
EStrand Orientation() const
TSignedSeqRange MapRangeOrigToEdited(TSignedSeqRange orig_range, ERangeEnd lend, ERangeEnd rend) const
vector< SMapRange > m_orig_ranges
virtual void Clip(TSignedSeqRange limits, EClipMode mode, bool ensure_cds_invariant=true)
CConstRef< objects::CSeq_id > m_target_id
CConstRef< objects::CSeq_id > GetTargetId() const
virtual void CutExons(TSignedSeqRange hole)
virtual CAlignMap GetAlignMap() const
void SetTargetId(const objects::CSeq_id &id)
CAlignModel(const objects::CSeq_align &seq_align)
void Cut(TSignedSeqRange hole)
CCDSInfo MapFromEditedToOrig(const CAlignMap &amap) const
bool PStop(bool includeall=true) const
void SetStart(TSignedSeqRange r, bool confirmed=false)
CCDSInfo MapFromOrigToEdited(const CAlignMap &amap) const
TSignedSeqRange MaxCdsLimits() const
void Set5PrimeCdsLimit(TSignedSeqPos p)
CCDSInfo(bool gcoords=true)
void Remap(const CRangeMapper &mapper)
bool IsMappedToGenome() const
void SetScore(double score, bool open=false)
bool m_genomic_coordinates
TSignedSeqRange m_reading_frame_from_proteins
TSignedSeqRange Start() const
bool ConfirmedStart() const
TSignedSeqRange m_reading_frame
void Clip(TSignedSeqRange limits)
void AddPStop(SPStop stp)
TSignedSeqRange Cds() const
void CombineWith(const CCDSInfo &another_cds_info)
TSignedSeqRange ReadingFrame() const
TSignedSeqRange m_max_cds_limits
TSignedSeqRange ProtReadingFrame() const
void SetStop(TSignedSeqRange r, bool confirmed=false)
const TPStops & PStops() const
bool ConfirmedStop() const
void Clear5PrimeCdsLimit()
void SetReadingFrame(TSignedSeqRange r, bool protein=false)
bool operator==(const CCDSInfo &another) const
TSignedSeqRange Stop() const
void SetSplices(int i, const string &f_sig, const string &s_sig)
const list< CRef< CSeq_id > > & TrustedProt() const
bool operator<(const CGeneModel &a) const
const unsigned int & Status() const
bool GoodEnoughToBeAnnotation() const
bool Open5primeEnd() const
int FShiftedLen(TSignedSeqPos a, TSignedSeqPos b, bool withextras=true) const
virtual void CutExons(TSignedSeqRange hole)
bool IntersectingWith(const CGeneModel &a) const
EStrand Orientation() const
void InsertTrustedProt(CRef< CSeq_id > g)
list< CRef< CSeq_id > > m_trusted_mrna
void SetRankInGene(int rank)
bool IdenticalAlign(const CGeneModel &a) const
const CSupportInfoSet & Support() const
bool OpenRightEnd() const
const TExons & Exons() const
vector< CCDSInfo > * SetEdgeReadingFrames()
bool LeftComplete() const
TSignedSeqRange ReadingFrame() const
const TInDels & FrameShifts() const
list< CRef< CSeq_id > > m_trusted_prot
bool RightComplete() const
CGeneModel(EStrand s=ePlus, Int8 id=0, int type=0)
CSupportInfoSet m_support
void SetStrand(EStrand s)
void ReplaceSupport(const CSupportInfoSet &support_set)
virtual void Clip(TSignedSeqRange limits, EClipMode mode, bool ensure_cds_invariant=true)
bool ConfirmedStop() const
void InsertTrustedmRNA(CRef< CSeq_id > g)
bool AddSupport(const CSupportInfo &support)
TSignedSeqRange Limits() const
const list< CRef< CSeq_id > > & TrustedmRNA() const
void AddComment(const string &comment)
const CCDSInfo & GetCdsInfo() const
const string & GetComment() const
bool operator==(const CGeneModel &a) const
vector< CModelExon > TExons
const string & ProteinHit() const
bool ConfirmedStart() const
const vector< CCDSInfo > * GetEdgeReadingFrames() const
vector< CCDSInfo > m_edge_reading_frames
bool PStop(bool includeall=true) const
void SetComment(const string &comment)
const SSource & GetSource() const
void SetStatus(EStatus s)
TSignedSeqPos Loc() const
bool operator==(const CInDelInfo &fsi) const
EStatus GetStatus() const
void Init(TSignedSeqPos l, int len, EType type, const string &v, const SSource &s)
void SetLoc(TSignedSeqPos l)
bool operator<(const CInDelInfo &fsi) const
bool operator!=(const CInDelInfo &fsi) const
bool IntersectingWith(TSignedSeqPos a, TSignedSeqPos b) const
CInDelInfo(TSignedSeqPos l, int len, EType type, const string &v=kEmptyStr, const SSource &s=SSource())
set< Cluster >::iterator Titerator
void Insert(const typename Cluster::TModel &a)
CModelCluster(TSignedSeqRange limits)
void Insert(const Model &a)
void Init(TSignedSeqPos first, TSignedSeqPos second)
CModelCluster(int f=numeric_limits< int >::max(), int s=0)
TSignedSeqRange Limits() const
void Splice(CModelCluster &c)
bool operator<(const CModelCluster &c) const
bool operator==(const CModelExon &p) const
CInDelInfo::SSource m_source
CModelExon(TSignedSeqPos f=0, TSignedSeqPos s=0, bool fs=false, bool ss=false, const string &fsig="", const string &ssig="", double ident=0, const string &seq="", const CInDelInfo::SSource &src=CInDelInfo::SSource())
bool operator<(const CModelExon &p) const
TSignedSeqPos GetFrom() const
const TSignedSeqRange & Limits() const
void Remap(const CRangeMapper &mapper)
TSignedSeqPos GetTo() const
TSignedSeqRange & Limits()
bool operator!=(const CModelExon &p) const
virtual TSignedSeqRange operator()(TSignedSeqRange r, bool withextras=true) const =0
bool operator==(const CSupportInfo &s) const
CSupportInfo(Int8 model_id, bool core=false)
bool operator<(const CSupportInfo &s) const
EResidue(EResidueNames e)
const_iterator upper_bound(const key_type &key) const
const_iterator lower_bound(const key_type &key) const
Include a standard set of the NCBI C++ Toolkit most basic headers.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
bool Empty(const CNcbiOstrstream &src)
vector< TResidue > CResidueVec
bool Precede(TSignedSeqRange l, TSignedSeqRange r)
CVectorSet< CSupportInfo > CSupportInfoSet
TResidue Complement(TResidue c)
CModelCluster< CAlignModel > TAlignModelCluster
const char *const k_aa_table
CModelCluster< CGeneModel > TGeneModelCluster
list< CAlignModel > TAlignModelList
CModelClusterSet< TAlignModelCluster > TAlignModelClusterSet
CNcbiOstream & operator<<(CNcbiOstream &s, const setcontig &c)
CNcbiIstream & operator>>(CNcbiIstream &s, const getcontig &c)
bool Include(TSignedSeqRange big, TSignedSeqRange small)
const EResidue k_toMinus[5]
void ReverseComplement(const BidirectionalIterator &first, const BidirectionalIterator &last)
CModelClusterSet< TGeneModelCluster > TGeneModelClusterSet
list< CGeneModel > TGeneModelList
bool IsStopCodon(const Res *seq, int strand=ePlus)
EStrand OtherStrand(EStrand s)
vector< CInDelInfo > TInDels
objects::CSeqVectorTypes::TResidue TResidue
bool Enclosed(TSignedSeqRange big, TSignedSeqRange small)
void MapAlignsToOrigContig(TAlignModelList &aligns, const TInDels &corrections, int contig_size)
list< Model > GetAlignParts(const Model &algn, bool settrimflags)
bool IsStartCodon(const Res *seq, int strand=ePlus)
int TSignedSeqPos
Type for signed sequence position.
int64_t Int8
8-byte (64-bit) signed integer
bool NotEmpty(void) const
bool operator<(const TThisType &r) const
static TThisType GetEmpty(void)
static position_type GetWholeFrom(void)
CRange< TSignedSeqPos > TSignedSeqRange
static TThisType GetWhole(void)
static position_type GetWholeTo(void)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
#define NCBI_XALGOGNOMON_EXPORT
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
unsigned int
A callback function used to compare two keys in a database.
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::SIZE size
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
double f(double x_, const double &y_)
static const TDS_WORD limits[]
void Out(T t, int w, CNcbiOstream &to=cout)
bool operator<(const SMapRangeEdge &mre) const
SMapRangeEdge(TSignedSeqPos p, TSignedSeqPos e=0, EEdgeType t=eBoundary, const string &seq=kEmptyStr)
bool operator==(const SMapRangeEdge &mre) const
SPStop(TSignedSeqRange r, EStatus s)
bool operator<(const SPStop &stp) const
setcontig(const string &cntg)
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
void AddExon(CRef< CSeq_entry > seq, const string &number, TSeqPos start)