1 #ifndef ALGO_GNOMON___GNOMON_MODEL__HPP
2 #define ALGO_GNOMON___GNOMON_MODEL__HPP
116 int Len()
const {
return m_len; }
123 return (IsDeletion() &&
Loc() >=
a &&
Loc() <=
b+1) ||
124 ((IsInsertion() || IsMismatch()) &&
Loc() <=
b &&
a <=
Loc()+
Len()-1);
128 if(m_loc != fsi.m_loc)
129 return m_loc < fsi.m_loc;
130 else if(m_type != fsi.m_type)
131 return m_type < fsi.m_type;
132 else if(m_len != fsi.m_len)
133 return m_len < fsi.m_len;
135 return m_indelv < fsi.m_indelv;
154 _ASSERT(m_indelv.empty() || (
int)m_indelv.length() ==
len);
155 _ASSERT(m_indelv.empty() || m_type != eIns);
156 if((IsDeletion() || IsMismatch()) && GetInDelV().
empty())
157 m_indelv.insert( m_indelv.end(),
Len(),
'N');
186 m_fsplice(fs), m_ssplice(ss), m_fsplice_sig(fsig), m_ssplice_sig(ssig), m_ident(ident), m_seq(seq), m_source(src), m_range(
f,s)
188 _ASSERT(m_seq.empty() || m_range.Empty());
197 return !(*
this == p);
207 void AddFrom(
int d) { m_range.SetFrom( m_range.GetFrom() +d ); }
208 void AddTo(
int d) { m_range.SetTo( m_range.GetTo() +d ); }
255 void SetScore(
double score,
bool open=
false);
283 bool PStop(
bool includeall =
true)
const;
367 eNotForChaining = 64,
371 static string TypeToString(
int type);
383 eBestPlacement = 512,
384 eUnknownOrientation = 1024,
385 eConsistentCoverage = 2048,
387 eUnmodifiedAlign = 8192,
388 eChangedByFilter = 16384,
390 eLeftConfirmed = 65536,
391 eRightConfirmed = 131072,
392 eLeftFlexible = 262144,
393 eRightFlexible = 524288
397 m_type(
type), m_id(id), m_status(0), m_ident(0), m_weight(1), m_expecting_hole(
false), m_strand(s), m_geneid(0), m_rank_in_gene(0) {}
402 void AddGgapExon(
double ident,
const string& seq,
const CInDelInfo::SSource& src,
bool infront);
403 void AddNormalExon(
TSignedSeqRange exon,
const string& fs,
const string& ss,
double ident,
bool infront);
413 m_edge_reading_frames.clear();
415 void SetSplices(
int i,
const string& f_sig,
const string& s_sig) { m_exons[
i].m_fsplice_sig = f_sig; m_exons[
i].m_ssplice_sig = s_sig; }
417 void ReverseComplementModel();
423 void ExtendLeft(
int amount);
424 void ExtendRight(
int amount);
425 void Extend(
const CGeneModel&
a,
bool ensure_cds_invariant =
true);
426 void RemoveShortHolesAndRescore(
const CGnomonEngine& gnomon);
432 int AlignLen()
const ;
433 void RecalculateLimits();
439 int RealCdsLen()
const ;
445 void SetCdsInfo(
const CCDSInfo& cds_info);
447 void CombineCdsInfo(
const CGeneModel&
a,
bool ensure_cds_invariant =
true);
448 void CombineCdsInfo(
const CCDSInfo& cds_info,
bool ensure_cds_invariant =
true);
452 return Limits().IntersectingWith(
a.Limits());
455 double Ident()
const {
return m_ident; }
458 double Weight()
const {
return m_weight; }
465 bool plusstrand = Strand() ==
ePlus;
466 return (notreversed == plusstrand) ?
ePlus :
eMinus;
470 int Type()
const {
return m_type; }
483 unsigned int&
Status() {
return m_status; }
484 const unsigned int&
Status()
const {
return m_status; }
488 void SetComment(
const string& comment) { m_comment = comment; }
489 void AddComment(
const string& comment) { m_comment +=
" " + comment; }
493 double Score()
const {
return m_cds_info.Score(); }
497 for(
unsigned int i = 1;
i < Exons().size(); ++
i)
498 if (!Exons()[
i-1].m_ssplice || !Exons()[
i].m_fsplice)
502 bool HasStart()
const {
return m_cds_info.HasStart(); }
503 bool HasStop ()
const {
return m_cds_info.HasStop (); }
506 bool FullCds()
const {
return HasStart() && HasStop() && Continuous(); }
507 bool CompleteCds()
const {
return FullCds() && (!Open5primeEnd() || ConfirmedStart()); }
511 _ASSERT( !(OpenCds()&&ConfirmedStart()) );
512 return (ReadingFrame().
Empty() || (!OpenCds() && FullCds()));
517 return (Strand() ==
ePlus ? OpenLeftEnd() : OpenRightEnd());
528 bool OpenCds()
const {
return m_cds_info.OpenCds(); }
529 bool PStop(
bool includeall =
true)
const {
return m_cds_info.PStop(includeall); }
534 bool isNMD(
int limit = 50)
const;
539 TInDels GetInDels(
bool fs_only)
const;
550 string GetCdsDnaSequence (
const CResidueVec& contig_sequence)
const;
551 string GetProtein (
const CResidueVec& contig_sequence)
const;
556 int HasCompatibleOverlap(
const CGeneModel&
a,
int min_overlap = 2)
const;
562 {
return Strand()==
a.Strand() && Limits()==
a.Limits() && Exons() ==
a.Exons() && FrameShifts()==
a.FrameShifts() &&
563 GetCdsInfo().PStops() ==
a.GetCdsInfo().PStops() &&
Type() ==
a.Type() && Status() ==
a.Status(); }
566 return IdenticalAlign(
a) &&
Type()==
a.Type() && m_id==
a.m_id && m_support==
a.m_support;
569 const list< CRef<CSeq_id> >&
TrustedmRNA()
const {
return m_trusted_mrna; }
573 const list< CRef<CSeq_id> >&
TrustedProt()
const {
return m_trusted_prot; }
586 void RemoveExtraFShifts(
int left,
int right);
587 void TrimEdgesToFrameInOtherAlignGaps(
const TExons& exons_with_gaps);
605 bool CdsInvariant(
bool check_start_stop =
true)
const;
636 if(fsi_begin != fsi_end) {
637 if(fsi_begin->Loc() == orig_a && !fsi_begin->IsMismatch()) {
638 _ASSERT(!fsi_begin->IsInsertion());
641 TInDels::const_iterator fs = fsi_end-1;
642 if(fs->Loc() == orig_b+1 && fs->IsDeletion())
655 template <
class In,
class Out>
656 void EditedSequence(
const In& original_sequence,
Out& edited_sequence,
bool includeholes =
false)
const;
670 mrange.MoveOrigin(shift);
727 EEdgeType left_type,
EEdgeType right_type,
const string& left_edit_extra_seq,
const string& right_edit_extra_seq);
744 void ResetAlignMap();
755 string TargetAccession()
const;
756 void SetTargetId(
const objects::CSeq_id&
id) { m_target_id.Reset(&
id); }
758 int TargetLen()
const {
return m_alignmap.TargetLen(); }
759 int PolyALen()
const;
763 void RecalculateAlignMap(
int left,
int right);
773 explicit setcontig(
const string& cntg) : m_contig(cntg) {}
787 template<
class Model>
794 m_limits.CombineWith(
a.Limits());
798 m_limits.CombineWith(c.
Limits());
799 this->splice(list<Model>::end(),c);
804 list<Model>::clear();
805 m_limits.SetFrom(
first );
806 m_limits.SetTo( second );
820 template<
class Cluster>
825 void Insert(
const typename Cluster::TModel&
a) {
831 clust.Splice(
const_cast<Cluster&
>(*it));
834 const_cast<Cluster&
>(*this->insert(second,Cluster(clust.Limits()))).Splice(clust);
888 template <
class B
idirectionalIterator>
896 template<
class Model>
899 int left = algn.Limits().GetFrom();
900 for(
unsigned int i = 1;
i < algn.Exons().
size(); ++
i) {
901 if (!algn.Exons()[
i-1].m_ssplice || !algn.Exons()[
i].m_fsplice) {
904 if(!parts.empty() && settrimflags) {
905 parts.back().Status() &= ~
CGeneModel::eRightTrimmed;
909 left = algn.
Exons()[
i].GetFrom();
916 parts.back().Status() &= ~
CGeneModel::eRightTrimmed;
TSignedSeqPos GetTo() const
const string & GetMismatch() const
TSignedSeqPos GetExtendedFrom() const
void SetEdgeFrom(SMapRangeEdge from)
EEdgeType GetTypeTo() const
string GetExtraSeqFrom() const
void MoveOrigin(TSignedSeqPos shift)
bool operator<(const SMapRange &mr) const
TSignedSeqPos GetExtraTo() const
string GetExtraSeqTo() const
SMapRangeEdge GetEdgeFrom() const
TSignedSeqPos GetFrom() const
TSignedSeqPos GetExtraFrom() const
SMapRangeEdge GetEdgeTo() const
TSignedSeqPos GetExtendedTo() const
EEdgeType GetTypeFrom() const
void SetEdgeTo(SMapRangeEdge to)
SMapRange(SMapRangeEdge from, SMapRangeEdge to, const string &mism)
CAlignMap(TSignedSeqPos orig_a, TSignedSeqPos orig_b, TInDels::const_iterator fsi_begin, const TInDels::const_iterator fsi_end)
TSignedSeqPos FShiftedMove(TSignedSeqPos orig_pos, int len) const
static int FindLowerRange(const vector< CAlignMap::SMapRange > &a, TSignedSeqPos p)
TSignedSeqRange ShrinkToRealPointsOnEdited(TSignedSeqRange edited_range) const
TSignedSeqRange MapRangeOrigToEdited(TSignedSeqRange orig_range, bool withextras=true) const
void MoveOrigin(TSignedSeqPos shift)
int FShiftedLen(TSignedSeqPos a, TSignedSeqPos b, bool withextras=true) const
void InsertOneToOneRange(TSignedSeqPos orig_start, TSignedSeqPos edited_start, TSignedSeqPos len, const string &mism, TSignedSeqPos left_orige, TSignedSeqPos left_edite, TSignedSeqPos right_orige, TSignedSeqPos right_edite, EEdgeType left_type, EEdgeType right_type, const string &left_edit_extra_seq, const string &right_edit_extra_seq)
static TSignedSeqRange MapRangeAtoB(const vector< CAlignMap::SMapRange > &a, const vector< CAlignMap::SMapRange > &b, TSignedSeqRange r, ERangeEnd lend, ERangeEnd rend)
vector< SMapRange > m_edited_ranges
CAlignMap(TSignedSeqPos orig_a, TSignedSeqPos orig_b)
TSignedSeqPos InsertIndelRangesForInterval(TSignedSeqPos orig_a, TSignedSeqPos orig_b, TSignedSeqPos edit_a, TInDels::const_iterator fsi_begin, TInDels::const_iterator fsi_end, EEdgeType type_a, EEdgeType type_b, const string &gseq_a, const string &gseq_b)
TSignedSeqRange MapRangeEditedToOrig(TSignedSeqRange edited_range, bool withextras=true) const
static TSignedSeqRange MapRangeAtoB(const vector< CAlignMap::SMapRange > &a, const vector< CAlignMap::SMapRange > &b, TSignedSeqRange r, bool withextras)
TSignedSeqPos MapOrigToEdited(TSignedSeqPos orig_pos) const
void EditedSequence(const In &original_sequence, Out &edited_sequence, bool includeholes=false) const
TSignedSeqPos MapEditedToOrig(TSignedSeqPos edited_pos) const
TSignedSeqRange ShrinkToRealPoints(TSignedSeqRange orig_range, bool snap_to_codons=false) const
int FShiftedLen(TSignedSeqRange ab, ERangeEnd lend, ERangeEnd rend) const
static TSignedSeqPos MapAtoB(const vector< CAlignMap::SMapRange > &a, const vector< CAlignMap::SMapRange > &b, TSignedSeqPos p, ERangeEnd move_mode)
EStrand Orientation() const
TSignedSeqRange MapRangeOrigToEdited(TSignedSeqRange orig_range, ERangeEnd lend, ERangeEnd rend) const
vector< SMapRange > m_orig_ranges
virtual void Clip(TSignedSeqRange limits, EClipMode mode, bool ensure_cds_invariant=true)
CConstRef< objects::CSeq_id > m_target_id
CConstRef< objects::CSeq_id > GetTargetId() const
virtual void CutExons(TSignedSeqRange hole)
virtual CAlignMap GetAlignMap() const
void SetTargetId(const objects::CSeq_id &id)
CAlignModel(const objects::CSeq_align &seq_align)
void Cut(TSignedSeqRange hole)
CCDSInfo MapFromEditedToOrig(const CAlignMap &amap) const
bool PStop(bool includeall=true) const
void SetStart(TSignedSeqRange r, bool confirmed=false)
CCDSInfo MapFromOrigToEdited(const CAlignMap &amap) const
TSignedSeqRange MaxCdsLimits() const
void Set5PrimeCdsLimit(TSignedSeqPos p)
CCDSInfo(bool gcoords=true)
void Remap(const CRangeMapper &mapper)
bool IsMappedToGenome() const
void SetScore(double score, bool open=false)
bool m_genomic_coordinates
TSignedSeqRange m_reading_frame_from_proteins
TSignedSeqRange Start() const
bool ConfirmedStart() const
TSignedSeqRange m_reading_frame
void Clip(TSignedSeqRange limits)
void AddPStop(SPStop stp)
TSignedSeqRange Cds() const
void CombineWith(const CCDSInfo &another_cds_info)
TSignedSeqRange ReadingFrame() const
TSignedSeqRange m_max_cds_limits
TSignedSeqRange ProtReadingFrame() const
void SetStop(TSignedSeqRange r, bool confirmed=false)
const TPStops & PStops() const
bool ConfirmedStop() const
void Clear5PrimeCdsLimit()
void SetReadingFrame(TSignedSeqRange r, bool protein=false)
bool operator==(const CCDSInfo &another) const
TSignedSeqRange Stop() const
void SetSplices(int i, const string &f_sig, const string &s_sig)
const list< CRef< CSeq_id > > & TrustedProt() const
bool operator<(const CGeneModel &a) const
const unsigned int & Status() const
bool GoodEnoughToBeAnnotation() const
bool Open5primeEnd() const
int FShiftedLen(TSignedSeqPos a, TSignedSeqPos b, bool withextras=true) const
virtual void CutExons(TSignedSeqRange hole)
bool IntersectingWith(const CGeneModel &a) const
EStrand Orientation() const
void InsertTrustedProt(CRef< CSeq_id > g)
list< CRef< CSeq_id > > m_trusted_mrna
void SetRankInGene(int rank)
bool IdenticalAlign(const CGeneModel &a) const
const CSupportInfoSet & Support() const
bool OpenRightEnd() const
const TExons & Exons() const
vector< CCDSInfo > * SetEdgeReadingFrames()
bool LeftComplete() const
TSignedSeqRange ReadingFrame() const
const TInDels & FrameShifts() const
list< CRef< CSeq_id > > m_trusted_prot
bool RightComplete() const
CGeneModel(EStrand s=ePlus, Int8 id=0, int type=0)
CSupportInfoSet m_support
void SetStrand(EStrand s)
void ReplaceSupport(const CSupportInfoSet &support_set)
virtual void Clip(TSignedSeqRange limits, EClipMode mode, bool ensure_cds_invariant=true)
bool ConfirmedStop() const
void InsertTrustedmRNA(CRef< CSeq_id > g)
bool AddSupport(const CSupportInfo &support)
TSignedSeqRange Limits() const
const list< CRef< CSeq_id > > & TrustedmRNA() const
void AddComment(const string &comment)
const CCDSInfo & GetCdsInfo() const
const string & GetComment() const
bool operator==(const CGeneModel &a) const
vector< CModelExon > TExons
const string & ProteinHit() const
bool ConfirmedStart() const
const vector< CCDSInfo > * GetEdgeReadingFrames() const
vector< CCDSInfo > m_edge_reading_frames
bool PStop(bool includeall=true) const
void SetComment(const string &comment)
const SSource & GetSource() const
void SetStatus(EStatus s)
TSignedSeqPos Loc() const
bool operator==(const CInDelInfo &fsi) const
EStatus GetStatus() const
void Init(TSignedSeqPos l, int len, EType type, const string &v, const SSource &s)
void SetLoc(TSignedSeqPos l)
bool operator<(const CInDelInfo &fsi) const
bool operator!=(const CInDelInfo &fsi) const
bool IntersectingWith(TSignedSeqPos a, TSignedSeqPos b) const
CInDelInfo(TSignedSeqPos l, int len, EType type, const string &v=kEmptyStr, const SSource &s=SSource())
set< Cluster >::iterator Titerator
void Insert(const typename Cluster::TModel &a)
CModelCluster(TSignedSeqRange limits)
void Insert(const Model &a)
void Init(TSignedSeqPos first, TSignedSeqPos second)
CModelCluster(int f=numeric_limits< int >::max(), int s=0)
TSignedSeqRange Limits() const
void Splice(CModelCluster &c)
bool operator<(const CModelCluster &c) const
bool operator==(const CModelExon &p) const
CInDelInfo::SSource m_source
CModelExon(TSignedSeqPos f=0, TSignedSeqPos s=0, bool fs=false, bool ss=false, const string &fsig="", const string &ssig="", double ident=0, const string &seq="", const CInDelInfo::SSource &src=CInDelInfo::SSource())
bool operator<(const CModelExon &p) const
TSignedSeqPos GetFrom() const
const TSignedSeqRange & Limits() const
void Remap(const CRangeMapper &mapper)
TSignedSeqPos GetTo() const
TSignedSeqRange & Limits()
bool operator!=(const CModelExon &p) const
virtual TSignedSeqRange operator()(TSignedSeqRange r, bool withextras=true) const =0
bool operator==(const CSupportInfo &s) const
CSupportInfo(Int8 model_id, bool core=false)
bool operator<(const CSupportInfo &s) const
EResidue(EResidueNames e)
const_iterator upper_bound(const key_type &key) const
const_iterator lower_bound(const key_type &key) const
Include a standard set of the NCBI C++ Toolkit most basic headers.
bool Empty(const CNcbiOstrstream &src)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static const TDS_WORD limits[]
vector< TResidue > CResidueVec
bool Precede(TSignedSeqRange l, TSignedSeqRange r)
set< CSupportInfo > CSupportInfoSet
TResidue Complement(TResidue c)
CModelCluster< CAlignModel > TAlignModelCluster
const char *const k_aa_table
CModelCluster< CGeneModel > TGeneModelCluster
list< CAlignModel > TAlignModelList
CModelClusterSet< TAlignModelCluster > TAlignModelClusterSet
CNcbiOstream & operator<<(CNcbiOstream &s, const setcontig &c)
CNcbiIstream & operator>>(CNcbiIstream &s, const getcontig &c)
bool Include(TSignedSeqRange big, TSignedSeqRange small)
const EResidue k_toMinus[5]
void ReverseComplement(const BidirectionalIterator &first, const BidirectionalIterator &last)
CModelClusterSet< TGeneModelCluster > TGeneModelClusterSet
list< CGeneModel > TGeneModelList
bool IsStopCodon(const Res *seq, int strand=ePlus)
EStrand OtherStrand(EStrand s)
vector< CInDelInfo > TInDels
objects::CSeqVectorTypes::TResidue TResidue
bool Enclosed(TSignedSeqRange big, TSignedSeqRange small)
void MapAlignsToOrigContig(TAlignModelList &aligns, const TInDels &corrections, int contig_size)
list< Model > GetAlignParts(const Model &algn, bool settrimflags)
bool IsStartCodon(const Res *seq, int strand=ePlus)
int TSignedSeqPos
Type for signed sequence position.
int64_t Int8
8-byte (64-bit) signed integer
bool NotEmpty(void) const
bool operator<(const TThisType &r) const
static TThisType GetEmpty(void)
static position_type GetWholeFrom(void)
CRange< TSignedSeqPos > TSignedSeqRange
static TThisType GetWhole(void)
static position_type GetWholeTo(void)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
#define NCBI_XALGOGNOMON_EXPORT
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
unsigned int
A callback function used to compare two keys in a database.
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::SIZE size
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
double f(double x_, const double &y_)
void Out(T t, int w, CNcbiOstream &to=cout)
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
bool operator<(const SMapRangeEdge &mre) const
SMapRangeEdge(TSignedSeqPos p, TSignedSeqPos e=0, EEdgeType t=eBoundary, const string &seq=kEmptyStr)
bool operator==(const SMapRangeEdge &mre) const
SPStop(TSignedSeqRange r, EStatus s)
bool operator<(const SPStop &stp) const
setcontig(const string &cntg)
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
void AddExon(CRef< CSeq_entry > seq, const string &number, TSeqPos start)