69 seq_align.
GetId().back()->GetIdType(
id);
84 bool is_protein =
false;
90 bool is_product_reversed =
false;
93 is_product_reversed =
true;
108 bool prev_3_prime_splice =
false;
110 int target_len = product_len;
112 vector<TSignedSeqRange> transcript_exons;
126 if((*i)->IsSetType() && (*i)->GetType().IsStr()) {
127 string type = (*i)->GetType().GetStr();
128 if(
type ==
"RNASeq-Counts") {
130 if(*j && (*j)->CanGetLabel() && (*j)->GetLabel().IsStr()) {
131 string label = (*j)->GetLabel().GetStr();
133 count += (*j)->GetData().GetInt();
136 }
else if(
type ==
"MismatchedBases") {
137 mismatches = (*i)->GetData().front()->GetData().GetStr();
138 }
else if(
type ==
"MismatchedBasesStatus") {
139 mismstatus = (*i)->GetData().front()->GetData().GetStr();
146 reverse(mismatches.begin(),mismatches.end());
147 reverse(mismstatus.begin(),mismstatus.end());
152 bool ggap_model=
false;
159 if (is_product_reversed) {
160 int tmp = prod_cur_start;
161 prod_cur_start = product_len - prod_cur_end -1;
162 prod_cur_end = product_len -
tmp -1;
170 if (prod_prev+1 != prod_cur_start || !prev_3_prime_splice || !cur_5_prime_splice) {
172 if(!mismatches.empty())
173 mismatches = mismatches.substr(prod_cur_start - prod_prev -1);
194 if((*it)->CanGetId() && (*it)->GetId().IsStr()) {
195 if((*it)->GetId().GetStr() ==
"idty") {
196 eident = (*it)->GetValue().GetReal();
213 string fill_seq = transcript.substr(nuc_cur_start, nuc_cur_end-nuc_cur_start+1);
225 AddGgapExon(eident, fill_seq, fill_src, Strand() ==
eMinus);
232 _ASSERT(transcript_exons.back().NotEmpty());
235 int prod_pos = prod_cur_start;
241 if(mismstatus[0] ==
'n')
243 else if(mismstatus[0] ==
'c')
245 mismstatus = mismstatus.substr(1);
250 if(!mismatches.empty()) {
251 v = mismatches.substr(0,product_ins);
252 mismatches = mismatches.substr(product_ins);
255 reverse(v.begin(),v.end());
257 if (Strand() ==
ePlus)
258 indels.push_back(fs);
260 indels.insert(indels.begin(), fs);
274 string v(mismatch_len,
'N');
275 if(!mismatches.empty()) {
276 _ASSERT(mismatch_len <= (
int)mismatches.length());
277 v = mismatches.substr(0,mismatch_len);
278 mismatches = mismatches.substr(mismatch_len);
280 if(Strand() ==
ePlus) {
283 reverse(v.begin(),v.end());
287 prod_pos += mismatch_len;
293 if(Strand() ==
ePlus)
294 indels.back().SetStatus(indelstatus);
296 indels.front().SetStatus(indelstatus);
300 prod_prev = prod_cur_end;
303 _ASSERT(mismatches.empty() || (product_len - prod_prev - 1 == (
int)mismatches.length()));
305 sort(transcript_exons.begin(),transcript_exons.end());
306 bool minusstrand = Strand() ==
eMinus;
309 reverse(transcript_exons.begin(),transcript_exons.end());
312 _ASSERT(orientation == Strand());
315 if ((*m)->IsStop_codon_found()) {
317 if (Strand() ==
ePlus) {
319 _ASSERT((transcript_exons.back().GetTo()+1)%3 == 0);
320 transcript_exons.back().SetTo(transcript_exons.back().GetTo()+3);
323 _ASSERT((transcript_exons.front().GetTo()+1)%3 == 0);
324 transcript_exons.front().SetTo(transcript_exons.front().GetTo()+3);
332 bool keepdoing =
true;
336 TInDels::iterator indl_next = indl;
337 if(++indl_next == indels.end())
340 if(indl->InDelEnd() == indl_next->Loc()) {
341 string new_seq = indl->GetInDelV()+indl_next->GetInDelV();
343 if(indl->GetType() == indl_next->GetType()) {
344 *indl =
CInDelInfo(indl->Loc(), indl->Len()+indl_next->Len(), indl->GetType(), new_seq);
345 indels.erase(indl_next);
347 }
else if(!indl->IsMismatch() && !indl_next->IsMismatch()) {
348 if(indl->Len() == indl_next->Len()) {
350 indels.erase(indl_next);
351 }
else if(indl->Len() < indl_next->Len()) {
353 *indl_next =
CInDelInfo(indl->InDelEnd(), indl_next->Len()-indl->Len(), indl_next->GetType(), new_seq.substr(indl->Len()));
355 *indl =
CInDelInfo(indl->Loc(), indl->Len()-indl_next->Len(), indl->GetType(), new_seq.substr(0,new_seq_len-indl_next->Len()));
364 m_alignmap =
CAlignMap(Exons(), transcript_exons, indels, orientation, target_len );
365 FrameShifts() = indels;
367 TSignedSeqRange newlimits = m_alignmap.ShrinkToRealPoints(Limits(),is_protein);
368 if(newlimits != Limits()) {
372 for (CGeneModel::TExons::const_iterator piece_begin = Exons().
begin(); piece_begin != Exons().end(); ++piece_begin) {
373 _ASSERT( !piece_begin->m_fsplice );
375 if(piece_begin->Limits().Empty()) {
376 _ASSERT(piece_begin->m_ssplice);
378 _ASSERT(piece_begin->Limits().NotEmpty());
381 CGeneModel::TExons::const_iterator piece_end;
382 for (piece_end = piece_begin; piece_end != Exons().end() && piece_end->m_ssplice; ++piece_end) ;
385 CGeneModel::TExons::const_iterator piece_end_g = piece_end;
386 if(piece_end_g->Limits().Empty()) {
387 _ASSERT(piece_end_g->m_fsplice);
389 _ASSERT(piece_end_g->Limits().NotEmpty());
392 TSignedSeqRange piece_range(piece_begin->GetFrom(),piece_end_g->GetTo());
394 piece_range = m_alignmap.ShrinkToRealPoints(piece_range, is_protein);
411 _ASSERT(piece_range.NotEmpty());
412 _ASSERT(piece_range.IntersectingWith(piece_begin->Limits()) && piece_range.IntersectingWith(piece_end_g->Limits()));
414 if(piece_range.GetFrom() != piece_begin->GetFrom() || piece_range.GetTo() != piece_end_g->GetTo()) {
419 piece_begin = piece_end;
423 TSignedSeqRange reading_frame = m_alignmap.MapRangeOrigToEdited(Limits(),
true);
427 if ((*m)->IsStart_codon_found()) {
430 }
else if ((*m)->IsStop_codon_found()) {
443 cds_info_t.
SetStop(stop,
false);
447 SetCdsInfo(cds_info_g);
449 SetCdsInfo(cds_info_t);
459 if((*it)->CanGetId() && (*it)->GetId().IsStr()) {
460 string scr = (*it)->GetId().GetStr();
461 if((scr ==
"N of matches") || (scr ==
"num_ident") || (scr ==
"matches")) {
462 double ident = (*it)->GetValue().GetInt();
465 }
else if(scr ==
"rank" && (*it)->GetValue().GetInt() == 1) {
467 }
else if(scr ==
"ambiguous_orientation") {
469 }
else if(scr ==
"count") {
470 _ASSERT(Weight() == 1 || Weight() == (*it)->GetValue().GetInt());
471 SetWeight((*it)->GetValue().GetInt());
495 string cds_seq(cds_len,
'A');
496 copy(mrna.begin()+cds_start, mrna.begin()+cds_start+cds_len, cds_seq.begin());
519 prot_seq[(stp->GetFrom()- cds_info.
Cds().
GetFrom())/3] =
'U';
532 if (prot_seq[0] ==
'-') {
533 string first_triplet = cds_seq.substr(0, 3);
536 prot_seq = first_aa+prot_seq.substr(1);
547 prot_seq[(stp->GetFrom()- cds_info.
Cds().
GetFrom())/3] =
'U';
559 CRef<CSeq_loc> s_ExonDataToLoc(
const vector<TSignedSeqRange>& vec,
565 ITERATE (vector<TSignedSeqRange>, iter, vec) {
567 ival->SetFrom(iter->GetFrom());
568 ival->SetTo(iter->GetTo());
569 ival->SetStrand(strand);
570 ival->SetId().Assign(
id);
572 data.push_back(ival);
575 if (data.size() == 1) {
576 loc->SetInt(*data.front());
578 loc->SetPacked_int().Set().swap(data);
592 annot->SetNameDesc(
"Gnomon gene scan output");
596 unsigned int counter = 0;
597 string locus_tag_base(
"GNOMON_");
600 int strand = igene.
Strand();
603 vector<TSignedSeqRange> mrna_vec;
604 copy(igene.
Exons().begin(), igene.
Exons().end(), back_inserter(mrna_vec));
605 vector<TSignedSeqRange> cds_vec;
607 for (
size_t j = 0; j < mrna_vec.size(); ++j) {
609 if (!intersect.
Empty()) {
610 cds_vec.push_back(intersect);
616 if (strand ==
ePlus) {
617 _ASSERT(cds_vec.back().GetLength()>=3);
618 cds_vec.back().SetTo(cds_vec.back().GetTo() - 3);
620 _ASSERT(cds_vec.front().GetLength()>=3);
621 cds_vec.front().SetFrom(cds_vec.front().GetFrom() + 3);
628 if (mrna_vec.size()) {
632 (*s_ExonDataToLoc(mrna_vec,
641 if (!cds_vec.empty()) {
646 (*s_ExonDataToLoc(cds_vec,
655 sprintf(
buf,
"%04u", ++counter);
656 string name(locus_tag_base);
658 feat_gene->
SetData().SetGene().SetLocus_tag(name);
669 ftable.push_back(feat_gene);
670 ftable.push_back(feat_mrna);
672 ftable.push_back(feat_cds);
696 unsigned int gc_count = 0;
698 for( ; xcript_iter; ++xcript_iter) {
699 if (*xcript_iter ==
'G' || *xcript_iter ==
'C') {
703 *gccontent =
static_cast<unsigned int>(100.0 * gc_count / xcript_vec.
size() + 0.5);
710 vec.SetIupacCoding();
712 seq.reserve(vec.size());
714 for( ; iter; ++iter) {
721 for (
unsigned int i = 5;
i < seq.size() - 3; ++
i)
732 int totallen = xcript_vec.
size();
735 int extrabases = start.
Left()+2;
738 if(startposition < extrabases) {
740 extraNs5p = extrabases-startposition;
742 left = startposition-extrabases;
744 right =
min(startposition+2+start.
Right(),totallen-1);
747 if(startposition+extrabases >= totallen) {
749 extraNs5p = startposition+extrabases-totallen+1;
751 right = startposition+extrabases;
753 left =
max(0,startposition-2-start.
Right());
769 sttseq.resize(extraNs5p,
enN);
770 for(
unsigned int i = 0;
i < sttvec.
size(); ++
i) {
775 *startscore = start.
Score(sttseq, extrabases+2);
777 for(
unsigned int i = 5;
i < sttseq.size(); ++
i) {
778 *startscore -= ncdr.
Score(sttseq,
i);
User-defined methods of the data storage class.
void EditedSequence(const In &original_sequence, Out &edited_sequence, bool includeholes=false) const
CCDSInfo MapFromEditedToOrig(const CAlignMap &amap) const
void SetStart(TSignedSeqRange r, bool confirmed=false)
CCDSInfo MapFromOrigToEdited(const CAlignMap &amap) const
bool IsMappedToGenome() const
TSignedSeqRange Cds() const
TSignedSeqRange ReadingFrame() const
void SetStop(TSignedSeqRange r, bool confirmed=false)
const TPStops & PStops() const
void SetReadingFrame(TSignedSeqRange r, bool protein=false)
static double GetScore(CConstRef< CHMMParameters > hmm_params, const objects::CSeq_loc &cds, objects::CScope &scope, int *const gccontent, double *const startscore=0)
string GetProtein(const CResidueVec &contig_sequence) const
TSignedSeqRange TranscriptLimits() const
const TExons & Exons() const
TSignedSeqRange ReadingFrame() const
virtual CAlignMap GetAlignMap() const
TSignedSeqRange RealCdsLimits() const
string GetCdsDnaSequence(const CResidueVec &contig_sequence) const
const CCDSInfo & GetCdsInfo() const
bool PStop(bool includeall=true) const
list< CGeneModel > GetGenes() const
CRef< objects::CSeq_annot > GetAnnot(const objects::CSeq_id &id)
const CInputModel & GetParameter(const string &type, int cgcontent) const
static string ToString(const CSeq_id &id)
double Score(const CEResidueVec &seq, int i, int codonshift) const
double Score(const CEResidueVec &seq, int i) const
TSeqPos GetAlignLength(bool include_gaps=true) const
Get the length of this alignment.
namespace ncbi::objects::
double Score(const CEResidueVec &seq, int i) const
constexpr auto begin(const ct_const_array< T, N > &in) noexcept
constexpr auto end(const ct_const_array< T, N > &in) noexcept
bool Empty(const CNcbiOstrstream &src)
vector< TResidue > CResidueVec
void ReverseComplement(const BidirectionalIterator &first, const BidirectionalIterator &last)
list< CGeneModel > TGeneModelList
vector< CInDelInfo > TInDels
USING_SCOPE(ncbi::objects)
Int8 GetModelId(const CSeq_align &seq_align)
EResidue fromACGT(TResidue c)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
ENa_strand GetStrand(void) const
Get the location's strand.
TRange GetTotalRange(void) const
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
CSeqVector GetSeqVector(EVectorCoding coding, ENa_strand strand=eNa_strand_plus) const
Get sequence: Iupacna or Iupacaa if use_iupac_coding is true.
void SetIupacCoding(void)
Set coding to either Iupacaa or Iupacna depending on molecule type.
int64_t Int8
8-byte (64-bit) signed integer
position_type GetLength(void) const
bool NotEmpty(void) const
CRange< TSignedSeqPos > TSignedSeqRange
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static const char label[]
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
void SetTo(TTo value)
Assign a value to To data member.
vector< CRef< CUser_field > > TData
const TDonor_after_exon & GetDonor_after_exon(void) const
Get the Donor_after_exon member data.
const TId & GetId(void) const
Get the Id member data.
bool CanGetProduct_length(void) const
Check if it is safe to call GetProduct_length method.
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
bool CanGetAcceptor_before_exon(void) const
Check if it is safe to call GetAcceptor_before_exon method.
bool CanGetBases(void) const
Check if it is safe to call GetBases method.
bool CanGetGenomic_id(void) const
Check if it is safe to call GetGenomic_id method.
vector< CRef< CScore > > TScore
TMatch GetMatch(void) const
Get the variant data.
list< CRef< CScore > > Tdata
const TProduct_id & GetProduct_id(void) const
Get the Product_id member data.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
const TAcceptor_before_exon & GetAcceptor_before_exon(void) const
Get the Acceptor_before_exon member data.
bool CanGetGenomic_id(void) const
Check if it is safe to call GetGenomic_id method.
bool IsMismatch(void) const
Check if variant Mismatch is selected.
TProduct_length GetProduct_length(void) const
Get the Product_length member data.
bool CanGetProduct_type(void) const
Check if it is safe to call GetProduct_type method.
bool IsSetPoly_a(void) const
start of poly(A) tail on the transcript For sense transcripts: aligned product positions < poly-a <= ...
bool CanGetModifiers(void) const
Check if it is safe to call GetModifiers method.
TDiag GetDiag(void) const
Get the variant data.
bool CanGetScore(void) const
Check if it is safe to call GetScore method.
TProduct_type GetProduct_type(void) const
Get the Product_type member data.
TMismatch GetMismatch(void) const
Get the variant data.
list< CRef< CUser_object > > TExt
bool CanGetProduct_strand(void) const
Check if it is safe to call GetProduct_strand method.
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
const TSpliced & GetSpliced(void) const
Get the variant data.
list< CRef< CSpliced_seg_modifier > > TModifiers
bool IsGenomic_ins(void) const
Check if variant Genomic_ins is selected.
bool IsMatch(void) const
Check if variant Match is selected.
bool CanGetGenomic_strand(void) const
Check if it is safe to call GetGenomic_strand method.
TGenomic_ins GetGenomic_ins(void) const
Get the variant data.
const TScores & GetScores(void) const
Get the Scores member data.
bool CanGetExt(void) const
Check if it is safe to call GetExt method.
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
bool CanGetScores(void) const
Check if it is safe to call GetScores method.
const TExt & GetExt(void) const
Get the Ext member data.
const TBases & GetBases(void) const
Get the Bases member data.
list< CRef< CSpliced_exon_chunk > > TParts
bool CanGetDonor_after_exon(void) const
Check if it is safe to call GetDonor_after_exon method.
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
const Tdata & Get(void) const
Get the member data.
TProduct_strand GetProduct_strand(void) const
Get the Product_strand member data.
bool CanGet(void) const
Check if it is safe to call Get method.
bool IsProduct_ins(void) const
Check if variant Product_ins is selected.
const TScore & GetScore(void) const
Get the Score member data.
const TModifiers & GetModifiers(void) const
Get the Modifiers member data.
TProduct_ins GetProduct_ins(void) const
Get the variant data.
const TSegs & GetSegs(void) const
Get the Segs member data.
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
bool CanGetId(void) const
Check if it is safe to call GetId method.
void SetLocation(TLocation &value)
Assign a value to Location data member.
const TLocation & GetLocation(void) const
Get the Location member data.
void SetData(TData &value)
Assign a value to Data data member.
void SetTo(TTo value)
Assign a value to To data member.
list< CRef< CSeq_interval > > Tdata
ENa_strand
strand of nucleic acid
void SetId(TId &value)
Assign a value to Id data member.
void SetFrom(TFrom value)
Assign a value to From data member.
void SetStrand(TStrand value)
Assign a value to Strand data member.
list< CRef< CSeq_feat > > TFtable
string GetDNASequence(CConstRef< objects::CSeq_id > id, CScope &scope)
constexpr auto sort(_Init &&init)
Magic spell ;-) needed for some weird compilers... very empiric.
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)