49 : scope(a_scope), generator(a_scope)
62 starts.
insert(
r->first.GetFrom());
67 return make_pair(starts, stops);
76 pair<bool, bool> trim_by_contig(
false,
false);
88 padding =
min(
padding, ((stop > start ? genomic_length : 0) - (stop - start +1))/2);
94 if (start <= 2 && !is_circular) {
95 trim_by_contig.first =
true;
97 if (stop >= genomic_length-3 && !is_circular) {
98 trim_by_contig.second =
true;
102 start = is_circular ? start + genomic_length : 0;
104 if (stop >= genomic_length) {
105 stop = is_circular ? stop - genomic_length : genomic_length-1;
119 query_loc->
SetInt(*spl.
GetExons().front()->CreateRowSeq_interval(0, spl));
124 if (seq.size()%3 != 0) {
134 code->Set().push_back(c_e);
137 const size_t kUnknownState = tbl.
SetCodonState(
'N',
'N',
'N');
144 string codon =
"NNN";
153 if (
state == kUnknownState)
158 query_loc->
SetInt().SetFrom((k-3)/3);
159 query_loc->
SetInt().SetTo((k-3)/3);
161 query_loc->
SetInt().SetFrom(k-3);
162 query_loc->
SetInt().SetTo(k-1);
170 starts[
TSeqRange(mapped_pos, mapped_pos2)] = codon;
178 if (gaps !=
nullptr) {
183 region_loc->
SetInt().SetFrom(0);
187 region_loc->
SetInt().SetTo(genomic_length-1);
203 id->Assign(*region_loc->
GetId());
207 for (
auto s: region_seq) {
213 }
else if (gap_end == k) {
214 query_loc->
SetInt().SetFrom(gap_begin);
215 query_loc->
SetInt().SetTo(gap_end-1);
217 auto mapped_loc = mapper.
Map(*query_loc);
230 if (trim_by_contig.first) {
233 if (!gaps->
empty() && gaps->
begin()->GetFrom()==0) {
234 gap_stop = gaps->
begin()->GetTo();
239 if (!gaps->
empty() && gaps->
begin()->GetTo()==0) {
240 gap_stop = gaps->
begin()->GetFrom();
246 if (trim_by_contig.second) {
247 int gap_start = genomic_length;
249 if (!gaps->
empty() && gaps->rbegin()->GetTo()==genomic_length-1) {
250 gap_start = gaps->rbegin()->GetFrom();
255 if (!gaps->
empty() && gaps->rbegin()->GetFrom()==genomic_length-1) {
256 gap_start = gaps->rbegin()->GetTo();
264 return make_pair(starts, stops);
287 int next_prod_start = 0;
302 cds_ranges.push_back(loc_ci.GetRange());
305 cds_ranges.back().SetTo(cds_ranges.back().GetTo()-3);
313 int prod_pos_start = (*exon)->GetProduct_start().AsSeqPos();
316 subject_loc->
SetInt(*(*exon)->CreateRowSeq_interval(1, spliced_seg));
323 if (next_prod_start < prod_pos_start) {
324 mRNA.append(prod_pos_start - next_prod_start,
'N');
325 next_prod_start = prod_pos_start;
328 if ((*exon)->IsSetParts()) {
330 pair<int, int> chunk =
ChunkSize(**part_it);
331 prod_pos_start += chunk.second;
332 if (chunk.first == 0) {
333 if (next_prod_start < prod_pos_start) {
334 mRNA.append(prod_pos_start - next_prod_start,
'N');
335 next_prod_start = prod_pos_start;
337 }
else if (chunk.second > 0) {
338 if (next_prod_start < prod_pos_start) {
339 mRNA.append(subject_seq, subj_pos+chunk.second-(prod_pos_start - next_prod_start), prod_pos_start - next_prod_start);
340 next_prod_start = prod_pos_start;
343 subj_pos += chunk.first;
346 mRNA.append(subject_seq);
347 next_prod_start += subject_seq.size();
351 if (cds_ranges.front().IsWhole()) {
356 if (
range.GetFrom() >= mRNA.size()) {
359 cds_seq += mRNA.substr(
range.GetFrom(),
range.GetLength());
370 switch (chunk.
Which()) {
373 return make_pair(
len, 0);
376 return make_pair(0,
len);
389 return make_pair(
len,
len);
@ eExtreme_Positional
numerical value
@ eExtreme_Biological
5' and 3'
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CConstRef< objects::CSeq_align > AdjustAlignment(const objects::CSeq_align &align, TSeqRange range, EProductPositionsMode mode=eForceProductFrom0)
void SetFlags(TFeatureGeneratorFlags)
void SetAllowedUnaligned(TSeqPos)
CConstRef< objects::CSeq_align > CleanAlignment(const objects::CSeq_align &align)
Clean an alignment according to our best guess of its biological representation.
static const CTrans_table & GetTransTable(int id)
string GetCDSNucleotideSequence(const CSeq_align &align)
set< TSeqPos > FindStops(const CSeq_align &align)
pair< map< TSeqRange, string >, set< TSeqRange > > FindStartStopRanges(const CSeq_align &align, int padding=0, set< TSignedSeqRange > *gaps=nullptr)
bool HasInternalStops(const CSeq_align &align)
pair< set< TSeqPos >, set< TSeqPos > > FindStartsStops(const CSeq_align &align, int padding=0)
CFeatureGenerator generator
CInternalStopFinder(CScope &scope)
CRef< CSeq_loc > CreateRowSeq_loc(TDim row) const
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
bool IsOrfStart(int state) const
static int SetCodonState(unsigned char ch1, unsigned char ch2, unsigned char ch3)
static int NextCodonState(int state, unsigned char ch)
bool IsOrfStop(int state) const
iterator_bool insert(const value_type &val)
const_iterator begin() const
const_iterator end() const
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
CMappedFeat GetCdsOnMrna(const objects::CSeq_id &rna_id, CScope &scope)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
#define NCBI_USER_THROW(message)
Throw a quick-and-dirty runtime exception of type 'CException' with the given error message and error...
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
#define MSerial_AsnText
I/O stream manipulators –.
ENa_strand GetStrand(void) const
Get the location's strand.
TSeqPos GetStart(ESeqLocExtremes ext) const
Return start and stop positions of the seq-loc.
CRef< CSeq_loc > Merge(TOpFlags flags, ISynonymMapper *syn_mapper) const
All functions create and return a new seq-loc object.
CPacked_seqint::TRanges TRanges
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
bool IsPartialStop(ESeqLocExtremes ext) const
TSeqPos GetStop(ESeqLocExtremes ext) const
CRef< CSeq_loc > Map(const CSeq_loc &src_loc)
Map seq-loc.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
TSeqPos GetBioseqLength(void) const
TInst_Topology GetInst_Topology(void) const
@ eCoding_Iupac
Set coding to printable coding (Iupacna or Iupacaa)
const CSeq_loc & GetLocation(void) const
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
const_iterator begin(void) const
const_iterator end(void) const
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
CRange< TSignedSeqPos > TSignedSeqRange
static TThisType GetWhole(void)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
TMatch GetMatch(void) const
Get the variant data.
const TProduct_id & GetProduct_id(void) const
Get the Product_id member data.
TDiag GetDiag(void) const
Get the variant data.
TProduct_type GetProduct_type(void) const
Get the Product_type member data.
TMismatch GetMismatch(void) const
Get the variant data.
const TSpliced & GetSpliced(void) const
Get the variant data.
TGenomic_ins GetGenomic_ins(void) const
Get the variant data.
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
list< CRef< CSpliced_exon_chunk > > TParts
bool IsSpliced(void) const
Check if variant Spliced is selected.
TProduct_ins GetProduct_ins(void) const
Get the variant data.
const TSegs & GetSegs(void) const
Get the Segs member data.
E_Choice Which(void) const
Which variant is currently selected.
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Diag
both sequences are represented, there is sufficient similarity between product and genomic sequences....
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
@ eProduct_type_transcript
pair< int, int > ChunkSize(const CSpliced_exon_chunk &chunk)
map< TSeqRange, string > TStarts
range(_Ty, _Ty) -> range< _Ty >
int GetGeneticCode(const CBioseq_Handle &bsh)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)