64 return (*
GetExons().begin())->GetProduct_strand();
75 return (*
GetExons().begin())->GetGenomic_strand();
83 "CSpliced_seg::GetSeqStrand(): Invalid row number");
96 bool last_exon_gen_rev =
false;
97 bool last_exon_prod_rev =
false;
103 bool exon_gen_rev =
false;
109 exon_gen_rev = aln_gen_rev;
111 if (exon_it !=
GetExons().begin() && last_exon_gen_rev != exon_gen_rev)
return false;
112 last_exon_gen_rev = exon_gen_rev;
114 bool exon_prod_rev =
false;
120 exon_prod_rev = aln_prod_rev;
122 if (exon_it !=
GetExons().begin() && last_exon_prod_rev != exon_prod_rev)
return false;
123 last_exon_prod_rev = exon_prod_rev;
140 auto prod_start = pstart.
IsNucpos() ? pstart.GetNucpos() :
141 pstart.GetProtpos().GetAmin() * 3 + pstart.GetProtpos().GetFrame() - 1;
142 auto prod_end = pend.IsNucpos() ? pend.GetNucpos() :
143 pend.GetProtpos().GetAmin() * 3 + pend.GetProtpos().GetFrame() - 1;
146 if (prod_end > last_prod_start)
return false;
149 if (prod_start < last_prod_end)
return false;
152 last_prod_start = prod_start;
153 last_prod_end = prod_end;
166 product_position_limits.
SetTo((
prot ? product_length * 3 : product_length) -1);
171 "CSpliced_seg::Validate(): poly-a on a protein");
174 if (poly_a > 0 &&
TSeqPos(poly_a) > product_position_limits.
GetTo()+1) {
176 "CSpliced_seg::Validate(): poly-a > product-length");
179 product_position_limits.
SetFrom(poly_a+1);
181 product_position_limits.
SetTo(poly_a-1);
188 "CSpliced_seg::Validate(): Spiced-seg is empty (has no exons)");
198 if (product_start > product_end) {
200 "CSpliced_seg::Validate(): product_start > product_end");
202 if (product_start < product_position_limits.
GetFrom() || product_position_limits.
GetTo() < product_end) {
204 "CSpliced_seg::Validate(): illegal product position in regard to poly-a and/or product-length ");
208 "CSpliced_seg::Validate(): genomic_start > genomic_end");
215 "product-id not set.");
219 "product-id should be set on the level of Spliced-seg XOR Spliced-exon.");
223 "genomic-id not set.");
230 "product-strand can be set on level of Spliced-seg XOR Spliced-exon.");
232 bool product_plus =
true;
238 if (
prot && !product_plus) {
240 "Protein product cannot have a negative strand.");
254 switch (chunk.
Which()) {
256 chunk_product_len = chunk_genomic_len = chunk.
GetMatch();
259 chunk_product_len = chunk_genomic_len = chunk.
GetDiag();
262 chunk_product_len = chunk_genomic_len = chunk.
GetMismatch();
273 exon_product_len += chunk_product_len;
274 exon_genomic_len += chunk_genomic_len;
276 if (exon_product_len != product_end - product_start + 1) {
278 "Product exon range length is not consistent with exon chunks.");
280 if (exon_genomic_len !=
283 "Genomic exon range length is not consistent with exon chunks.");
286 TSeqPos exon_product_len = product_end - product_start + 1;
288 if (exon_product_len != exon_genomic_len) {
290 "Product and genomic exon range lengths are not consistent.");
303 "CSpliced_seg::GetSeqRange(): Spiced-seg is empty (has no exons)");
313 ((*exon_it)->GetProduct_start().GetNucpos(),
314 (*exon_it)->GetProduct_end().GetNucpos()));
321 ((*exon_it)->GetProduct_start().GetProtpos().GetAmin(),
322 (*exon_it)->GetProduct_end().GetProtpos().GetAmin()));
328 "Invalid product type");
335 ((*exon_it)->GetGenomic_start(),
336 (*exon_it)->GetGenomic_end()));
341 "CSpliced_seg::GetSeqRange(): Invalid row number");
368 static vector<TSignedSeqPos>
372 vector<TSignedSeqPos> rv;
373 rv.reserve(lens.size());
382 rv.push_back((end + 1) -
offset);
384 rv.push_back(start +
offset);
400 vector<TSeqPos> product_lens;
401 vector<TSeqPos> genomic_lens;
406 product_lens.push_back(part.
GetMatch());
407 genomic_lens.push_back(part.
GetMatch());
411 }
else if (part.
IsDiag()) {
412 product_lens.push_back(part.
GetDiag());
413 genomic_lens.push_back(part.
GetDiag());
416 genomic_lens.push_back(0);
418 product_lens.push_back(0);
421 throw runtime_error(
"unhandled part type in Spliced-enon");
427 genomic_lens.push_back(
len);
428 product_lens.push_back(
len);
432 lens.reserve(product_lens.size());
433 for (
size_t i = 0;
i < product_lens.size(); ++
i) {
434 lens.push_back(
max(product_lens[
i], genomic_lens[
i]));
444 vector<TSignedSeqPos> product_starts;
449 vector<TSignedSeqPos> genomic_starts =
455 starts.reserve(product_starts.size() + genomic_starts.size());
456 for (
size_t i = 0;
i < lens.size(); ++
i) {
457 starts.push_back(product_starts[
i]);
458 starts.push_back(genomic_starts[
i]);
469 for (
size_t i = 0;
i < lens.size(); ++
i) {
470 strands.push_back(product_strand);
471 strands.push_back(genomic_strand);
514 product_strand, genomic_strand,
515 product_id, genomic_id);
517 ds_align->
SetSegs().SetDenseg(*ds);
519 disc->
SetSegs().SetDisc().Set().push_back(ds_align);
537 product_strand, genomic_strand,
538 product_id, genomic_id);
542 ds_align->
SetSegs().SetDenseg(*ds);
544 disc->
SetSegs().SetDisc().Set().push_back(ds_align);
551 "unhandled product type in spliced seg");
bool IsReverse(ENa_strand s)
static vector< TSignedSeqPos > s_CalculateStarts(const vector< TSeqPos > &lens, ENa_strand strand, TSeqPos start, TSeqPos end)
static CRef< CDense_seg > s_ExonToDenseg(const CSpliced_exon &exon, ENa_strand product_strand, ENa_strand genomic_strand, const CSeq_id &product_id, const CSeq_id &genomic_id)
User-defined methods of the data storage class.
TWidths & SetWidths(void)
void Compact()
Join adjacent mergeable segments to create a more compact alignment.
TSeqRange GetRowSeq_range(CSeq_align::TDim row, bool always_as_nuc) const
Return exon's range within this row.
TSeqPos GetSeqStop(TDim row) const
CRange< TSeqPos > GetSeqRange(TDim row) const
GetSeqRange NB: In case the product-type is protein, these only return the amin part of Prot-pos.
bool IsConsistentBiologicalOrder(void) const
TSeqPos GetSeqStart(TDim row) const
void Validate(bool full_test=false) const
Validators.
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
CRef< CSeq_align > AsDiscSeg() const
Convert this alignment to a discontinuous segment.
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
C * SerialClone(const C &src)
Create on heap a clone of the source object.
static TThisType GetWhole(void)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
void SetTo(TTo value)
Assign a value to To data member.
bool IsSetProduct_strand(void) const
should be 'plus' or 'minus' Check if a value has been assigned to Product_strand data member.
TLens & SetLens(void)
Assign a value to Lens data member.
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
bool IsSetParts(void) const
basic seqments always are in biologic order Check if a value has been assigned to Parts data member.
TMatch GetMatch(void) const
Get the variant data.
list< CRef< CScore > > Tdata
bool IsSetProduct_strand(void) const
should be 'plus' or 'minus' Check if a value has been assigned to Product_strand data member.
const TProduct_id & GetProduct_id(void) const
Get the Product_id member data.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
const TProduct_id & GetProduct_id(void) const
Get the Product_id member data.
bool IsMismatch(void) const
Check if variant Mismatch is selected.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
vector< ENa_strand > TStrands
TProduct_length GetProduct_length(void) const
Get the Product_length member data.
bool IsSetPoly_a(void) const
start of poly(A) tail on the transcript For sense transcripts: aligned product positions < poly-a <= ...
TScores & SetScores(void)
Assign a value to Scores data member.
TDiag GetDiag(void) const
Get the variant data.
vector< TSignedSeqPos > TStarts
TProduct_type GetProduct_type(void) const
Get the Product_type member data.
TMismatch GetMismatch(void) const
Get the variant data.
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
bool IsSetGenomic_strand(void) const
genomic-strand represents the strand of translation Check if a value has been assigned to Genomic_str...
bool CanGetProduct_strand(void) const
Check if it is safe to call GetProduct_strand method.
bool IsSetProduct_id(void) const
product is either protein or transcript (cDNA) Check if a value has been assigned to Product_id data ...
void SetType(TType value)
Assign a value to Type data member.
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
const TProduct_end & GetProduct_end(void) const
Get the Product_end member data.
bool IsSetProduct_id(void) const
product is either protein or transcript (cDNA) Check if a value has been assigned to Product_id data ...
bool IsGenomic_ins(void) const
Check if variant Genomic_ins is selected.
bool IsMatch(void) const
Check if variant Match is selected.
bool CanGetGenomic_strand(void) const
Check if it is safe to call GetGenomic_strand method.
TGenomic_ins GetGenomic_ins(void) const
Get the variant data.
bool IsSetGenomic_strand(void) const
Check if a value has been assigned to Genomic_strand data member.
const TScores & GetScores(void) const
Get the Scores member data.
TStarts & SetStarts(void)
Assign a value to Starts data member.
TStrands & SetStrands(void)
Assign a value to Strands data member.
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
TGenomic_strand GetGenomic_strand(void) const
Get the Genomic_strand member data.
TProduct_strand GetProduct_strand(void) const
Get the Product_strand member data.
bool IsDiag(void) const
Check if variant Diag is selected.
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
list< CRef< CSpliced_exon_chunk > > TParts
bool IsSetProduct_length(void) const
length of the product, in bases/residues from this (or from poly-a if present), a 3' unaligned length...
bool CanGetProduct_strand(void) const
Check if it is safe to call GetProduct_strand method.
TPoly_a GetPoly_a(void) const
Get the Poly_a member data.
TGenomic_end GetGenomic_end(void) const
Get the Genomic_end member data.
const Tdata & Get(void) const
Get the member data.
TProduct_strand GetProduct_strand(void) const
Get the Product_strand member data.
TIds & SetIds(void)
Assign a value to Ids data member.
bool IsSetGenomic_id(void) const
Check if a value has been assigned to Genomic_id data member.
bool IsProduct_ins(void) const
Check if variant Product_ins is selected.
bool IsNucpos(void) const
Check if variant Nucpos is selected.
TProduct_ins GetProduct_ins(void) const
Get the variant data.
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
bool CanGetGenomic_strand(void) const
Check if it is safe to call GetGenomic_strand method.
bool IsSetGenomic_id(void) const
Check if a value has been assigned to Genomic_id data member.
bool IsSetScores(void) const
scores for this exon Check if a value has been assigned to Scores data member.
E_Choice Which(void) const
Which variant is currently selected.
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Diag
both sequences are represented, there is sufficient similarity between product and genomic sequences....
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
@ eType_partial
mapping pieces together
@ eType_disc
discontinuous alignment
@ eProduct_type_transcript
ENa_strand
strand of nucleic acid
constexpr bool empty(list< Ts... >) noexcept
#define row(bind, expected)