61 # define CRangeCollection CRangeSet
99 "Unsupported alignment type: e_Std.");
103 "Unsupported alignment type: e_Sparse.");
107 "Invalid CSeq_align::TSegs type.");
141 "Unsupported CSeq_align::TSegs type.");
149 #ifdef _TRACE_CSegmentedRangeCollection
152 out <<
"CRangeCollection<CPairwiseAln::TPos>" << endl;
164 typedef ncbi::CRangeCollection<CPairwiseAln::TPos>
TParent;
173 #ifdef _TRACE_CSegmentedRangeCollection
174 cerr <<
"=====================" << endl;
175 cerr <<
"Original:" << *
this;
181 #ifdef _TRACE_CSegmentedRangeCollection
182 cerr <<
"After the cut:" << *
this << endl;
190 if ( !addition.empty() ) {
191 #ifdef _TRACE_CSegmentedRangeCollection
192 cerr <<
"Addition: " << addition << endl;
195 #ifndef USE_RANGE_SET
198 TRange rr(add_it->GetFrom(), add_it->GetTo());
199 while (it != TParent::m_vRanges.
end() &&
200 rr.
GetFrom() >= it->GetFrom()) {
203 it = TParent::m_vRanges.insert(it, rr);
210 #ifdef _TRACE_CSegmentedRangeCollection
212 cerr <<
"No addition." << endl << endl;
215 #ifdef _TRACE_CSegmentedRangeCollection
216 cerr <<
"Result: " << *
this;
217 cerr <<
"=====================" << endl << endl;
231 TAnchorSegments anchor_segments;
234 anchor_segments.insert(
CPairwiseAln::TRng(rng_i->GetFirstFrom(), rng_i->GetFirstTo()));
240 for (
auto anchor_segments_seg = anchor_segments.begin(); anchor_segments_seg != anchor_segments.end(); ++anchor_segments_seg ) {
243 diag->SetIds().resize(dim);
246 id->Assign(anchored_aln.
GetId(dim -
row - 1)->GetSeqId());
247 diag->SetIds()[
row] = id;
250 diag->SetStrands().resize(dim);
251 diag->SetLen(anchor_segments_seg->GetLength());
252 diags.push_back(diag);
256 CSeq_align::TSegs::TDendiag::iterator diag_it = diags.begin();
258 TAnchorSegments::const_iterator seg_i = anchor_segments.begin();
260 pairwises[dim -
row - 1]->begin();
261 bool direct = aln_rng_i->IsDirect();
264 while (seg_i != anchor_segments.end()) {
265 if (aln_rng_i != pairwises[dim -
row - 1]->end() &&
266 seg_i->GetFrom() >= aln_rng_i->GetFirstFrom()) {
267 _ASSERT(seg_i->GetToOpen() <= aln_rng_i->GetFirstToOpen());
268 if (seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()) {
270 "seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()");
274 _ASSERT(right_delta >= seg_i->GetLength());
277 "right_delta < seg_i->GetLength()");
279 right_delta -= seg_i->GetLength();
281 (*diag_it)->SetStarts()[
row] =
283 aln_rng_i->GetSecondFrom() + left_delta :
284 aln_rng_i->GetSecondFrom() + right_delta);
287 left_delta += seg_i->GetLength();
289 if (right_delta == 0) {
290 _ASSERT(left_delta == aln_rng_i->GetLength());
292 if (aln_rng_i != pairwises[dim -
row - 1]->end()) {
293 direct = aln_rng_i->IsDirect();
295 right_delta = aln_rng_i->GetLength();
299 (*diag_it)->SetStrands()[
row] =
310 while (
row < starts.size()) {
312 starts.erase(starts.begin() +
row);
314 ids.erase(ids.begin() +
row);
316 strands.erase(strands.begin() +
row);
321 if (diag.
GetStarts().size() < 2)
continue;
326 dd.push_back(*diag_it);
338 TAnchorSegments anchor_segments;
341 anchor_segments.insert(
CPairwiseAln::TRng(rng_i->GetFirstFrom(), rng_i->GetFirstTo()));
352 dim = anchored_aln.
GetDim();
363 SerialAssign<CSeq_id>(*ids[
row], anchored_aln.
GetId(dim -
row - 1)->GetSeqId());
369 TAnchorSegments::const_iterator seg_i = anchor_segments.begin();
370 for (seg = 0; seg < numseg; ++seg, ++seg_i) {
371 lens[seg] = seg_i->GetLength();
374 int matrix_size = dim * numseg;
382 starts.resize(matrix_size, -1);
385 int matrix_row_pos =
row;
386 seg_i = anchor_segments.begin();
388 bool direct = aln_rng_i->IsDirect();
391 while (seg_i != anchor_segments.end()) {
394 if (aln_rng_i != pairwises[dim -
row - 1]->end() &&
395 seg_i->GetFrom() >= aln_rng_i->GetFirstFrom()) {
396 _ASSERT(seg_i->GetToOpen() <= aln_rng_i->GetFirstToOpen());
397 if (seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()) {
399 "seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()");
403 _ASSERT(right_delta >= seg_i->GetLength());
406 "right_delta < seg_i->GetLength()");
408 right_delta -= seg_i->GetLength();
410 starts[matrix_row_pos] =
412 aln_rng_i->GetSecondFrom() + left_delta :
413 aln_rng_i->GetSecondFrom() + right_delta);
416 left_delta += seg_i->GetLength();
418 if (right_delta == 0) {
419 _ASSERT(left_delta == aln_rng_i->GetLength());
421 if (aln_rng_i != pairwises[dim -
row - 1]->end()) {
422 direct = aln_rng_i->IsDirect();
424 right_delta = aln_rng_i->GetLength();
431 matrix_row_pos += dim;
453 int matrix_size = 2 * numseg;
459 starts.resize(matrix_size, -1);
467 SerialAssign<CSeq_id>(*ids[0], pairwise_aln.
GetFirstId()->GetSeqId());
469 SerialAssign<CSeq_id>(*ids[1], pairwise_aln.
GetSecondId()->GetSeqId());
479 starts[matrix_pos++] = aln_rng_i->GetFirstFrom();
480 if ( !aln_rng_i->IsDirect() ) {
486 starts[matrix_pos++] = aln_rng_i->GetSecondFrom();
487 lens[seg++] = aln_rng_i->GetLength();
489 _ASSERT(matrix_pos == matrix_size);
541 TPos last_prod_end = 0;
542 TPos last_gen_end = 0;
545 bool gen_direct = rg_it == pairwise_aln.
end() || rg_it->IsDirect();
546 bool prod_direct =
prot ||
547 rg_it == pairwise_aln.
end() || rg_it->IsFirstDirect();
549 if ( !prod_direct ) {
550 gen_direct = !gen_direct;
561 TPos prod_skip, gen_skip;
562 if (rg_it == pairwise_aln.
begin()) {
568 gen_skip = gen_direct == prod_direct ?
577 _ASSERT(exon->IsSetProduct_start());
578 _ASSERT(exon->IsSetGenomic_start());
579 _ASSERT(exon->IsSetProduct_end());
580 _ASSERT(exon->IsSetGenomic_end());
582 exons.push_back(exon);
585 exons.push_front(exon);
588 ex_prod_rg = TRng::GetEmpty();
589 ex_gen_rg = TRng::GetEmpty();
595 if (prod_skip > 0 || gen_skip > 0) {
600 chunk->SetMismatch(mismatch);
602 exon->SetParts().push_back(chunk);
605 exon->SetParts().push_front(chunk);
607 prod_skip -= mismatch;
608 gen_skip -= mismatch;
612 chunk->SetProduct_ins(prod_skip);
614 exon->SetParts().push_back(chunk);
617 exon->SetParts().push_front(chunk);
622 chunk->SetGenomic_ins(gen_skip);
624 exon->SetParts().push_back(chunk);
627 exon->SetParts().push_front(chunk);
639 exon->SetPartial(
true);
642 exon->SetProduct_strand(prod_direct
645 exon->SetGenomic_strand(gen_direct
652 exon->SetParts().push_back(chunk);
655 exon->SetParts().push_front(chunk);
663 exon->SetProduct_start().SetProtpos().SetAmin(ex_prod_rg.GetFrom() / 3);
664 exon->SetProduct_start().SetProtpos().SetFrame(ex_prod_rg.GetFrom() % 3 + 1);
665 exon->SetProduct_end().SetProtpos().SetAmin(ex_prod_rg.GetTo() / 3);
666 exon->SetProduct_end().SetProtpos().SetFrame(ex_prod_rg.GetTo() % 3 + 1);
668 exon->SetProduct_start().SetNucpos(ex_prod_rg.GetFrom());
669 exon->SetProduct_end().SetNucpos(ex_prod_rg.GetTo());
671 exon->SetGenomic_start(ex_gen_rg.GetFrom());
672 exon->SetGenomic_end(ex_gen_rg.GetTo());
676 last_gen_end = gen_direct == prod_direct ?
680 _ASSERT(exon->IsSetProduct_start());
681 _ASSERT(exon->IsSetGenomic_start());
682 _ASSERT(exon->IsSetProduct_end());
683 _ASSERT(exon->IsSetGenomic_end());
685 exons.push_back(exon);
688 exons.push_front(exon);
691 else if ( !exons.empty() ) {
693 exon = prod_direct ? exons.front() : exons.back();
699 if ( exon->GetProduct_end().IsNucpos() ) {
700 prod_end = exon->GetProduct_end().GetNucpos();
703 prod_end = exon->GetProduct_end().GetProtpos().GetAmin();
709 exon->SetPartial(
true);
777 out_seqaligns.resize(pairwises.size() - 1);
781 if (
row == anchor)
continue;
817 "Unsupported CSeq_align::TSegs type.");
821 "Invalid CSeq_align::TSegs type.");
823 out_seqaligns[sa_idx++].Reset(sa);
837 TAnchorSegments anchor_segments;
840 anchor_segments.insert(
CPairwiseAln::TRng(rng_i->GetFirstFrom(), rng_i->GetFirstTo()));
851 vector< CRef<CDense_seg> > dsegs;
852 dsegs.resize(numseg);
853 for (
size_t i = 0;
i < dsegs.size(); ++
i) {
863 SerialAssign<CSeq_id>(*ids[
row], anchored_aln.
GetId(dim -
row - 1)->GetSeqId());
873 TAnchorSegments::const_iterator seg_i = anchor_segments.begin();
875 bool direct = aln_rng_i->IsDirect();
878 while (seg_i != anchor_segments.end()) {
881 dseg.
SetLens()[0] = seg_i->GetLength();
885 if (aln_rng_i != pairwises[dim -
row - 1]->end() &&
886 seg_i->GetFrom() >= aln_rng_i->GetFirstFrom()) {
887 _ASSERT(seg_i->GetToOpen() <= aln_rng_i->GetFirstToOpen());
888 if (seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()) {
890 "seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()");
894 _ASSERT(right_delta >= seg_i->GetLength());
897 "right_delta < seg_i->GetLength()");
899 right_delta -= seg_i->GetLength();
903 aln_rng_i->GetSecondFrom() + left_delta :
904 aln_rng_i->GetSecondFrom() + right_delta);
907 left_delta += seg_i->GetLength();
909 if (right_delta == 0) {
910 _ASSERT(left_delta == aln_rng_i->GetLength());
912 if (aln_rng_i != pairwises[dim -
row - 1]->end()) {
913 direct = aln_rng_i->IsDirect();
915 right_delta = aln_rng_i->GetLength();
920 if (starts[0] >= 0 && starts[1] >= 0) {
924 disc->
Set().push_back(seg_aln);
925 seg_aln->
SetSegs().SetDenseg(dseg);
944 vector< CRef<CDense_seg> > dsegs;
945 dsegs.resize(numseg);
946 for (
size_t i = 0;
i < dsegs.size(); ++
i) {
950 disc->
Set().push_back(seg_aln);
952 dsegs[
i].Reset(&dseg);
959 SerialAssign<CSeq_id>(*ids[0], pairwise_aln.
GetFirstId()->GetSeqId());
961 SerialAssign<CSeq_id>(*ids[1], pairwise_aln.
GetSecondId()->GetSeqId());
972 dseg.
SetStarts()[0] = aln_rng_i->GetFirstFrom();
973 if ( !aln_rng_i->IsDirect() ) {
979 dseg.
SetStarts()[1] = aln_rng_i->GetSecondFrom();
980 dseg.
SetLens()[0] = aln_rng_i->GetLength();
995 TAnchorSegments anchor_segments;
998 anchor_segments.insert(
CPairwiseAln::TRng(rng_i->GetFirstFrom(), rng_i->GetFirstTo()));
1009 dim = anchored_aln.
GetDim();
1020 SerialAssign<CSeq_id>(*ids[
row], anchored_aln.
GetId(dim -
row - 1)->GetSeqId());
1025 lens.resize(numseg);
1026 TAnchorSegments::const_iterator seg_i = anchor_segments.begin();
1027 for (seg = 0; seg < numseg; ++seg, ++seg_i) {
1028 lens[seg] = seg_i->GetLength();
1031 int matrix_size = dim * numseg;
1035 present.resize(matrix_size);
1043 starts.resize(matrix_size, 0);
1046 int matrix_row_pos =
row;
1047 seg_i = anchor_segments.begin();
1049 bool direct = aln_rng_i->IsDirect();
1052 while (seg_i != anchor_segments.end()) {
1056 if (aln_rng_i != pairwises[dim -
row - 1]->end() &&
1057 seg_i->GetFrom() >= aln_rng_i->GetFirstFrom()) {
1058 _ASSERT(seg_i->GetToOpen() <= aln_rng_i->GetFirstToOpen());
1059 if (seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()) {
1061 "seg_i->GetToOpen() > aln_rng_i->GetFirstToOpen()");
1065 _ASSERT(right_delta >= seg_i->GetLength());
1068 "right_delta < seg_i->GetLength()");
1070 right_delta -= seg_i->GetLength();
1073 aln_rng_i->GetSecondFrom() + left_delta
1074 : aln_rng_i->GetSecondFrom() + right_delta);
1075 starts[matrix_row_pos] = start;
1080 left_delta += seg_i->GetLength();
1082 if (right_delta == 0) {
1083 _ASSERT(left_delta == aln_rng_i->GetLength());
1085 if (aln_rng_i != pairwises[dim -
row - 1]->end()) {
1086 direct = aln_rng_i->IsDirect();
1088 right_delta = aln_rng_i->GetLength();
1094 matrix_row_pos += dim;
1113 int matrix_size = 2 * numseg;
1116 lens.resize(numseg);
1119 starts.resize(matrix_size, 0);
1122 present.resize(matrix_size, 0);
1129 SerialAssign<CSeq_id>(*ids[0], pairwise_aln.
GetFirstId()->GetSeqId());
1131 SerialAssign<CSeq_id>(*ids[1], pairwise_aln.
GetSecondId()->GetSeqId());
1142 starts[matrix_pos++] = start;
1143 if ( !aln_rng_i->IsDirect() ) {
1144 if ( !ps->IsSetStrands() ) {
1149 start = aln_rng_i->GetSecondFrom();
1151 starts[matrix_pos++] = start;
1152 lens[seg++] = aln_rng_i->GetLength();
1154 _ASSERT(matrix_pos == matrix_size);
1171 id_extract(src, ids);
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CRef< CAnchoredAln > CreateAnchoredAlnFromAln(const _TAlnStats &aln_stats, size_t aln_idx, const CAlnUserOptions &options, objects::CSeq_align::TDim explicit_anchor_row=-1)
Create an anchored alignment from Seq-align using hints.
vector< TAlnSeqIdIRef > TAlnSeqIdVec
void InitSplicedsegFromPairwiseAln(CSpliced_seg &spliced_seg, const CPairwiseAln &pairwise_aln, CScope *scope)
CRef< CSeq_align > CreateSeqAlignFromPairwiseAln(const CPairwiseAln &pairwise_aln, CSeq_align::TSegs::E_Choice choice, CScope *scope)
Convert CPairwiseAln to seq-align of the selected type.
CRef< CDense_seg > CreateDensegFromAnchoredAln(const CAnchoredAln &anchored_aln, CScope *scope)
CRef< CSpliced_seg > CreateSplicedsegFromPairwiseAln(const CPairwiseAln &pairwise_aln, CScope *scope)
CRef< CSeq_align > ConvertSeq_align(const CSeq_align &src, CSeq_align::TSegs::E_Choice dst_choice, CSeq_align::TDim anchor_row, CScope *scope)
Convert source alignment to a new type.
CRef< CPacked_seg > CreatePackedsegFromPairwiseAln(const CPairwiseAln &pairwise_aln, CScope *scope)
CRef< CPacked_seg > CreatePackedsegFromAnchoredAln(const CAnchoredAln &anchored_aln, CScope *scope)
void CreateDense_diagFromAnchoredAln(CSeq_align::TSegs::TDendiag &dd, const CAnchoredAln &anchored_aln, CScope *scope)
void s_TranslatePairwise(CPairwiseAln &out_pw, const CPairwiseAln &pw, const CPairwiseAln &tr)
CRef< CSpliced_seg > CreateSplicedsegFromAnchoredAln(const CAnchoredAln &anchored_aln, CScope *scope)
CRef< CSeq_align > CreateSeqAlignFromAnchoredAln(const CAnchoredAln &anchored_aln, CSeq_align::TSegs::E_Choice choice, CScope *scope)
Convert CAnchoredAln to seq-align of the selected type.
CRef< CSeq_align_set > CreateAlignSetFromPairwiseAln(const CPairwiseAln &pairwise_aln, CScope *scope)
void CreateSeqAlignFromEachPairwiseAln(const CAnchoredAln::TPairwiseAlnVector pairwises, TDim anchor, vector< CRef< CSeq_align > > &out_seqaligns, CSeq_align::TSegs::E_Choice choice, CScope *scope)
Create seq-align from each of the pairwise alignments vs the selected anchor row.
CRef< CSeq_align_set > CreateAlignSetFromAnchoredAln(const CAnchoredAln &anchored_aln, CScope *scope)
CRef< CDense_seg > CreateDensegFromPairwiseAln(const CPairwiseAln &pairwise_aln, CScope *scope)
static const TSignedSeqPos kMaxSplicedExonIndelLength
class CAlignRangeCollectionList<TAlignRange> represent a sorted collection of TAlignRange.
const_iterator begin() const
const_iterator insert(const TAlignRange &r)
TSignedSeqPos GetSecondPosByFirstPos(position_type pos, ESearchDirection dir=eNone) const
@ fMixedDir
contains at least one reversed range
const_iterator end() const
CAlignRange Represents an element of pairwise alignment of two sequences.
Container mapping seq-aligns to vectors of participating seq-ids.
void push_back(const CSeq_align &aln)
Adding an alignment.
Helper class which collects seq-align statistics: seq-ids participating in alignments and rows,...
Options for different alignment manager operations.
Query-anchored alignment can be 2 or multi-dimentional.
const TPairwiseAlnVector & GetPairwiseAlns(void) const
The vector of pairwise alns.
vector< CRef< CPairwiseAln > > TPairwiseAlnVector
const TAlnSeqIdIRef & GetId(TDim row) const
Seq ids of the rows.
TDim GetDim(void) const
How many rows.
TDim GetAnchorRow(void) const
Which is the anchor row?
void Validate(bool full_test=false) const
A pairwise aln is a collection of ranges for a pair of rows.
int GetSecondBaseWidth(void) const
Base width of the second row.
const TAlnSeqIdIRef & GetFirstId(void) const
Get first sequence id.
int GetFirstBaseWidth(void) const
Base width of the first row.
const TAlnSeqIdIRef & GetSecondId(void) const
Get second sequence id.
const_iterator end() const
iterator find_nc(position_type pos)
TRanges::iterator iterator
CPairwiseAln::TPos position_type
TThisType & CombineWithAndKeepAbutting(const TThisType &c)
TThisType & DivideAfter(const position_type &p)
If position is in middle of range, divide into two consecutive ranges after this position.
Scope-aware seq-id converter.
void insert(const TRange &r)
void CutAtPosition(position_type pos)
ncbi::CRangeSet< CPairwiseAln::TPos > TParent
void Validate(bool full_test=false) const
Validators.
CNcbiOstream & operator<<(CNcbiOstream &out, const CEquivRange &range)
std::ofstream out("events_result.xml")
main entry point for tests
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
TSeqPos GetBioseqLength(void) const
void Reset(void)
Reset reference object.
position_type GetFirstToOpen(void) const
position_type GetFirstTo(void) const
position_type GetSecondFrom(void) const
TThisType & SetFirstFrom(position_type from)
position_type GetSecondTo(void) const
position_type GetFirstFrom(void) const
position_type GetSecondToOpen(void) const
position_type GetLength(void) const
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
TFrom GetFrom(void) const
Get the From member data.
vector< CRef< CSeq_id > > TIds
vector< TSeqPos > TStarts
Tdata & Set(void)
Assign a value to data member.
vector< ENa_strand > TStrands
TLens & SetLens(void)
Assign a value to Lens data member.
bool IsSetStrands(void) const
Check if a value has been assigned to Strands data member.
void SetProduct_id(TProduct_id &value)
Assign a value to Product_id data member.
void SetSegs(TSegs &value)
Assign a value to Segs data member.
vector< ENa_strand > TStrands
TExons & SetExons(void)
Assign a value to Exons data member.
void SetDim(TDim value)
Assign a value to Dim data member.
TStrands & SetStrands(void)
Assign a value to Strands data member.
void SetDim(TDim value)
Assign a value to Dim data member.
vector< TSignedSeqPos > TStarts
void SetDim(TDim value)
Assign a value to Dim data member.
vector< TSeqPos > TStarts
void SetType(TType value)
Assign a value to Type data member.
vector< CRef< CSeq_id > > TIds
vector< CRef< CSeq_id > > TIds
TStarts & SetStarts(void)
Assign a value to Starts data member.
TStarts & SetStarts(void)
Assign a value to Starts data member.
void SetProduct_type(TProduct_type value)
Assign a value to Product_type data member.
TStrands & SetStrands(void)
Assign a value to Strands data member.
list< CRef< CSpliced_exon > > TExons
vector< ENa_strand > TStrands
void SetGenomic_id(TGenomic_id &value)
Assign a value to Genomic_id data member.
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
const TStarts & GetStarts(void) const
Get the Starts member data.
TIds & SetIds(void)
Assign a value to Ids data member.
list< CRef< CDense_diag > > TDendiag
TIds & SetIds(void)
Assign a value to Ids data member.
@ e_not_set
No variant selected.
@ eType_partial
mapping pieces together
@ eProduct_type_transcript
double value_type
The numeric datatype used by the parser.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
#define row(bind, expected)