66 #define INIT_MEMBERS \
68 , mf_MarkIndels( fGap ) \
70 , mf_UseSpliced( false ) \
71 , mf_UseSparse( false ) \
72 , mf_ShowIndels( true ) \
73 , mf_RowsInvalidated( false )
111 x_Init( sparce_aln, threshold );
119 x_Init( spliced_seg, scope, threshold );
127 if( input_aligns.size() == 1 && input_aligns[0]->GetSegs().IsSpliced() ){
129 const_cast<CSpliced_seg*
>( &input_aligns[0]->GetSegs().GetSpliced() )
136 if( input_aligns.size() > 0 ){
138 container.
insert( **ait );
153 LOG_POST(
Error <<
"Skipping this alignment: " << e.what() );
162 LOG_POST(
Error <<
"Input alignments cannot be anchored because they don't share at least one common sequence." );
165 eUnknown,
"Input alignments cannot be anchored because they don't share at least one common sequence."
190 x_Init( *sparse_aln, threshold );
241 *handle -> GetBioseqHandle().
GetSeqId(),
367 vector< IAlnSegmentIterator* > iters;
385 vector<bool> seq_touched(
m_NumSeqs,
false);
399 while (a_range.
GetTo() < (
int)pos && ++it) {
400 if (!seq_touched[
row])
401 seq_touched[
row] =
true;
412 }
else if (too < next_pos) {
424 vector<string> seq_spans;
431 bool seg_has_gap =
false;
432 bool seg_has_unaln =
false;
433 bool seg_is_translated =
false;
448 seg_has_unaln = seq_touched[
row];
464 m_SparseAlns[0]->GetAlnSeqString(
row, seq_span, seq_range, seg_is_translated);
467 if (seg_is_translated) {
470 seq_span.swap(new_seq);
474 if (!seq_span.empty()) {
475 seq_spans.push_back(seq_span);
480 if (seq_spans.size() <= 1) {
482 if (seq_spans.empty()) {
483 LOG_POST(
Warning <<
"All gaps/discontinuity for aln range [ " << pos <<
", " << (next_pos - 1) <<
"]");
492 if (newRow.
type == 0) {
501 aln_spans.push_back(newRow);
519 int length = next_pos - pos;
521 int size = (
int)seq_spans.size();
530 bool f_mismatch =
false;
531 for (
int j = 1; j <
size; j++) {
532 _ASSERT(
i < (
int)seq_spans[j].length());
533 if (seq_spans[j][
i] != seq_spans[0][
i]) {
541 if (new_span_type != f_span_type) {
550 if (span_start >
i - 1)
551 LOG_POST(
Error <<
"Miscalculating span: pos=" << pos <<
", start=" << span_start <<
", oend=" <<
i);
552 newRow.
type = f_span_type;
555 aln_spans.push_back(newRow);
559 f_span_type = new_span_type;
561 }
while (
i < length);
566 for (
int row_to_delete = 0; row_to_delete <
m_NumSeqs; row_to_delete++) {
567 delete iters[row_to_delete];
572 TAlnSpans::iterator iter = aln_spans.begin();
573 for (; iter != aln_spans.end();) {
589 if (from == -1 || to == -1) {
598 }
else if (iter->type &
fGap) {
619 vector<bool> seq_strand(
m_NumSeqs,
true);
622 for (
int seq_ix = 0; seq_ix <
m_NumSeqs; seq_ix++) {
623 seq_strand[seq_ix] =
m_SparseAlns[0]->IsPositiveStrand(seq_ix);
626 for (
int seq_ix = 0; seq_ix <
m_NumSeqs; seq_ix++) {
631 vector<TSignedSeqPos> seq_pos(
m_NumSeqs, -1);
636 for (
int seq_ix = 0; seq_ix <
m_NumSeqs; seq_ix++) {
639 if (!span_row.
ranges[seq_ix].Empty()) {
645 string splice3, splice5;
647 if (seq_strand[seq_ix]) {
649 if (cur_seq_pos > 0 && cur_span_from > cur_seq_pos) {
665 seq_ix, intron, seq_pos[seq_ix], cur_span_from - 1
667 if (intron.length() > 1) {
668 string splice5 = intron.substr(0, 2);
669 string splice3 = intron.substr(intron.length() - 2);
677 iter = rows.insert(iter, gap_row);
680 seq_pos[seq_ix] = cur_span_to + 1;
684 if (cur_seq_pos > 0 && cur_span_to < cur_seq_pos) {
700 seq_ix, intron, cur_span_to + 1, seq_pos[seq_ix]
702 if (intron.length() > 1) {
703 string splice5 = intron.substr(0, 2);
704 string splice3 = intron.substr(intron.length() - 2);
712 iter = rows.insert(iter, gap_row);
715 seq_pos[seq_ix] = cur_span_from - 1;
731 TAlnSpans::iterator iter = rows.begin();
732 TAlnSpans::iterator
prev = rows.end();
734 for (; iter != rows.end();
prev = iter, ++iter) {
738 || iter->type ==
fTail
747 bool merge_prev =
false;
748 bool merge_next =
false;
750 TAlnSpans::iterator
next = iter + 1;
771 if (
row >= (
int)merged_row.
ranges.size()) {
775 }
else if (
row >= (
int)
prev->ranges.size()) {
793 if (
row >= (
int)merged_row.
ranges.size()) {
797 }
else if (
row >= (
int)
next->ranges.size()) {
806 TAlnSpans::iterator from = merge_prev ? iter :
next;
807 TAlnSpans::iterator to = merge_next ? (
next + 1) :
next;
809 rows.erase(from, to);
826 TAlnSpans::iterator iter = rows.begin();
827 for (; iter != rows.end();) {
828 if (pos < (
int)iter->aln_range.GetFrom()) {
841 pos = iter->aln_range.GetTo() + 1;
863 unsigned warning_cnt = 0;
864 TAlnSpans::iterator iter =
m_AlnSpans.begin();
867 if (iter->length == 0) {
868 iter->length = iter->aln_range.GetLength() /
m_BaseWidth;
870 if (iter->length == 0) {
872 if (warning_cnt < 10) {
875 if (warning_cnt == 10) {
888 ((double)(iter->length - iter->mismatch)) / iter->length
890 iter->identity = floor(identity * 10000 + 0.5) / 100.0;
893 iter->identity = 0.0;
921 if (from == -1 || to == -1) {
930 }
else if (iter->type &
fGap) {
956 span_row.
locs.push_back(loc);
974 span_row.
locs.push_back(loc);
983 int product_length = 0;
1003 if( poly_a < start ){
1011 span_rows.push_back( newRow );
1013 if( poly_a < start -1 ){
1018 newRow.
length = start - 1 - poly_a;
1021 span_rows.push_back( newRow );
1033 newRow.
length = start - 1;
1036 span_rows.push_back( newRow );
1061 switch (chunk.
Which()) {
1073 span_rows.push_back(newRow);
1075 prod_pos += prod_len;
1089 span_rows.push_back(newRow);
1091 prod_pos += prod_len;
1108 if (aln_to < aln_from)
1109 swap(aln_to, aln_from);
1115 from += 3 - from % 3;
1119 }
else if (off == 2) {
1129 m_SparseAlns[0]->GetAlnSeqString(1, p_str, seq_range,
false);
1131 _ASSERT(g_str.size() == p_str.size());
1132 for (
auto i = 0;
i < g_str.size(); ++
i) {
1133 if (
i < p_str.size()) {
1134 if (g_str[
i] != p_str[
i])
1137 newRow.
mismatch += (g_str.size() - p_str.size());
1149 span_rows.push_back(newRow);
1151 prod_pos += prod_len;
1166 newRow.
length = prod_len;
1167 span_rows.push_back( newRow );
1169 prod_pos += prod_len;
1179 span_rows.push_back( newRow );
1198 if( poly_a >= product_length ){
1203 if( stop < poly_a ){
1204 if( stop+1 < poly_a ){
1209 newRow.
length = (poly_a - stop) + 1;
1211 span_rows.push_back( newRow );
1218 newRow.
length = (product_length - poly_a);
1220 span_rows.push_back( newRow );
1226 if( stop < product_length - 1){
1230 newRow.
length = product_length - stop;
1233 span_rows.push_back( newRow );
1237 for (
auto&& aln_row : span_rows) {
1238 if (!aln_row.ranges.empty() && aln_row.ranges[0].NotEmpty()) {
1239 TSeqPos aln_from =
m_SparseAlns[0]->GetAlnPosFromSeqPos((
int)0, aln_row.ranges[0].GetFrom());
1241 if (aln_to < aln_from)
1242 swap(aln_to, aln_from);
1243 aln_row.aln_range.Set(aln_from, aln_to + 1);
1337 return wxT(
"string");
1347 return wxT(
"double");
1350 return wxT(
"string");
1355 return wxT(
"string");
1361 if ( !dst.empty() ) {
1374 eUnknown,
"CAlnSpanVertModel::GetValueAt(): Array index out-of-bounds"
1382 switch( extra_col ){
1420 return wxVariant( spanRow.
length );
1423 return wxVariant( spanRow.
mismatch );
1426 return wxVariant( spanRow.
gap );
1429 return wxVariant( spanRow.
identity );
1436 || col >= (
int)spanRow.
ranges.size()
1437 || spanRow.
ranges[col].Empty()
1480 eUnknown,
"CAlnSpanVertModel::GetData(): Array index out-of-bounds"
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void BuildAln(TAnchoredAlnVec &in_alns, CAnchoredAln &out_aln, const CAlnUserOptions &options, TAlnSeqIdIRef pseudo_seqid=TAlnSeqIdIRef())
Build anchored alignment from a set of alignmnets.
void CreateAnchoredAlnVec(_TAlnStats &aln_stats, TAnchoredAlnVec &out_vec, const CAlnUserOptions &options)
Create anchored alignment from each seq-align in the stats.
static void s_Append(string &dst, const string &src)
const_iterator insert(const CSeq_align &seq_align)
Insert new CSeq_align into the list.
size_type size(void) const
Container mapping seq-aligns to vectors of participating seq-ids.
void push_back(const CSeq_align &aln)
Adding an alignment.
void x_ConvertRowsToSpans(TAlnSpans &rows)
finally, convert our spans into rows for display
void x_MergeRows(TAlnSpans &rows)
CRange< TSignedSeqPos > m_AlnRange
void SetThreshold(int th)
virtual int GetNumColumns() const
Returns the number of columns in the model.
TAlnSpans m_AlnSpans
the alignments we represent
void SetShowIndels(bool indel)
virtual wxVariant GetValueAt(int i, int j) const
vector< string > m_ColNames
const SSpanRow & GetData(size_t row) const
access a given row's data
void x_PopulateRows(TAlnSpans &rows)
virtual int GetNumRows() const
Returns the number of rows in the model.
virtual wxString GetColumnName(int aColIx) const
Returns a default name for the column using spreadsheet conventions: A, B, C, ...
CRef< CSpliced_seg > m_SplicedSeg
vector< CRef< CSparseAln > > m_SparseAlns
virtual wxString GetColumnType(int aColIx) const
Tries to extract actual type from row 0 value if it exists.
vector< SSpanRow > TAlnSpans
void x_InsertIntrons(TAlnSpans &rows)
CAnchoredAln m_AnchoredAln
CRef< IAlnMultiDataSource > m_AlnSrc
void x_PopulateRowsSpliced(TAlnSpans &rows)
Helper class which collects seq-align statistics: seq-ids participating in alignments and rows,...
bool CanBeAnchored(void) const
Check if there are any ids which can be used as anchors for the whole set of alignments.
Options for different alignment manager operations.
@ ePreserveRows
Preserve all rows as they were in the input (e.g.
Query-anchored alignment can be 2 or multi-dimentional.
TDim GetDim(void) const
How many rows.
static void TranslateNAToAA(const string &na, string &aa, int gen_code=kDefaultGenCode)
TSeqPos GetSeqStop(TDim row) const
TDim CheckNumRows(void) const
TSeqPos GetSeqStart(TDim row) const
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
virtual void x_FireDataChanged()
IAlignRowHandle provides an abstract way to access alignment row data.
virtual bool UsesAATranslation() const =0
virtual const objects::CBioseq_Handle & GetBioseqHandle() const =0
virtual bool CanGetSeqString() const =0
virtual string & GetAlnSeqString(string &buffer, const IAlnExplorer::TSignedRange &aln_rng) const =0
IAlnMultiDataSource - interface to a data source representing an abstract multiple alignment.
virtual IAlnExplorer::EAlignType GetAlignType() const =0
virtual TSignedSeqPos GetSeqPosFromAlnPos(TNumrow for_row, TSeqPos aln_pos, IAlnExplorer::ESearchDirection dir=IAlnExplorer::eNone, bool try_reverse_dir=true) const =0
virtual const IAlignRowHandle * GetRowHandle(TNumrow row) const =0
virtual bool IsPositiveStrand(TNumrow row) const =0
virtual TSeqPos GetAlnStart(void) const =0
virtual bool CanGetId(TNumrow row) const =0
virtual IAlnSegmentIterator * CreateSegmentIterator(TNumrow row, const IAlnExplorer::TSignedRange &range, IAlnSegmentIterator::EFlags flags) const =0
virtual TNumrow GetNumRows(void) const =0
number of rows in alignment
virtual TSeqPos GetAlnStop(void) const =0
virtual const objects::CSeq_id & GetSeqId(TNumrow row) const =0
Alignment segment iterator interface.
@ eAllSegments
Iterate all segments.
@ fIndel
Either anchor or the selected row is not present in the segment.
@ fGap
Both anchor row and the selected row are not included in the segment (some other row is present and t...
virtual TSegTypeFlags GetType(void) const =0
Get current segment type.
virtual const TSignedRange & GetAlnRange(void) const =0
Get alignment range for the segment.
virtual int GetGenCode(IAlnExplorer::TNumrow row) const =0
bool IsConsensusSplice(const string &splice5, const string &splice3)
Consensus splice is GY..AG or AT..AC.
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
void Error(CExceptionArgs_Base &args)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Warning(CExceptionArgs_Base &args)
static void GetLabel(const CObject &obj, string *label, ELabelType type=eDefault)
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
position_type GetLength(void) const
position_type GetToOpen(void) const
static position_type GetPositionMax(void)
static TThisType GetEmpty(void)
TThisType & Set(position_type from, position_type to)
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
CRange< TSignedSeqPos > TSignedSeqRange
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
@ fWithCommas
Use commas as thousands separator.
static const char label[]
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
void SetTo(TTo value)
Assign a value to To data member.
const TProtpos & GetProtpos(void) const
Get the variant data.
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
bool IsSetParts(void) const
basic seqments always are in biologic order Check if a value has been assigned to Parts data member.
TMatch GetMatch(void) const
Get the variant data.
const TProduct_id & GetProduct_id(void) const
Get the Product_id member data.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
TProduct_length GetProduct_length(void) const
Get the Product_length member data.
bool IsSetPoly_a(void) const
start of poly(A) tail on the transcript For sense transcripts: aligned product positions < poly-a <= ...
TDiag GetDiag(void) const
Get the variant data.
TProduct_type GetProduct_type(void) const
Get the Product_type member data.
TMismatch GetMismatch(void) const
Get the variant data.
TAmin GetAmin(void) const
Get the Amin member data.
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
TGenomic_ins GetGenomic_ins(void) const
Get the variant data.
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
list< CRef< CSpliced_exon_chunk > > TParts
bool IsSetProduct_length(void) const
length of the product, in bases/residues from this (or from poly-a if present), a 3' unaligned length...
TPoly_a GetPoly_a(void) const
Get the Poly_a member data.
TProduct_ins GetProduct_ins(void) const
Get the variant data.
TNucpos GetNucpos(void) const
Get the variant data.
E_Choice Which(void) const
Which variant is currently selected.
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Diag
both sequences are represented, there is sufficient similarity between product and genomic sequences....
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
unsigned int
A callback function used to compare two keys in a database.
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n th
range(_Ty, _Ty) -> range< _Ty >
const struct ncbi::grid::netcache::search::fields::SIZE size
const CharType(& source)[N]
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
vector< CRef< CAnchoredAln > > TAnchoredAlnVec
Collection of anchored alignments.
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
#define row(bind, expected)
the alignment we store with its parsed data
vector< CConstRef< objects::CSeq_loc > > locs
vector< string > str_ranges
visible values
vector< TSignedSeqRange > ranges
wxString ToWxString(const string &s)