NCBI C++ ToolKit
alnspan_vertmodel.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: alnspan_vertmodel.cpp 47479 2023-05-02 13:24:02Z ucko $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Yury Voronov
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 #include <corelib/ncbistre.hpp>
34 
36 
39 
45 #include <objmgr/align_ci.hpp>
46 #include <objmgr/seq_vector.hpp>
47 
58 #include <gui/objutils/label.hpp>
59 #include <serial/iterator.hpp>
60 
61 #include <math.h>
62 
65 
66 #define INIT_MEMBERS \
67  m_Threshold( 0 ) \
68  , mf_MarkIndels( fGap ) \
69  , m_NumSeqs ( 0 ) \
70  , mf_UseSpliced( false ) \
71  , mf_UseSparse( false ) \
72  , mf_ShowIndels( true ) \
73  , mf_RowsInvalidated( false )
74 
75 
77  : INIT_MEMBERS
78 {
79  x_Init();
80 }
81 
82 /*
83 CAlnSpanVertModel::CAlnSpanVertModel( IAlnMultiDataSource& source )
84  : INIT_MEMBERS
85 {
86  x_Init( source );
87 
88  UpdateRows();
89 }
90 
91 CAlnSpanVertModel::CAlnSpanVertModel( CSparseAln& source )
92  : INIT_MEMBERS
93 {
94  x_Init( source );
95 
96  UpdateRows();
97 }
98 */
99 
101  : INIT_MEMBERS
102 {
103  x_Init( source, threshold );
104 
105  UpdateRows();
106 }
107 
109  : INIT_MEMBERS
110 {
111  x_Init( sparce_aln, threshold );
112 
113  UpdateRows();
114 }
115 
116 CAlnSpanVertModel::CAlnSpanVertModel( const CSpliced_seg& spliced_seg, CScope& scope, int threshold )
117 : INIT_MEMBERS
118 {
119  x_Init( spliced_seg, scope, threshold );
120 
121  UpdateRows();
122 }
123 
124 CAlnSpanVertModel::CAlnSpanVertModel( vector< CConstRef<CSeq_align> >& input_aligns, CScope& scope, int threshold )
125 : INIT_MEMBERS
126 {
127  if( input_aligns.size() == 1 && input_aligns[0]->GetSegs().IsSpliced() ){
129  const_cast<CSpliced_seg*>( &input_aligns[0]->GetSegs().GetSpliced() )
130  );
131  mf_UseSpliced = true;
132  }
133 
134  CAlnContainer container;
135 
136  if( input_aligns.size() > 0 ){
137  ITERATE( vector< CConstRef<CSeq_align> >, ait, input_aligns ){
138  container.insert( **ait );
139  }
140 
141  } else {
142  _ASSERT(false);
143  }
144 
145  /// Create a vector of seq-ids per seq-align
146  TIdExtract id_extract;
147  TAlnIdMap aln_id_map( id_extract, container.size() );
148  ITERATE( CAlnContainer, aln_it, container ){
149  try {
150  aln_id_map.push_back( **aln_it );
151 
152  } catch( CAlnException e ){
153  LOG_POST( Error << "Skipping this alignment: " << e.what() );
154  }
155  }
156 
157  /// Create align statistics object
158  TAlnStats aln_stats( aln_id_map );
159 
160  /// Can the alignments be anchored?
161  if ( !aln_stats.CanBeAnchored() ) {
162  LOG_POST( Error << "Input alignments cannot be anchored because they don't share at least one common sequence." );
163  NCBI_THROW(
164  CException,
165  eUnknown, "Input alignments cannot be anchored because they don't share at least one common sequence."
166  );
167  }
168 
169  /// Create user options
170  CAlnUserOptions aln_user_options;
171 
172  /// Construct a vector of anchored alignments
173  TAnchoredAlnVec anchored_aln_vec;
174  CreateAnchoredAlnVec( aln_stats, anchored_aln_vec, aln_user_options );
175 
176  BuildAln( anchored_aln_vec, m_AnchoredAln, aln_user_options );
177 
178  if( m_AnchoredAln.GetDim() == 1 ){
180 
181  aln_user_options.m_MergeAlgo = CAlnUserOptions::ePreserveRows;
182 
183  BuildAln( anchored_aln_vec, m_AnchoredAln, aln_user_options );
184  }
185 
186  /// Get sequence:
187  CRef<CSparseAln> sparse_aln;
188  sparse_aln.Reset( new CSparseAln( m_AnchoredAln, scope ) );
189 
190  x_Init( *sparse_aln, threshold );
191 
192 // UpdateRows();
193 }
194 
196 {
197  //LOG_POST( Info << "ASWM::x_Init: with no source..." );
198 
199  for( int i = 0; i < eMaxExtraCols; i++ ){
200  switch( i ){
201  case eLengthCol:
202  m_ColNames.push_back( "Length" );
203  break;
204  case eIdenticalCol:
205  m_ColNames.push_back( "% Identity" );
206  break;
207  case eTypeCol:
208  m_ColNames.push_back( "Type" );
209  break;
210  case eMismatchCountCol:
211  m_ColNames.push_back( "Mismatch" );
212  break;
213  case eGapLengthCol:
214  m_ColNames.push_back( "Gap" );
215  break;
216  default:
217  _ASSERT( false ); // Unknown column
218  }
219  }
220  //m_ColNames.push_back( "Aln Range" );
221 
222  //LOG_POST( Info << "ASWM::x_Init: threshhold = " << m_Threshold );
223  //LOG_POST( Info << "ASWM::x_Init: num of seqs = " << m_NumSeqs );
224  //LOG_POST( Info << "ASWM::x_Init: use sparse = " << mf_UseSparse );
225 }
226 
227 void CAlnSpanVertModel::x_Init( IAlnMultiDataSource& aDataSource, int aThresh )
228 {
229  //LOG_POST( Info << "ASWM::x_Init: with a data source..." );
230 
231  m_AlnSrc = &aDataSource;
232  m_Threshold = aThresh;
233 
235 
236  for( int row = 0; row < m_NumSeqs; row++ ){
237  const IAlignRowHandle* handle = aDataSource.GetRowHandle( row );
238  string label;
239 
241  *handle -> GetBioseqHandle().GetSeqId(),
243  &handle->GetBioseqHandle().GetScope()
244  );
245 
246  m_ColNames.push_back( label );
247  }
248 
249  x_Init();
250 
251  mf_RowsInvalidated = true;
252 }
253 
254 void CAlnSpanVertModel::x_Init( CSparseAln& aSparseAln, int aThresh )
255 {
256  //LOG_POST( Info << "ASVM::x_Init: with a sparse aln..." );
257 
258  //m_SparseAln = &aSparseAln;
259  m_SparseAlns.push_back( CRef<CSparseAln>( &aSparseAln ) );
260  m_Threshold = aThresh;
261 
262  m_NumSeqs = m_SparseAlns[0]->GetNumRows();
263  mf_UseSparse = true;
264 
265  for( int row = 0; row < m_NumSeqs; row++ ){
266  string label;
267 
269  m_SparseAlns[0]->GetSeqId( row ),
271  m_SparseAlns[0]->GetScope()
272  );
273 
274  m_ColNames.push_back( label );
275  }
276 
277  x_Init();
278 
279  mf_RowsInvalidated = true;
280 }
281 
282 void CAlnSpanVertModel::x_Init( const CSpliced_seg& spliced_seg, CScope& scope, int aThresh )
283 {
284  m_SplicedSeg.Reset( const_cast<CSpliced_seg*>(&spliced_seg) );
285  m_Scope.Reset( &scope );
286  m_Threshold = aThresh;
287 
288  m_NumSeqs = m_SplicedSeg->CheckNumRows(); // always 2
289  mf_UseSpliced = true;
290 
291  string label;
292 
296  m_Scope
297  );
298  m_ColNames.push_back( label );
299 
303  m_Scope
304  );
305  m_ColNames.push_back( label );
306 
307  x_Init();
308 
309  mf_RowsInvalidated = true;
310 }
311 
312 
314 {
315  m_AlnSrc.Reset();
316  //m_SparseAln.Reset();
317  m_SparseAlns.clear();
318 
319  m_AlnSpans.clear();
320  m_ColNames.clear();
321 
322  m_Threshold = 0;
323  m_NumSeqs = 0;
324 }
325 
326 
328 {
329  TAlnSpans aln_spans;
330 
331  if (mf_UseSparse) {
332  m_AlnRange.SetFrom(m_SparseAlns[0]->GetAlnRange().GetFrom());
333  m_AlnRange.SetTo(m_SparseAlns[0]->GetAlnRange().GetTo());
334  if (m_SparseAlns[0]->IsTranslated())
335  m_BaseWidth = 3;
336  } else {
340  m_BaseWidth = 3;
341  }
342 
343  if (mf_UseSpliced) {
344  x_PopulateRowsSpliced(aln_spans);
345  } else {
346  x_PopulateRows(aln_spans);
347  }
348  x_InsertIntrons(aln_spans);
349  x_MergeRows(aln_spans);
350  ///
351  /// finally, convert our spans into rows for display
352  ///
353  x_ConvertRowsToSpans(aln_spans);
354  ///
355  /// last stage: final book keeping
356  /// we need to make sure that all spans have correct meta-information
357  ///
359  ///
360  /// after last stage: caching locs in span rows
361  ///
362  x_UpdateLocs();
363 }
364 
366 {
367  vector< IAlnSegmentIterator* > iters;
368  iters.reserve(m_NumSeqs);
370 
371  if (mf_UseSparse) {
372  for (int row = 0; row < m_NumSeqs; row++) {
373  iters.push_back(m_SparseAlns[0]->CreateSegmentIterator(row, m_AlnRange, flags));
374  }
375  } else {
376  for (int row = 0; row < m_NumSeqs; row++) {
377  iters.push_back(m_AlnSrc->CreateSegmentIterator(row, m_AlnRange, flags));
378  }
379  }
380 
381  ///
382  /// first, scan for mismatches and gaps
383  ///
384 
385  vector<bool> seq_touched(m_NumSeqs, false);
386 
388  while (pos <= m_AlnRange.GetTo()) {
389  int next_pos = CRange<TSignedSeqPos>::GetPositionMax();
390 
392 
393  for (row = 0; row < m_NumSeqs; ++row) {
394  IAlnSegmentIterator& it = *iters[row];
395  if (!it)
396  continue;
397  const IAlnSegment::TSignedRange& a_range = it->GetAlnRange();
398 
399  while (a_range.GetTo() < (int)pos && ++it) {
400  if (!seq_touched[row])
401  seq_touched[row] = true;
402  continue;
403  }
404  if (!it)
405  continue;
406 
407  TSignedSeqPos from = a_range.GetFrom();
408  TSignedSeqPos too = a_range.GetToOpen();
409  if (from > pos) {
410  if (from < next_pos)
411  next_pos = from;
412  } else if (too < next_pos) {
413  next_pos = too;
414  }
415  }
416  TSeqPos off = 0;
417  if (m_BaseWidth == 3) {
418  if (pos % 3 > 0) {
419  off = pos % 3;
420  pos -= pos % 3;
421  }
422  }
423 
424  vector<string> seq_spans;
425 
426  ///
427  /// scan to see if we have a gap
428  /// if no gaps, so we inspect for mismatches
429  /// we will break this range into match and mismatch segments
430  ///
431  bool seg_has_gap = false;
432  bool seg_has_unaln = false;
433  bool seg_is_translated = false;
434 
435  for (int row = 0; row < m_NumSeqs; row++) {
436  IAlnSegmentIterator& it = *iters[row];
437  if (!it)
438  continue;
439  if (mf_UseSparse) {
440  seg_is_translated = m_SparseAlns[0]->IsTranslated()
441  && m_SparseAlns[0]->GetBaseWidth(row) == 1;
442  } else {
443  seg_is_translated = m_AlnSrc->GetAlignType() == IAlnExplorer::fMixed
445  }
446 
447  if (it->GetAlnRange().GetFrom() > pos + off) {
448  seg_has_unaln = seq_touched[row];
449  continue;
450  } else if (it->GetType() &(IAlnSegment::fGap | IAlnSegment::fIndel)) {
451  seg_has_gap = true;
452  continue;
453  } //else//
454  //TSignedSeqPos aln_from = it->GetAlnRange().GetFrom();
455  //TSignedSeqPos aln_to = it->GetAlnRange().GetTo();
456  bool is_aa = false;
457 
459 
460  CRange<TSignedSeqPos> seq_range(pos, next_pos - 1);
461 
462  string seq_span;
463  if (mf_UseSparse) {
464  m_SparseAlns[0]->GetAlnSeqString(row, seq_span, seq_range, seg_is_translated);
465  } else {
466  m_AlnSrc->GetRowHandle(row)->GetAlnSeqString(seq_span, seq_range);
467  if (seg_is_translated) {
468  string new_seq;
469  CSparseAln::TranslateNAToAA(seq_span, new_seq, m_AlnSrc->GetGenCode(row));
470  seq_span.swap(new_seq);
471  }
472  }
473  // we collect non-empty strings only
474  if (!seq_span.empty()) {
475  seq_spans.push_back(seq_span);
476  }
477  }
478  }
479 
480  if (seq_spans.size() <= 1) {
481 
482  if (seq_spans.empty()) {
483  LOG_POST(Warning << "All gaps/discontinuity for aln range [ " << pos << ", " << (next_pos - 1) << "]");
484  }
485 
486  SSpanRow newRow;
487  newRow.aln_range = TSeqRange(pos, next_pos - 1);
488  newRow.length = newRow.aln_range.GetLength();
489 
490  newRow.type = (seg_has_gap ? fGap : 0) | (seg_has_unaln ? fDiscontig : 0);
491 
492  if (newRow.type == 0) {
493  newRow.type = (m_NumSeqs == 1) ? fAligned : fInvalid;
494 
495  } else {
496  newRow.gap = newRow.aln_range.GetLength();
497  if (!seg_is_translated && m_BaseWidth == 3)
498  newRow.gap /= 3;
499  }
500 
501  aln_spans.push_back(newRow);
502  pos = next_pos;
503 
504  continue;
505  }
506  /*
507  if (base_width == 3) {
508  // adjust start/stop to trim incomplete codons
509  // GetAlnSeqString does the same
510  if (pos % 3 > 0) {
511  pos -= pos % 3;
512  pos += 3; // skip incomplete codon
513  }
514 
515  if (next_pos > pos && next_pos % 3 > 0)
516  next_pos -= next_pos % 3; // skip incomplete codon
517  }
518  */
519  int length = next_pos - pos;
520  length /= m_BaseWidth;
521  int size = (int)seq_spans.size();
522 
523  int f_span_type = fInvalid;
524  int span_start = 0;
525 
526  int i = -1;
527  do {
528  int new_span_type = fInvalid;
529  if (++i < length) {
530  bool f_mismatch = false;
531  for (int j = 1; j < size; j++) {
532  _ASSERT(i < (int)seq_spans[j].length());
533  if (seq_spans[j][i] != seq_spans[0][i]) {
534  f_mismatch = true;
535  break;
536  }
537  }
538  new_span_type = f_mismatch ? fMismatch : fAligned;
539  }
540 
541  if (new_span_type != f_span_type) {
542 
543  if (f_span_type != fInvalid) {
544  SSpanRow newRow;
545  newRow.aln_range = TSeqRange(
546  pos + span_start * m_BaseWidth,
547  max(pos + span_start * m_BaseWidth, pos + (i * m_BaseWidth) - 1));
548  // should be always pos +i -1
549  newRow.length = newRow.aln_range.GetLength() / m_BaseWidth;
550  if (span_start > i - 1)
551  LOG_POST(Error << "Miscalculating span: pos=" << pos << ", start=" << span_start << ", oend=" << i);
552  newRow.type = f_span_type;
553  newRow.mismatch = f_span_type == fMismatch ? (newRow.aln_range.GetLength() / m_BaseWidth) : 0;
554  newRow.type |= (seg_has_gap ? fGap : 0) | (seg_has_unaln ? fDiscontig : 0);
555  aln_spans.push_back(newRow);
556  }
557 
558  span_start = i;
559  f_span_type = new_span_type;
560  }
561  } while (i < length);
562 
563  pos = next_pos;
564  }
565 
566  for (int row_to_delete = 0; row_to_delete < m_NumSeqs; row_to_delete++) {
567  delete iters[row_to_delete];
568  }
569  ///
570  /// Preliminary span set
571  ///
572  TAlnSpans::iterator iter = aln_spans.begin();
573  for (; iter != aln_spans.end();) {
574 
575  for (size_t row = iter->ranges.size(); (int)row < m_NumSeqs; row++) {
576 
577  TSignedSeqPos from, to;
578  bool is_aa = false;
579  if (mf_UseSparse) {
580  from = m_SparseAlns[0]->GetSeqPosFromAlnPos((IAlnExplorer::TNumrow)row, iter->aln_range.GetFrom());
581  to = m_SparseAlns[0]->GetSeqPosFromAlnPos((IAlnExplorer::TNumrow)row, iter->aln_range.GetTo());
582  is_aa = m_SparseAlns[0]->GetBaseWidth((IAlnExplorer::TNumrow)row) == 3;
583  } else {
584  from = m_AlnSrc->GetSeqPosFromAlnPos((IAlnExplorer::TNumrow)row, iter->aln_range.GetFrom());
585  to = m_AlnSrc->GetSeqPosFromAlnPos((IAlnExplorer::TNumrow)row, iter->aln_range.GetTo());
586  is_aa = m_AlnSrc->GetRowHandle((IAlnExplorer::TNumrow)row)->UsesAATranslation();
587  }
588 
589  if (from == -1 || to == -1) {
590  iter->ranges.push_back(TSignedSeqRange::GetEmpty());
591  } else {
592  if (from > to) {
593  swap(from, to);
594  }
595  if (is_aa) {
596  from /= m_BaseWidth;
597  to /= m_BaseWidth;
598  } else if (iter->type & fGap) {
599  iter->length /= m_BaseWidth;
600  iter->gap /= m_BaseWidth;
601  }
602  iter->ranges.push_back(TSignedSeqRange(from, to));
603  }
604  }
605 
606  ++iter;
607  }
608 
609 }
610 
611 
613 {
614  ///
615  /// insert extra gaps, if needed
616  ///
617  if (!mf_ShowIndels)
618  return;
619  vector<bool> seq_strand(m_NumSeqs, true);
620 
621  if (mf_UseSparse) {
622  for (int seq_ix = 0; seq_ix < m_NumSeqs; seq_ix++) {
623  seq_strand[seq_ix] = m_SparseAlns[0]->IsPositiveStrand(seq_ix);
624  }
625  } else {
626  for (int seq_ix = 0; seq_ix < m_NumSeqs; seq_ix++) {
627  seq_strand[seq_ix] = m_AlnSrc->IsPositiveStrand(seq_ix);
628  }
629  }
630 
631  vector<TSignedSeqPos> seq_pos(m_NumSeqs, -1);
632 
633  NON_CONST_ITERATE(TAlnSpans, iter, rows)
634  {
635 
636  for (int seq_ix = 0; seq_ix < m_NumSeqs; seq_ix++) {
637  SSpanRow& span_row = *iter;
638 
639  if (!span_row.ranges[seq_ix].Empty()) {
640 
641  TSignedSeqPos cur_seq_pos = seq_pos[seq_ix];
642  TSignedSeqPos cur_span_from = span_row.ranges[seq_ix].GetFrom();
643  TSignedSeqPos cur_span_to = span_row.ranges[seq_ix].GetTo();
644 
645  string splice3, splice5;
646 
647  if (seq_strand[seq_ix]) {
648 
649  if (cur_seq_pos > 0 && cur_span_from > cur_seq_pos) {
650  SSpanRow gap_row;
651 
652  gap_row.ranges.assign(m_NumSeqs, TSignedSeqRange());
653  gap_row.ranges[seq_ix] =
654  TSignedSeqRange(seq_pos[seq_ix], cur_span_from - 1)
655  ;
656 
657  gap_row.type = mf_MarkIndels;
658  gap_row.length = gap_row.gap = gap_row.ranges[seq_ix].GetLength();
659  gap_row.mismatch = 0;
660  gap_row.identity = 0.0;
661 
662  if (mf_MarkIndels == fIntron && mf_UseSparse) {
663  string intron;
664  m_SparseAlns[0]->GetSeqString(
665  seq_ix, intron, seq_pos[seq_ix], cur_span_from - 1
666  );
667  if (intron.length() > 1) {
668  string splice5 = intron.substr(0, 2);
669  string splice3 = intron.substr(intron.length() - 2);
670 
671  if (!IsConsensusSplice(splice5, splice3)) {
672  gap_row.type = fIntronNC;
673  }
674  }
675  }
676 
677  iter = rows.insert(iter, gap_row);
678  }
679 
680  seq_pos[seq_ix] = cur_span_to + 1;
681 
682  } else {
683 
684  if (cur_seq_pos > 0 && cur_span_to < cur_seq_pos) {
685  SSpanRow gap_row;
686 
687  gap_row.ranges.assign(m_NumSeqs, TSignedSeqRange());
688  gap_row.ranges[seq_ix] =
689  TSignedSeqRange(cur_span_to + 1, seq_pos[seq_ix])
690  ;
691 
692  gap_row.type = mf_MarkIndels;
693  gap_row.length = gap_row.gap = gap_row.ranges[seq_ix].GetLength();
694  gap_row.mismatch = 0;
695  gap_row.identity = 0.0;
696 
697  if (mf_MarkIndels == fIntron && mf_UseSparse) {
698  string intron;
699  m_SparseAlns[0]->GetSeqString(
700  seq_ix, intron, cur_span_to + 1, seq_pos[seq_ix]
701  );
702  if (intron.length() > 1) {
703  string splice5 = intron.substr(0, 2);
704  string splice3 = intron.substr(intron.length() - 2);
705 
706  if (!IsConsensusSplice(splice5, splice3)) {
707  gap_row.type = fIntronNC;
708  }
709  }
710  }
711 
712  iter = rows.insert(iter, gap_row);
713  }
714 
715  seq_pos[seq_ix] = cur_span_from - 1;
716  }
717  }
718  }
719  }
720 }
721 
722 
724 {
725  ///
726  /// next, coalesce our spans
727  ///
728 
729  if (rows.empty())
730  return;
731  TAlnSpans::iterator iter = rows.begin();
732  TAlnSpans::iterator prev = rows.end();
733 
734  for (; iter != rows.end(); prev = iter, ++iter) {
735  if (
736  iter->type == fIntron
737  || iter->type == fIntronNC
738  || iter->type == fTail
739  || iter->type == fPolyA
740  ) {
741  continue;
742  }
743 
744  if (iter->type != fAligned || (int)iter->aln_range.GetLength() > m_Threshold)
745  continue;
746 
747  bool merge_prev = false;
748  bool merge_next = false;
749 
750  TAlnSpans::iterator next = iter + 1;
751 
752  if (prev != rows.end() && (prev->type & (fMismatch | fAligned))) {
753  merge_prev = true;
754  }
755 
756  if (next != rows.end() && (next->type & (fMismatch | fAligned))) {
757  merge_next = true;
758  }
759 
760  SSpanRow merged_row = *iter;
761 
762  if (merge_prev) {
763  merged_row.type |= prev->type;
764  merged_row.length += prev->length;
765  merged_row.mismatch += prev->mismatch;
766  merged_row.gap += prev->gap;
767 
768  merged_row.aln_range.SetFrom(prev->aln_range.GetFrom());
769 
770  for (int row = 0; row < m_NumSeqs; row++) {
771  if (row >= (int)merged_row.ranges.size()) {
772  LOG_POST(Warning << "ASV: Not enough rows!");
773  break;
774 
775  } else if (row >= (int)prev->ranges.size()) {
776  LOG_POST(Warning << "ASV: Not enough rows!");
777  break;
778  }
779 
780  merged_row.ranges[row] += prev->ranges[row];
781  }
782  }
783 
784  if (merge_next) {
785  merged_row.type |= next->type;
786  merged_row.length += next->length;
787  merged_row.mismatch += next->mismatch;
788  merged_row.gap += next->gap;
789 
790  merged_row.aln_range.SetTo(next->aln_range.GetTo());
791 
792  for (int row = 0; row < m_NumSeqs; row++) {
793  if (row >= (int)merged_row.ranges.size()) {
794  LOG_POST(Warning << "ASV: Not enough rows!");
795  break;
796 
797  } else if (row >= (int)next->ranges.size()) {
798  LOG_POST(Warning << "ASV: Not enough rows!");
799  break;
800  }
801 
802  merged_row.ranges[row] += next->ranges[row];
803  }
804  }
805 
806  TAlnSpans::iterator from = merge_prev ? iter : next;
807  TAlnSpans::iterator to = merge_next ? (next + 1) : next;
808 
809  rows.erase(from, to);
810 
811  if (merge_prev) {
812  iter = prev;
813  }
814  *iter = merged_row;
815  }
816 }
817 
818 ///
819 /// finally, convert our spans into rows for display
820 ///
822 {
823 
825  m_AlnSpans.reserve(rows.size() * 2);
826  TAlnSpans::iterator iter = rows.begin();
827  for (; iter != rows.end();) {
828  if (pos < (int)iter->aln_range.GetFrom()) {
829  SSpanRow newRow;
830  newRow.aln_range.SetFrom(pos);
831  newRow.aln_range.SetTo(iter->aln_range.GetFrom() - 1);
832  newRow.length = newRow.aln_range.GetLength() / m_BaseWidth;
833  newRow.identity = 0;
834  newRow.type = 0;
835  newRow.gap = 0;
836  pos = newRow.aln_range.GetTo() + 1;
837 
838  m_AlnSpans.push_back(newRow);
839  } else {
840  m_AlnSpans.push_back(*iter);
841  pos = iter->aln_range.GetTo() + 1;
842  ++iter;
843  }
844  }
845  if (pos < m_AlnRange.GetTo()) {
846  SSpanRow newRow;
847  newRow.aln_range.SetFrom(pos);
848  newRow.aln_range.SetTo(m_AlnRange.GetTo() - 1);
849  newRow.length = newRow.aln_range.GetLength() / m_BaseWidth;
850  newRow.identity = 0;
851  newRow.type = 0;
852  newRow.gap = 0;
853  pos = newRow.aln_range.GetTo() + 1;
854 
855  m_AlnSpans.push_back(newRow);
856 
857  }
858 }
859 
860 
862 {
863  unsigned warning_cnt = 0; // warning counter to limit excessive reporting
864  TAlnSpans::iterator iter = m_AlnSpans.begin();
865  for (; iter != m_AlnSpans.end();) {
866 
867  if (iter->length == 0) {
868  iter->length = iter->aln_range.GetLength() / m_BaseWidth;
869  }
870  if (iter->length == 0) {
871  ++warning_cnt;
872  if (warning_cnt < 10) {
873  LOG_POST(Warning << "Zero length span!");
874  } else
875  if (warning_cnt == 10) {
876  LOG_POST(Warning << "Zero length span! (Further warnings supressed)");
877  }
878  iter = m_AlnSpans.erase(iter);
879  continue;
880  }
881 
882  if (!iter->type) {
883  iter->type = fAligned;
884  }
885 
886  if (iter->type & ~(fGap | fDiscontig)) {
887  double identity =
888  ((double)(iter->length - iter->mismatch)) / iter->length
889  ;
890  iter->identity = floor(identity * 10000 + 0.5) / 100.0;
891 
892  } else {
893  iter->identity = 0.0;
894  }
895 
896  for (size_t row = iter->ranges.size(); (int)row < m_NumSeqs; row++) {
897 
898  TSignedSeqPos from, to;
899  bool is_aa = false;
900 
901  if (mf_UseSparse) {
902  from = m_SparseAlns[0]->GetSeqPosFromAlnPos((IAlnExplorer::TNumrow)row, iter->aln_range.GetFrom());
903  to = m_SparseAlns[0]->GetSeqPosFromAlnPos((IAlnExplorer::TNumrow)row, iter->aln_range.GetTo());
904  is_aa = m_SparseAlns[0]->GetBaseWidth((IAlnExplorer::TNumrow)row) == 3;
905  } else {
906  from = m_AlnSrc->GetSeqPosFromAlnPos((IAlnExplorer::TNumrow)row, iter->aln_range.GetFrom());
907  to = m_AlnSrc->GetSeqPosFromAlnPos((IAlnExplorer::TNumrow)row, iter->aln_range.GetTo());
908  is_aa = m_AlnSrc->GetRowHandle((IAlnExplorer::TNumrow)row)->UsesAATranslation();
909  }
910 
911 
912  /*
913  if( from == -1 && iter->aln_range.GetFrom() > 0 ){
914  from = m_AlnSrc->GetSeqPosFromAlnPos( row, iter->aln_range.GetFrom() -1 );
915  }
916  if( to == -1 ){
917  to = m_AlnSrc->GetSeqPosFromAlnPos( row, iter->aln_range.GetTo() +1 );
918  }
919  */
920 
921  if (from == -1 || to == -1) {
922  iter->ranges.push_back(TSignedSeqRange::GetEmpty());
923  } else {
924  if (from > to) {
925  swap(from, to);
926  }
927  if (is_aa) {
928  from /= 3;
929  to /= 3;
930  } else if (iter->type & fGap) {
931  iter->length /= 3;
932  iter->gap /= 3;
933  }
934 
935  iter->ranges.push_back(TSignedSeqRange(from, to));
936  }
937  }
938 
939  ++iter;
940  }
941 }
942 
943 
944 
946 {
948  {
949  SSpanRow& span_row = *iter;
950  for (int row = 0; row < m_NumSeqs; row++) {
951  if (span_row.ranges[row].Empty()) {
952  CRef<CSeq_loc> loc(new CSeq_loc());
953  auto id = Ref(new CSeq_id);
954  id->Assign((mf_UseSparse) ? m_SparseAlns[0]->GetSeqId(row) : m_AlnSrc->GetSeqId(row));
955  loc->SetEmpty(*id);
956  span_row.locs.push_back(loc);
957  continue;
958  }
959  if (mf_UseSparse || m_AlnSrc->CanGetId(row)) {
960  CRef<CSeq_loc> loc(new CSeq_loc());
961  loc->SetInt().SetFrom(span_row.ranges[row].GetFrom());
962  loc->SetInt().SetTo(span_row.ranges[row].GetTo());
963  if (mf_UseSparse) {
964  loc->SetInt().SetStrand(
965  m_SparseAlns[0]->IsPositiveStrand(row) ? eNa_strand_plus : eNa_strand_minus
966  );
967  loc->SetId(m_SparseAlns[0]->GetSeqId(row));
968  } else {
969  loc->SetInt().SetStrand(
971  );
972  loc->SetId(m_AlnSrc->GetSeqId(row));
973  }
974  span_row.locs.push_back(loc);
975  }
976  }
977  }
978 }
979 
980 
982 {
983  int product_length = 0;
984  bool is_protein =
986  ;
987 
989  product_length = m_SplicedSeg->GetProduct_length();
990  }
991 
992  if(
995  ){
996  int poly_a = m_SplicedSeg->GetPoly_a();
997 
998  if( poly_a <= -1 ){
999  // inferred poly(A) tail at transcript's start
1000  } else {
1001  int start = m_SplicedSeg->GetSeqStart( 0 );
1002 
1003  if( poly_a < start ){
1004  // range [0, poly_a] - poly(A)
1005  SSpanRow newRow;
1006  newRow.ranges.push_back( TSignedSeqRange() );
1007  newRow.ranges.push_back(TSignedSeqRange(0, poly_a));
1008  newRow.length = poly_a;
1009  newRow.type = fPolyA;
1010 
1011  span_rows.push_back( newRow );
1012 
1013  if( poly_a < start -1 ){
1014  // range [poly_a+1, start-1] - unaligned head
1015  SSpanRow newRow;
1016  newRow.ranges.push_back( TSignedSeqRange() );
1017  newRow.ranges.push_back(TSignedSeqRange(poly_a + 1, start - 1));
1018  newRow.length = start - 1 - poly_a;
1019  newRow.type = fTail;
1020 
1021  span_rows.push_back( newRow );
1022  }
1023  }
1024  }
1025  } else {
1026  int start = m_SplicedSeg->GetSeqStart( 0 );
1027 
1028  if( start > 0 ){
1029  SSpanRow newRow;
1030  newRow.aln_range = TSeqRange( 0, start-1 );
1031  newRow.ranges.push_back(TSignedSeqRange());
1032  newRow.ranges.push_back(TSignedSeqRange());
1033  newRow.length = start - 1;
1034  newRow.type = fTail;
1035 
1036  span_rows.push_back( newRow );
1037  }
1038  }
1039 
1041 
1042  ITERATE( CSpliced_seg::TExons, ex_itr, exons ){
1043 
1044  const CSpliced_exon& exon = **ex_itr;
1045 
1046  int prod_pos =
1047  is_protein
1048  ? exon.GetProduct_start().GetProtpos().GetAmin()
1049  : exon.GetProduct_start().GetNucpos()
1050  ;
1051  int gen_pos = exon.GetGenomic_start();
1052 
1053  if( exon.IsSetParts() ){
1054  ITERATE( CSpliced_exon::TParts, part_itr, exon.GetParts() ){
1055  const CSpliced_exon_chunk& chunk = **part_itr;
1056 
1057  SSpanRow newRow;
1058  int len = 0;
1059  int prod_len = 0;
1060 
1061  switch (chunk.Which()) {
1063  len = prod_len = chunk.GetMatch();
1064  if (is_protein)
1065  prod_len /= 3;
1066  else
1067  prod_len -= 1;
1068 
1069  newRow.ranges.push_back(TSignedSeqRange(gen_pos, gen_pos + len - 1));
1070  newRow.ranges.push_back(TSignedSeqRange(prod_pos, prod_pos + prod_len));
1071  newRow.type = fAligned;
1072 
1073  span_rows.push_back(newRow);
1074 
1075  prod_pos += prod_len;
1076  gen_pos += len;
1077 
1078  break;
1080  len = prod_len = chunk.GetMismatch();
1081  if (is_protein)
1082  prod_len /= 3;
1083  else
1084  prod_len -= 1;
1085 
1086  newRow.ranges.push_back(TSignedSeqRange(gen_pos, gen_pos + len - 1));
1087  newRow.ranges.push_back(TSignedSeqRange(prod_pos, prod_pos + prod_len));
1088  newRow.type = fMismatch;
1089  span_rows.push_back(newRow);
1090 
1091  prod_pos += prod_len;
1092  gen_pos += len;
1093 
1094  break;
1096  {
1097  len = prod_len = chunk.GetDiag();
1098  if (is_protein)
1099  prod_len /= 3;
1100  else
1101  prod_len -= 1;
1102 
1103 
1104  TSeqPos from = gen_pos;
1105  TSeqPos to = gen_pos + len - 1;
1106  TSeqPos aln_from = m_SparseAlns[0]->GetAlnPosFromSeqPos((int)0, from);
1107  TSeqPos aln_to = m_SparseAlns[0]->GetAlnPosFromSeqPos((int)0, to);
1108  if (aln_to < aln_from)
1109  swap(aln_to, aln_from);
1110  newRow.aln_range.Set(aln_from, aln_to + 1);
1111  from = aln_from;
1112  to = aln_to;
1113  if (is_protein) {
1114  if (from % 3)
1115  from += 3 - from % 3;
1116  int off = to % 3;
1117  if (off == 1) {
1118  --to;
1119  } else if (off == 2) {
1120  ++to;
1121  }
1122  }
1123  // Diag type includes segments with mismatches
1124  // and we have to calculate them
1125  CRange<TSignedSeqPos> seq_range(aln_from, aln_to);
1126  string g_str;
1127  m_SparseAlns[0]->GetAlnSeqString(0, g_str, CRange<TSignedSeqPos>(from, to), is_protein);
1128  string p_str;
1129  m_SparseAlns[0]->GetAlnSeqString(1, p_str, seq_range, false);
1130 
1131  _ASSERT(g_str.size() == p_str.size());
1132  for (auto i = 0; i < g_str.size(); ++i) {
1133  if (i < p_str.size()) {
1134  if (g_str[i] != p_str[i])
1135  ++newRow.mismatch;
1136  } else {
1137  newRow.mismatch += (g_str.size() - p_str.size());
1138  break;
1139  }
1140  }
1141 
1142  newRow.ranges.push_back(TSignedSeqRange(gen_pos, gen_pos + len - 1));
1143  newRow.ranges.push_back(TSignedSeqRange(prod_pos, prod_pos + prod_len));
1144 
1145  newRow.type = fAligned;
1146  if (newRow.mismatch > 0)
1147  newRow.type |= fMismatch;
1148 
1149  span_rows.push_back(newRow);
1150 
1151  prod_pos += prod_len;
1152  gen_pos += len;
1153 
1154  break;
1155  }
1157  prod_len = chunk.GetProduct_ins();
1158  if (is_protein)
1159  prod_len /= 3;
1160  else
1161  prod_len -= 1;
1162 
1163  newRow.ranges.push_back( TSignedSeqRange() );
1164  newRow.ranges.push_back(TSignedSeqRange(prod_pos, prod_pos + prod_len));
1165  newRow.type = fGap;
1166  newRow.length = prod_len;
1167  span_rows.push_back( newRow );
1168 
1169  prod_pos += prod_len;
1170 
1171  break;
1173  len = chunk.GetGenomic_ins();
1174 
1175  newRow.ranges.push_back( TSignedSeqRange( gen_pos, gen_pos+len-1 ) );
1176  newRow.ranges.push_back(TSignedSeqRange());
1177  newRow.type = fGap;
1178  newRow.length = len;
1179  span_rows.push_back( newRow );
1180 
1181  gen_pos += len;
1182 
1183  break;
1184  default:
1185  ;
1186  }
1187  }
1188  }
1189  }
1190 
1191 
1192  if(
1194  && m_SplicedSeg->IsSetPoly_a()
1195  ){
1196  int poly_a = m_SplicedSeg->GetPoly_a();
1197 
1198  if( poly_a >= product_length ){
1199  // inferred poly(A) tail at transcript's end
1200  } else {
1201  int stop = m_SplicedSeg->GetSeqStop( 0 );
1202 
1203  if( stop < poly_a ){
1204  if( stop+1 < poly_a ){
1205  // range [stop+1, poly_a] - unaligned tail
1206  SSpanRow newRow;
1207  newRow.ranges.push_back( TSignedSeqRange() );
1208  newRow.ranges.push_back(TSignedSeqRange(stop + 1, poly_a - 1));
1209  newRow.length = (poly_a - stop) + 1;
1210  newRow.type = fTail;
1211  span_rows.push_back( newRow );
1212  }
1213 
1214  // range [poly_a, product_length] - poly(A)
1215  SSpanRow newRow;
1216  newRow.ranges.push_back( TSignedSeqRange() );
1217  newRow.ranges.push_back(TSignedSeqRange(poly_a, product_length - 1));
1218  newRow.length = (product_length - poly_a);
1219  newRow.type = fPolyA;
1220  span_rows.push_back( newRow );
1221  }
1222  }
1223  } else {
1224  int stop = m_SplicedSeg->GetSeqStop( 0 );
1225 
1226  if( stop < product_length - 1){
1227  SSpanRow newRow;
1228  newRow.ranges.push_back( TSignedSeqRange() );
1229  newRow.ranges.push_back(TSignedSeqRange(stop + 1, product_length - 1));
1230  newRow.length = product_length - stop;
1231  newRow.type = fTail;
1232 
1233  span_rows.push_back( newRow );
1234  }
1235  }
1236 
1237  for (auto&& aln_row : span_rows) {
1238  if (!aln_row.ranges.empty() && aln_row.ranges[0].NotEmpty()) {
1239  TSeqPos aln_from = m_SparseAlns[0]->GetAlnPosFromSeqPos((int)0, aln_row.ranges[0].GetFrom());
1240  TSeqPos aln_to = m_SparseAlns[0]->GetAlnPosFromSeqPos((int)0, aln_row.ranges[0].GetTo());
1241  if (aln_to < aln_from)
1242  swap(aln_to, aln_from);
1243  aln_row.aln_range.Set(aln_from, aln_to + 1);
1244  }
1245  }
1246 
1247 }
1248 
1250 {
1251  if( mf_RowsInvalidated ){
1252 
1253  m_AlnSpans.clear();
1254  x_CreateRows();
1255 
1257 
1258  mf_RowsInvalidated = false;
1259  }
1260 }
1261 
1263 {
1264  if( th != m_Threshold && th >= 0 ){
1265 
1266  m_Threshold = th;
1267 
1268  mf_RowsInvalidated = true;
1269  }
1270 }
1271 
1273 {
1274  if( indel != mf_ShowIndels ){
1275 
1276  mf_ShowIndels = indel;
1277 
1278  mf_RowsInvalidated = true;
1279  }
1280 }
1281 
1282 
1284 {
1285  //TODO revive or remove this
1286  /*
1287  for( TAlnSegments::iterator iter = m_AlnSpans.begin(); iter != NULL; ++iter ){
1288  const SSpanRow& proxy = *iter;
1289  if( proxy.align->IsSetScore() ){
1290  ITERATE (CSeq_align::TScore, score_iter, proxy.align->GetScore()) {
1291  const CScore& score = **score_iter;
1292  string str;
1293  if (score.GetId().IsStr()) {
1294  str = score.GetId().GetStr();
1295  } else {
1296  str = NStr::IntToString(score.GetId().GetId());
1297  }
1298 
1299  proxy.scores[str] = *score_iter;
1300 
1301  TScoreColNames::iterator col_iter = m_ColNames.find(str);
1302  if (col_iter == m_ColNames.end()) {
1303  size_t col_idx = CAlnSpanVertModel::eScoreStart;
1304  col_idx += m_ColNames.size();
1305  m_ColNames [str ] = col_idx;
1306  m_ColIndices[col_idx] = str;
1307  }
1308  }
1309  }
1310  }
1311  */
1312 }
1313 
1315 {
1316  return static_cast<int>(m_AlnSpans.size());
1317 }
1318 
1319 
1321 {
1322  return static_cast<int>(m_ColNames.size());
1323 }
1324 
1325 
1326 wxString CAlnSpanVertModel::GetColumnName( int aColIdx ) const
1327 {
1328  return ToWxString(m_ColNames[(size_t)aColIdx]);
1329 }
1330 
1331 
1332 wxString CAlnSpanVertModel::GetColumnType(int aColIdx) const
1333 {
1334  aColIdx -= m_NumSeqs;
1335 
1336  if( aColIdx < 0 ){
1337  return wxT("string");
1338  }
1339 
1340  switch( aColIdx ){
1341  case eLengthCol:
1342  case eMismatchCountCol:
1343  case eGapLengthCol:
1344  return wxT("int");
1345 
1346  case eIdenticalCol:
1347  return wxT("double");
1348 
1349  case eTypeCol:
1350  return wxT("string");
1351  default:
1352  ;//_ASSERT( false ); // Unknown column
1353  }
1354 
1355  return wxT("string");
1356 }
1357 
1358 
1359 static void s_Append(string& dst, const string& src)
1360 {
1361  if ( !dst.empty() ) {
1362  dst += ", ";
1363  }
1364  dst += src;
1365 }
1366 
1367 
1368 wxVariant CAlnSpanVertModel::GetValueAt( int row, int col ) const
1369 {
1370  _ASSERT( row < (int)m_AlnSpans.size() );
1371  if( row >= (int)m_AlnSpans.size() ){
1372  NCBI_THROW(
1373  CException,
1374  eUnknown, "CAlnSpanVertModel::GetValueAt(): Array index out-of-bounds"
1375  );
1376  }
1377 
1378  const SSpanRow& spanRow = m_AlnSpans[row];
1379 
1380  int extra_col = col - m_NumSeqs;
1381 
1382  switch( extra_col ){
1383  case eTypeCol:
1384  if (spanRow.str_type.empty()) {
1385  // Type
1386  if( spanRow.type & fAligned ){
1387  if( spanRow.type & fMismatch ){
1388  s_Append( spanRow.str_type, "Mixed" );
1389  } else {
1390  s_Append( spanRow.str_type, "Aligned" );
1391  }
1392  } else if( spanRow.type & fMismatch ){
1393  s_Append( spanRow.str_type, "Mismatch" );
1394  }
1395  if( spanRow.type & fGap ){
1396  s_Append( spanRow.str_type, "Gap" );
1397  }
1398  if( spanRow.type & fInvalid ){
1399  s_Append( spanRow.str_type, "Invalid" );
1400  }
1401  if( spanRow.type & fDiscontig ){
1402  s_Append( spanRow.str_type, "Discontig" );
1403  }
1404  if( spanRow.type & fIntron ){
1405  s_Append( spanRow.str_type, "Intron" );
1406  }
1407  if( spanRow.type & fIntronNC ){
1408  s_Append( spanRow.str_type, "Intron (non-consensus)" );
1409  }
1410  if( spanRow.type & fPolyA ){
1411  s_Append( spanRow.str_type, "Poly(A)" );
1412  }
1413  if( spanRow.type & fTail ){
1414  s_Append( spanRow.str_type, "Tail" );
1415  }
1416  }
1417  return ToWxString( spanRow.str_type );
1418 
1419  case eLengthCol:
1420  return wxVariant( spanRow.length );
1421 
1422  case eMismatchCountCol:
1423  return wxVariant( spanRow.mismatch );
1424 
1425  case eGapLengthCol:
1426  return wxVariant( spanRow.gap );
1427 
1428  case eIdenticalCol:
1429  return wxVariant( spanRow.identity );
1430  }
1431 
1432  spanRow.str_ranges.resize( m_NumSeqs +1 );
1433  if( col < m_NumSeqs ){
1434  if(
1435  col < 0
1436  || col >= (int)spanRow.ranges.size()
1437  || spanRow.ranges[col].Empty()
1438  ){
1439  return wxString();
1440  }
1441  if( spanRow.str_ranges[col].empty() ){
1442  TSignedSeqRange range = spanRow.ranges[col];
1443 
1444  spanRow.str_ranges[col] =
1445  NStr::IntToString( range.GetFrom() + 1, NStr::fWithCommas )
1446  ;
1447  spanRow.str_ranges[col] += "-";
1448  spanRow.str_ranges[col] +=
1449  NStr::IntToString( range.GetTo() + 1, NStr::fWithCommas )
1450  ;
1451  }
1452 
1453  return ToWxString( spanRow.str_ranges[col] );
1454 
1455  } else {
1456 
1457  if( spanRow.str_ranges[m_NumSeqs].empty() ){
1458  TSeqRange range = spanRow.aln_range;
1459 
1460  spanRow.str_ranges[m_NumSeqs] =
1461  NStr::IntToString( range.GetFrom() + 1, NStr::fWithCommas )
1462  ;
1463  spanRow.str_ranges[m_NumSeqs] += "-";
1464  spanRow.str_ranges[m_NumSeqs] +=
1465  NStr::IntToString( range.GetTo() + 1, NStr::fWithCommas )
1466  ;
1467  }
1468 
1469  return ToWxString( spanRow.str_ranges[m_NumSeqs] );
1470  }
1471 }
1472 
1473 
1475 {
1476  _ASSERT( row < m_AlnSpans.size() );
1477  if( row >= m_AlnSpans.size() ){
1478  NCBI_THROW(
1479  CException,
1480  eUnknown, "CAlnSpanVertModel::GetData(): Array index out-of-bounds"
1481  );
1482  }
1483  return m_AlnSpans[row];
1484 }
1485 
1487 
1488 
1489 /*
1490  * ===========================================================================
1491  * $Log$
1492  * ===========================================================================
1493  */
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void BuildAln(TAnchoredAlnVec &in_alns, CAnchoredAln &out_aln, const CAlnUserOptions &options, TAlnSeqIdIRef pseudo_seqid=TAlnSeqIdIRef())
Build anchored alignment from a set of alignmnets.
void CreateAnchoredAlnVec(_TAlnStats &aln_stats, TAnchoredAlnVec &out_vec, const CAlnUserOptions &options)
Create anchored alignment from each seq-align in the stats.
USING_SCOPE(objects)
#define INIT_MEMBERS
static void s_Append(string &dst, const string &src)
CSeq_align container.
const_iterator insert(const CSeq_align &seq_align)
Insert new CSeq_align into the list.
size_type size(void) const
Container mapping seq-aligns to vectors of participating seq-ids.
Definition: aln_tests.hpp:56
void push_back(const CSeq_align &aln)
Adding an alignment.
Definition: aln_tests.hpp:87
IAlnSeqId extracting functor.
void x_ConvertRowsToSpans(TAlnSpans &rows)
finally, convert our spans into rows for display
void x_MergeRows(TAlnSpans &rows)
CRange< TSignedSeqPos > m_AlnRange
virtual int GetNumColumns() const
Returns the number of columns in the model.
TAlnSpans m_AlnSpans
the alignments we represent
void SetShowIndels(bool indel)
virtual wxVariant GetValueAt(int i, int j) const
vector< string > m_ColNames
const SSpanRow & GetData(size_t row) const
access a given row's data
void x_PopulateRows(TAlnSpans &rows)
virtual int GetNumRows() const
Returns the number of rows in the model.
virtual wxString GetColumnName(int aColIx) const
Returns a default name for the column using spreadsheet conventions: A, B, C, ...
CRef< CSpliced_seg > m_SplicedSeg
vector< CRef< CSparseAln > > m_SparseAlns
virtual wxString GetColumnType(int aColIx) const
Tries to extract actual type from row 0 value if it exists.
vector< SSpanRow > TAlnSpans
void x_InsertIntrons(TAlnSpans &rows)
CAnchoredAln m_AnchoredAln
CRef< CScope > m_Scope
CRef< IAlnMultiDataSource > m_AlnSrc
void x_PopulateRowsSpliced(TAlnSpans &rows)
Helper class which collects seq-align statistics: seq-ids participating in alignments and rows,...
Definition: aln_stats.hpp:57
bool CanBeAnchored(void) const
Check if there are any ids which can be used as anchors for the whole set of alignments.
Definition: aln_stats.hpp:284
Options for different alignment manager operations.
EMergeAlgo m_MergeAlgo
@ ePreserveRows
Preserve all rows as they were in the input (e.g.
Query-anchored alignment can be 2 or multi-dimentional.
TDim GetDim(void) const
How many rows.
CScope –.
Definition: scope.hpp:92
Sparse alignment.
Definition: sparse_aln.hpp:51
static void TranslateNAToAA(const string &na, string &aa, int gen_code=kDefaultGenCode)
Definition: sparse_aln.cpp:463
CSpliced_exon_chunk –.
TSeqPos GetSeqStop(TDim row) const
TDim CheckNumRows(void) const
Definition: Spliced_seg.hpp:70
TSeqPos GetSeqStart(TDim row) const
ENa_strand GetSeqStrand(TDim row) const
Get strand (the first one if segments have different strands).
Definition: Spliced_seg.cpp:56
virtual void x_FireDataChanged()
IAlignRowHandle provides an abstract way to access alignment row data.
Definition: alnmulti_ds.hpp:59
virtual bool UsesAATranslation() const =0
virtual const objects::CBioseq_Handle & GetBioseqHandle() const =0
virtual bool CanGetSeqString() const =0
virtual string & GetAlnSeqString(string &buffer, const IAlnExplorer::TSignedRange &aln_rng) const =0
IAlnMultiDataSource - interface to a data source representing an abstract multiple alignment.
virtual IAlnExplorer::EAlignType GetAlignType() const =0
virtual TSignedSeqPos GetSeqPosFromAlnPos(TNumrow for_row, TSeqPos aln_pos, IAlnExplorer::ESearchDirection dir=IAlnExplorer::eNone, bool try_reverse_dir=true) const =0
virtual const IAlignRowHandle * GetRowHandle(TNumrow row) const =0
virtual bool IsPositiveStrand(TNumrow row) const =0
virtual TSeqPos GetAlnStart(void) const =0
virtual bool CanGetId(TNumrow row) const =0
virtual IAlnSegmentIterator * CreateSegmentIterator(TNumrow row, const IAlnExplorer::TSignedRange &range, IAlnSegmentIterator::EFlags flags) const =0
virtual TNumrow GetNumRows(void) const =0
number of rows in alignment
virtual TSeqPos GetAlnStop(void) const =0
virtual const objects::CSeq_id & GetSeqId(TNumrow row) const =0
Alignment segment iterator interface.
EFlags
Iterator options.
@ eAllSegments
Iterate all segments.
@ fIndel
Either anchor or the selected row is not present in the segment.
@ fGap
Both anchor row and the selected row are not included in the segment (some other row is present and t...
virtual TSegTypeFlags GetType(void) const =0
Get current segment type.
virtual const TSignedRange & GetAlnRange(void) const =0
Get alignment range for the segment.
virtual int GetGenCode(IAlnExplorer::TNumrow row) const =0
bool IsConsensusSplice(const string &splice5, const string &splice3)
Consensus splice is GY..AG or AT..AC.
static uch flags
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:61
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
static void GetLabel(const CObject &obj, string *label, ELabelType type=eDefault)
Definition: label.cpp:140
@ eDefault
Definition: label.hpp:73
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
Definition: Seq_loc.cpp:3474
void SetEmpty(TEmpty &v)
Definition: Seq_loc.hpp:981
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
Definition: ncbiobj.hpp:2015
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
position_type GetLength(void) const
Definition: range.hpp:158
position_type GetToOpen(void) const
Definition: range.hpp:138
static position_type GetPositionMax(void)
Definition: range.hpp:250
static TThisType GetEmpty(void)
Definition: range.hpp:306
TThisType & Set(position_type from, position_type to)
Definition: range.hpp:188
CRange< TSeqPos > TSeqRange
typedefs for sequence ranges
Definition: range.hpp:419
CRange< TSignedSeqPos > TSignedSeqRange
Definition: range.hpp:420
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5086
@ fWithCommas
Use commas as thousands separator.
Definition: ncbistr.hpp:254
static const char label[]
void SetFrom(TFrom value)
Assign a value to From data member.
Definition: Range_.hpp:231
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
void SetTo(TTo value)
Assign a value to To data member.
Definition: Range_.hpp:278
const TProtpos & GetProtpos(void) const
Get the variant data.
const TGenomic_id & GetGenomic_id(void) const
Get the Genomic_id member data.
bool IsSetParts(void) const
basic seqments always are in biologic order Check if a value has been assigned to Parts data member.
TMatch GetMatch(void) const
Get the variant data.
const TProduct_id & GetProduct_id(void) const
Get the Product_id member data.
TGenomic_start GetGenomic_start(void) const
Get the Genomic_start member data.
TProduct_length GetProduct_length(void) const
Get the Product_length member data.
bool IsSetPoly_a(void) const
start of poly(A) tail on the transcript For sense transcripts: aligned product positions < poly-a <= ...
TDiag GetDiag(void) const
Get the variant data.
TProduct_type GetProduct_type(void) const
Get the Product_type member data.
TMismatch GetMismatch(void) const
Get the variant data.
TAmin GetAmin(void) const
Get the Amin member data.
Definition: Prot_pos_.hpp:220
const TParts & GetParts(void) const
Get the Parts member data.
const TProduct_start & GetProduct_start(void) const
Get the Product_start member data.
TGenomic_ins GetGenomic_ins(void) const
Get the variant data.
list< CRef< CSpliced_exon > > TExons
const TExons & GetExons(void) const
Get the Exons member data.
list< CRef< CSpliced_exon_chunk > > TParts
bool IsSetProduct_length(void) const
length of the product, in bases/residues from this (or from poly-a if present), a 3' unaligned length...
TPoly_a GetPoly_a(void) const
Get the Poly_a member data.
TProduct_ins GetProduct_ins(void) const
Get the variant data.
TNucpos GetNucpos(void) const
Get the variant data.
E_Choice Which(void) const
Which variant is currently selected.
@ e_Product_ins
insertion in product sequence (i.e. gap in the genomic sequence)
@ e_Diag
both sequences are represented, there is sufficient similarity between product and genomic sequences....
@ e_Genomic_ins
insertion in genomic sequence (i.e. gap in the product sequence)
@ e_Match
both sequences represented, product and genomic sequences match
@ e_Mismatch
both sequences represented, product and genomic sequences do not match
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n th
int i
int len
#define wxT(x)
Definition: muParser.cpp:41
range(_Ty, _Ty) -> range< _Ty >
const struct ncbi::grid::netcache::search::fields::SIZE size
const CharType(& source)[N]
Definition: pointer.h:1149
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
T max(T x_, T y_)
vector< CRef< CAnchoredAln > > TAnchoredAlnVec
Collection of anchored alignments.
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
#define row(bind, expected)
Definition: string_bind.c:73
the alignment we store with its parsed data
vector< CConstRef< objects::CSeq_loc > > locs
vector< string > str_ranges
visible values
vector< TSignedSeqRange > ranges
#define _ASSERT
CScope & GetScope()
wxString ToWxString(const string &s)
Definition: wx_utils.hpp:173
Modified on Wed Sep 04 15:02:24 2024 by modify_doxy.py rev. 669887