NCBI C++ ToolKit
sparse_ci.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: sparse_ci.cpp 77988 2017-05-17 15:53:12Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Andrey Yazhuk
27  *
28  * File Description:
29  *
30  */
31 
32 #include <ncbi_pch.hpp>
33 
35 
38 
39 ////////////////////////////////////////////////////////////////////////////////
40 /// CSparseSegment - IAlnSegment implementation for CAlnMap::CAlnChunk
41 
43  : m_Type(fInvalid),
44  m_AlnRange(TSignedRange::GetEmpty()),
45  m_RowRange(TSignedRange::GetEmpty())
46 {
47 }
48 
49 
50 CSparseSegment::operator bool(void) const
51 {
52  return !IsInvalidType();
53 }
54 
55 
57 {
58  return m_Type;
59 }
60 
61 
63 {
64  return m_AlnRange;
65 }
66 
67 
69 {
70  return m_RowRange;
71 }
72 
73 
74 ////////////////////////////////////////////////////////////////////////////////
75 /// CSparse_CI
76 
78 {
79  // Update anchor and row directions - mixed strand alignments are now allowed.
80  // If an iterator is not valid, use the last known direction.
81  if ( m_AnchorIt ) {
83  }
84  if ( m_RowIt ) {
86  }
87 
88  bool anchor_gap = !m_AnchorIt ||
90  bool row_gap = !m_RowIt ||
92 
95 
96  TSignedSeqPos from = 0;
97  TSignedSeqPos to = 0;
98  TSignedSeqPos left_offset = 0;
99  TSignedSeqPos right_offset = 0;
100 
101  if ( !m_AnchorIt ) {
102  if ( !m_RowIt ) {
103  // End of the iterator
104  m_Aln.Reset();
105  aln_rg = TSignedRange::GetEmpty();
106  row_rg = TSignedRange::GetEmpty();
108  return;
109  }
110  // Only row iterator is valid. Gap or indel. Use the whole remaining
111  // range of the row segment.
112  aln_rg = m_NextRowRg;
113  row_rg = m_RowIt.GetSecondRange();
114  from = m_NextRowRg.GetFrom();
115  to = m_NextRowRg.GetToOpen();
116  left_offset = m_NextRowRg.GetFrom() - m_RowIt.GetFirstRange().GetFrom();
117  right_offset = 0;
118  }
119  else if ( !m_RowIt ) {
120  from = m_NextAnchorRg.GetFrom();
121  to = m_NextAnchorRg.GetToOpen();
122  // Row sequence is missing - set offsets to make the range empty
123  // at the last range end.
124  left_offset = row_rg.GetLength();
125  right_offset = 0;
126  }
127  else {
128  if ( m_AnchorDirect ) {
129  // Both iterators are valid - select nearest segment start.
131  // Calculate offset from the pairwise row segment start (to skip it).
132  left_offset = from - m_RowIt.GetFirstRange().GetFrom();
133  if (m_NextAnchorRg.GetFrom() > from) {
134  // Use part of row range up to the anchor segment start
135  // or the whole row segment if the anchor starts later.
137  right_offset = m_NextRowRg.GetToOpen() - to;
138  }
139  else if (m_NextRowRg.GetFrom() > from) {
140  // Use part of anchor range up the the row segment start
141  // or the whole anchor segment if the row starts later.
143  // Row range will become empty starting at the nearest row
144  // segment from/to depending on the strand.
145  left_offset = 0;
146  right_offset = m_RowIt.GetSecondRange().GetLength();
147  }
148  else {
149  // Both ranges start at the same point - find the nearest end.
151  right_offset = m_NextRowRg.GetToOpen() - to;
152  }
153 
154  // Adjust gap flags if one of the pariwise segments starts past
155  // the sparse segment end.
156  anchor_gap = anchor_gap ||
157  m_AnchorIt.GetFirstRange().GetFrom() >= to;
158  row_gap = row_gap ||
159  m_RowIt.GetFirstRange().GetFrom() >= to;
160  }
161  else {
162  // Both iterators are valid - select nearest segment end.
164  right_offset = m_RowIt.GetFirstRange().GetToOpen() - to;
165  if (m_NextAnchorRg.GetToOpen() < to) {
167  left_offset = from - m_NextRowRg.GetFrom();
168  }
169  else if (m_NextRowRg.GetToOpen() < to) {
171  right_offset = 0;
172  left_offset = m_RowIt.GetSecondRange().GetLength();
173  }
174  else {
176  left_offset = from - m_NextRowRg.GetFrom();
177  }
178 
179  anchor_gap = anchor_gap ||
180  m_AnchorIt.GetFirstRange().GetToOpen() <= from;
181  row_gap = row_gap ||
182  m_RowIt.GetFirstRange().GetToOpen() <= from;
183  }
184  }
185 
186  aln_rg.SetOpen(from, to);
187 
188  // Trim ranges to leave only unused range
189  if ( m_AnchorDirect ) {
190  if (m_NextAnchorRg.GetFrom() < to) {
192  }
193  if (m_NextRowRg.GetFrom() < to) {
194  m_NextRowRg.SetFrom(to);
195  }
196  }
197  else {
198  if (m_NextAnchorRg.GetToOpen() > from) {
200  }
201  if (m_NextRowRg.GetToOpen() > from) {
202  m_NextRowRg.SetToOpen(from);
203  }
204  }
205 
206  // Adjust row range according to the alignment range.
207  _ASSERT(left_offset >= 0);
208  _ASSERT(right_offset >= 0);
209  if ( !m_RowDirect ) {
210  swap(left_offset, right_offset);
211  }
212  if ( m_RowIt ) {
213  row_rg = m_RowIt.GetSecondRange();
214  }
215  // Adjust offsets so that the range length is never negative.
216  if (left_offset > row_rg.GetLength()) {
217  left_offset = row_rg.GetLength();
218  }
219  if (right_offset > row_rg.GetLength() - left_offset) {
220  right_offset = row_rg.GetLength() - left_offset;
221  }
222  row_rg.SetOpen(row_rg.GetFrom() + left_offset,
223  row_rg.GetToOpen() - right_offset);
224 
225  // Set segment type.
226  if ( row_gap ) {
227  if ( aln_rg.Empty() ) {
229  }
230  else {
231  m_Segment.m_Type = anchor_gap ?
233  }
234  }
235  else {
236  m_Segment.m_Type = anchor_gap ?
238  }
239 
240  // The flag shows relative row direction.
241  if ( !m_RowDirect ) {
243  }
244 }
245 
246 // assuming clipping range
248 {
249  if (m_Row >= TDim(m_Aln->GetPairwiseAlns().size())) {
250  // Invalid row selected - nothing to iterate.
251  m_Aln.Reset();
252  return;
253  }
254  const CPairwiseAln& anchor_pw =
256  const CPairwiseAln& pw = *m_Aln->GetPairwiseAlns()[m_Row];
257  // If at least one of the rows has mixed strand, force all iterators to be direct.
258  bool mixed_strand =
261  m_AnchorIt = CPairwise_CI(anchor_pw, m_TotalRange, mixed_strand);
262  m_RowIt = CPairwise_CI(pw, m_TotalRange, mixed_strand);
263  if ( m_AnchorIt ) {
265  }
266  else {
268  }
269  if ( m_RowIt ) {
271  }
272  else {
274  }
276  x_InitSegment();
277  x_CheckSegment();
278 }
279 
280 
282 {
283  if (m_Flags == eAllSegments) {
284  return;
285  }
286  while ( *this ) {
287  if (m_Flags == eSkipGaps) {
288  if ( m_Segment.IsAligned() ) {
289  break;
290  }
291  }
292  else {
293  // Distinguish between insertions and deletions.
296  if ((m_Flags == eInsertsOnly && ins) ||
297  (m_Flags == eSkipInserts && !ins)) {
298  break;
299  }
300  }
301  x_NextSegment();
302  }
303 }
304 
305 
307 {
308  if ( !*this ) return;
309  if (m_AnchorIt && m_NextAnchorRg.Empty()) {
310  // Advance anchor iterator, skip unaligned segments if any.
311  do {
312  ++m_AnchorIt;
313  }
314  while (m_AnchorIt && m_AnchorIt.GetFirstRange().Empty());
315  if ( m_AnchorIt ) {
317  }
318  }
319  if (m_RowIt && m_NextRowRg.Empty()) {
320  ++m_RowIt;
321  if ( m_RowIt ) {
323  }
324  }
325  x_InitSegment();
326 }
327 
328 
329 bool CSparse_CI::x_Equals(const CSparse_CI& other) const
330 {
331  return m_Aln == other.m_Aln &&
332  m_Flags == other.m_Flags &&
333  m_Row == other.m_Row &&
334  m_TotalRange == other.m_TotalRange &&
335  m_AnchorIt == other.m_AnchorIt &&
336  m_RowIt == other.m_RowIt &&
337  m_NextAnchorRg == other.m_NextAnchorRg &&
338  m_NextRowRg == other.m_NextRowRg &&
339  m_Segment == other.m_Segment;
340 }
341 
342 
344 : m_Flags(eAllSegments),
345  m_Aln(NULL),
346  m_Row(0),
347  m_AnchorDirect(true),
348  m_RowDirect(true)
349 {
353 }
354 
355 
357  TDim row,
358  EFlags flags)
359  : m_Flags(flags),
360  m_Aln(aln.m_Aln),
361  m_Row(row),
362  m_TotalRange(TSignedRange::GetWhole()),
363  m_AnchorDirect(true),
364  m_RowDirect(true)
365 {
366  x_InitIterator();
367 }
368 
369 
371  TDim row,
372  EFlags flags,
373  const TSignedRange& range)
374  : m_Flags(flags),
375  m_Aln(aln.m_Aln),
376  m_Row(row),
377  m_TotalRange(range),
378  m_AnchorDirect(true),
379  m_RowDirect(true)
380 {
381  x_InitIterator();
382 }
383 
384 
386 {
387  *this = orig;
388 }
389 
390 
392 {
393 }
394 
395 
397 {
398  return new CSparse_CI(*this);
399 }
400 
401 
402 CSparse_CI::operator bool(void) const
403 {
404  return m_Aln && (m_AnchorIt || m_RowIt);
405 }
406 
407 
409 {
410  x_NextSegment();
411  x_CheckSegment();
412  return *this;
413 }
414 
415 
417 {
418  if(typeid(*this) == typeid(it)) {
419  const CSparse_CI* sparse_it = dynamic_cast<const CSparse_CI*>(&it);
420  return x_Equals(*sparse_it);
421  }
422  return false;
423 }
424 
425 
427 {
428  if(typeid(*this) == typeid(it)) {
429  const CSparse_CI* sparse_it = dynamic_cast<const CSparse_CI*>(&it);
430  return !x_Equals(*sparse_it);
431  }
432  return true;
433 }
434 
435 
437 {
438  _ASSERT(*this);
439  return m_Segment;
440 }
441 
442 
444 {
445  _ASSERT(*this);
446  return &m_Segment;
447 }
448 
449 
const TPairwiseAlnVector & GetPairwiseAlns(void) const
The vector of pairwise alns.
TDim GetAnchorRow(void) const
Which is the anchor row?
A pairwise aln is a collection of ranges for a pair of rows.
CPairwiseAln iterator. Iterates over aligned ranges and gaps.
bool IsDirect(void) const
Direction of the second sequence relative to the first one.
@ eGap
Gap or unaligned range.
bool IsFirstDirect(void) const
Absolute direction of the first sequence.
ESegType GetSegType(void) const
Get current segment type.
const TSignedRange & GetSecondRange(void) const
Current range on the second sequence. May be empty when in a gap.
const TSignedRange & GetFirstRange(void) const
Current range on the first sequence.
Sparse alignment.
Definition: sparse_aln.hpp:51
virtual const TSignedRange & GetAlnRange(void) const
Get alignment range for the segment.
Definition: sparse_ci.cpp:62
virtual TSegTypeFlags GetType(void) const
Get current segment type.
Definition: sparse_ci.cpp:56
virtual const TSignedRange & GetRange(void) const
Get the selected row range.
Definition: sparse_ci.cpp:68
TSegTypeFlags m_Type
Definition: sparse_ci.hpp:65
TSignedRange m_RowRange
Definition: sparse_ci.hpp:67
TSignedRange m_AlnRange
Definition: sparse_ci.hpp:66
CSparseSegment(void)
CSparseSegment - IAlnSegment implementation for CAlnMap::CAlnChunk.
Definition: sparse_ci.cpp:42
Implementation of IAlnSegmentIterator for CSparseAln.
Definition: sparse_ci.hpp:73
virtual const value_type & operator*(void) const
Definition: sparse_ci.cpp:436
TSignedRange m_TotalRange
Definition: sparse_ci.hpp:125
void x_InitSegment(void)
CSparse_CI.
Definition: sparse_ci.cpp:77
virtual ~CSparse_CI(void)
Definition: sparse_ci.cpp:391
virtual IAlnSegmentIterator * Clone(void) const
Create a copy of the iterator.
Definition: sparse_ci.cpp:396
CConstRef< CAnchoredAln > m_Aln
Definition: sparse_ci.hpp:123
void x_CheckSegment(void)
Definition: sparse_ci.cpp:281
virtual const value_type * operator->(void) const
Definition: sparse_ci.cpp:443
bool m_RowDirect
Definition: sparse_ci.hpp:131
CPairwise_CI m_AnchorIt
Definition: sparse_ci.hpp:126
CPairwise_CI m_RowIt
Definition: sparse_ci.hpp:127
virtual bool operator==(const IAlnSegmentIterator &it) const
Compare iterators.
Definition: sparse_ci.cpp:416
CSparseSegment m_Segment
Definition: sparse_ci.hpp:122
void x_NextSegment(void)
Definition: sparse_ci.cpp:306
virtual bool operator!=(const IAlnSegmentIterator &it) const
Definition: sparse_ci.cpp:426
void x_InitIterator(void)
Definition: sparse_ci.cpp:247
TSignedRange m_NextRowRg
Definition: sparse_ci.hpp:129
CSparse_CI(void)
Create 'empty' iterator.
Definition: sparse_ci.cpp:343
CSparseAln::TDim TDim
Definition: sparse_ci.hpp:76
virtual IAlnSegmentIterator & operator++(void)
Advance to the next segment.
Definition: sparse_ci.cpp:408
EFlags m_Flags
Definition: sparse_ci.hpp:121
bool m_AnchorDirect
Definition: sparse_ci.hpp:130
TSignedRange m_NextAnchorRg
Definition: sparse_ci.hpp:128
bool x_Equals(const CSparse_CI &other) const
Definition: sparse_ci.cpp:329
Alignment segment iterator interface.
EFlags
Iterator options.
@ eSkipInserts
Iterate segments where at least some rows are aligned (including gap segments)
@ eInsertsOnly
Iterate only ranges not participating in the alignment (unaligned segments)
@ eAllSegments
Iterate all segments.
@ eSkipGaps
Skip gap segments (show only aligned ranges)
Alignment segment interface.
@ fAligned
Aligned segment.
@ fInvalid
The iterator is in bad state.
@ fIndel
Either anchor or the selected row is not present in the segment.
@ fUnaligned
The range on the selected sequence does not participate in the alignment (the alignment range of the ...
@ fReversed
The selected row is reversed (relative to the anchor).
@ fGap
Both anchor row and the selected row are not included in the segment (some other row is present and t...
bool IsAligned(void) const
unsigned TSegTypeFlags
static uch flags
@ fInvalid
#define true
Definition: bool.h:35
#define bool
Definition: bool.h:34
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
#define NULL
Definition: ncbistd.hpp:225
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
position_type GetLength(void) const
Definition: range.hpp:158
TThisType & SetToOpen(position_type toOpen)
Definition: range.hpp:175
position_type GetToOpen(void) const
Definition: range.hpp:138
static TThisType GetEmpty(void)
Definition: range.hpp:306
bool Empty(void) const
Definition: range.hpp:148
TThisType & SetOpen(position_type from, position_type toOpen)
Definition: range.hpp:184
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
void SetFrom(TFrom value)
Assign a value to From data member.
Definition: Range_.hpp:231
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
range(_Ty, _Ty) -> range< _Ty >
T max(T x_, T y_)
T min(T x_, T y_)
USING_SCOPE(ncbi::objects)
#define row(bind, expected)
Definition: string_bind.c:73
#define _ASSERT
Modified on Wed Sep 04 15:04:55 2024 by modify_doxy.py rev. 669887