NCBI C++ ToolKit
seq_vector_ci.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef SEQ_VECTOR_CI__HPP
2 #define SEQ_VECTOR_CI__HPP
3 
4 /* $Id: seq_vector_ci.hpp 78227 2017-06-05 19:13:54Z vasilche $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Aleksey Grichenko, Michael Kimelman, Eugene Vasilchenko
30 *
31 * File Description:
32 * Seq-vector iterator
33 *
34 */
35 
36 
37 #include <objmgr/seq_map_ci.hpp>
38 #include <objects/seq/Seq_data.hpp>
39 #include <iterator>
40 
42 
43 /** @addtogroup ObjectManagerIterators
44  *
45  * @{
46  */
47 
48 
49 class CRandom;
50 
52 
53 
55 {
56 public:
57  typedef unsigned char TResidue;
60  typedef TSeqPos size_type;
62  typedef std::random_access_iterator_tag iterator_category;
63  typedef const TResidue* pointer;
64  typedef const TResidue& reference;
65 
69  eCaseConversion_lower
70  };
71 
72 protected:
73  static TResidue sx_GetGapChar(TCoding coding,
74  ECaseConversion case_cvt);
75  static const char* sx_GetConvertTable(TCoding src, TCoding dst,
76  bool reverse,
77  ECaseConversion case_cvt);
78  static const char sm_TrivialTable[256];
79 };
80 
81 class CSeqVector;
82 
83 
84 /////////////////////////////////////////////////////////////////////////////
85 // INcbi2naRandomizer interface is used to randomize ambiguous nucleotides
86 // when generating unambiguous Ncbi2na encoding.
87 // The source encoding is prepared by CSeqVector as unpacked Ncbi4na and
88 // must be converted to unpacked Ncbi2na with possible randomization.
89 // Extra parameter with position of current buffer in the whole sequence
90 // can be used to generate the same bases for the same ambiguous positions.
92 {
93 public:
94  virtual ~INcbi2naRandomizer(void);
95 
96  /// Convert count unpacked bases in buffer 4na -> 2na with randomization.
97  /// The argument pos will contain position of the buffer in
98  /// the sequence, and can be used to give the same random base
99  /// at the same ambiguous position.
100  virtual void RandomizeData(char* buffer, size_t count, TSeqPos pos) = 0;
101 };
102 
103 
105 {
106 public:
107  CSeqVector_CI(void);
108  ~CSeqVector_CI(void);
109  explicit
110  CSeqVector_CI(const CSeqVector& seq_vector, TSeqPos pos = 0);
111  CSeqVector_CI(const CSeqVector& seq_vector, TSeqPos pos,
112  ECaseConversion case_cvt);
113  // Use the same CSeqVector source object, but with different strand.
114  CSeqVector_CI(const CSeqVector& seq_vector, ENa_strand strand,
115  TSeqPos pos = 0, ECaseConversion = eCaseConversion_none);
116  CSeqVector_CI(const CSeqVector_CI& sv_it);
117  CSeqVector_CI& operator=(const CSeqVector_CI& sv_it);
118 
119  bool operator==(const CSeqVector_CI& iter) const;
120  bool operator!=(const CSeqVector_CI& iter) const;
121  bool operator<(const CSeqVector_CI& iter) const;
122  bool operator>(const CSeqVector_CI& iter) const;
123  bool operator>=(const CSeqVector_CI& iter) const;
124  bool operator<=(const CSeqVector_CI& iter) const;
125 
126  /// Check if the sequence can be obtained for the interval [start, stop)
127  bool CanGetRange(TSeqPos start, TSeqPos stop);
128  /// Fill the buffer string with the sequence data for the interval
129  /// [start, stop).
130  void GetSeqData(TSeqPos start, TSeqPos stop, string& buffer);
131  /// Fill the buffer string with the count bytes of sequence data
132  /// starting with current iterator position
133  void GetSeqData(string& buffer, TSeqPos count);
134 
135  /// Get number of chars from current position to the current buffer end
136  TSeqPos GetBufferSize(void) const;
137  /// Get pointer to current char in the buffer
138  const char* GetBufferPtr(void) const;
139  /// Get pointer to current position+size.
140  /// Throw exception if current pos + size is not in the buffer.
141  const char* GetBufferEnd(size_t size) const;
142 
143  CSeqVector_CI& operator++(void);
144  CSeqVector_CI& operator--(void);
145 
146  /// special temporary holder for return value from postfix operators
148  {
149  public:
151  : m_Value(value)
152  {
153  }
154 
155  value_type operator*(void) const
156  {
157  return m_Value;
158  }
159  private:
161  };
162  /// Restricted postfix operators.
163  /// They allow only get value from old position by operator*,
164  /// like in commonly used copying cycle:
165  /// CSeqVector_CI src;
166  /// for ( ... ) {
167  /// *dst++ = *src++;
168  /// }
170  {
171  value_type value(**this);
172  ++*this;
173  return value;
174  }
176  {
177  value_type value(**this);
178  --*this;
179  return value;
180  }
181 
182  TSeqPos GetPos(void) const;
183  CSeqVector_CI& SetPos(TSeqPos pos);
184 
185  TCoding GetCoding(void) const;
186  void SetCoding(TCoding coding);
187 
188  // The CSeqVector_CI strand is relative to the CSeqVector's base object.
189  // Dafault CSeqVector_CI string is equal to the strand in the CSeqVector.
190  ENa_strand GetStrand(void) const;
191  void SetStrand(ENa_strand strand);
192 
193  void SetRandomizeAmbiguities(void);
194  void SetRandomizeAmbiguities(Uint4 seed);
195  void SetRandomizeAmbiguities(CRandom& random_gen);
196  void SetRandomizeAmbiguities(CRef<INcbi2naRandomizer> randomizer);
197  void SetNoAmbiguities(void);
198 
199  TResidue operator*(void) const;
200  bool IsValid(void) const;
201 
203 
204  const CSeqMap_CI& GetCurrentSeqMap_CI() const;
205 
206  /// true if current position of CSeqVector_CI is inside of sequence gap
207  bool IsInGap(void) const;
208  /// returns gap Seq-data object ref
209  /// returns null if it's not a gap or an unspecified gap
210  CConstRef<CSeq_literal> GetGapSeq_literal(void) const;
211  /// returns character representation of gap in sequence
212  TResidue GetGapChar(void) const;
213  /// returns number of gap symbols ahead including current symbol
214  /// returns 0 if current position is not in gap
215  TSeqPos GetGapSizeForward(void) const;
216  /// returns number of gap symbols before current symbol
217  /// returns 0 if current position is not in gap
218  TSeqPos GetGapSizeBackward(void) const;
219  /// skip current gap forward
220  /// returns number of skipped gap symbols
221  /// does nothing and returns 0 if current position is not in gap
222  TSeqPos SkipGap(void);
223  /// skip current gap backward
224  /// returns number of skipped gap symbols
225  /// does nothing and returns 0 if current position is not in gap
226  TSeqPos SkipGapBackward(void);
227  /// true if there is zero-length gap before current position
228  bool HasZeroGapBefore(void);
229 
232 
233 private:
234  TSeqPos x_GetSize(void) const;
235  TCoding x_GetCoding(TCoding cacheCoding, TCoding dataCoding) const;
236 
237  void x_SetPos(TSeqPos pos);
238  void x_InitializeCache(void);
239  void x_ClearCache(void);
240  void x_ResizeCache(size_t size);
241  void x_SwapCache(void);
242  void x_UpdateCacheUp(TSeqPos pos);
243  void x_UpdateCacheDown(TSeqPos pos);
244  void x_FillCache(TSeqPos start, TSeqPos count);
245  void x_UpdateSeg(TSeqPos pos);
246  void x_InitSeg(TSeqPos pos);
247  void x_IncSeg(void);
248  void x_DecSeg(void);
249  void x_CheckForward(void);
250  void x_CheckBackward(void);
251  void x_InitRandomizer(CRandom& random_gen);
252 
253  void x_NextCacheSeg(void);
254  void x_PrevCacheSeg(void);
255 
256  TSeqPos x_CachePos(void) const;
257  TSeqPos x_CacheSize(void) const;
258  TSeqPos x_CacheEndPos(void) const;
259  TSeqPos x_BackupPos(void) const;
260  TSeqPos x_BackupSize(void) const;
261  TSeqPos x_BackupEndPos(void) const;
262 
263  TSeqPos x_CacheOffset(void) const;
264 
265  void x_ResetCache(void);
266  void x_ResetBackup(void);
267 
268  void x_ThrowOutOfRange(void) const;
269 
270  friend class CSeqVector;
271  void x_SetVector(CSeqVector& seq_vector);
272 
274  typedef char* TCache_I;
275 
279  vector<CTSE_Handle> m_UsedTSEs;
283  // Current CSeqMap segment
285  // Current cache pointer
287  // Current cache
291  // Backup cache
295  // optional ambiguities randomizer
297  // scanned range
298  TSeqPos m_ScannedStart, m_ScannedEnd;
299 };
300 
301 
302 /////////////////////////////////////////////////////////////////////
303 //
304 // Inline methods
305 //
306 /////////////////////////////////////////////////////////////////////
307 
308 
309 inline
311 {
312  return m_Coding;
313 }
314 
315 
316 inline
318 {
319  return m_Strand;
320 }
321 
322 
323 inline
325 {
326  return m_CachePos;
327 }
328 
329 
330 inline
332 {
333  return TSeqPos(m_CacheEnd - m_CacheData.get());
334 }
335 
336 
337 inline
339 {
340  return x_CachePos() + x_CacheSize();
341 }
342 
343 
344 inline
346 {
347  return m_BackupPos;
348 }
349 
350 
351 inline
353 {
354  return TSeqPos(m_BackupEnd - m_BackupData.get());
355 }
356 
357 
358 inline
360 {
361  return x_BackupPos() + x_BackupSize();
362 }
363 
364 
365 inline
367 {
368  return TSeqPos(m_Cache - m_CacheData.get());
369 }
370 
371 
372 inline
374 {
375  return x_CachePos() + x_CacheOffset();
376 }
377 
378 
379 inline
381 {
383 }
384 
385 
386 inline
388 {
390 }
391 
392 
393 inline
395 {
399  m_Cache = m_CacheData.get();
400 }
401 
402 
403 inline
405 {
406  TCache_I cache = m_CacheData.get();
407  TSeqPos offset = pos - m_CachePos;
408  TSeqPos size = TSeqPos(m_CacheEnd - cache);
409  if ( offset >= size ) {
410  x_SetPos(pos);
411  }
412  else {
413  m_Cache = cache + offset;
414  }
415  return *this;
416 }
417 
418 
419 inline
420 bool CSeqVector_CI::IsValid(void) const
421 {
422  return m_Cache < m_CacheEnd;
423 }
424 
425 
426 inline
428 {
429  return GetPos() == iter.GetPos();
430 }
431 
432 
433 inline
435 {
436  return GetPos() != iter.GetPos();
437 }
438 
439 
440 inline
442 {
443  return GetPos() < iter.GetPos();
444 }
445 
446 
447 inline
449 {
450  return GetPos() > iter.GetPos();
451 }
452 
453 
454 inline
456 {
457  return GetPos() <= iter.GetPos();
458 }
459 
460 
461 inline
463 {
464  return GetPos() >= iter.GetPos();
465 }
466 
467 
468 inline
470 {
471  if ( !bool(*this) ) {
473  }
474  return *m_Cache;
475 }
476 
477 
478 inline
480 {
481  return m_Seg;
482 }
483 
484 
485 inline
486 bool CSeqVector_CI::IsInGap(void) const
487 {
488  return *this && m_Seg.GetType() == CSeqMap::eSeqGap;
489 }
490 
491 
492 inline
494 {
495  if ( ++m_Cache >= m_CacheEnd ) {
496  x_NextCacheSeg();
497  }
498  return *this;
499 }
500 
501 
502 inline
504 {
505  TCache_I cache = m_Cache;
506  if ( cache == m_CacheData.get() ) {
507  x_PrevCacheSeg();
508  }
509  else {
510  m_Cache = cache - 1;
511  }
512  return *this;
513 }
514 
515 
516 inline
518 {
519  SetPos(start);
520  if (start > stop) {
521  buffer.erase();
522  return;
523  }
524  GetSeqData(buffer, stop - start);
525 }
526 
527 
528 inline
530 {
531  return TSeqPos(m_CacheEnd - m_Cache);
532 }
533 
534 
535 inline
536 const char* CSeqVector_CI::GetBufferPtr(void) const
537 {
538  return m_Cache;
539 }
540 
541 
542 inline
543 const char* CSeqVector_CI::GetBufferEnd(size_t size) const
544 {
545  const char* ptr = m_Cache + size;
546  if (ptr < m_Cache || ptr > m_CacheEnd) {
548  }
549  return ptr;
550 }
551 
552 
553 inline
555 {
556  SetPos(GetPos() + value);
557  return *this;
558 }
559 
560 
561 inline
563 {
564  SetPos(GetPos() - value);
565  return *this;
566 }
567 
568 
569 inline
571 {
572  CSeqVector_CI ret(iter);
573  ret += value;
574  return ret;
575 }
576 
577 
578 inline
580 {
581  CSeqVector_CI ret(iter);
582  ret -= value;
583  return ret;
584 }
585 
586 
587 inline
589 {
590  CSeqVector_CI ret(iter);
591  ret.SetPos(iter.GetPos() + value);
592  return ret;
593 }
594 
595 
596 inline
598 {
599  CSeqVector_CI ret(iter);
600  ret.SetPos(iter.GetPos() - value);
601  return ret;
602 }
603 
604 
605 inline
607  const CSeqVector_CI& iter2)
608 {
609  return iter1.GetPos() - iter2.GetPos();
610 }
611 
612 
613 inline
615  TCoding dataCoding) const
616 {
617  return cacheCoding != CSeq_data::e_not_set? cacheCoding: dataCoding;
618 }
619 
620 
621 inline
623 {
625 }
626 
627 
628 /* @} */
629 
630 
633 
634 #endif // SEQ_VECTOR_CI__HPP
bool operator!=(const _Ht_iterator< _Val, _Nonconst_traits< _Val >, _Key, _HF, _ExK, _EqK, _All > &__x, const _Ht_iterator< _Val, _Const_traits< _Val >, _Key, _HF, _ExK, _EqK, _All > &__y)
Definition: _hashtable.h:173
bool GetSeqData(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq, Int4 nodetype, unsigned char *seqconv, Uint1 seq_data_type)
Definition: asci_blk.cpp:1678
CConstRef –.
Definition: ncbiobj.hpp:1266
CObject –.
Definition: ncbiobj.hpp:180
CRandom::
Definition: random_gen.hpp:66
Iterator over CSeqMap.
Definition: seq_map_ci.hpp:252
special temporary holder for return value from postfix operators
CSeqVector –.
Definition: seq_vector.hpp:65
bool operator<(const CEquivRange &A, const CEquivRange &B)
bool operator==(const CEquivRange &A, const CEquivRange &B)
int offset
Definition: replacements.h:160
objects::CSeqVectorTypes::TResidue TResidue
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
int TSignedSeqPos
Type for signed sequence position.
Definition: ncbimisc.hpp:887
element_type * get(void) const
Get pointer.
Definition: ncbimisc.hpp:581
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
CVect2< NCBI_PROMOTE(int,U) > operator*(int v1, const CVect2< U > &v2)
Definition: globals.hpp:371
CExpression operator>=(CREATED, time_point)
CExpression operator<=(time_point, CREATED)
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
bool IsValid(const CSeq_point &pt, CScope *scope)
Checks that point >= 0 and point < length of Bioseq.
const TResidue * pointer
CSeqVector_CI & SetPos(TSeqPos pos)
virtual void RandomizeData(char *buffer, size_t count, TSeqPos pos)=0
Convert count unpacked bases in buffer 4na -> 2na with randomization.
const char * GetBufferPtr(void) const
Get pointer to current char in the buffer.
ENa_strand GetStrand(void) const
CTempValue(value_type value)
void x_ResetCache(void)
CSeq_data::E_Choice TCoding
TSeqPos x_CacheEndPos(void) const
TSeqPos x_BackupPos(void) const
bool operator>(const CSeqVector_CI &iter) const
CTSE_Handle m_TSE
TSignedSeqPos difference_type
CTempValue operator--(int)
TResidue operator*(void) const
const TResidue & reference
CSeqVector_CI & operator++(void)
TSeqPos GetPos(void) const
vector< CTSE_Handle > m_UsedTSEs
TCoding GetCoding(void) const
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer)
Fill the buffer string with the sequence data for the interval [start, stop).
const char * GetBufferEnd(size_t size) const
Get pointer to current position+size.
TSeqPos x_CacheOffset(void) const
CSeqVector_CI & operator+=(TSeqPos value)
CRef< INcbi2naRandomizer > m_Randomizer
void x_SetPos(TSeqPos pos)
TCacheData m_CacheData
bool operator>=(const CSeqVector_CI &iter) const
ECaseConversion m_CaseConversion
bool operator!=(const CSeqVector_CI &iter) const
TCache_I m_CacheEnd
unsigned char TResidue
void x_ClearCache(void)
void x_ThrowOutOfRange(void) const
value_type operator*(void) const
TCache_I m_BackupEnd
TSeqPos x_CachePos(void) const
TSeqPos GetBufferSize(void) const
Get number of chars from current position to the current buffer end.
DECLARE_OPERATOR_BOOL(IsValid())
CSeqMap::ESegmentType GetType(void) const
Definition: seq_map_ci.hpp:651
AutoArray< char > TCacheData
TSeqPos m_ScannedStart
CTempValue operator++(int)
Restricted postfix operators.
TResidue GetGapChar(void) const
returns character representation of gap in sequence
void x_PrevCacheSeg(void)
void x_NextCacheSeg(void)
CSeqVector_CI operator-(const CSeqVector_CI &iter, TSeqPos value)
TSeqPos x_BackupEndPos(void) const
TSeqPos x_CacheSize(void) const
bool operator<(const CSeqVector_CI &iter) const
CConstRef< CSeqMap > m_SeqMap
CSeqVector_CI & operator--(void)
void x_ResetBackup(void)
ENa_strand m_Strand
bool operator<=(const CSeqVector_CI &iter) const
CSeqVector_CI operator+(const CSeqVector_CI &iter, TSeqPos value)
CHeapScope m_Scope
static TResidue sx_GetGapChar(TCoding coding, ECaseConversion case_cvt)
void x_SwapCache(void)
TCacheData m_BackupData
bool IsValid(void) const
TSeqPos x_BackupSize(void) const
TCoding x_GetCoding(TCoding cacheCoding, TCoding dataCoding) const
bool operator==(const CSeqVector_CI &iter) const
CSeqMap_CI m_Seg
const CSeqMap_CI & GetCurrentSeqMap_CI() const
bool IsInGap(void) const
true if current position of CSeqVector_CI is inside of sequence gap
CSeqVector_CI & operator-=(TSeqPos value)
std::random_access_iterator_tag iterator_category
@ eSeqGap
gap
Definition: seq_map.hpp:97
CObject & operator=(const CObject &src) THROWS_NONE
Assignment operator.
Definition: ncbiobj.hpp:482
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_XOBJMGR_EXPORT
Definition: ncbi_export.h:1307
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
E_Choice
Choice variants.
Definition: Seq_data_.hpp:102
@ e_not_set
No variant selected.
Definition: Seq_data_.hpp:103
CNcbiMatrix< T > & operator+=(CNcbiMatrix< T > &, const CNcbiMatrix< U > &)
global addition: matrix += matrix
Definition: matrix.hpp:570
CNcbiMatrix< T > & operator-=(CNcbiMatrix< T > &, const CNcbiMatrix< U > &)
global subtraction: matrix -= matrix
Definition: matrix.hpp:622
const struct ncbi::grid::netcache::search::fields::SIZE size
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
S & operator--(CNetRef< S > &r, int)
static pcre_uint8 * buffer
Definition: pcretest.c:1051
static int seed
Definition: test_table.cpp:132
bool operator>(const typename tree< T, tree_node_allocator >::iterator_base &one, const typename tree< T, tree_node_allocator >::iterator_base &two)
Definition: tree_msvc7.hpp:426
Modified on Wed May 22 11:30:31 2024 by modify_doxy.py rev. 669887