NCBI C++ ToolKit
seq_vector.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef SEQ_VECTOR__HPP
2 #define SEQ_VECTOR__HPP
3 
4 /* $Id: seq_vector.hpp 62074 2014-03-12 16:29:48Z vasilche $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Aleksey Grichenko, Michael Kimelman, Eugene Vasilchenko
30 *
31 * File Description:
32 * Sequence data container for object manager
33 *
34 */
35 
36 #include <objmgr/bioseq_handle.hpp>
37 #include <objmgr/scope.hpp>
38 #include <objmgr/seq_map.hpp>
39 #include <objmgr/seq_vector_ci.hpp>
41 
43 
44 /** @addtogroup ObjectManagerSequenceRep
45  *
46  * @{
47  */
48 
49 class CRandom;
50 
52 
53 class CScope;
54 class CSeq_loc;
55 class CSeqMap;
56 class CSeqVector_CI;
57 
58 /////////////////////////////////////////////////////////////////////////////
59 ///
60 /// CSeqVector --
61 ///
62 /// Provide sequence data in the selected coding
63 
65 {
66 public:
69  typedef CMutex TMutex;
71 
72  CSeqVector(void);
73  explicit
74  CSeqVector(const CBioseq_Handle& bioseq,
77  CSeqVector(const CSeqMap& seqMap, CScope& scope,
80  CSeqVector(const CSeqMap& seqMap, const CTSE_Handle& top_tse,
83  CSeqVector(const CSeq_loc& loc, CScope& scope,
86  CSeqVector(const CSeq_loc& loc, const CTSE_Handle& top_tse,
89  CSeqVector(const CBioseq& bioseq,
90  CScope* scope = 0,
93  CSeqVector(const CSeqVector& vec);
94 
95  virtual ~CSeqVector(void);
96 
97  CSeqVector& operator= (const CSeqVector& vec);
98 
99  bool empty(void) const;
100  TSeqPos size(void) const;
101 
102  /// Get mutex for a few non-MT-safe methods to make them MT-safe at a cost
103  /// of performance.
104  TMutex& GetMutex(void) const;
105 
106  /// 0-based array of residues
107  /// Note: this method is not MT-safe,
108  /// do not call it in parallel with other, even MT-safe method.
109  /// It will be MT-safe to call this method after locking GetMutex().
110  TResidue operator[] (TSeqPos pos) const;
111 
112  /// true if sequence at 0-based position 'pos' has gap
113  /// Note: this method is not MT-safe,
114  /// do not call it in parallel with other, even MT-safe method.
115  /// It will be MT-safe to call this method after locking GetMutex().
116  bool IsInGap(TSeqPos pos) const;
117 
118  /// returns number of gap symbols ahead including base at position 'pos'
119  /// returns 0 if the position is not in gap
120  TSeqPos GetGapSizeForward(TSeqPos pos) const;
121 
122  /// returns gap Seq-literal object ref
123  /// returns null if it's not a gap or an unspecified gap
124  CConstRef<CSeq_literal> GetGapSeq_literal(TSeqPos pos) const;
125 
126  /// Check if the sequence data is available for the interval [start, stop).
127  bool CanGetRange(TSeqPos start, TSeqPos stop) const;
128  bool CanGetRange(const const_iterator& start,
129  const const_iterator& stop) const;
130 
131  /// Fill the buffer string with the sequence data for the interval
132  /// [start, stop).
133  void GetSeqData(TSeqPos start, TSeqPos stop, string& buffer) const;
134  void GetSeqData(const const_iterator& start,
135  const const_iterator& stop,
136  string& buffer) const;
137  void GetPackedSeqData(string& buffer,
138  TSeqPos start = 0,
139  TSeqPos stop = kInvalidSeqPos);
140 
142 
143  TMol GetSequenceType(void) const;
144  bool IsProtein(void) const;
145  bool IsNucleotide(void) const;
146 
147  CScope& GetScope(void) const;
148  const CSeqMap& GetSeqMap(void) const;
149  ENa_strand GetStrand(void) const;
150  void SetStrand(ENa_strand strand);
151 
152  /// Target sequence coding. CSeq_data::e_not_set -- do not
153  /// convert sequence (use GetCoding() to check the real coding).
154  TCoding GetCoding(void) const;
155  void SetCoding(TCoding coding);
156  /// Set coding to either Iupacaa or Iupacna depending on molecule type
157  void SetIupacCoding(void);
158  /// Set coding to either Ncbi8aa or Ncbi8na depending on molecule type
159  void SetNcbiCoding(void);
160  /// Set coding to either Iupac or Ncbi8xx
161  void SetCoding(EVectorCoding coding);
162 
163  /// Return gap symbol corresponding to the selected coding
164  TResidue GetGapChar(ECaseConversion case_cvt = eCaseConversion_none) const;
165 
166  const_iterator begin(void) const;
167  const_iterator end(void) const;
168 
169  /// Randomization of ambiguities and gaps in ncbi2na coding
170  void SetRandomizeAmbiguities(void);
171  void SetRandomizeAmbiguities(Uint4 seed);
172  void SetRandomizeAmbiguities(CRandom& random_gen);
173  void SetRandomizeAmbiguities(CRef<INcbi2naRandomizer> randomizer);
174  void SetNoAmbiguities(void);
175 
176 private:
177 
178  friend class CBioseq_Handle;
179  friend class CSeqVector_CI;
180 
181  void x_InitSequenceType(void);
182 
183  // this internal method is not MT-safe and must be guarded if necessary
184  CSeqVector_CI& x_GetIterator(TSeqPos pos) const;
185  // this internal method is not MT-safe and must be guarded if necessary
186  CSeqVector_CI* x_CreateIterator(TSeqPos pos) const;
187 
188  void x_ResetIterator(void) const;
189 
190  void x_InitRandomizer(CRandom& random_gen);
191 
192  void x_GetPacked8SeqData(string& dst_str,
193  TSeqPos src_pos, TSeqPos src_end);
194  void x_GetPacked4naSeqData(string& dst_str,
195  TSeqPos src_pos, TSeqPos src_end);
196  void x_GetPacked2naSeqData(string& dst_str,
197  TSeqPos src_pos, TSeqPos src_end);
198 
207 
210 };
211 
212 
213 /////////////////////////////////////////////////////////////////////////////
214 ///
215 /// CNcbi2naRandomizer --
216 ///
217 
219 {
220 public:
221  // If seed == 0 then use random number for seed
223  ~CNcbi2naRandomizer(void);
224 
225  void RandomizeData(char* buffer, // buffer to be randomized
226  size_t count, // number of bases in the buffer
227  TSeqPos pos); // sequence pos of the buffer
228 
229 private:
230  enum {
231  kRandomizerPosMask = 0x3f,
232  kRandomDataSize = kRandomizerPosMask + 1,
233  kRandomValue = 16
234  };
235 
236  char m_FixedTable[16];
237  char m_RandomTable[16][kRandomDataSize];
238 };
239 
240 
241 /////////////////////////////////////////////////////////////////////
242 //
243 // Inline methods
244 //
245 /////////////////////////////////////////////////////////////////////
246 
247 
248 inline
250 {
251  CSeqVector_CI* iter = m_Iterator.get();
252  if ( !iter ) {
253  iter = x_CreateIterator(pos);
254  }
255  else {
256  iter->SetPos(pos);
257  }
258  return *iter;
259 }
260 
261 
262 inline
264 {
265  return m_IteratorMutex;
266 }
267 
268 
269 inline
271 {
272  return *x_GetIterator(pos);
273 }
274 
275 
276 inline
278 {
279  return x_GetIterator(pos).IsInGap();
280 }
281 
282 
283 inline
284 bool CSeqVector::empty(void) const
285 {
286  return m_Size == 0;
287 }
288 
289 
290 inline
292 {
293  return m_Size;
294 }
295 
296 
297 inline
299 {
300  return CSeqVector_CI(*this, 0);
301 }
302 
303 
304 inline
306 {
307  return CSeqVector_CI(*this, size());
308 }
309 
310 
311 inline
313 {
314  return m_Coding;
315 }
316 
317 inline
319 {
320  return sx_GetGapChar(GetCoding(), case_cvt);
321 }
322 
323 inline
324 const CSeqMap& CSeqVector::GetSeqMap(void) const
325 {
326  return *m_SeqMap;
327 }
328 
329 inline
331 {
332  return m_Scope;
333 }
334 
335 inline
337 {
338  return m_Strand;
339 }
340 
341 
342 inline
344 {
345  return m_Mol;
346 }
347 
348 
349 inline
350 bool CSeqVector::IsProtein(void) const
351 {
353 }
354 
355 
356 inline
357 bool CSeqVector::IsNucleotide(void) const
358 {
360 }
361 
362 
363 inline
365  const const_iterator& stop) const
366 {
367  return CanGetRange(start.GetPos(), stop.GetPos());
368 }
369 
370 
371 inline
373  const const_iterator& stop,
374  string& buffer) const
375 {
376  GetSeqData(start.GetPos(), stop.GetPos(), buffer);
377 }
378 
379 
380 /* @} */
381 
382 
385 
386 #endif // SEQ_VECTOR__HPP
User-defined methods of the data storage class.
bool GetSeqData(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq, Int4 nodetype, unsigned char *seqconv, Uint1 seq_data_type)
Definition: asci_blk.cpp:1632
CBioseq_Handle –.
CConstRef –.
Definition: ncbiobj.hpp:1266
CMutex –.
Definition: ncbimtx.hpp:749
CNcbi2naRandomizer –.
Definition: seq_vector.hpp:219
CObject –.
Definition: ncbiobj.hpp:180
CRandom::
Definition: random_gen.hpp:66
CScope –.
Definition: scope.hpp:92
CSeqMap –.
Definition: seq_map.hpp:93
CSeqVector –.
Definition: seq_vector.hpp:65
bool IsAa(void) const
Definition: Seq_inst.hpp:113
bool IsNa(void) const
Definition: Seq_inst.hpp:106
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
element_type * get(void) const
Get pointer.
Definition: ncbimisc.hpp:469
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
EVectorCoding
CSeqVector constructor flags.
@ eCoding_Ncbi
Set coding to binary coding (Ncbi4na or Ncbistdaa)
CSeqVector_CI & SetPos(TSeqPos pos)
TSeqPos GetPos(void) const
unsigned char TResidue
static TResidue sx_GetGapChar(TCoding coding, ECaseConversion case_cvt)
void x_InitRandomizer(CRandom &random_gen)
bool IsInGap(void) const
true if current position of CSeqVector_CI is inside of sequence gap
ENa_strand GetStrand(void) const
Definition: seq_vector.hpp:336
AutoPtr< CSeqVector_CI > m_Iterator
Definition: seq_vector.hpp:209
TCoding GetCoding(void) const
Target sequence coding.
Definition: seq_vector.hpp:312
friend class CSeqVector_CI
Definition: seq_vector.hpp:179
bool IsInGap(TSeqPos pos) const
true if sequence at 0-based position 'pos' has gap Note: this method is not MT-safe,...
Definition: seq_vector.hpp:277
bool CanGetRange(TSeqPos start, TSeqPos stop) const
Check if the sequence data is available for the interval [start, stop).
Definition: seq_vector.cpp:292
const CSeqMap & GetSeqMap(void) const
Definition: seq_vector.hpp:324
CConstRef< CSeqMap > m_SeqMap
Definition: seq_vector.hpp:200
TCoding m_Coding
Definition: seq_vector.hpp:205
TMutex m_IteratorMutex
Definition: seq_vector.hpp:208
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer) const
Fill the buffer string with the sequence data for the interval [start, stop).
Definition: seq_vector.cpp:304
CBioseq_Handle::EVectorCoding EVectorCoding
Definition: seq_vector.hpp:67
CSeqVector_CI & x_GetIterator(TSeqPos pos) const
Definition: seq_vector.hpp:249
CTSE_Handle m_TSE
Definition: seq_vector.hpp:201
CMutex TMutex
Definition: seq_vector.hpp:69
CHeapScope m_Scope
Definition: seq_vector.hpp:199
CRef< INcbi2naRandomizer > m_Randomizer
Definition: seq_vector.hpp:206
TSeqPos m_Size
Definition: seq_vector.hpp:202
void x_InitSequenceType(void)
TResidue operator[](TSeqPos pos) const
0-based array of residues Note: this method is not MT-safe, do not call it in parallel with other,...
Definition: seq_vector.hpp:270
TMutex::TWriteLockGuard TMutexGuard
Definition: seq_vector.hpp:70
CSeqVector_CI * x_CreateIterator(TSeqPos pos) const
Definition: seq_vector.cpp:261
TSeqPos size(void) const
Definition: seq_vector.hpp:291
TMutex & GetMutex(void) const
Get mutex for a few non-MT-safe methods to make them MT-safe at a cost of performance.
Definition: seq_vector.hpp:263
bool IsProtein(void) const
Definition: seq_vector.hpp:350
bool empty(void) const
Definition: seq_vector.hpp:284
CSeq_inst::TMol TMol
Definition: seq_vector.hpp:141
CSeqVector_CI const_iterator
Definition: seq_vector.hpp:68
ENa_strand m_Strand
Definition: seq_vector.hpp:204
const_iterator begin(void) const
Definition: seq_vector.hpp:298
bool IsNucleotide(void) const
Definition: seq_vector.hpp:357
const_iterator end(void) const
Definition: seq_vector.hpp:305
TMol GetSequenceType(void) const
Definition: seq_vector.hpp:343
CScope & GetScope(void) const
Definition: seq_vector.hpp:330
TResidue GetGapChar(ECaseConversion case_cvt=eCaseConversion_none) const
Return gap symbol corresponding to the selected coding.
Definition: seq_vector.hpp:318
CObject & operator=(const CObject &src) THROWS_NONE
Assignment operator.
Definition: ncbiobj.hpp:482
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_XOBJMGR_EXPORT
Definition: ncbi_export.h:1307
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
E_Choice
Choice variants.
Definition: Seq_data_.hpp:102
EMol
molecule class in living organism
Definition: Seq_inst_.hpp:108
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::SIZE size
static pcre_uint8 * buffer
Definition: pcretest.c:1051
Uint4 GetSequenceType(const CBioseq_Handle &bsh)
Return a (corrected) set of flags identifying the sequence type.
Definition: sequtils.cpp:42
static int seed
Definition: test_table.cpp:132
CScope & GetScope()
Modified on Tue Apr 16 20:07:19 2024 by modify_doxy.py rev. 669887