NCBI C++ ToolKit
seq_vector_ci.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: seq_vector_ci.cpp 58888 2013-07-16 14:29:35Z vasilche $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Aleksey Grichenko, Eugene Vasilchenko
27 *
28 * File Description:
29 * Seq-vector iterator
30 *
31 */
32 
33 
34 #include <ncbi_pch.hpp>
35 #include <objmgr/seq_vector.hpp>
36 #include <objmgr/seq_vector_ci.hpp>
37 #include <objects/seq/NCBI8aa.hpp>
38 #include <objects/seq/NCBIpaa.hpp>
40 #include <objects/seq/NCBIeaa.hpp>
41 #include <objects/seq/NCBIpna.hpp>
42 #include <objects/seq/NCBI8na.hpp>
43 #include <objects/seq/NCBI4na.hpp>
44 #include <objects/seq/NCBI2na.hpp>
45 #include <objects/seq/IUPACaa.hpp>
46 #include <objects/seq/IUPACna.hpp>
47 #include <algorithm>
50 #include <util/random_gen.hpp>
51 
54 
55 
57 
58 void ThrowOutOfRangeSeq_inst(size_t pos)
59 {
61  "reference out of range of Seq-inst data: "<<pos);
62 }
63 
64 // CSeqVector_CI::
65 
66 
68  : m_Strand(eNa_strand_unknown),
69  m_Coding(CSeq_data::e_not_set),
70  m_CaseConversion(eCaseConversion_none),
71  m_Cache(0),
72  m_CachePos(0),
73  m_CacheData(),
74  m_CacheEnd(0),
75  m_BackupPos(0),
76  m_BackupData(),
77  m_BackupEnd(0),
78  m_ScannedStart(0),
79  m_ScannedEnd(0)
80 {
81 }
82 
83 
85 {
86 }
87 
88 
90  : m_Strand(eNa_strand_unknown),
91  m_Coding(CSeq_data::e_not_set),
92  m_CaseConversion(eCaseConversion_none),
93  m_Cache(0),
94  m_CachePos(0),
95  m_CacheData(),
96  m_CacheEnd(0),
97  m_BackupPos(0),
98  m_BackupData(),
99  m_BackupEnd(0),
100  m_Randomizer(sv_it.m_Randomizer),
101  m_ScannedStart(0),
102  m_ScannedEnd(0)
103 {
104  *this = sv_it;
105 }
106 
107 
109  : m_Scope(seq_vector.m_Scope),
110  m_SeqMap(seq_vector.m_SeqMap),
111  m_TSE(seq_vector.m_TSE),
112  m_Strand(seq_vector.m_Strand),
113  m_Coding(seq_vector.m_Coding),
114  m_CaseConversion(eCaseConversion_none),
115  m_Cache(0),
116  m_CachePos(0),
117  m_CacheData(),
118  m_CacheEnd(0),
119  m_BackupPos(0),
120  m_BackupData(),
121  m_BackupEnd(0),
122  m_Randomizer(seq_vector.m_Randomizer),
123  m_ScannedStart(0),
124  m_ScannedEnd(0)
125 {
126  x_SetPos(pos);
127 }
128 
129 
131  ECaseConversion case_cvt)
132  : m_Scope(seq_vector.m_Scope),
133  m_SeqMap(seq_vector.m_SeqMap),
134  m_TSE(seq_vector.m_TSE),
135  m_Strand(seq_vector.m_Strand),
136  m_Coding(seq_vector.m_Coding),
137  m_CaseConversion(case_cvt),
138  m_Cache(0),
139  m_CachePos(0),
140  m_CacheData(),
141  m_CacheEnd(0),
142  m_BackupPos(0),
143  m_BackupData(),
144  m_BackupEnd(0),
145  m_Randomizer(seq_vector.m_Randomizer),
146  m_ScannedStart(0),
147  m_ScannedEnd(0)
148 {
149  x_SetPos(pos);
150 }
151 
152 
154  TSeqPos pos, ECaseConversion case_cvt)
155  : m_Scope(seq_vector.m_Scope),
156  m_SeqMap(seq_vector.m_SeqMap),
157  m_TSE(seq_vector.m_TSE),
158  m_Strand(strand),
159  m_Coding(seq_vector.m_Coding),
160  m_CaseConversion(case_cvt),
161  m_Cache(0),
162  m_CachePos(0),
163  m_CacheData(),
164  m_CacheEnd(0),
165  m_BackupPos(0),
166  m_BackupData(),
167  m_BackupEnd(0),
168  m_Randomizer(seq_vector.m_Randomizer),
169  m_ScannedStart(0),
170  m_ScannedEnd(0)
171 {
172  x_SetPos(pos);
173 }
174 
175 
177 {
178  if ( m_SeqMap ) {
179  // reset old values
180  m_Seg = CSeqMap_CI();
181  x_ResetCache();
182  x_ResetBackup();
183  }
184 
185  m_Scope = seq_vector.m_Scope;
186  m_SeqMap = seq_vector.m_SeqMap;
187  m_TSE = seq_vector.m_TSE;
188  m_Strand = seq_vector.m_Strand;
189  m_Coding = seq_vector.m_Coding;
190  m_CachePos = seq_vector.size();
191  m_Randomizer = seq_vector.m_Randomizer;
193 }
194 
195 
196 inline
198 {
200 }
201 
202 
203 static const TSeqPos kMaxPreloadBases = 10*1000*1000;
204 
205 
207 {
208  try {
209  if ( stop < start ) {
210  return false;
211  }
213  sel.SetStrand(m_Strand).SetRange(start, stop-start);
216  return false;
217  }
218  if ( start > m_ScannedEnd || stop < m_ScannedStart ) {
219  m_ScannedStart = start;
220  m_ScannedEnd = stop;
221  }
222  else {
224  m_ScannedEnd = max(m_ScannedEnd, stop);
225  }
226  return true;
227  }
228  catch ( exception& /*ignored*/ ) {
229  return false;
230  }
231 }
232 
233 
235 {
236  TSeqPos scanned = m_ScannedEnd - m_ScannedStart;
237  TSeqPos more = x_GetSize() - m_ScannedEnd;
238  TSeqPos check = min(min(scanned, more), kMaxPreloadBases);
239  if ( check > 0 ) {
241  }
242 }
243 
244 
246 {
247  TSeqPos scanned = m_ScannedEnd - m_ScannedStart;
248  TSeqPos more = m_ScannedStart;
249  TSeqPos check = min(min(scanned, more), kMaxPreloadBases);
250  if ( check > 0 ) {
252  }
253 }
254 
255 
256 inline
258 {
261  if ( pos == m_ScannedEnd ) {
262  x_CheckForward();
263  }
264  else if ( pos < m_ScannedStart || pos > m_ScannedEnd ) {
266  }
270 }
271 
272 
273 inline
275 {
276  if ( m_Seg.GetEndPosition() == m_ScannedEnd ) {
277  x_CheckForward();
278  }
279  ++m_Seg;
281 }
282 
283 
284 inline
286 {
287  if ( m_Seg.GetPosition() == m_ScannedStart ) {
288  x_CheckBackward();
289  }
290  --m_Seg;
292 }
293 
294 
296 {
297  NCBI_THROW_FMT(CSeqVectorException, eOutOfRange,
298  "iterator out of range: "<<GetPos()<<">="<<x_GetSize());
299 }
300 
301 
303 {
304  if ( m_Coding != coding ) {
305  TSeqPos pos = GetPos();
306  m_Coding = coding;
307  x_ResetBackup();
308  if ( x_CacheSize() ) {
309  x_ResetCache();
310  if ( m_Seg ) {
311  x_SetPos(pos);
312  }
313  }
314  }
315 }
316 
317 
319 {
320  if ( IsReverse(m_Strand) == IsReverse(strand) ) {
321  m_Strand = strand;
322  return;
323  }
324 
325  TSeqPos pos = GetPos();
326  m_Strand = strand;
327  x_ResetBackup();
328  if ( x_CacheSize() ) {
329  x_ResetCache();
330  if ( m_Seg ) {
331  m_Seg = CSeqMap_CI();
332  x_SetPos(pos);
333  }
334  }
335 }
336 
337 
338 // returns gap Seq-data object ref
339 // returns null if it's not a gap or an unspecified gap
341 {
342  if ( !IsInGap() ) {
343  return null;
344  }
345  return m_Seg.GetRefGapLiteral();
346 }
347 
348 
349 // returns number of gap symbols ahead including current symbol
350 // returns 0 if current position is not in gap
352 {
353  if ( !IsInGap() ) {
354  return 0;
355  }
356  return m_Seg.GetEndPosition() - GetPos();
357 }
358 
359 
360 // returns number of gap symbols before current symbol
361 // returns 0 if current position is not in gap
363 {
364  if ( !IsInGap() ) {
365  return 0;
366  }
367  return GetPos() - m_Seg.GetPosition();
368 }
369 
370 
371 // skip current gap forward
372 // returns number of skipped gap symbols
373 // does nothing and returns 0 if current position is not in gap
375 {
376  if ( !IsInGap() ) {
377  return 0;
378  }
379  TSeqPos skip = GetGapSizeForward();
380  SetPos(GetPos()+skip);
381  return skip;
382 }
383 
384 
385 // skip current gap backward
386 // returns number of skipped gap symbols
387 // does nothing and returns 0 if current position is not in gap
389 {
390  if ( !IsInGap() ) {
391  return 0;
392  }
393  TSeqPos skip = GetGapSizeBackward()+1;
394  SetPos(GetPos()-skip);
395  return skip;
396 }
397 
398 
399 // return true if there is zero-length gap before current position
400 // it might happen only if current position is at the beginning of buffer
402 {
403  if ( x_CacheOffset() != 0 ) {
404  return false;
405  }
406  TSeqPos pos = GetPos();
407  if ( IsReverse(m_Strand) ) {
408  pos = x_GetSize() - pos;
409  }
411 }
412 
413 
415 {
416  if ( this == &sv_it ) {
417  return *this;
418  }
419 
420  m_Scope = sv_it.m_Scope;
421  m_SeqMap = sv_it.m_SeqMap;
422  m_TSE = sv_it.m_TSE;
423  m_Strand = sv_it.m_Strand;
424  m_Coding = sv_it.GetCoding();
426  m_Seg = sv_it.m_Seg;
427  m_CachePos = sv_it.x_CachePos();
428  m_Randomizer = sv_it.m_Randomizer;
430  m_ScannedEnd = sv_it.m_ScannedEnd;
431  // copy cache if any
432  size_t cache_size = sv_it.x_CacheSize();
433  if ( cache_size ) {
435  m_CacheEnd = m_CacheData.get() + cache_size;
436  m_Cache = m_CacheData.get() + sv_it.x_CacheOffset();
437  memcpy(m_CacheData.get(), sv_it.m_CacheData.get(), cache_size);
438 
439  // copy backup cache if any
440  size_t backup_size = sv_it.x_BackupSize();
441  if ( backup_size ) {
442  m_BackupPos = sv_it.x_BackupPos();
443  m_BackupEnd = m_BackupData.get() + backup_size;
444  memcpy(m_BackupData.get(), sv_it.m_BackupData.get(), backup_size);
445  }
446  else {
447  x_ResetBackup();
448  }
449  }
450  else {
451  x_ResetCache();
452  x_ResetBackup();
453  }
454  return *this;
455 }
456 
457 
459 {
460  if ( !m_Cache ) {
461  m_CacheData.reset(new char[kCacheSize]);
462  m_BackupData.reset(new char[kCacheSize]);
465  }
466  else {
467  x_ResetCache();
468  }
469 }
470 
471 
472 inline
474 {
475  _ASSERT(size <= kCacheSize);
476  if ( !m_CacheData.get() ) {
478  }
479  m_Cache = m_CacheData.get();
481 }
482 
483 
485 {
486  _ASSERT(pos < x_GetSize());
487 
488  TSeqPos segEnd = m_Seg.GetEndPosition();
489  _ASSERT(pos >= m_Seg.GetPosition() && pos < segEnd);
490 
491  TSeqPos cache_size = min(kCacheSize, segEnd - pos);
492  x_FillCache(pos, cache_size);
493  m_Cache = m_CacheData.get();
494  _ASSERT(GetPos() == pos);
495 }
496 
497 
499 {
500  _ASSERT(pos < x_GetSize());
501 
502  TSeqPos segStart = m_Seg.GetPosition();
503  _ASSERT(pos >= segStart && pos < m_Seg.GetEndPosition());
504 
505  TSeqPos cache_offset = min(kCacheSize - 1, pos - segStart);
506  x_FillCache(pos - cache_offset, cache_offset + 1);
507  m_Cache = m_CacheData.get() + cache_offset;
508  _ASSERT(GetPos() == pos);
509 }
510 
511 
513 {
515  _ASSERT(start >= m_Seg.GetPosition());
516  _ASSERT(start < m_Seg.GetEndPosition());
517 
518  x_ResizeCache(count);
519 
520  switch ( m_Seg.GetType() ) {
521  case CSeqMap::eSeqData:
522  {
523  const CSeq_data& data = m_Seg.GetRefData();
524  if ( data.IsGap() && m_Seg.GetType() == CSeqMap::eSeqGap ) {
525  // workaround for erroneously split gap Seq-data
526  x_FillCache(start, count);
527  return;
528  }
529 
530  TCoding dataCoding = data.Which();
531  TCoding cacheCoding = x_GetCoding(m_Coding, dataCoding);
532  bool reverse = m_Seg.GetRefMinusStrand();
533 
534  bool randomize = false;
535  if ( cacheCoding != dataCoding &&
536  cacheCoding == CSeq_data::e_Ncbi2na &&
537  m_Randomizer) {
538  cacheCoding = CSeq_data::e_Ncbi4na;
539  randomize = true;
540  }
541 
542  const char* table = 0;
543  if ( cacheCoding != dataCoding || reverse ||
545  table = sx_GetConvertTable(dataCoding, cacheCoding,
546  reverse, m_CaseConversion);
547  if ( !table && cacheCoding != dataCoding ) {
548  NCBI_THROW_FMT(CSeqVectorException, eCodingError,
549  "Incompatible sequence codings: "<<
550  dataCoding<<" -> "<<cacheCoding);
551  }
552  }
553 
554  TSeqPos dataPos;
555  if ( reverse ) {
556  // Revert segment offset
557  dataPos = m_Seg.GetRefEndPosition() -
558  (start - m_Seg.GetPosition()) - count;
559  }
560  else {
561  dataPos = m_Seg.GetRefPosition() +
562  (start - m_Seg.GetPosition());
563  }
564 
565  switch ( dataCoding ) {
567  copy_8bit_any(m_Cache, count, data.GetIupacna().Get(), dataPos,
568  table, reverse);
569  break;
571  copy_8bit_any(m_Cache, count, data.GetIupacaa().Get(), dataPos,
572  table, reverse);
573  break;
575  copy_2bit_any(m_Cache, count, data.GetNcbi2na().Get(), dataPos,
576  table, reverse);
577  break;
579  copy_4bit_any(m_Cache, count, data.GetNcbi4na().Get(), dataPos,
580  table, reverse);
581  break;
583  copy_8bit_any(m_Cache, count, data.GetNcbi8na().Get(), dataPos,
584  table, reverse);
585  break;
587  NCBI_THROW(CSeqVectorException, eCodingError,
588  "Ncbipna conversion not implemented");
590  copy_8bit_any(m_Cache, count, data.GetNcbi8aa().Get(), dataPos,
591  table, reverse);
592  break;
594  copy_8bit_any(m_Cache, count, data.GetNcbieaa().Get(), dataPos,
595  table, reverse);
596  break;
598  NCBI_THROW(CSeqVectorException, eCodingError,
599  "Ncbipaa conversion not implemented");
601  copy_8bit_any(m_Cache, count, data.GetNcbistdaa().Get(), dataPos,
602  table, reverse);
603  break;
604  default:
605  NCBI_THROW_FMT(CSeqVectorException, eCodingError,
606  "Invalid data coding: "<<dataCoding);
607  }
608  if ( randomize ) {
609  m_Randomizer->RandomizeData(m_Cache, count, start);
610  }
611  break;
612  }
613  case CSeqMap::eSeqGap:
615  fill_n(m_Cache, count,
617  m_Randomizer->RandomizeData(m_Cache, count, start);
618  }
619  else {
620  fill_n(m_Cache, count, GetGapChar());
621  }
622  break;
623  default:
625  "Invalid segment type: "<<m_Seg.GetType());
626  }
627  m_CachePos = start;
628 }
629 
630 
632 {
633  TSeqPos size = x_GetSize();
634  if ( pos >= size ) {
635  if ( x_CacheSize() ) {
636  // save current cache as backup
637  x_SwapCache();
638  x_ResetCache();
639  }
640  _ASSERT(x_CacheSize() == 0 && x_CacheOffset() == 0);
641  m_CachePos = size;
642  _ASSERT(GetPos() == size);
643  return;
644  }
645 
646  _ASSERT(pos - x_CachePos() >= x_CacheSize());
647 
648  // update current segment
649  x_UpdateSeg(pos);
650 
651  // save current cache as backup and restore old backup
652  x_SwapCache();
653 
654  // check if old backup is suitable
655  TSeqPos cache_offset = pos - x_CachePos();
656  TSeqPos cache_size = x_CacheSize();
657  if ( cache_offset < cache_size ) {
658  // can use backup
659  _ASSERT(x_CacheSize() &&
660  x_CachePos() >= m_Seg.GetPosition() &&
662  m_Cache = m_CacheData.get() + cache_offset;
663  }
664  else {
665  // cannot use backup
667  TSeqPos old_pos = x_BackupPos();
668  if ( pos < old_pos && pos >= old_pos - kCacheSize &&
669  m_Seg.GetEndPosition() >= old_pos ) {
670  x_UpdateCacheDown(old_pos - 1);
671  cache_offset = pos - x_CachePos();
672  m_Cache = m_CacheData.get() + cache_offset;
673  }
674  else {
675  x_UpdateCacheUp(pos);
676  }
677  }
679  _ASSERT(GetPos() == pos);
680 }
681 
682 
684 {
685  if ( m_Seg.IsInvalid() ) {
686  x_InitSeg(pos);
687  }
688  else if ( m_Seg.GetPosition() > pos ) {
689  // segment is ahead
690  do {
691  x_DecSeg();
692  } while ( m_Seg && m_Seg.GetLength() == 0 ); // skip 0 length segments
693  if ( !m_Seg || m_Seg.GetPosition() > pos ) {
694  // too far
695  x_InitSeg(pos);
696  }
697  }
698  else if ( m_Seg.GetEndPosition() <= pos ) {
699  // segment is behind
700  do {
701  x_IncSeg();
702  } while ( m_Seg && m_Seg.GetLength() == 0 ); // skip 0 length segments
703  if ( !m_Seg || m_Seg.GetEndPosition() <= pos ) {
704  // too far
705  x_InitSeg(pos);
706  }
707  }
708  if ( !m_Seg && pos == x_GetSize() ) {
709  // it's ok to position to the very end
710  return;
711  }
712  if ( !m_Seg || pos<m_Seg.GetPosition() || pos>=m_Seg.GetEndPosition() ) {
714  "CSeqVector_CI: cannot locate segment at "<<pos);
715  }
716  _ASSERT(m_Seg && pos>=m_Seg.GetPosition() && pos<m_Seg.GetEndPosition());
717 }
718 
719 
721 {
722  buffer.erase();
723  TSeqPos pos = GetPos();
724  _ASSERT(pos <= x_GetSize());
725  count = min(count, x_GetSize() - pos);
726  if ( !count ) {
727  return;
728  }
729 
730  if ( m_TSE && !CanGetRange(pos, pos+count) ) {
732  "CSeqVector_CI::GetSeqData: "
733  "cannot get seq-data in range: "
734  <<pos<<"-"<<pos+count);
735  }
736 
737  buffer.reserve(count);
738  while ( count ) {
739  TCache_I cache = m_Cache;
740  TCache_I cache_end = m_CacheEnd;
741  TSeqPos chunk_count = min(count, TSeqPos(cache_end - cache));
742  _ASSERT(chunk_count > 0);
743  TCache_I chunk_end = cache + chunk_count;
744  buffer.append(cache, chunk_end);
745  count -= chunk_count;
746  //if ( count == 0 ) break;
747  if ( chunk_end == cache_end ) {
748  x_NextCacheSeg();
749  }
750  else {
751  m_Cache = chunk_end;
752  }
753  }
754 }
755 
756 
758 {
759  _ASSERT(m_SeqMap);
760  TSeqPos pos = x_CacheEndPos();
761  TSeqPos size = x_GetSize();
762  if ( pos >= size ) {
763  if ( x_CachePos() < pos ) {
764  x_SwapCache();
765  x_ResetCache();
766  m_CachePos = pos;
767  return;
768  }
769  else {
770  // Can not go further
771  NCBI_THROW(CSeqVectorException, eOutOfRange,
772  "Can not update cache: iterator beyond end");
773  }
774  }
775  // save current cache in backup
776  _ASSERT(x_CacheSize());
777  x_SwapCache();
778  // update segment if needed
779  x_UpdateSeg(pos);
780  if ( !m_Seg ) {
781  // end of sequence
783  "CSeqVector_CI: invalid sequence length: "
784  <<pos<<" <> "<<size);
785  }
786  // Try to re-use backup cache
787  if ( pos < x_CacheEndPos() && pos >= x_CachePos() ) {
788  m_Cache = m_CacheData.get() + pos - x_CachePos();
789  }
790  else {
791  // can not use backup cache
792  x_ResetCache();
793  x_UpdateCacheUp(pos);
794  _ASSERT(GetPos() == pos);
795  _ASSERT(x_CacheSize());
796  _ASSERT(x_CachePos() == pos);
797  }
798 }
799 
800 
802 {
803  _ASSERT(m_SeqMap);
804  TSeqPos pos = x_CachePos();
805  if ( pos-- == 0 ) {
806  // Can not go further
807  NCBI_THROW(CSeqVectorException, eOutOfRange,
808  "Can not update cache: iterator beyond start");
809  }
810  TSeqPos size = x_GetSize();
811  // save current cache in backup
812  x_SwapCache();
813  // update segment if needed
814  if ( m_Seg.IsInvalid() ) {
815  x_InitSeg(pos);
816  }
817  else {
818  while ( m_Seg && m_Seg.GetPosition() > pos ) {
819  x_DecSeg();
820  }
821  }
822  if ( !m_Seg ) {
824  "CSeqVector_CI: invalid sequence length: "
825  <<pos<<" <> "<<size);
826  }
827  // Try to re-use backup cache
828  if ( pos >= x_CachePos() && pos < x_CacheEndPos() ) {
829  m_Cache = m_CacheData.get() + pos - x_CachePos();
830  }
831  else {
832  // can not use backup cache
833  x_ResetCache();
834  x_UpdateCacheDown(pos);
835  _ASSERT(GetPos() == pos);
836  _ASSERT(x_CacheSize());
837  _ASSERT(x_CacheEndPos() == pos+1);
838  }
839 }
840 
841 
843 {
844  if ( randomizer != m_Randomizer ) {
845  TSeqPos pos = GetPos();
846  m_Randomizer = randomizer;
847  x_ResetBackup();
848  if ( x_CacheSize() ) {
849  x_ResetCache();
850  if ( m_Seg ) {
851  x_SetPos(pos);
852  }
853  }
854  }
855 }
856 
857 
859 {
860  CRef<INcbi2naRandomizer> randomizer(new CNcbi2naRandomizer(random_gen));
861  SetRandomizeAmbiguities(randomizer);
862 }
863 
864 
866 {
867  CRandom random_gen;
868  x_InitRandomizer(random_gen);
869 }
870 
871 
873 {
874  CRandom random_gen(seed);
875  x_InitRandomizer(random_gen);
876 }
877 
878 
880 {
881  x_InitRandomizer(random_gen);
882 }
883 
884 
886 {
888 }
889 
890 
static CRef< CScope > m_Scope
#define static
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
bool IsReverse(ENa_strand s)
Definition: Na_strand.hpp:75
CConstRef –.
Definition: ncbiobj.hpp:1266
CScope * GetScopeOrNull(void) const
Definition: heap_scope.cpp:74
CNcbi2naRandomizer –.
Definition: seq_vector.hpp:219
CRandom::
Definition: random_gen.hpp:66
Iterator over CSeqMap.
Definition: seq_map_ci.hpp:252
SeqVector related exceptions.
CSeqVector –.
Definition: seq_vector.hpp:65
#define check(s)
Definition: describecol2.c:21
char data[12]
Definition: iconv.c:80
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
element_type * get(void) const
Get pointer.
Definition: ncbimisc.hpp:581
void reset(element_type *p=0)
Reset will delete the old pointer, set content to the new value, and assume the ownership upon the ne...
Definition: ncbimisc.hpp:598
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
#define NCBI_THROW_FMT(exception_class, err_code, message)
The same as NCBI_THROW but with message processed as output to ostream.
Definition: ncbiexpt.hpp:719
void x_InitSeg(TSeqPos pos)
CSeqVector_CI & SetPos(TSeqPos pos)
virtual void RandomizeData(char *buffer, size_t count, TSeqPos pos)=0
Convert count unpacked bases in buffer 4na -> 2na with randomization.
TSeqPos GetEndPosition(void) const
return end position of current segment in sequence (exclusive)
Definition: seq_map_ci.hpp:679
const CSeq_data & GetRefData(void) const
will allow any data segments, user should check for position and strand
Definition: seq_map_ci.cpp:282
void x_ResetCache(void)
TSeqPos x_CacheEndPos(void) const
TSeqPos x_BackupPos(void) const
SSeqMapSelector & SetLinkUsedTSE(bool link=true)
Definition: seq_map_ci.hpp:157
void x_CheckBackward(void)
TSeqPos SkipGap(void)
skip current gap forward returns number of skipped gap symbols does nothing and returns 0 if current ...
void x_CheckForward(void)
CTSE_Handle m_TSE
TSeqPos GetPos(void) const
TSeqPos GetRefPosition(void) const
Definition: seq_map_ci.hpp:693
vector< CTSE_Handle > m_UsedTSEs
TSeqPos m_ScannedEnd
void x_FillCache(TSeqPos start, TSeqPos count)
TCoding GetCoding(void) const
void GetSeqData(TSeqPos start, TSeqPos stop, string &buffer)
Fill the buffer string with the sequence data for the interval [start, stop).
void x_ResizeCache(size_t size)
TSeqPos x_CacheOffset(void) const
CRef< INcbi2naRandomizer > m_Randomizer
TSeqPos GetGapSizeForward(void) const
returns number of gap symbols ahead including current symbol returns 0 if current position is not in ...
CConstRef< CSeq_literal > GetGapSeq_literal(void) const
returns gap Seq-data object ref returns null if it's not a gap or an unspecified gap
CSeqVector_CI & operator=(const CSeqVector_CI &sv_it)
void x_SetPos(TSeqPos pos)
TSeqPos x_GetSize(void) const
TCacheData m_CacheData
void SetCoding(TCoding coding)
ECaseConversion m_CaseConversion
TCache_I m_CacheEnd
void x_ThrowOutOfRange(void) const
bool GetRefMinusStrand(void) const
Definition: seq_map_ci.hpp:700
TCache_I m_BackupEnd
TSeqPos x_CachePos(void) const
TSeqPos GetGapSizeBackward(void) const
returns number of gap symbols before current symbol returns 0 if current position is not in gap
CSeqMap::ESegmentType GetType(void) const
Definition: seq_map_ci.hpp:651
bool HasZeroGapBefore(void)
true if there is zero-length gap before current position
TSeqPos m_ScannedStart
TResidue GetGapChar(void) const
returns character representation of gap in sequence
void x_PrevCacheSeg(void)
void x_NextCacheSeg(void)
static const char * sx_GetConvertTable(TCoding src, TCoding dst, bool reverse, ECaseConversion case_cvt)
TSeqPos x_CacheSize(void) const
void x_UpdateCacheUp(TSeqPos pos)
void x_InitializeCache(void)
SSeqMapSelector & SetRange(TSeqPos start, TSeqPos length)
Set range for iterator.
Definition: seq_map_ci.hpp:127
CConstRef< CSeqMap > m_SeqMap
bool CanGetRange(TSeqPos start, TSeqPos stop)
Check if the sequence can be obtained for the interval [start, stop)
void SetNoAmbiguities(void)
void SetStrand(ENa_strand strand)
void x_IncSeg(void)
void x_ResetBackup(void)
ENa_strand m_Strand
void x_UpdateSeg(TSeqPos pos)
TSeqPos GetRefEndPosition(void) const
Definition: seq_map_ci.hpp:707
CHeapScope m_Scope
SSeqMapSelector & SetStrand(ENa_strand strand)
Set strand to iterate over.
Definition: seq_map_ci.hpp:144
static TResidue sx_GetGapChar(TCoding coding, ECaseConversion case_cvt)
bool IsInvalid(void) const
Definition: seq_map_ci.hpp:686
void x_SwapCache(void)
TCacheData m_BackupData
TSeqPos GetPosition(void) const
return position of current segment in sequence
Definition: seq_map_ci.hpp:665
void SetRandomizeAmbiguities(void)
TSeqPos SkipGapBackward(void)
skip current gap backward returns number of skipped gap symbols does nothing and returns 0 if current...
void x_SetVector(CSeqVector &seq_vector)
TSeqPos x_BackupSize(void) const
TCoding x_GetCoding(TCoding cacheCoding, TCoding dataCoding) const
TSeqPos GetLength(void) const
return length of current segment
Definition: seq_map_ci.hpp:672
CConstRef< CSeq_literal > GetRefGapLiteral(void) const
return CSeq_literal with gap data, or null if either the segment is not a gap, or an unspecified gap
Definition: seq_map_ci.cpp:292
void x_InitRandomizer(CRandom &random_gen)
CSeqMap_CI m_Seg
void x_DecSeg(void)
bool IsInGap(void) const
true if current position of CSeqVector_CI is inside of sequence gap
void x_UpdateCacheDown(TSeqPos pos)
bool HasZeroGapAt(TSeqPos pos, CScope *scope=0) const
Returns true if there is zero-length gap at position.
Definition: seq_map.cpp:907
CConstRef< CSeqMap > m_SeqMap
Definition: seq_vector.hpp:200
TCoding m_Coding
Definition: seq_vector.hpp:205
CTSE_Handle m_TSE
Definition: seq_vector.hpp:201
CHeapScope m_Scope
Definition: seq_vector.hpp:199
CRef< INcbi2naRandomizer > m_Randomizer
Definition: seq_vector.hpp:206
TSeqPos size(void) const
Definition: seq_vector.hpp:291
bool CanResolveRange(CScope *scope, const SSeqMapSelector &sel) const
Definition: seq_map.cpp:986
ENa_strand m_Strand
Definition: seq_vector.hpp:204
TSeqPos GetLength(CScope *scope) const
Definition: seq_map.hpp:482
@ fDefaultFlags
Definition: seq_map.hpp:140
@ eSeqEnd
Definition: seq_map.hpp:101
@ eSeqData
real sequence data
Definition: seq_map.hpp:98
@ eSeqGap
gap
Definition: seq_map.hpp:97
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define kMax_UInt
Definition: ncbi_limits.h:185
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
E_Choice
Choice variants.
Definition: Seq_data_.hpp:102
@ e_Ncbipna
nucleic acid probabilities
Definition: Seq_data_.hpp:109
@ e_Ncbieaa
extended ASCII 1 letter aa codes
Definition: Seq_data_.hpp:111
@ e_Ncbistdaa
consecutive codes for std aas
Definition: Seq_data_.hpp:113
@ e_Ncbi2na
2 bit nucleic acid code
Definition: Seq_data_.hpp:106
@ e_Iupacna
IUPAC 1 letter nuc acid code.
Definition: Seq_data_.hpp:104
@ e_Ncbipaa
amino acid probabilities
Definition: Seq_data_.hpp:112
@ e_Ncbi8na
8 bit extended nucleic acid code
Definition: Seq_data_.hpp:108
@ e_Ncbi4na
4 bit nucleic acid code
Definition: Seq_data_.hpp:107
@ e_Iupacaa
IUPAC 1 letter amino acid code.
Definition: Seq_data_.hpp:105
@ e_Ncbi8aa
8 bit extended amino acid codes
Definition: Seq_data_.hpp:110
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
@ e_not_set
const struct ncbi::grid::netcache::search::fields::SIZE size
T max(T x_, T y_)
T min(T x_, T y_)
static pcre_uint8 * buffer
Definition: pcretest.c:1051
void ThrowOutOfRangeSeq_inst(size_t pos)
static const TSeqPos kMaxPreloadBases
static const TSeqPos kCacheSize
void copy_8bit_any(DstIter dst, size_t count, const SrcCont &srcCont, size_t srcPos, const char *table, bool reverse)
void copy_4bit_any(DstIter dst, size_t count, const SrcCont &srcCont, size_t srcPos, const char *table, bool reverse)
void copy_2bit_any(DstIter dst, size_t count, const SrcCont &srcCont, size_t srcPos, const char *table, bool reverse)
Selector used in CSeqMap methods returning iterators.
Definition: seq_map_ci.hpp:113
#define _ASSERT
static int seed
Definition: test_table.cpp:132
#define const
Definition: zconf.h:232
Modified on Thu Apr 11 15:04:23 2024 by modify_doxy.py rev. 669887