NCBI C++ ToolKit
seq_masker_window.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: seq_masker_window.cpp 86312 2019-04-29 12:24:20Z morgulis $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Aleksandr Morgulis
27  *
28  * File Description:
29  * CSeqMaskerWindow class member and method definitions.
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <string>
35 
36 #include <corelib/ncbi_limits.h>
37 
39 #include <objmgr/seq_vector.hpp>
40 
43 
44 
45 //-------------------------------------------------------------------------
47 
48 //-------------------------------------------------------------------------
50  Uint1 arg_unit_size,
51  Uint1 arg_window_size,
52  Uint4 arg_window_step,
53  Uint1 arg_unit_step,
54  Uint4 winstart,
55  Uint4 arg_winend )
56  : data(arg_data), state( false ),
57  unit_size( arg_unit_size ), unit_step( arg_unit_step ),
58  window_size( arg_window_size ), window_step( arg_window_step ),
59  end( 0 ), first_unit( 0 ), unit_mask( 0 ), winend( arg_winend )
60 {
61  static bool first_call = true;
62 
63  if( first_call )
64  {
65  LOOKUP[unsigned('A')] = 1;
66  LOOKUP[unsigned('C')] = 2;
67  LOOKUP[unsigned('G')] = 3;
68  LOOKUP[unsigned('T')] = 4;
69  first_call = false;
70  }
71 
72  if( data.size() < window_size ) {
73  // TODO Throw an exception.
74  }
75  if( unit_size > window_size ) {
76  // TODO Throw an exception.
77  }
78 
79  units.resize( NumUnits(), 0 );
80  unit_mask = (unit_size == 16) ? 0xFFFFFFFF : (1ULL << (unit_size << 1)) - 1;
81 
82  if( winend == 0 )
83  winend = data.size();
84 
85  FillWindow( winstart );
86 }
87 
89 {
90 }
91 
92 //-------------------------------------------------------------------------
94 {
95  if( step >= window_size || unit_step > 1 )
96  {
97  FillWindow( start + step );
98  return;
99  }
100 
101  Uint1 num_units = NumUnits();
102  Uint1 last_unit = first_unit ? first_unit - 1 : num_units - 1;
103  Uint4 unit = units[last_unit];
104  Uint4 iter = 0;
105 
106  for( ; ++end < winend && iter < step ; ++iter, ++start )
107  {
108  Uint1 letter = LOOKUP[unsigned(data[end])];
109 
110  if( !(letter--) )
111  {
112  FillWindow( end );
113  return;
114  }
115 
116  unit = ((unit<<2)&unit_mask) + letter;
117 
118  if( ++first_unit == num_units ) first_unit = 0;
119 
120  if( ++last_unit == num_units ) last_unit = 0;
121 
122  units[last_unit] = unit;
123  }
124 
125  --end;
126 
127  if( iter != step ) state = false;
128 }
129 
130 //-------------------------------------------------------------------------
132 {
133  first_unit = 0;
134  TUnit unit = 0;
135  Int4 iter = 0;
136  end = winstart;
137 
138  for( ; iter < window_size && end < data.size(); ++iter, ++end )
139  {
140  Uint1 letter = LOOKUP[unsigned(data[end])];
141 
142  if( !(letter--) )
143  {
144  iter = -1;
145  continue;
146  }
147 
148  unit = ((unit<<2)&unit_mask) + letter;
149 
150  if( iter >= unit_size - 1 )
151  if( !((iter + 1 - unit_size)%unit_step) )
152  units[(iter + 1- unit_size)/unit_step] = unit;
153  }
154 
155  start = (end--) - window_size;
156  state = (iter == window_size);
157 }
158 
159 
Uint1 window_size
The window size.
static Uint1 LOOKUP[]
Table used to translate bases from iupacna to ncbi2na format.
virtual ~CSeqMaskerWindow()
Object destructor.
TUnits units
The array of units.
virtual void Advance(Uint4 step)
Slide the window by the given number of bases.
TUnits::size_type first_unit
The position in the array of units of the first unit of the current window.
Uint1 unit_step
The distance between consequtive units within a window.
Uint4 start
The start of the current window.
void FillWindow(Uint4 winstart)
Uint1 NumUnits() const
Get the number of units in a window.
Uint4 TUnit
Integer type used to represent units within a window.
const objects::CSeqVector & data
The sequence data in iupacna format.
Uint1 unit_size
The unit size.
Uint4 winend
Final position in the sequence.
CSeqMaskerWindow(const objects::CSeqVector &arg_data, Uint1 arg_unit_size, Uint1 arg_window_size, Uint4 window_step, Uint1 unit_step=1, Uint4 winstart=0, Uint4 winend=0)
Object constructor.
Uint4 end
The end if the current window.
TUnit unit_mask
The mask to use when accessing the integer value of a unit.
CSeqVector –.
Definition: seq_vector.hpp:65
static ulg window_size
#define false
Definition: bool.h:36
char data[12]
Definition: iconv.c:80
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define kMax_UI1
Definition: ncbi_limits.h:213
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
USING_SCOPE(objects)
static Uint4 letter(char c)
Modified on Fri Sep 20 14:57:37 2024 by modify_doxy.py rev. 669887