NCBI C++ ToolKit
seq_masker_window_ambig.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: seq_masker_window_ambig.cpp 46615 2010-07-26 12:40:34Z dicuccio $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Aleksandr Morgulis
27  *
28  * File Description:
29  * CSeqMaskerWindowAmbig class member and method definitions.
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <string>
35 
37 #include <objmgr/seq_vector.hpp>
38 
41 
42 
43 //-------------------------------------------------------------------------
45  Uint1 arg_unit_size,
46  Uint1 arg_window_size,
47  Uint4 arg_window_step,
48  TUnit arg_ambig_unit,
49  Uint4 window_start,
50  Uint1 arg_unit_step )
51  : CSeqMaskerWindow( arg_data, arg_unit_size,
52  arg_window_size, arg_window_step, arg_unit_step ),
53  ambig_unit( arg_ambig_unit ), ambig( false )
54 {
55  FillWindow( window_start );
56 }
57 
58 //-------------------------------------------------------------------------
60 {
61  if( ambig || step >= window_size || unit_step > 1 )
62  {
63  FillWindow( start + step );
64  return;
65  }
66 
67  Uint1 num_units = NumUnits();
68  Uint1 last_unit = first_unit ? first_unit - 1 : num_units - 1;
69  Uint4 unit = units[last_unit];
70  Uint4 iter = 0;
71  Uint4 newstart = start + step;
72 
73  for( ; ++end < data.size() && iter < step ; ++iter )
74  {
75  Uint1 letter = LOOKUP[unsigned(data[end])];
76 
77  if( !(letter--) )
78  {
79  FillWindow( newstart );
80  return;
81  }
82 
83  unit = ((unit<<2)&unit_mask) + letter;
84 
85  if( ++first_unit == num_units ) first_unit = 0;
86 
87  if( ++last_unit == num_units ) last_unit = 0;
88 
89  units[last_unit] = unit;
90  }
91 
92  --end;
93  start = end - window_size + 1;
94 
95  if( iter != step ) state = false;
96 }
97 
98 //-------------------------------------------------------------------------
100 {
101  first_unit = 0;
102  TUnit unit = 0;
103  Int4 iter = 0;
104  Int4 ambig_pos = -1;
105  start = end = winstart;
106  ambig = false;
107 
108  for( ; iter < window_size && end < data.size();
109  ++iter, ++end, --ambig_pos )
110  {
111  Uint1 letter = LOOKUP[unsigned(data[end])];
112 
113  if( !(letter--) )
114  {
115  ambig_pos = unit_size - 1;
116  ambig = true;
117  }
118 
119  unit = ((unit<<2)&unit_mask) + letter;
120 
121  if( iter >= unit_size - 1 ) {
122  if( !((iter + 1 - unit_size)%unit_step) ) {
123  if( ambig_pos >= 0 )
124  units[(iter + 1 - unit_size)/unit_step] = ambig_unit;
125  else
126  units[(iter + 1- unit_size)/unit_step] = unit;
127  }
128  }
129  }
130 
131  --end;
132  state = (iter == window_size);
133 }
134 
135 
bool ambig
Ambiguity status of the window.
CSeqMaskerWindowAmbig(const objects::CSeqVector &arg_data, Uint1 arg_unit_size, Uint1 arg_window_size, Uint4 window_step, TUnit arg_ambig_unit, Uint4 window_start=0, Uint1 arg_unit_step=1)
Object constructor.
void FillWindow(Uint4 winstart)
Computes the units starting at specified position.
TUnit ambig_unit
Value to use for units containing ambiguity characters.
virtual void Advance(Uint4 step)
Advance the window by a specified number of characters.
Sliding window skipping over the ambiguities.
Uint1 window_size
The window size.
static Uint1 LOOKUP[]
Table used to translate bases from iupacna to ncbi2na format.
TUnits units
The array of units.
TUnits::size_type first_unit
The position in the array of units of the first unit of the current window.
Uint1 unit_step
The distance between consequtive units within a window.
Uint4 start
The start of the current window.
Uint1 NumUnits() const
Get the number of units in a window.
Uint4 TUnit
Integer type used to represent units within a window.
const objects::CSeqVector & data
The sequence data in iupacna format.
Uint1 unit_size
The unit size.
Uint4 end
The end if the current window.
TUnit unit_mask
The mask to use when accessing the integer value of a unit.
CSeqVector –.
Definition: seq_vector.hpp:65
#define false
Definition: bool.h:36
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
USING_SCOPE(objects)
static Uint4 letter(char c)
static bool ambig(char c)
Modified on Fri Sep 20 14:58:12 2024 by modify_doxy.py rev. 669887