NCBI C++ ToolKit
win_mask_sdust_masker.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: win_mask_sdust_masker.cpp 91949 2020-12-17 12:52:59Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Aleksandr Morgulis
27  *
28  * File Description:
29  * CDustMasker class implementation.
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <vector>
35 #include <algorithm>
36 
38 
41 
42 //------------------------------------------------------------------------------
43 static inline char iupacna_to_blastna( char c )
44 {
45  switch( c )
46  {
47  case 'a': case 'A': return 0;
48  case 'c': case 'C': return 1;
49  case 'g': case 'G': return 2;
50  case 't': case 'T': return 3;
51  case 'r': case 'R': return 4;
52  case 'y': case 'Y': return 5;
53  case 'm': case 'M': return 6;
54  case 'k': case 'K': return 7;
55  case 'w': case 'W': return 8;
56  case 's': case 'S': return 9;
57  case 'b': case 'B': return 10;
58  case 'd': case 'D': return 11;
59  case 'h': case 'H': return 12;
60  case 'v': case 'V': return 13;
61  case 'n': case 'N':
62  default: return 14;
63  }
64 }
65 
66 //------------------------------------------------------------------------------
67 CSDustMasker::CSDustMasker( Uint4 arg_window, Uint4 arg_level, Uint4 arg_linker )
68  : window( arg_window ), duster_( arg_level, arg_window, arg_linker )
69 {}
70 
71 //------------------------------------------------------------------------------
73 
74 //------------------------------------------------------------------------------
76  const objects::CSeqVector & data, const TMaskList & exclude_ranges )
77 {
78  TMaskList * res( new TMaskList );
79  TMaskList::const_iterator e_it = exclude_ranges.begin();
80  TMaskList::const_iterator e_end = exclude_ranges.end();
81  CSeqVector::const_iterator start_it = data.begin();
82  CSeqVector::const_iterator current_it = data.begin();
83  CSeqVector::const_iterator end_it = data.end();
84 
85  if( e_it != e_end && e_it->first == 0 && e_it->second + 1 > window )
86  {
87  current_it = start_it + e_it->second - window + 2;
88  ++e_it;
89  }
90 
91  do
92  {
93  Uint4 start_offset = current_it - start_it;
94 
95  while( e_it != e_end && e_it->second - e_it->first + 1 <= window )
96  ++e_it;
97 
98  end_it = (e_it == e_end) ? data.end()
99  : start_it + e_it->first + window;
100  Uint4 stop_offset = end_it - start_it;
101 
102  // Now dust.
103  std::unique_ptr< TMaskList > result = duster_(
104  data, start_offset, stop_offset );
105 
106  res->insert( res->end(), result->begin(), result->end() );
107 
108  if( e_it != e_end )
109  {
110  current_it = start_it + e_it->second - window + 2;
111  ++e_it;
112  }
113  } while( end_it != data.end() );
114 
115  return res;
116 }
117 
118 
CSDustMasker(Uint4 window, Uint4 level, Uint4 linker)
Object constructor.
~CSDustMasker()
Object destructor.
CSeqMasker::TMaskList TMaskList
Type representing a list of masked segments.
TMaskList * operator()(const objects::CSeqVector &data, const TMaskList &exclude_ranges)
Function performing the actual dusting.
CSymDustMasker duster_
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
else result
Definition: token2.c:20
USING_SCOPE(objects)
static char iupacna_to_blastna(char c)
Modified on Thu Feb 22 17:12:29 2024 by modify_doxy.py rev. 669887