NCBI C++ ToolKit
sequtil_shared.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: sequtil_shared.cpp 51894 2011-11-10 15:31:33Z ucko $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Mati Shomrat
27  *
28  * File Description:
29  * Shared utility functions for the various sequtil classes.
30  */
31 #include <ncbi_pch.hpp>
32 #include <corelib/ncbistd.hpp>
33 
34 #include <util/sequtil/sequtil.hpp>
35 #include "sequtil_shared.hpp"
36 
37 
39 
40 // converts one byte for another using the conversion table.
42 (const char* src,
43  TSeqPos pos,
44  TSeqPos length,
45  char* dst,
46  const Uint1* table)
47 {
48  const char* iter = src + pos;
49  const char* end = src + pos + length;
50 
51  for ( ; iter != end; ++iter, ++dst ) {
52  *dst = table[static_cast<Uint1>(*iter)];
53  }
54 
55  return length;
56 }
57 
58 
60 (const char* src,
61  TSeqPos pos,
62  TSeqPos length,
63  char* dst,
64  const Uint1* table)
65 {
66  size_t size = length;
67 
68  const char* iter = src + (pos / 2);
69 
70  // first position
71  if ( pos % 2 != 0 ) {
72  *dst = table[static_cast<unsigned char>(*iter) * 2 + 1];
73  ++dst;
74  ++iter;
75  --size;
76  }
77 
78  // NB: we "trick" the compiler so that we copy 2 bytes instead
79  // of one with each assignment operation
80  Uint2* out_i = reinterpret_cast<Uint2*>(dst);
81  const Uint2* table2 = reinterpret_cast<const Uint2*>(table);
82  for( size_t i = size / 2; i; --i, ++out_i, ++iter ) {
83  *out_i = table2[static_cast<Uint1>(*iter)];
84  }
85 
86  // last position
87  if ( size % 2 != 0 )
88  {
89  // just copy a single char
90  char* last = reinterpret_cast<char*>(out_i);
91  *last = table[static_cast<Uint1>(*iter) * 2];
92  }
93 
94  return length;
95 }
96 
97 
99 (const char* src,
100  TSeqPos pos,
101  TSeqPos length,
102  char* dst,
103  const Uint1* table)
104 {
105  size_t size = length;
106 
107  const char* iter = src + (pos / 4);
108 
109  // first position
110  if ( pos % 4 != 0 ) {
111  size_t to = min(static_cast<unsigned int>(4), (pos % 4) + length);
112  for ( size_t i = pos % 4; i < to; ++i, ++dst ) {
113  *dst = table[static_cast<Uint1>(*iter) * 4 + i];
114  }
115  ++iter;
116  size -= to - (pos % 4);
117  }
118 
119  // NB: we "trick" the compiler so that we copy 4 bytes instead
120  // of one with each assignment operation
121  Uint4* out_i = reinterpret_cast<Uint4*>(dst);
122  const Uint4* table4 = reinterpret_cast<const Uint4*>(table);
123  for( size_t i = size / 4; i; --i, ++out_i, ++iter ) {
124  *out_i = table4[static_cast<Uint1>(*iter)];
125  }
126 
127  // last position
128  if ( size % 4 != 0 )
129  {
130  char* last = reinterpret_cast<char*>(out_i);
131 
132  for ( size_t i = 0; i < size % 4; ++i, ++last ) {
133  *last = table[static_cast<Uint1>(*iter) * 4 + i];
134  }
135  }
136 
137  return length;
138 }
139 
140 
142 (const char* src,
143  TSeqPos pos,
144  TSeqPos length,
145  char* dst,
146  const Uint1* table)
147 {
148  const char* begin = src + pos;
149  const char* iter = src + pos + length;
150 
151  for ( ; iter != begin; ++dst ) {
152  *dst = table[static_cast<Uint1>(*--iter)];
153  }
154 
155  return length;
156 }
157 
158 
160 (char* buf,
161  TSeqPos pos,
162  TSeqPos length,
163  const Uint1* table)
164 {
165  char* first = buf + pos;
166  char* last = first + length - 1;
167  char temp;
168 
169  for ( ; first <= last; ++first, --last ) {
170  temp = table[static_cast<Uint1>(*first)];
171  *first = table[static_cast<Uint1>(*last)];
172  *last = temp;
173  }
174 
175  if ( pos != 0 ) {
176  copy(buf + pos, buf + pos + length, buf);
177  }
178 
179  return length;
180 }
181 
182 
184 {
185  if ( coding == CSeqUtil::e_Ncbi2na ) {
186  return 4;
187  } else if ( coding == CSeqUtil::e_Ncbi4na ) {
188  return 2;
189  }
190 
191  return 1;
192 }
193 
194 
196 {
197  switch (coding) {
198  case CSeqUtil::e_not_set: return 0;
199  case CSeqUtil::e_Ncbi2na: return (length + 3) / 4;
200  case CSeqUtil::e_Ncbi4na: return (length + 1) / 2;
201  default: return length;
202  }
203 }
204 
@ e_not_set
Definition: sequtil.hpp:44
@ e_Ncbi4na
Definition: sequtil.hpp:50
@ e_Ncbi2na
Definition: sequtil.hpp:48
Include a standard set of the NCBI C++ Toolkit most basic headers.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:51
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
uint16_t Uint2
2-byte (16-bit) unsigned integer
Definition: ncbitype.h:101
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
char * buf
int i
const struct ncbi::grid::netcache::search::fields::SIZE size
T min(T x_, T y_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
SIZE_TYPE convert_1_to_4(const char *src, TSeqPos pos, TSeqPos length, char *dst, const Uint1 *table)
SIZE_TYPE copy_1_to_1_reverse(const char *src, TSeqPos pos, TSeqPos length, char *dst, const Uint1 *table)
SIZE_TYPE convert_1_to_2(const char *src, TSeqPos pos, TSeqPos length, char *dst, const Uint1 *table)
size_t GetBasesPerByte(CSeqUtil::TCoding coding)
SIZE_TYPE GetBytesNeeded(CSeqUtil::TCoding coding, TSeqPos length)
SIZE_TYPE convert_1_to_1(const char *src, TSeqPos pos, TSeqPos length, char *dst, const Uint1 *table)
SIZE_TYPE revcmp(char *buf, TSeqPos pos, TSeqPos length, const Uint1 *table)
Modified on Fri Sep 20 14:58:15 2024 by modify_doxy.py rev. 669887