NCBI C++ ToolKit
sequtil_convert.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef UTIL_SEQUTIL___SEQUTIL_CONVERT__HPP
2 #define UTIL_SEQUTIL___SEQUTIL_CONVERT__HPP
3 
4 /* $Id: sequtil_convert.hpp 64228 2014-08-26 20:15:25Z ucko $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author: Mati Shomrat
30  *
31  * File Description:
32  * Sequence conversion utility.
33  */
34 #include <corelib/ncbiobj.hpp>
35 #include <corelib/ncbi_limits.hpp>
36 
37 #include <util/sequtil/sequtil.hpp>
38 
39 
41 
42 
43 /////////////////////////////////////////////////////////////////////////////
44 //
45 // Sequence Conversions
46 //
47 
49 {
50 public:
51 
52  // types
54 
55  // Conversion Methods:
56  // Convert sequence from one coding to another.
57  //
58  // PARAMETERS:
59  // src - input container containing the sequence.
60  // src_coding - the input's coding
61  // pos, length - specify the initial offset and the length
62  // dst - output container, or raw memory
63  // dst_coding - the desired output coding
64  //
65  // RETURN VALUE:
66  // All methds return the number of converted characers
67  // Methods will throw an exception if the conversion can't be
68  // performed. For example when trying to convert NA coding to an AA one.
69 
70 
71  // string to string
72  static SIZE_TYPE Convert(const CTempString& src, TCoding src_coding,
73  TSeqPos pos, TSeqPos length,
74  string& dst, TCoding dst_coding);
75 
76  // string to vector
77  static SIZE_TYPE Convert(const string& src, TCoding src_coding,
78  TSeqPos pos, TSeqPos length,
79  vector< char >& dst, TCoding dst_coding);
80 
81  // vector to string
82  static SIZE_TYPE Convert(const vector< char >& src, TCoding src_coding,
83  TSeqPos pos, TSeqPos length,
84  string& dst, TCoding dst_coding);
85 
86  // vector to vector
87  static SIZE_TYPE Convert(const vector< char >& src, TCoding src_coding,
88  TSeqPos pos, TSeqPos length,
89  vector< char >& dst, TCoding dst_coding);
90 
91  // char[] to char[]
92  static SIZE_TYPE Convert(const char src[], TCoding src_coding,
93  TSeqPos pos, TSeqPos length,
94  char dst[], TCoding dst_coding);
95 
96  // Subseq
97  //
98  // The function returns an object whose sequence is a copy of up to
99  // length elements of the controlled sequence beginning at position
100  // pos.
101  static SIZE_TYPE Subseq(const string& src, TCoding src_coding,
102  TSeqPos pos, TSeqPos length,
103  string& dst);
104  static SIZE_TYPE Subseq(const string& src, TCoding src_coding,
105  TSeqPos pos, TSeqPos length,
106  vector<char>& dst);
107  static SIZE_TYPE Subseq(const vector<char>& src, TCoding src_coding,
108  TSeqPos pos, TSeqPos length,
109  string& dst);
110  static SIZE_TYPE Subseq(const vector<char>& src, TCoding src_coding,
111  TSeqPos pos, TSeqPos length,
112  vector<char>& dst);
113  static SIZE_TYPE Subseq(const char* src, TCoding src_coding,
114  TSeqPos pos, TSeqPos length,
115  char* dst);
116 
117 
118  // Packing:
119  // Pack will convert a given sequnece to its most condensed form
120  // without the loss of information. Hence, sequences containing
121  // ambiguities will be converted to ncbi4na, while those that are
122  // comprised of only A, C, G and T will be converted to ncbi2na.
123 
124  static SIZE_TYPE Pack(const string& src, TCoding src_coding,
125  vector<char>& dst, TCoding& dst_coding,
127  static SIZE_TYPE Pack(const vector<char>& src, TCoding src_coding,
128  vector<char>& dst, TCoding& dst_coding,
130  static SIZE_TYPE Pack(const char* src, TSeqPos length, TCoding src_coding,
131  char* dst, TCoding& dst_coding);
132 
133  // For packing piecemeal into a Delta-ext or the like, with ambiguities
134  // isolated when doing so is worth the overhead of having more segments
136  {
137  public:
140 
141  // Virtual destructor, if only as a matter of principle.
142  virtual ~IPackTarget() {}
143 
144  // Return the approximate memory overhead per segment, in bytes
145  virtual SIZE_TYPE GetOverhead(TCoding coding) const = 0;
146 
147  // Returning true signals that Pack may call NewSegment with
148  // coding = CSeqUtil::e_not_set as appropriate.
149  virtual bool GapsOK(TCodingType coding_type) const = 0;
150 
151  // Return a pointer to a character array suitable for storing
152  // the given number of residues in the given format.
153  virtual char* NewSegment(TCoding coding, TSeqPos length) = 0;
154  };
155 
156  static SIZE_TYPE Pack(const string& src, TCoding src_coding,
157  IPackTarget& dst,
159  static SIZE_TYPE Pack(const vector<char>& src, TCoding src_coding,
160  IPackTarget& dst,
162  static SIZE_TYPE Pack(const char* src, TSeqPos length, TCoding src_coding,
163  IPackTarget& dst);
164 };
165 
166 
168 
169 
170 #endif /* UTIL_SEQUTIL___SEQUTIL_CONVERT__HPP */
virtual bool GapsOK(TCodingType coding_type) const =0
CSeqUtil::ECodingType TCodingType
virtual char * NewSegment(TCoding coding, TSeqPos length)=0
virtual SIZE_TYPE GetOverhead(TCoding coding) const =0
CSeqUtil::ECoding TCoding
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
static string Pack(const string &s)
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
T max(T x_, T y_)
NCBI_XUTIL_EXPORT
Parameter to control printing diagnostic message about conversion of static array data from a differe...
Definition: static_set.hpp:72
const value_slice::CValueConvert< value_slice::SRunTimeCP, FROM > Convert(const FROM &value)
Modified on Thu Feb 22 17:08:27 2024 by modify_doxy.py rev. 669887