NCBI C++ ToolKit
writedb_convert.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef OBJTOOLS_WRITERS_WRITEDB__WRITEDB_CONVERT_HPP
2 #define OBJTOOLS_WRITERS_WRITEDB__WRITEDB_CONVERT_HPP
3 
4 /* $Id: writedb_convert.hpp 56895 2013-01-16 14:19:52Z fongah2 $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author: Kevin Bealer
30  *
31  */
32 
33 /// @file writedb_convert.hpp
34 /// Data conversion tools for CWriteDB and associated code.
35 ///
36 /// Defines classes:
37 /// CAmbiguousRegion
38 ///
39 /// Implemented for: UNIX, MS-Windows
40 
41 #include <objects/seq/seq__.hpp>
43 
44 #include <objmgr/bioseq_handle.hpp>
45 #include <objmgr/seq_vector.hpp>
46 
48 
49 /// Import definitions from the objects namespace.
51 
52 /// Build blast db protein format from Stdaa protein Seq-inst.
53 ///
54 /// No conversion is actually done here, because this is already the
55 /// correct format for disk. Instead the sequence data is just copied
56 /// from the Seq-inst to the string.
57 ///
58 /// @param si Seq-inst containing data in NcbiStdaa format. [in]
59 /// @param seq Sequence in blast db disk format. [out]
60 void WriteDB_StdaaToBinary(const CSeq_inst & si, string & seq);
61 
62 /// Build blast db protein format from Eaa protein Seq-inst.
63 ///
64 /// The data is converted and returned in the string.
65 ///
66 /// @param si Seq-inst containing data in NcbiEaa format. [in]
67 /// @param seq Sequence in blast db disk format. [out]
68 void WriteDB_EaaToBinary(const CSeq_inst & si, string & seq);
69 
70 /// Build blast db protein format from Iupacaa protein Seq-inst.
71 ///
72 /// The data is converted and returned in the string.
73 ///
74 /// @param si Seq-inst containing data in Iupacaa format. [in]
75 /// @param seq Sequence in blast db disk format. [out]
76 void WriteDB_IupacaaToBinary(const CSeq_inst & si, string & seq);
77 
78 /// Build blast db nucleotide format from Ncbi2na Seq-inst.
79 ///
80 /// The data is in the correct format, and can be copied as-is, but
81 /// the length remainder must be coded into the last byte. It is not
82 /// necessary to deal with ambiguities - if there were any, ncbi2na
83 /// would not be the input format.
84 ///
85 /// @param si Seq-inst containing data in Iupacaa format. [in]
86 /// @param seq Sequence in blast db disk format. [out]
87 void WriteDB_Ncbi2naToBinary(const CSeq_inst & si, string & seq);
88 
89 /// Build blast db nucleotide format from Ncbi4na Seq-inst.
90 ///
91 /// The data is compressed to ncbi2na, the length remainder is coded
92 /// into the last byte, and ambiguous region data is produced.
93 ///
94 /// @param si Seq-inst containing data in Ncbi4na format. [in]
95 /// @param seq Sequence in blast db disk format. [out]
96 /// @param amb Ambiguities in blast db disk format. [out]
97 void WriteDB_Ncbi4naToBinary(const CSeq_inst & seqinst,
98  string & seq,
99  string & amb);
100 
101 /// Build binary blast2na + ambig encoding based on ncbi4na input.
102 ///
103 /// @param ncbi4na Input data with possible ambiguities.
104 /// @param byte_length Number of bytes in the input data.
105 /// @param base_length Valid nucleotide bases in the input data.
106 /// @param seq Sequence data in blast db format.
107 /// @param amb Ambiguity data in blast db format.
108 
109 
110 /// Build blast db nucleotide format from Ncbi4na data in memory.
111 ///
112 /// For a given sequence in ncbi4na format, the blast database format
113 /// data is constructed; this consists of ncbi2na format with values
114 /// in ambiguous locations selected randomly, plus the precise values
115 /// of the ambiguous regions encoded in a seperate string.
116 ///
117 /// @param ncbi4na Pointer to Ncbi4na format sequence data. [in]
118 /// @param byte_length Length of ncbi4na data in bytes. [in]
119 /// @param base_length Number of letters of valid data. [in]
120 /// @param seq Sequence in blast db disk format. [out]
121 /// @param seq Ambiguities in blast db disk format. [out]
122 void WriteDB_Ncbi4naToBinary(const char * ncbi4na,
123  int byte_length,
124  int base_length,
125  string & seq,
126  string & amb);
127 
128 /// Build blast db nucleotide format from Iupacna Seq-inst.
129 ///
130 /// The data is compressed to ncbi2na, the length remainder is coded
131 /// into the last byte, and ambiguous region data is produced.
132 ///
133 /// @param si Seq-inst containing data in Iupacna format. [in]
134 /// @param seq Sequence in blast db disk format. [out]
135 /// @param amb Ambiguities in blast db disk format. [out]
137  string & seq,
138  string & amb);
139 
140 /// Append a value to a string as a 4 byte big-endian integer.
141 /// @param x Value to append.
142 /// @param outp String to modify.
143 inline void s_AppendInt4(string & outp, int x)
144 {
145  char buf[4];
146  buf[0] = (x >> 24) & 0xFF;
147  buf[1] = (x >> 16) & 0xFF;
148  buf[2] = (x >> 8) & 0xFF;
149  buf[3] = x & 0xFF;
150 
151  outp.append(buf, 4);
152 }
153 
154 /// Write a four byte integer to a stream in big endian format.
155 /// @param str Stream to write to.
156 /// @param x Integer to write.
157 inline void s_WriteInt4(ostream & str, int x)
158 {
159  char buf[4];
160  buf[0] = (x >> 24) & 0xFF;
161  buf[1] = (x >> 16) & 0xFF;
162  buf[2] = (x >> 8) & 0xFF;
163  buf[3] = x & 0xFF;
164 
165  str.write(buf, 4);
166 }
167 
168 /// Write an eight byte integer to a stream in little-endian format.
169 /// @param str Stream to write to.
170 /// @param x Integer to write.
171 inline void s_WriteInt8LE(ostream & str, Uint8 x)
172 {
173  char buf[8];
174  buf[7] = (char)((x >> 56) & 0xFF);
175  buf[6] = (char)((x >> 48) & 0xFF);
176  buf[5] = (char)((x >> 40) & 0xFF);
177  buf[4] = (char)((x >> 32) & 0xFF);
178  buf[3] = (char)((x >> 24) & 0xFF);
179  buf[2] = (char)((x >> 16) & 0xFF);
180  buf[1] = (char)((x >> 8) & 0xFF);
181  buf[0] = (char)((x ) & 0xFF);
182 
183  str.write(buf, 8);
184 }
185 
186 /// Write an eight byte integer to a stream in big-endian format.
187 /// @param str Stream to write to.
188 /// @param x Integer to write.
189 inline void s_WriteInt8BE(ostream & str, Uint8 x)
190 {
191  char buf[8];
192  buf[0] = (char)((x >> 56) & 0xFF);
193  buf[1] = (char)((x >> 48) & 0xFF);
194  buf[2] = (char)((x >> 40) & 0xFF);
195  buf[3] = (char)((x >> 32) & 0xFF);
196  buf[4] = (char)((x >> 24) & 0xFF);
197  buf[5] = (char)((x >> 16) & 0xFF);
198  buf[6] = (char)((x >> 8) & 0xFF);
199  buf[7] = (char)((x ) & 0xFF);
200 
201  str.write(buf, 8);
202 }
203 
204 /// Write a length-prefixed string to a stream.
205 ///
206 /// This method writes a string to a stream, prefixing the string with
207 /// it's length, written as a big-endian four byte integer.
208 ///
209 /// @param str Stream to write to.
210 /// @param s String to write.
211 inline void s_WriteString(ostream & str, const string & s)
212 {
213  s_WriteInt4(str, (int)s.length());
214  str.write(s.data(), s.length());
215 }
216 
218 
219 
220 #endif // OBJTOOLS_WRITERS_WRITEDB__WRITEDB_IMPL_HPP
221 
static int base_length[29]
static const char si[8][64]
Definition: des.c:146
static const char * str(char *buf, int n)
Definition: stats.c:84
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
char * buf
void s_WriteInt8BE(ostream &str, Uint8 x)
Write an eight byte integer to a stream in big-endian format.
USING_SCOPE(objects)
Import definitions from the objects namespace.
void s_WriteInt4(ostream &str, int x)
Write a four byte integer to a stream in big endian format.
void s_WriteInt8LE(ostream &str, Uint8 x)
Write an eight byte integer to a stream in little-endian format.
void WriteDB_Ncbi2naToBinary(const CSeq_inst &si, string &seq)
Build blast db nucleotide format from Ncbi2na Seq-inst.
void s_WriteString(ostream &str, const string &s)
Write a length-prefixed string to a stream.
void WriteDB_EaaToBinary(const CSeq_inst &si, string &seq)
Build blast db protein format from Eaa protein Seq-inst.
void WriteDB_IupacaaToBinary(const CSeq_inst &si, string &seq)
Build blast db protein format from Iupacaa protein Seq-inst.
void WriteDB_Ncbi4naToBinary(const CSeq_inst &seqinst, string &seq, string &amb)
Build blast db nucleotide format from Ncbi4na Seq-inst.
void s_AppendInt4(string &outp, int x)
Append a value to a string as a 4 byte big-endian integer.
void WriteDB_StdaaToBinary(const CSeq_inst &si, string &seq)
Build blast db protein format from Stdaa protein Seq-inst.
void WriteDB_IupacnaToBinary(const CSeq_inst &si, string &seq, string &amb)
Build blast db nucleotide format from Iupacna Seq-inst.
Modified on Fri Sep 20 14:58:08 2024 by modify_doxy.py rev. 669887