NCBI C++ ToolKit
utf8.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef UTIL_UTF8__H
2 #define UTIL_UTF8__H
3 
4 /* $Id: utf8.hpp 33815 2007-05-04 17:18:18Z kazimird $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author: Aleksey Vinokurov, Vladimir Ivanov
30  *
31  * File Description:
32  * UTF8 conversion functions
33  *
34  */
35 
36 #include <corelib/ncbistd.hpp>
37 #include <vector>
38 
39 
40 /** @addtogroup utf8
41  *
42  * @{
43  */
44 
45 
48 
49 
50 // For characters that could not be translated into similar ASCII-7 or
51 // Unicode character because there is no graphically similar character in
52 // ASCII-7 table for this one.
53 //
54 const char kOutrangeChar = '?';
55 
56 
57 // 0xFF This means that the character should be skipped in translation to
58 // ASCII-7.
59 // For example, there are a lot of characters which meaning is to modify the
60 // character next to them.
61 const char kSkipChar = '\xFF';
62 
63 // Result (status) conversion Unicode symbols to character
65  eSuccess, // Success, result is good
66  eSkipChar, // Result conversion == kSkipChar
67  eOutrangeChar // Result conversion == kOutrangeChar
68 };
69 
70 
71 // Convert first UTF-8 symbol of "src" into ASCII-7 character.
72 // "ascii_table" specifies whether to use ASCII-7 translation tables.
73 // Length of the retrieved UTF-8 symbol is returned in "*seq_len"
74 // (if "seq_len" is not NULL).
75 // Return resulting ASCII-7 character.
76 // NOTE: If the UTF-8 symbol has no ASCII-7 equivalent, then return
77 // kOutrangeChar or kSkipChar.
78 //
80 extern char StringToChar(const string& src,
81  size_t* seq_len = 0,
82  bool ascii_table = true,
83  EConversionStatus* status = 0);
84 
85 // Convert UTF-8 string "src" into the ASCII-7 string with
86 // graphically similar characters -- using StringToChar().
87 // Return resulting ASCII-7 string.
88 //
90 extern string StringToAscii(const string& src,
91  bool ascii_table = true);
92 
93 
94 // Convert first UTF-8 symbol of "src" into a Unicode symbol code.
95 // Length of the retrieved UTF-8 symbol is returned in "*seq_len"
96 // (if "seq_len" is not NULL).
97 // Return resulting Unicode symbol code.
98 // NOTE: If the UTF-8 symbol has no Unicode equivalent, then return
99 // kOutrangeChar or kSkipChar.
100 //
102 extern long StringToCode(const string& src,
103  size_t* seq_len = 0,
104  EConversionStatus* status = 0);
105 
106 // Convert UTF-8 string "src" into the vector of Unicode symbol codes
107 // using StringToCode().
108 // Return resulting vector.
109 //
111 extern vector<long> StringToVector(const string& src);
112 
113 
114 // Translate Unicode symbol code "src" into graphically similar ASCII-7
115 // character.
116 // Return resulting ASCII-7 character.
117 // NOTE: If the Unicode symbol has no ASCII-7 equivalent, then return
118 // kOutrangeChar or kSkipChar.
119 //
121 extern char CodeToChar(const long src, EConversionStatus* status = 0);
122 
123 
126 
127 
128 /* @} */
129 
130 #endif /* UTIL_UTF8__H */
Include a standard set of the NCBI C++ Toolkit most basic headers.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
vector< long > StringToVector(const string &src)
Definition: utf8.cpp:268
char StringToChar(const string &src, size_t *seq_len=0, bool ascii_table=true, EConversionStatus *status=0)
Definition: utf8.cpp:149
EConversionStatus
Definition: utf8.hpp:64
char CodeToChar(const long src, EConversionStatus *status=0)
Definition: utf8.cpp:295
const char kOutrangeChar
Definition: utf8.hpp:54
long StringToCode(const string &src, size_t *seq_len=0, EConversionStatus *status=0)
Definition: utf8.cpp:215
string StringToAscii(const string &src, bool ascii_table=true)
Definition: utf8.cpp:187
const char kSkipChar
Definition: utf8.hpp:61
@ eSkipChar
Definition: utf8.hpp:66
@ eSuccess
Definition: utf8.hpp:65
@ eOutrangeChar
Definition: utf8.hpp:67
static BOOL utf8
Definition: pcregrep.c:199
NCBI_XUTIL_EXPORT
Parameter to control printing diagnostic message about conversion of static array data from a differe...
Definition: static_set.hpp:72
#define const
Definition: zconf.h:232
Modified on Sat May 25 14:21:56 2024 by modify_doxy.py rev. 669887