NCBI C++ ToolKit
unicode.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: unicode.cpp 101236 2023-11-17 17:18:35Z gouriano $
2  * ==========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ==========================================================================
25  *
26  * Author: Aleksey Vinokurov
27  *
28  * File Description:
29  * Unicode transformation library
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <util/unicode.hpp>
35 #include <util/util_exception.hpp>
36 #include <util/util_misc.hpp>
37 #include <util/error_codes.hpp>
38 #include <corelib/ncbifile.hpp>
40 
41 #define NCBI_USE_ERRCODE_X Util_Unicode
42 
45 
46 #include "unicode_plans.inl"
47 
48 
50 {
51  &s_Plan_00h, &s_Plan_01h, &s_Plan_02h, &s_Plan_03h, &s_Plan_04h, 0, 0, 0, // Plan 00 - 07
52  0, 0, 0, 0, 0, 0, 0, 0, // Plan 08 - 0F
53 
54  0, 0, 0, 0, 0, 0, 0, 0, // Plan 10 - 17
55  0, 0, 0, 0, 0, 0, &s_Plan_1Eh, 0, // Plan 18 - 1F
56 
58  0, 0, 0, 0, 0, 0, 0, 0, // Plan 28 - 2F
59 
60  &s_Plan_30h, 0, 0, 0, 0, 0, 0, 0, // Plan 30 - 37
61  0, 0, 0, 0, 0, 0, 0, 0, // Plan 38 - 3F
62 
63  0, 0, 0, 0, 0, 0, 0, 0, // Plan 40 - 47
64  0, 0, 0, 0, 0, 0, 0, 0, // Plan 48 - 4F
65 
66  0, 0, 0, 0, 0, 0, 0, 0, // Plan 50 - 57
67  0, 0, 0, 0, 0, 0, 0, 0, // Plan 58 - 5F
68 
69  0, 0, 0, 0, 0, 0, 0, 0, // Plan 60 - 67
70  0, 0, 0, 0, 0, 0, 0, 0, // Plan 68 - 6F
71 
72  0, 0, 0, 0, 0, 0, 0, 0, // Plan 70 - 77
73  0, 0, 0, 0, 0, 0, 0, 0, // Plan 78 - 7F
74 
75  0, 0, 0, 0, 0, 0, 0, 0, // Plan 80 - 87
76  0, 0, 0, 0, 0, 0, 0, 0, // Plan 88 - 8F
77 
78  0, 0, 0, 0, 0, 0, 0, 0, // Plan 90 - 97
79  0, 0, 0, 0, 0, 0, 0, 0, // Plan 98 - 9F
80 
81  0, 0, 0, 0, 0, 0, 0, 0, // Plan A0 - A7
82  0, 0, 0, 0, 0, 0, 0, 0, // Plan A8 - AF
83 
84  0, 0, 0, 0, 0, 0, 0, 0, // Plan B0 - B7
85  0, 0, 0, 0, 0, 0, 0, 0, // Plan B8 - BF
86 
87  0, 0, 0, 0, 0, 0, 0, 0, // Plan C0 - C7
88  0, 0, 0, 0, 0, 0, 0, 0, // Plan C8 - CF
89 
90  0, 0, 0, 0, 0, 0, 0, 0, // Plan D0 - D7
91  0, 0, 0, 0, 0, 0, 0, 0, // Plan D8 - DF
92 
94  &s_Plan_E8h, 0, &s_Plan_EAh, &s_Plan_EBh, 0, 0, 0, 0, // Plan E8 - EF
95 
96  0, 0, 0, 0, 0, 0, 0, 0, // Plan F0 - F7
97  0, 0, 0, &s_Plan_FBh, 0, 0, &s_Plan_FEh, 0 // Plan F8 - FF
98 };
99 
100 /////////////////////////////////////////////////////////////////////////////
101 // Declare the parameter to get UnicodeToAscii translation table.
102 // Registry file:
103 // [NCBI]
104 // UnicodeToAscii = ...
105 // Environment variable:
106 // NCBI_CONFIG__NCBI__UNICODETOASCII
107 //
108 static string s_FindUnicodeToAscii(void)
109 {
110  return g_FindDataFile("unicode_to_ascii.txt");
111 }
114 
115 /////////////////////////////////////////////////////////////////////////////
116 // CUnicodeToAsciiTranslation helper class
117 
119 {
120 public:
122  virtual ~CUnicodeToAsciiTranslation(void);
123  bool IsInitialized(void) const
124  {
125  return m_initialized;
126  }
127  const SUnicodeTranslation* GetTranslation( TUnicode symbol) const;
128 private:
129  void x_Initialize(const string& name);
130  static int x_ParseLine(string& line, TUnicode& symbol, string& translation);
132  char *m_pool;
134 
135 };
136 
138  : m_initialized(false), m_pool(0)
139 {
140  string name( NCBI_PARAM_TYPE(NCBI, UnicodeToAscii)::GetDefault() );
141  if (!name.empty()) {
142  x_Initialize(name);
143  }
144 }
146 {
147  if (m_pool) {
148  free(m_pool);
149  }
150 }
151 
153 {
154 // clear existing data
155  if (m_pool) {
156  free(m_pool);
157  m_pool = 0;
159  }
160  m_initialized = false;
161 
162 // find file
163  CNcbiIfstream ifs(name.c_str(), IOS_BASE::in);
164  if (!ifs.is_open()) {
165  ERR_POST_X(1, "UnicodeToAscii table not found: " << name);
166  return;
167  }
168  LOG_POST_X(2, Info << "Loading UnicodeToAscii table at: " << name);
169 
170 // estimate memory pool size
171  size_t filelen = (size_t)CFile(name).GetLength();
172  size_t poolsize = filelen/2;
173  m_pool = (char*)malloc(poolsize);
174  if (!m_pool) {
175  ERR_POST_X(3, "UnicodeToAscii table failed to load: not enough memory");
176  return;
177  }
178  size_t poolpos=0;
179 
180 // parse file
181  TUnicode symbol;
182  string translation;
183  string line;
184  line.reserve(16);
185  map<TUnicode, size_t> symbolToOffset;
186  while ( NcbiGetlineEOL(ifs, line) ) {
187  if (x_ParseLine(line, symbol, translation) > 1) {
188 
189  if (poolpos + translation.size() + 1 > poolsize) {
190  m_pool = (char*)realloc( m_pool, poolsize += filelen/4);
191  if (!m_pool) {
192  ERR_POST_X(3, "UnicodeToAscii table failed to load: not enough memory");
193  return;
194  }
195  }
196 
197  symbolToOffset[symbol] = poolpos;
198  memcpy(m_pool+poolpos, translation.data(), translation.size());
199  poolpos += translation.size();
200  *(m_pool+poolpos) = '\0';
201  ++poolpos;
202  }
203  }
204  if (poolpos == 0) {
205  ERR_POST_X(1, "UnicodeToAscii table is empty: " << name);
206  free(m_pool);
207  m_pool = nullptr;
208  return;
209  } else {
210  m_pool = (char*)realloc( m_pool, poolpos);
211  }
212 
213 // create translation table
214  map<TUnicode, size_t>::const_iterator symend = symbolToOffset.end();
215  for( map<TUnicode, size_t>::const_iterator sym = symbolToOffset.begin();
216  sym != symend; ++sym) {
218  tr.Type = eString;
219  tr.Subst = m_pool + sym->second;
220  m_SymbolToTranslation[sym->first] = tr;
221  }
222 
223  m_initialized = true;
224 }
225 
227  string& line, TUnicode& symbol, string& translation)
228 {
229  int res = 0;
230  symbol = 0;
231  translation.clear();
232 
233 // symbol
234  string::size_type begin = line.find_first_not_of(" \t", 0);
235  if (begin == string::npos) {
236  return res;
237  }
238  string::size_type end = line.find_first_of(" \t,#",begin);
239  if (end == begin) {
240  return res;
241  }
242  if (end == string::npos) {
243  end = line.size();
244  }
245  if (NStr::StartsWith(CTempString( line.data()+begin, end-begin), "0x")) {
246  begin += 2;
247  }
248  symbol = NStr::StringToUInt( CTempString( line.data()+begin, end-begin), 0, 16);
249  ++res;
250  if ( end == line.size() || line[end] == '#') {
251  return res;
252  }
253 // translation
254  end = line.find(',',end);
255  if (end == string::npos) {
256  return res;
257  }
258  begin = line.find_first_not_of(" \t", ++end);
259  if (begin == string::npos) {
260  return res;
261  }
262  if (*(line.data()+begin) != '\"') {
263  return res;
264  }
265  const char* data = line.data()+begin;
266  const char* dataend = line.data()+line.size();
267  for (++data; data < dataend; ++data) {
268  char c = *data;
269  if (c == '"') {
270  break;
271  }
272  if (c == '\\') {
273  ++data;
274  if (data < dataend) {
275  c = *data;
276  switch (c) {
277  default: break;
278  case 'a': c = 0x7; break;
279  case 'b': c = 0x8; break;
280  case 't': c = 0x9; break;
281  case 'n': c = 0xA; break;
282  case 'v': c = 0xB; break;
283  case 'f': c = 0xC; break;
284  case 'r': c = 0xD; break;
285  case '0': c = 0x0; break;
286  case 'x':
287  if (data + 1 < dataend) {
288  begin = data + 1 - line.data();
289  end = line.find_first_not_of("0123456789abcdefABCDEF", begin);
290  if (end == string::npos) {
291  end = line.size();
292  }
293  c = (char)NStr::StringToUInt( CTempString( line.data()+begin, end-begin), 0, 16);
294  data = line.data() + end;
295  }
296  break;
297  }
298  }
299  if (data == dataend) {
300  break;
301  }
302  }
303  translation.append(1,c);
304  }
305  ++res;
306  return res;
307 }
308 
309 const SUnicodeTranslation*
311 {
313  m_SymbolToTranslation.find(symbol);
314  if (i == m_SymbolToTranslation.end()) {
315  return NULL;
316  }
317  return &(i->second);
318 }
319 
321 
322 /////////////////////////////////////////////////////////////////////////////
323 const SUnicodeTranslation*
325  const SUnicodeTranslation* default_translation)
326 {
327  if (!table) {
329  if (t.IsInitialized()) {
330  return t.GetTranslation(character);
331  }
333  }
334  const SUnicodeTranslation* translation=NULL;
335  if ((character & (~0xFFFF)) == 0) {
336  unsigned int thePlanNo = (character & 0xFF00) >> 8;
337  unsigned int theOffset = character & 0xFF;
338  const TUnicodePlan* thePlan = (*table)[thePlanNo];
339  if ( thePlan ) {
340  translation = &((*thePlan)[theOffset]);
341  }
342  }
343  if (!translation) {
344  if (!default_translation) {
345  return NULL;
346  }
347  if (default_translation->Type == eException) {
348  NCBI_THROW(CUtilException,eWrongData,
349  "UnicodeToAscii: unknown Unicode symbol");
350  }
351  translation = default_translation;
352  }
353  return translation;
354 }
355 
356 
357 TUnicode UTF8ToUnicode( const char* theUTF )
358 {
359  const char *p = theUTF;
360  Int1 counter = *p++;
361 
362  if ( ((*theUTF) & 0xC0) != 0xC0 ) {
363  TUnicode RC = 0;
364  RC |= (unsigned char)theUTF[0];
365  return RC;
366  }
367 
368  TUnicode acc = counter & 037;
369 
370  while ((counter = Int1(counter << 1)) < 0) {
371  unsigned char c = *p++;
372  if ((c & ~077) != 0200) { // Broken UTF-8 chain
373  return 0;
374  }
375  acc = (acc << 6) | (c & 077);
376  }
377 
378  return acc;
379 }
380 
381 
382 size_t UTF8ToUnicode( const char* theUTF, TUnicode* theUnicode )
383 {
384  const char *p = theUTF;
385  Int1 counter = *p++;
386 
387  if ( (unsigned char)theUTF[0] < 0x80 ) {
388  // This is one character UTF8. I.e. regular character.
389  *theUnicode = *theUTF;
390  return 1;
391  }
392 
393  if ( ((*theUTF) & 0xC0) != 0xC0 || ((*theUTF) & 0xFE) == 0xC0) {
394  // This is not UTF8
395  return 0;
396  }
397 
398  TUnicode acc = counter & 037;
399  if ( ((*theUTF) & 0xF8) == 0xF0 ) {
400  acc = counter & 07;
401  }
402 
403  while ((counter = Int1(counter << 1)) < 0) {
404  unsigned char c = *p++;
405  if ((c & ~077) != 0200) { // Broken UTF-8 chain
406  return 0;
407  }
408  acc = (acc << 6) | (c & 077);
409  } // while
410 
411  *theUnicode = acc;
412  return (size_t)(p - theUTF);
413 }
414 
415 
416 string UnicodeToUTF8( TUnicode theUnicode )
417 {
418  char theBuffer[10];
419  size_t theLength = UnicodeToUTF8( theUnicode, theBuffer, 10 );
420  return string( theBuffer, theLength );
421 }
422 
423 
424 size_t UnicodeToUTF8( TUnicode theUnicode, char *theBuffer,
425  size_t theBufLength )
426 {
427  size_t Length = 0;
428 
429  if (theUnicode < 0x80) {
430  Length = 1;
431  if ( Length > theBufLength ) return 0;
432  theBuffer[0] = char(theUnicode);
433  }
434  else if (theUnicode < 0x800) {
435  Length = 2;
436  if ( Length > theBufLength ) return 0;
437  theBuffer[0] = char( 0xC0 | (theUnicode>>6));
438  theBuffer[1] = char( 0x80 | (theUnicode & 0x3F));
439  }
440  else if (theUnicode < 0x10000) {
441  Length = 3;
442  if ( Length > theBufLength ) return 0;
443  theBuffer[0] = char( 0xE0 | (theUnicode>>12));
444  theBuffer[1] = char( 0x80 | ((theUnicode>>6) & 0x3F));
445  theBuffer[2] = char( 0x80 | (theUnicode & 0x3F));
446  }
447  else if (theUnicode < 0x200000) {
448  Length = 4;
449  if ( Length > theBufLength ) return 0;
450  theBuffer[0] = char( 0xF0 | (theUnicode>>18));
451  theBuffer[1] = char( 0x80 | ((theUnicode>>12) & 0x3F));
452  theBuffer[2] = char( 0x80 | ((theUnicode>>6) & 0x3F));
453  theBuffer[3] = char( 0x80 | (theUnicode & 0x3F));
454  }
455  return Length;
456 }
457 
458 ssize_t UTF8ToAscii( const char* src, char* dst, size_t dstLen,
459  const SUnicodeTranslation* default_translation,
460  const TUnicodeTable* table,
462 {
463  if (result) {
465  }
466  if ( !src || !dst || dstLen == 0 ) return 0;
467  size_t srcPos = 0;
468  size_t dstPos = 0;
469  size_t srcLen = strlen( src );
470 
471  for ( srcPos = 0; srcPos < srcLen; ) {
472  // Assign quck pointers
473  char* pDst = &(dst[dstPos]);
474  const char* pSrc = &(src[srcPos]);
475  TUnicode theUnicode;
476 
477  size_t utfLen = UTF8ToUnicode( pSrc, &theUnicode );
478 
479  if ( utfLen == 0 ) {
480  // Skip the error.
481  srcPos++;
482  continue;
483  }
484 
485  srcPos += utfLen;
486 
487  // Find the correct substitution.
488  const SUnicodeTranslation*
489  pSubst = UnicodeToAscii( theUnicode, table, default_translation );
490  if (result && pSubst == default_translation) {
492  }
493 
494  // Check if the unicode has a translation
495  if ( !pSubst ) {
496  continue;
497  }
498 
499  // Check if type is eSkip or substituting string is empty.
500  if ( (pSubst->Type == eSkip) ||
501  !(pSubst->Subst) ) {
502  continue;
503  }
504 
505 
506  // Check if type is eAsIs
507  if (pSubst->Type == eAsIs) {
508  memcpy( pDst, pSrc, utfLen );
509 // dstPos += utfLen;
510  continue;
511  }
512 
513  // Check the remaining length and put the result in there.
514  size_t substLen = strlen( pSubst->Subst );
515  if ( (dstPos + substLen) > dstLen ) {
516  return -1; // Unsufficient space
517  }
518 
519  // Copy the substituting value into the destignation string
520  memcpy( pDst, pSubst->Subst, substLen );
521  dstPos += substLen;
522  }
523  return (ssize_t) dstPos;
524 }
525 
526 string UTF8ToAsciiString( const char* src,
527  const SUnicodeTranslation* default_translation,
528  const TUnicodeTable* table,
530 {
531  if (result) {
533  }
534  if ( !src ) return kEmptyStr;
535  string dst;
536  size_t srcPos = 0;
537  size_t srcLen = strlen( src );
538 
539  for ( srcPos = 0; srcPos < srcLen; ) {
540  // Assign quck pointers
541  const char* pSrc = &(src[srcPos]);
542  TUnicode theUnicode;
543 
544  size_t utfLen = UTF8ToUnicode( pSrc, &theUnicode );
545 
546  if ( utfLen == 0 ) {
547  // Skip the error.
548  srcPos++;
549  continue;
550  }
551 
552  srcPos += utfLen;
553 
554  // Find the correct substitution.
555  const SUnicodeTranslation*
556  pSubst = UnicodeToAscii( theUnicode, table, default_translation );
557  if (result && pSubst == default_translation) {
559  }
560 
561  // Check if the unicode has a translation
562  if ( !pSubst ) {
563 // srcPos += utfLen;
564  continue;
565  }
566 
567  // Check if type is eSkip or substituting string is empty.
568  if ( (pSubst->Type == eSkip) ||
569  !(pSubst->Subst) ) {
570 // srcPos += utfLen;
571  continue;
572  }
573 
574 
575  // Check if type is eAsIs
576  if (pSubst->Type == eAsIs) {
577  dst += string( pSrc, utfLen );
578 // srcPos += utfLen;
579  continue;
580  }
581 
582  // Copy the substituting value into the destignation string
583  dst += pSubst->Subst;
584  }
585  return dst;
586 }
587 
588 
#define false
Definition: bool.h:36
CFile –.
Definition: ncbifile.hpp:1604
CObject –.
Definition: ncbiobj.hpp:180
CSafeStatic<>::
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
void x_Initialize(const string &name)
Definition: unicode.cpp:152
bool IsInitialized(void) const
Definition: unicode.cpp:123
static int x_ParseLine(string &line, TUnicode &symbol, string &translation)
Definition: unicode.cpp:226
virtual ~CUnicodeToAsciiTranslation(void)
Definition: unicode.cpp:145
map< TUnicode, SUnicodeTranslation > m_SymbolToTranslation
Definition: unicode.cpp:133
const SUnicodeTranslation * GetTranslation(TUnicode symbol) const
Definition: unicode.cpp:310
container_type::const_iterator const_iterator
Definition: map.hpp:53
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
void clear()
Definition: map.hpp:169
const_iterator find(const key_type &key) const
Definition: map.hpp:153
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
#define LOG_POST_X(err_subcode, message)
Definition: ncbidiag.hpp:553
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
Definition: ncbidiag.hpp:550
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
Int8 GetLength(void) const
Get size of file.
Definition: ncbifile.cpp:3204
#define NCBI_PARAM_TYPE(section, name)
Generate typename for a parameter from its {section, name} attributes.
Definition: ncbi_param.hpp:149
int8_t Int1
1-byte (8-bit) signed integer
Definition: ncbitype.h:98
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, string::size_type *count=NULL)
Read from "is" to "str" the next line (taking into account platform specifics of End-of-Line)
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
#define kEmptyStr
Definition: ncbistr.hpp:123
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5411
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Definition: ncbistr.cpp:642
ESubstType Type
Type of the substitutor.
Definition: unicode.hpp:72
string UTF8ToAsciiString(const char *src, const SUnicodeTranslation *default_translation, const TUnicodeTable *table, EConversionResult *result)
Convert UTF8 into ASCII string.
Definition: unicode.cpp:526
SUnicodeTranslation TUnicodePlan[256]
Definition: unicode.hpp:75
string UnicodeToUTF8(TUnicode theUnicode)
Convert Unicode character into UTF8.
Definition: unicode.cpp:416
const SUnicodeTranslation * UnicodeToAscii(TUnicode character, const TUnicodeTable *table, const SUnicodeTranslation *default_translation)
Convert Unicode character into ASCII string.
Definition: unicode.cpp:324
const char * Subst
Substitutor for unicode.
Definition: unicode.hpp:71
TUnicodePlan * TUnicodeTable[256]
Definition: unicode.hpp:76
unsigned int TUnicode
Definition: unicode.hpp:77
EConversionResult
Definition: unicode.hpp:63
ssize_t UTF8ToAscii(const char *src, char *dst, size_t dstLen, const SUnicodeTranslation *default_translation, const TUnicodeTable *table, EConversionResult *result)
Convert UTF8 into ASCII character buffer.
Definition: unicode.cpp:458
@ eSkip
Unicode to be skipped in translation. Usually it is combined mark.
Definition: unicode.hpp:52
@ eException
Throw exception (CUtilException, with type eWrongData)
Definition: unicode.hpp:55
@ eAsIs
Unicodes which should go into the text as is.
Definition: unicode.hpp:53
@ eDefaultTranslationUsed
Definition: unicode.hpp:65
@ eConvertedFine
Definition: unicode.hpp:64
Definition of all error codes used in util (xutil.lib).
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
int i
EIPRangeType t
Definition: ncbi_localip.c:101
Static variables safety - create on demand, destroy on application termination.
int ssize_t
Definition: ncbiconf_msvc.h:92
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
std::istream & in(std::istream &in_, double &x_)
static BOOL utf8
Definition: pcregrep.c:199
NCBI
Definition: static_set.hpp:72
Structure to keep substititutions for the particular unicode character.
Definition: unicode.hpp:70
else result
Definition: token2.c:20
CSafeStatic< CUnicodeToAsciiTranslation > g_UnicodeTranslation
Definition: unicode.cpp:320
NCBI_PARAM_DECL(string, NCBI, UnicodeToAscii)
static string s_FindUnicodeToAscii(void)
Definition: unicode.cpp:108
static TUnicodeTable g_DefaultUnicodeTable
Definition: unicode.cpp:49
TUnicode UTF8ToUnicode(const char *theUTF)
Definition: unicode.cpp:357
NCBI_PARAM_DEF_WITH_INIT(string, NCBI, UnicodeToAscii, "", s_FindUnicodeToAscii)
static TUnicodePlan s_Plan_26h
static TUnicodePlan s_Plan_E4h
static TUnicodePlan s_Plan_30h
static TUnicodePlan s_Plan_04h
static TUnicodePlan s_Plan_FEh
static TUnicodePlan s_Plan_E2h
static TUnicodePlan s_Plan_27h
static TUnicodePlan s_Plan_21h
static TUnicodePlan s_Plan_E5h
static TUnicodePlan s_Plan_E8h
static TUnicodePlan s_Plan_00h
static TUnicodePlan s_Plan_E6h
static TUnicodePlan s_Plan_22h
static TUnicodePlan s_Plan_20h
static TUnicodePlan s_Plan_24h
static TUnicodePlan s_Plan_25h
static TUnicodePlan s_Plan_01h
static TUnicodePlan s_Plan_EAh
static TUnicodePlan s_Plan_EBh
static TUnicodePlan s_Plan_FBh
static TUnicodePlan s_Plan_23h
static TUnicodePlan s_Plan_E0h
static TUnicodePlan s_Plan_E3h
static TUnicodePlan s_Plan_1Eh
static TUnicodePlan s_Plan_02h
static TUnicodePlan s_Plan_03h
static TUnicodePlan s_Plan_E7h
string g_FindDataFile(const CTempString &name, CDirEntry::EType type=CDirEntry::eFile)
Look for an NCBI application data file or directory of the given name and type; in general,...
Definition: util_misc.cpp:139
void free(voidpf ptr)
voidp malloc(uInt size)
Modified on Sat Dec 02 09:24:02 2023 by modify_doxy.py rev. 669887