NCBI C++ ToolKit
ncbistr.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: ncbistr.cpp 102274 2024-04-15 14:13:11Z ivanov $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Eugene Vasilchenko, Denis Vakatov
27  *
28  * File Description:
29  * Some helper functions
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <common/ncbi_source_ver.h>
35 #include <corelib/ncbistr.hpp>
36 #include <corelib/tempstr.hpp>
37 #include <corelib/ncbistr_util.hpp>
38 #include <corelib/error_codes.hpp>
39 #include <corelib/ncbierror.hpp>
40 #include <corelib/ncbifloat.h>
41 #include <corelib/ncbi_base64.h>
42 #include <memory>
43 #include <functional>
44 #include <algorithm>
45 #include <iterator>
46 #include <stdio.h>
47 #include <locale.h>
48 #include <math.h>
49 
50 
51 #define NCBI_USE_ERRCODE_X Corelib_Util
52 
53 
55 
56 
57 // Digits (up to base 36)
58 static const char kDigitUpper[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
59 static const char kDigitLower[] = "0123456789abcdefghijklmnopqrstuvwxyz";
60 
61 
62 static inline
63 SIZE_TYPE s_DiffPtr(const char* end, const char* start)
64 {
65  return end ? (SIZE_TYPE)(end - start) : (SIZE_TYPE) 0;
66 }
67 
68 const char *const kEmptyCStr = "";
69 
70 #if defined(HAVE_WSTRING)
71 const wchar_t *const kEmptyWCStr = L"";
72 #endif
73 
74 
75 extern const char* const kNcbiDevelopmentVersionString;
77  = "NCBI_DEVELOPMENT_VER_" NCBI_AS_STRING(NCBI_DEVELOPMENT_VER);
78 
79 #ifdef NCBI_PRODUCTION_VER
80 extern const char* const kNcbiProductionVersionString;
81 const char* const kNcbiProductionVersionString
82  = "NCBI_PRODUCTION_VER_" NCBI_AS_STRING(NCBI_PRODUCTION_VER);
83 #endif
84 
85 
86 #if !defined(NCBI_OS_MSWIN) && \
87  !(defined(NCBI_OS_LINUX) && \
88  (defined(NCBI_COMPILER_GCC) || defined(NCBI_COMPILER_ANY_CLANG)))
89 const string* CNcbiEmptyString::m_Str = 0;
90 const string& CNcbiEmptyString::FirstGet(void) {
91  static const string s_Str = "";
92  m_Str = &s_Str;
93  return s_Str;
94 }
95 # ifdef HAVE_WSTRING
96 const wstring* CNcbiEmptyWString::m_Str = 0;
97 const wstring& CNcbiEmptyWString::FirstGet(void) {
98  static const wstring s_Str = L"";
99  m_Str = &s_Str;
100  return s_Str;
101 }
102 # endif
103 #endif
104 
105 
107 {
108  SIZE_TYPE len = str.length();
109  for (SIZE_TYPE idx = pos; idx < len; ++idx) {
110  if (!isspace((unsigned char) str[idx])) {
111  return false;
112  }
113  }
114  return true;
115 }
116 
117 
119 {
120  SIZE_TYPE n1 = s1.length();
121  SIZE_TYPE n2 = s2.length();
122  if ( !n1 ) {
123  return n2 ? -1 : 0;
124  }
125  if ( !n2 ) {
126  return 1;
127  }
128  if (int res = memcmp(s1.data(), s2.data(), min(n1, n2))) {
129  return res;
130  }
131  return (n1 == n2) ? 0 : (n1 > n2 ? 1 : -1);
132 }
133 
134 
136  const char* s2)
137 {
138  if (pos == NPOS || !n || s1.length() <= pos) {
139  return *s2 ? -1 : 0;
140  }
141  if ( !*s2 ) {
142  return 1;
143  }
144  if (n == NPOS || n > s1.length() - pos) {
145  n = s1.length() - pos;
146  }
147  const char* s = s1.data() + pos;
148  while (n && *s2 && *s == *s2) {
149  s++; s2++; n--;
150  }
151  if (n == 0) {
152  return *s2 ? -1 : 0;
153  }
154  return *s - *s2;
155 }
156 
157 
159  const CTempString s2)
160 {
161  if (pos == NPOS || !n || s1.length() <= pos) {
162  return s2.empty() ? 0 : -1;
163  }
164  if (s2.empty()) {
165  return 1;
166  }
167  if (n == NPOS || n > s1.length() - pos) {
168  n = s1.length() - pos;
169  }
170  SIZE_TYPE n_cmp = n;
171  if (n_cmp > s2.length()) {
172  n_cmp = s2.length();
173  }
174  const char* s = s1.data() + pos;
175  const char* p = s2.data();
176  while (n_cmp && *s == *p) {
177  s++; p++; n_cmp--;
178  }
179 
180  if (n_cmp == 0) {
181  if (n == s2.length())
182  return 0;
183  return n > s2.length() ? 1 : -1;
184  }
185 
186  return *s - *p;
187 }
188 
189 
191 {
192  SIZE_TYPE n1 = s1.length();
193  SIZE_TYPE n2 = s2.length();
194 
195  if ( !n1 ) {
196  return n2 ? -1 : 0;
197  }
198  if ( !n2 ) {
199  return 1;
200  }
201  SIZE_TYPE n = min(n1, n2);
202  const char* p1 = s1.data();
203  const char* p2 = s2.data();
204 
205  while (n && (*p1 == *p2 ||
206  tolower((unsigned char)(*p1)) == tolower((unsigned char)(*p2))) ) {
207  p1++; p2++; n--;
208  }
209  if ( !n ) {
210  return (n1 == n2) ? 0 : (n1 > n2 ? 1 : -1);
211  }
212  if (*p1 == *p2) {
213  return 0;
214  }
215  return tolower((unsigned char)(*p1)) - tolower((unsigned char)(*p2));
216 }
217 
218 
220  const char* s2)
221 {
222  if (pos == NPOS || !n || s1.length() <= pos) {
223  return *s2 ? -1 : 0;
224  }
225  if ( !*s2 ) {
226  return 1;
227  }
228 
229  if (n == NPOS || n > s1.length() - pos) {
230  n = s1.length() - pos;
231  }
232 
233  const char* s = s1.data() + pos;
234  while (n && *s2 && (*s == *s2 ||
235  tolower((unsigned char)(*s)) == tolower((unsigned char)(*s2))) ) {
236  s++; s2++; n--;
237  }
238  if (n == 0) {
239  return *s2 ? -1 : 0;
240  }
241  if (*s == *s2) {
242  return 0;
243  }
244  return tolower((unsigned char)(*s)) - tolower((unsigned char)(*s2));
245 }
246 
247 
249  const CTempString s2)
250 {
251  if (pos == NPOS || !n || s1.length() <= pos) {
252  return s2.empty() ? 0 : -1;
253  }
254  if (s2.empty()) {
255  return 1;
256  }
257  if (n == NPOS || n > s1.length() - pos) {
258  n = s1.length() - pos;
259  }
260 
261  SIZE_TYPE n_cmp = n;
262  if (n_cmp > s2.length()) {
263  n_cmp = s2.length();
264  }
265  const char* s = s1.data() + pos;
266  const char* p = s2.data();
267  while (n_cmp && (*s == *p ||
268  tolower((unsigned char)(*s)) == tolower((unsigned char)(*p))) ) {
269  s++; p++; n_cmp--;
270  }
271  if (n_cmp == 0) {
272  return (n == s2.length()) ? 0 : (n > s2.length() ? 1 : -1);
273  }
274  if (*s == *p) {
275  return 0;
276  }
277  return tolower((unsigned char)(*s)) - tolower((unsigned char)(*p));
278 }
279 
280 
281 // MatchesMask() tri-state result
283  eMatch = 1, // match
284  eNoMatch = 0, // no match
285  eMismatch = -1 // mismatch, stop search
286 };
287 
288 // Implements the same logic as UTIL_MatchesMask() from 'include/connect/ncbi_util.h',
289 // but for CTempString instead of char*.
290 
292 {
293  char s, m;
294  size_t str_pos = 0, mask_pos = 0;
295 
296  for ( ; (m = mask[mask_pos]); ++str_pos, ++mask_pos) {
297 
298  s = str[str_pos];
299 
300  if (!s && m != '*') {
301  return eMismatch;
302  }
303  // Analyze mask symbol
304  switch ( m ) {
305  case '?':
306  _ASSERT(s);
307  continue;
308  case '*':
309  // Collapse multiple stars
310  while ( (m = mask[mask_pos]) == '*' ) mask_pos++;
311  if ( !m ) {
312  // only stars left in the mask
313  return eMatch;
314  }
315  // General case, use recursion
316  while ( s ) {
317  EMatchesMaskResult res = s_MatchesMask(str.substr(str_pos), mask.substr(mask_pos), ignore_case);
318  if ( res != eNoMatch ) {
319  // match or mismatch
320  return res;
321  }
322  // continue search
323  s = str[str_pos++];
324  }
325  return eMismatch;
326 
327  case '[':
328  if (!(m = mask[++mask_pos]))
329  return eMismatch; // mismatch, pattern error
330  if (m == '!') {
331  m = 1 /*complement*/;
332  ++mask_pos;
333  } else
334  m = 0;
335  if (ignore_case)
336  s = (char) tolower((unsigned char) s);
337  _ASSERT(s);
338  char a, b; // range for [a-b]
339  do {
340  if (!(a = mask[mask_pos++]))
341  return eMismatch; // mismatch, pattern error
342  if (mask[mask_pos] == '-' && mask[mask_pos+1] != ']') {
343  ++mask_pos;
344  if (!(b = mask[mask_pos++]))
345  return eMismatch; // mismatch, pattern error
346  } else
347  b = a;
348  if (s) {
349  if (ignore_case) {
350  a = (char) tolower((unsigned char) a);
351  b = (char) tolower((unsigned char) b);
352  }
353  if (a <= s && s <= b)
354  s = 0 /*mark as found*/;
355  }
356  } while (mask[mask_pos] != ']');
357  if (m == !s)
358  return eNoMatch; // mismatch
359  continue;
360 
361  case '\\':
362  if (!(m = mask[++mask_pos]))
363  return eMismatch; // mismatch, pattern error
364  /*FALLTHRU*/
365 
366  default:
367  // Compare non pattern character in mask and name
368  _ASSERT(s && m);
369  if (ignore_case) {
370  if (s != m && tolower((unsigned char)s) != tolower((unsigned char)m))
371  return eNoMatch;
372  } else {
373  if (s != m)
374  return eNoMatch;
375  }
376  continue;
377  }
378  }
379  // Matches if we reach the end of the string and mask at the same time only
380  if ( str[str_pos] ) {
381  return eNoMatch;
382  }
383  return eMatch;
384 }
385 
386 
387 // NOTE: This code is also used in CDirEntry::MatchesMask().
388 //
390 {
391  return s_MatchesMask(str, mask, use_case == NStr::eNocase) == eMatch;
392 }
393 
394 
395 char* NStr::ToLower(char* str)
396 {
397  char* s;
398  for (s = str; *str; str++) {
399  *str = (char)tolower((unsigned char)(*str));
400  }
401  return s;
402 }
403 
404 
405 string& NStr::ToLower(string& str)
406 {
407  NON_CONST_ITERATE (string, it, str) {
408  *it = (char)tolower((unsigned char)(*it));
409  }
410  return str;
411 }
412 
413 
414 char* NStr::ToUpper(char* str)
415 {
416  char* s;
417  for (s = str; *str; str++) {
418  *str = (char)toupper((unsigned char)(*str));
419  }
420  return s;
421 }
422 
423 
424 string& NStr::ToUpper(string& str)
425 {
426  NON_CONST_ITERATE (string, it, str) {
427  *it = (char)toupper((unsigned char)(*it));
428  }
429  return str;
430 }
431 
432 
434 {
435  SIZE_TYPE len = str.length();
436  for (SIZE_TYPE i = 0; i < len; ++i) {
437  if (isalpha((unsigned char)str[i]) && !islower((unsigned char)str[i])) {
438  return false;
439  }
440  }
441  return true;
442 }
443 
444 
446 {
447  SIZE_TYPE len = str.length();
448  for (SIZE_TYPE i = 0; i < len; ++i) {
449  if (isalpha((unsigned char)str[i]) && !isupper((unsigned char)str[i])) {
450  return false;
451  }
452  }
453  return true;
454 }
455 
456 
458 {
459  int error = 0, ret = -1;
460  size_t len = str.size();
461 
462  if (!len) {
463  error = EINVAL;
464  } else {
465  size_t i = 0;
466  // skip leading '+' if any
467  if (str.data()[0] == '+' && len > 1) {
468  ++i;
469  }
470  unsigned v = 0;
471  for (; i < len; ++i) {
472  unsigned d = str.data()[i] - '0';
473  if (d > 9) {
474  error = EINVAL;
475  break;
476  }
477  unsigned nv = v * 10 + d;
478  const unsigned kOverflowLimit = (INT_MAX - 9) / 10 + 1;
479  if (v >= kOverflowLimit) {
480  // possible overflow
481  if (v > kOverflowLimit || nv > INT_MAX) {
482  error = ERANGE;
483  break;
484  }
485  }
486  v = nv;
487  }
488  if (!error) {
489  ret = static_cast<int>(v);
490  }
491  }
492 /*
493  if (flags & fConvErr_NoErrno) {
494  return ret;
495  }
496 */
497  errno = error;
498  if (error) {
501  } else {
503  }
504  }
505  return ret;
506 }
507 
508 
509 /// @internal
510 // Access to errno is slow on some platforms, because it use TLS to store a value
511 // for each thread. This guard class can set an errno value in string to numeric
512 // conversion functions only once before exit, and when necessary.
514 {
515 public:
516  CS2N_Guard(NStr::TStringToNumFlags, bool skip_if_zero) :
517  m_NoErrno(false), // m_NoErrno((flags & NStr::fConvErr_NoErrno) > 0),
518  m_SkipIfZero(skip_if_zero),
519  m_Errno(0)
520  { }
521  ~CS2N_Guard(void) {
522  if (!m_NoErrno) {
523  // Is the guard used against the code that already set an errno?
524  // If the error code is not defined here, do not even try to check/set it.
525  if (!m_SkipIfZero || m_Errno) {
526  errno = m_Errno;
527  }
528  }
529  }
530  void Set(int errcode) { m_Errno = errcode; }
531  int Errno(void) const { return m_Errno; }
532  // Says that we want to throw an exception, do not set errno in this case
533  void Throw(void) { m_SkipIfZero = true; m_Errno = 0; }
534  // Auxiliary function to create a message about conversion error
535  // to specified type. It doesn't have any relation to the guard itself,
536  // but can help to save on the amount of code in calling macro.
537  string Message(const CTempString str, const char* to_type, const CTempString msg);
538 
539 private:
540  bool m_NoErrno; // do not set errno at all
541  bool m_SkipIfZero; // do not set errno if TRUE and m_Errno == 0
542  int m_Errno; // errno value to set
543 };
544 
545 string CS2N_Guard::Message(const CTempString str, const char* to_type, const CTempString msg)
546 {
547  string s;
548  s.reserve(str.length() + msg.length() + 50);
549  s += "Cannot convert string '";
551  s += "' to ";
552  s += to_type;
553  if ( !msg.empty() ) {
554  s += ", ";
555  s += msg;
556  }
557  return s;
558 }
559 
560 /// Regular guard
561 #define S2N_CONVERT_GUARD(flags) \
562  CS2N_Guard err_guard(flags, false)
563 
564 // This guard can be used against the code that already set an errno.
565 // If the error code is not defined, the guard not even try to check/set it (even to zero).
566 #define S2N_CONVERT_GUARD_EX(flags) \
567  CS2N_Guard err_guard(flags, true)
568 
569 #define S2N_CONVERT_ERROR(to_type, msg, errcode, pos) \
570  do { \
571  err_guard.Set(errcode); \
572  if ( !(flags & NStr::fConvErr_NoThrow) ) { \
573  err_guard.Throw(); \
574  NCBI_THROW2(CStringException, eConvert, \
575  err_guard.Message(str, #to_type, msg), pos); \
576  } else { \
577 /* \
578  if (flags & NStr::fConvErr_NoErrno) { \
579  / Error, but forced to return 0 / \
580  return 0; \
581  } \
582 */ \
583  if (flags & NStr::fConvErr_NoErrMessage) { \
584  CNcbiError::SetErrno(err_guard.Errno()); \
585  } else { \
586  CNcbiError::SetErrno(err_guard.Errno(), \
587  err_guard.Message(str, #to_type, msg)); \
588  } \
589  return 0; \
590  } \
591  } while (false)
592 
593 
594 #define S2N_CONVERT_ERROR_INVAL(to_type) \
595  S2N_CONVERT_ERROR(to_type, kEmptyStr, EINVAL, pos)
596 
597 #define S2N_CONVERT_ERROR_RADIX(to_type, msg) \
598  S2N_CONVERT_ERROR(to_type, msg, EINVAL, pos)
599 
600 #define S2N_CONVERT_ERROR_OVERFLOW(to_type) \
601  S2N_CONVERT_ERROR(to_type, "overflow", ERANGE, pos)
602 
603 #define CHECK_ENDPTR(to_type) \
604  if ( str[pos] ) { \
605  S2N_CONVERT_ERROR(to_type, kEmptyStr, EINVAL, pos); \
606  }
607 
608 #define CHECK_ENDPTR_SIZE(to_type) \
609  if ( pos < size ) { \
610  S2N_CONVERT_ERROR(to_type, kEmptyStr, EINVAL, pos); \
611  }
612 
613 #define CHECK_COMMAS \
614  /* Check on possible commas */ \
615  if (flags & NStr::fAllowCommas) { \
616  if (ch == ',') { \
617  if ((numpos == pos) || \
618  ((comma >= 0) && (comma != 3)) ) { \
619  /* Not first comma, sitting on incorrect place */ \
620  break; \
621  } \
622  /* Skip it */ \
623  comma = 0; \
624  pos++; \
625  continue; \
626  } else { \
627  if (comma >= 0) { \
628  /* Count symbols between commas */ \
629  comma++; \
630  } \
631  } \
632  }
633 
634 
636 {
638  Int8 value = StringToInt8(str, flags, base);
639  if ( value < kMin_Int || value > kMax_Int ) {
640  S2N_CONVERT_ERROR(int, "overflow", ERANGE, 0);
641  }
642  return (int) value;
643 }
644 
645 
646 unsigned int
647 NStr::StringToUInt(const CTempString str, TStringToNumFlags flags, int base)
648 {
650  Uint8 value = StringToUInt8(str, flags, base);
651  if ( value > kMax_UInt ) {
652  S2N_CONVERT_ERROR(unsigned int, "overflow", ERANGE, 0);
653  }
654  return (unsigned int) value;
655 }
656 
657 
658 long NStr::StringToLong(const CTempString str, TStringToNumFlags flags, int base)
659 {
661  Int8 value = StringToInt8(str, flags, base);
662  if ( value < kMin_Long || value > kMax_Long ) {
663  S2N_CONVERT_ERROR(long, "overflow", ERANGE, 0);
664  }
665  return (long) value;
666 }
667 
668 
669 unsigned long
670 NStr::StringToULong(const CTempString str, TStringToNumFlags flags, int base)
671 {
673  Uint8 value = StringToUInt8(str, flags, base);
674  if ( value > kMax_ULong ) {
675  S2N_CONVERT_ERROR(unsigned long, "overflow", ERANGE, 0);
676  }
677  return (unsigned long) value;
678 }
679 
680 
681 /// @internal
682 // Check that symbol 'ch' is good symbol for number with radix 'base'.
683 static inline
684 bool s_IsGoodCharForRadix(char ch, int base, int* value = 0)
685 {
686  if ( base <= 10 ) {
687  // shortcut for most frequent case
688  int delta = ch-'0';
689  if ( unsigned(delta) < unsigned(base) ) {
690  if ( value ) {
691  *value = delta;
692  }
693  return true;
694  }
695  return false;
696  }
697  if (!isalnum((unsigned char) ch)) {
698  return false;
699  }
700  // Corresponding numeric value of *endptr
701  int delta;
702  if (isdigit((unsigned char) ch)) {
703  delta = ch - '0';
704  } else {
705  ch = (char) tolower((unsigned char) ch);
706  delta = ch - 'a' + 10;
707  }
708  if ( value ) {
709  *value = delta;
710  }
711  return delta < base;
712  }
713 
714 
715 // Skip all allowed chars (all except used for digit composition).
716 // Update 'ptr' to current position in the string.
717 enum ESkipMode {
718  eSkipAll, // all symbols
719  eSkipAllAllowed, // all symbols, except digit/+/-/.
720  eSkipSpacesOnly // spaces only
721 };
722 
723 static inline
724 bool s_IsDecimalPoint(unsigned char ch, NStr::TStringToNumFlags flags)
725 {
726  if ( ch != '.' && ch != ',') {
727  return false;
728  }
729  if (flags & NStr::fDecimalPosix) {
730  return ch == '.';
731  }
732  else if (flags & NStr::fDecimalPosixOrLocal) {
733  return ch == '.' || ch == ',';
734  }
735  struct lconv* conv = localeconv();
736  return ch == *(conv->decimal_point);
737 }
738 
739 static inline
741  SIZE_TYPE& pos,
742  ESkipMode skip_mode,
744 {
745  if (skip_mode == eSkipAll) {
746  pos = str.length();
747  return;
748  }
749 
750  for ( SIZE_TYPE len = str.length(); pos < len; ++pos ) {
751  unsigned char ch = str[pos];
752  if ( isdigit(ch) || ch == '+' || ch == '-' || s_IsDecimalPoint(ch,flags) ) {
753  break;
754  }
755  if ( (skip_mode == eSkipSpacesOnly) && !isspace(ch) ) {
756  break;
757  }
758  }
759 }
760 
761 
762 // Check radix base. If it is zero, determine base using first chars
763 // of the string. Update 'base' value.
764 // Update 'ptr' to current position in the string.
765 static inline
766 bool s_CheckRadix(const CTempString str, SIZE_TYPE& pos, int& base)
767 {
768  if ( base == 10 || base == 8 ) {
769  // shortcut for most frequent case
770  return true;
771  }
772  // Check base
773  if ( base < 0 || base == 1 || base > 36 ) {
774  return false;
775  }
776  // Try to determine base using first chars of the string
777  unsigned char ch = str[pos];
778  unsigned char next = str[pos+1];
779  if ( base == 0 ) {
780  if ( ch != '0' ) {
781  base = 10;
782  } else if (next == 'x' || next == 'X') {
783  base = 16;
784  } else {
785  base = 8;
786  }
787  }
788  // Remove leading '0x' for hex numbers
789  if ( base == 16 ) {
790  if (ch == '0' && (next == 'x' || next == 'X')) {
791  pos += 2;
792  }
793  }
794  return true;
795 }
796 
797 
798 Int8 NStr::StringToInt8(const CTempString str, TStringToNumFlags flags, int base)
799 {
801 
802  // Current position in the string
803  SIZE_TYPE pos = 0;
804 
805  // Skip allowed leading symbols
806  if (flags & fAllowLeadingSymbols) {
807  bool spaces = ((flags & fAllowLeadingSymbols) == fAllowLeadingSpaces);
810  }
811  // Determine sign
812  bool sign = false;
813  switch (str[pos]) {
814  case '-':
815  sign = true;
816  /*FALLTHRU*/
817  case '+':
818  pos++;
819  break;
820  default:
821  if (flags & fMandatorySign) {
823  }
824  break;
825  }
826  SIZE_TYPE pos0 = pos;
827  // Check radix base
828  if ( !s_CheckRadix(str, pos, base) ) {
829  S2N_CONVERT_ERROR_RADIX(Int8, "bad numeric base '" +
830  NStr::IntToString(base)+ "'");
831  }
832 
833  // Begin conversion
834  Int8 n = 0;
835  Int8 limdiv = base==10? kMax_I8 / 10: kMax_I8 / base;
836  Int8 limoff = (base==10? kMax_I8 % 10: kMax_I8 % base) + (sign ? 1 : 0);
837 
838  // Number of symbols between two commas. '-1' means -- no comma yet.
839  int comma = -1;
840  SIZE_TYPE numpos = pos;
841 
842  while (char ch = str[pos]) {
843  int delta; // corresponding numeric value of 'ch'
844 
845  // Check on possible commas
846  CHECK_COMMAS;
847  // Sanity check
848  if ( !s_IsGoodCharForRadix(ch, base, &delta) ) {
849  break;
850  }
851  // Overflow check
852  if ( n >= limdiv && (n > limdiv || delta > limoff) ) {
854  }
855  n *= base;
856  n += delta;
857  pos++;
858  }
859 
860  // Last checks
861  if ( pos == pos0 || ((comma >= 0) && (comma != 3)) ) {
863  }
864  // Skip allowed trailing symbols
866  bool spaces = ((flags & fAllowTrailingSymbols) ==
869  }
870  // Assign sign before the end pointer check
871  n = sign ? -n : n;
873 
874  return n;
875 }
876 
877 
879  TStringToNumFlags flags, int base)
880 {
882 
883  const TStringToNumFlags slow_flags =
885 
886  if ( base == 10 && (flags & slow_flags) == 0 ) {
887  // fast conversion
888 
889  // Current position in the string
890  CTempString::const_iterator ptr = str.begin(), end = str.end();
891 
892  // Determine sign
893  if ( ptr != end && *ptr == '+' ) {
894  ++ptr;
895  }
896  if ( ptr == end ) {
897  S2N_CONVERT_ERROR(Uint8, kEmptyStr, EINVAL, ptr-str.begin());
898  }
899 
900  // Begin conversion
901  Uint8 n = 0;
902 
903  const Uint8 limdiv = kMax_UI8/10;
904  const int limoff = int(kMax_UI8 % 10);
905 
906  do {
907  char ch = *ptr;
908  int delta = ch - '0';
909  if ( unsigned(delta) >= 10 ) {
910  S2N_CONVERT_ERROR(Uint8, kEmptyStr, EINVAL, ptr-str.begin());
911  }
912  // Overflow check
913  if ( n >= limdiv && (n > limdiv || delta > limoff) ) {
914  S2N_CONVERT_ERROR(Uint8, kEmptyStr, ERANGE, ptr-str.begin());
915  }
916  n = n*10+delta;
917  } while ( ++ptr != end );
918 
919  return n;
920  }
921 
922  // Current position in the string
923  SIZE_TYPE pos = 0, size = str.size();
924 
925  // Skip allowed leading symbols
926  if (flags & fAllowLeadingSymbols) {
927  bool spaces = ((flags & fAllowLeadingSymbols) == fAllowLeadingSpaces);
930  }
931  // Determine sign
932  if (str[pos] == '+') {
933  pos++;
934  } else {
935  if (flags & fMandatorySign) {
937  }
938  }
939  SIZE_TYPE pos0 = pos;
940 
941  // Begin conversion
942  Uint8 n = 0;
943  // Check radix base
944  if ( !s_CheckRadix(str, pos, base) ) {
945  S2N_CONVERT_ERROR_RADIX(Uint8, "bad numeric base '" +
946  NStr::IntToString(base) + "'");
947  }
948 
949  Uint8 limdiv = kMax_UI8 / base;
950  int limoff = int(kMax_UI8 % base);
951 
952  // Number of symbols between two commas. '-1' means -- no comma yet.
953  int comma = -1;
954  SIZE_TYPE numpos = pos;
955 
956  while (char ch = str[pos]) {
957  int delta; // corresponding numeric value of 'ch'
958 
959  // Check on possible commas
960  CHECK_COMMAS;
961  // Sanity check
962  if ( !s_IsGoodCharForRadix(ch, base, &delta) ) {
963  break;
964  }
965  // Overflow check
966  if ( n >= limdiv && (n > limdiv || delta > limoff) ) {
968  }
969  n *= base;
970  n += delta;
971  pos++;
972  }
973 
974  // Last checks
975  if ( pos == pos0 || ((comma >= 0) && (comma != 3)) ) {
977  }
978  // Skip allowed trailing symbols
980  bool spaces = ((flags & fAllowTrailingSymbols) ==
983  }
985  return n;
986 }
987 
988 
989 double NStr::StringToDoublePosix(const char* ptr, char** endptr, TStringToNumFlags flags)
990 {
992 
993  const char* start = ptr;
994  char c = *ptr++;
995 
996  // skip leading blanks
997  while ( isspace((unsigned char)c) ) {
998  c = *ptr++;
999  }
1000 
1001  int sign = 0;
1002  if ( c == '-' ) {
1003  sign = -1;
1004  c = *ptr++;
1005  }
1006  else if ( c == '+' ) {
1007  sign = +1;
1008  c = *ptr++;
1009  }
1010 
1011  if (c == 0) {
1012  if (endptr) {
1013  *endptr = (char*)start;
1014  }
1015  err_guard.Set(EINVAL);
1016  return 0.;
1017  }
1018 
1019  // short-cut - single digit
1020  if ( !*ptr && c >= '0' && c <= '9' ) {
1021  if (endptr) {
1022  *endptr = (char*)ptr;
1023  }
1024  double result = c-'0';
1025  // some compilers fail to negate zero
1026  return sign < 0 ? (c == '0' ? -0. : -result) : result;
1027  }
1028 
1029  bool dot = false, expn = false, anydigits = false;
1030  int digits = 0, dot_position = 0;
1031  unsigned int first=0, second=0, first_mul=1;
1032  long double second_mul = NCBI_CONST_LONGDOUBLE(1.),
1033  third = NCBI_CONST_LONGDOUBLE(0.);
1034 
1035  // up to exponent
1036  for ( ; ; c = *ptr++ ) {
1037  if (c >= '0' && c <= '9') {
1038  // digits: accumulate
1039  c = (char)(c - '0');
1040  anydigits = true;
1041  ++digits;
1042  if (first == 0) {
1043  first = c;
1044  if ( first == 0 ) {
1045  // omit leading zeros
1046  --digits;
1047  if (dot) {
1048  --dot_position;
1049  }
1050  }
1051  } else if (digits <= 9) {
1052  // first 9 digits come to 'first'
1053  first = first*10 + c;
1054  } else if (digits <= 18) {
1055  // next 9 digits come to 'second'
1056  first_mul *= 10;
1057  second = second*10 + c;
1058  } else {
1059  // other digits come to 'third'
1060  second_mul *= NCBI_CONST_LONGDOUBLE(10.);
1061  third = third * NCBI_CONST_LONGDOUBLE(10.) + c;
1062  }
1063  }
1064  else if (c == '.') {
1065  // dot
1066  // if second dot, stop
1067  if (dot) {
1068  --ptr;
1069  break;
1070  }
1071  dot_position = digits;
1072  dot = true;
1073  }
1074  else if (c == 'e' || c == 'E') {
1075  // if exponent, stop
1076  if (!anydigits) {
1077  --ptr;
1078  break;
1079  }
1080  expn = true;
1081  break;
1082  }
1083  else {
1084  --ptr;
1085  if (!anydigits) {
1086  if ( !dot && (c == 'n' || c == 'N') &&
1087  NStr::strncasecmp(ptr,"nan",3)==0) {
1088  if (endptr) {
1089  *endptr = (char*)(ptr+3);
1090  }
1091  return HUGE_VAL/HUGE_VAL; /* NCBI_FAKE_WARNING */
1092  }
1093  if ( (c == 'i' || c == 'I') ) {
1094  if ( NStr::strncasecmp(ptr,"inf",3)==0) {
1095  ptr += 3;
1096  if ( NStr::strncasecmp(ptr,"inity",5)==0) {
1097  ptr += 5;
1098  }
1099  if (endptr) {
1100  *endptr = (char*)ptr;
1101  }
1102  return sign < 0 ? -HUGE_VAL : HUGE_VAL;
1103  }
1104  }
1105  }
1106  break;
1107  }
1108  }
1109  // if no digits, stop now - error
1110  if (!anydigits) {
1111  if (endptr) {
1112  *endptr = (char*)start;
1113  }
1114  err_guard.Set(EINVAL);
1115  return 0.;
1116  }
1117  int exponent = dot ? dot_position - digits : 0;
1118 
1119  // read exponent
1120  if (expn && *ptr) {
1121  int expvalue = 0;
1122  bool expsign = false, expnegate= false;
1123  int expdigits= 0;
1124  for( ; ; ++ptr) {
1125  c = *ptr;
1126  // sign: should be no digits at this point
1127  if (c == '-' || c == '+') {
1128  // if there was sign or digits, stop
1129  if (expsign || expdigits) {
1130  break;
1131  }
1132  expsign = true;
1133  expnegate = c == '-';
1134  }
1135  // digits: accumulate
1136  else if (c >= '0' && c <= '9') {
1137  ++expdigits;
1138  int newexpvalue = expvalue*10 + (c-'0');
1139  if (newexpvalue > expvalue) {
1140  expvalue = newexpvalue;
1141  }
1142  }
1143  else {
1144  break;
1145  }
1146  }
1147  // if no digits, rollback
1148  if (!expdigits) {
1149  // rollback sign
1150  if (expsign) {
1151  --ptr;
1152  }
1153  // rollback exponent
1154  if (expn) {
1155  --ptr;
1156  }
1157  }
1158  else {
1159  exponent = expnegate ? exponent - expvalue : exponent + expvalue;
1160  }
1161  }
1162  long double ret;
1163  if ( first_mul > 1 ) {
1164  _ASSERT(first);
1165  ret = ((long double)first * first_mul + second)* second_mul + third;
1166  }
1167  else {
1168  _ASSERT(first_mul == 1);
1169  _ASSERT(second == 0);
1170  _ASSERT(second_mul == 1);
1171  _ASSERT(third == 0);
1172  ret = first;
1173  }
1174  // calculate exponent
1175  if ( first && exponent ) {
1176  // multiply by power of 10 only non-zero mantissa
1177  if (exponent > 2*DBL_MAX_10_EXP) {
1178  ret = (flags & fDecimalPosixFinite) ? DBL_MAX : HUGE_VAL;
1179  err_guard.Set(ERANGE);
1180  } else if (exponent < 2*DBL_MIN_10_EXP) {
1181  ret = (flags & fDecimalPosixFinite) ? DBL_MIN : 0.;
1182  err_guard.Set(ERANGE);
1183  } else {
1184  if ( exponent > 0 ) {
1185  static const double mul1[16] = {
1186  1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7,
1187  1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15
1188  };
1189  ret *= mul1[exponent&15];
1190  if ( exponent >>= 4 ) {
1191  static const long double mul2[16] = {
1192  NCBI_CONST_LONGDOUBLE(1e0),
1193  NCBI_CONST_LONGDOUBLE(1e16),
1194  NCBI_CONST_LONGDOUBLE(1e32),
1195  NCBI_CONST_LONGDOUBLE(1e48),
1196  NCBI_CONST_LONGDOUBLE(1e64),
1197  NCBI_CONST_LONGDOUBLE(1e80),
1198  NCBI_CONST_LONGDOUBLE(1e96),
1199  NCBI_CONST_LONGDOUBLE(1e112),
1200  NCBI_CONST_LONGDOUBLE(1e128),
1201  NCBI_CONST_LONGDOUBLE(1e144),
1202  NCBI_CONST_LONGDOUBLE(1e160),
1203  NCBI_CONST_LONGDOUBLE(1e176),
1204  NCBI_CONST_LONGDOUBLE(1e192),
1205  NCBI_CONST_LONGDOUBLE(1e208),
1206  NCBI_CONST_LONGDOUBLE(1e224),
1207  NCBI_CONST_LONGDOUBLE(1e240)
1208  };
1209  ret *= mul2[exponent&15];
1210  for ( exponent >>= 4; exponent; --exponent ) {
1211  ret *= NCBI_CONST_LONGDOUBLE(1e256);
1212  }
1213  }
1214  if (!finite(double(ret))) {
1215  if (flags & fDecimalPosixFinite) {
1216  ret = DBL_MAX;
1217  }
1218  err_guard.Set(ERANGE);
1219  }
1220  }
1221  else {
1222  exponent = -exponent;
1223  static const long double mul1[16] = {
1224  NCBI_CONST_LONGDOUBLE(1e-0),
1225  NCBI_CONST_LONGDOUBLE(1e-1),
1226  NCBI_CONST_LONGDOUBLE(1e-2),
1227  NCBI_CONST_LONGDOUBLE(1e-3),
1228  NCBI_CONST_LONGDOUBLE(1e-4),
1229  NCBI_CONST_LONGDOUBLE(1e-5),
1230  NCBI_CONST_LONGDOUBLE(1e-6),
1231  NCBI_CONST_LONGDOUBLE(1e-7),
1232  NCBI_CONST_LONGDOUBLE(1e-8),
1233  NCBI_CONST_LONGDOUBLE(1e-9),
1234  NCBI_CONST_LONGDOUBLE(1e-10),
1235  NCBI_CONST_LONGDOUBLE(1e-11),
1236  NCBI_CONST_LONGDOUBLE(1e-12),
1237  NCBI_CONST_LONGDOUBLE(1e-13),
1238  NCBI_CONST_LONGDOUBLE(1e-14),
1239  NCBI_CONST_LONGDOUBLE(1e-15)
1240  };
1241  ret *= mul1[exponent&15];
1242  if ( exponent >>= 4 ) {
1243  static const long double mul2[16] = {
1244  NCBI_CONST_LONGDOUBLE(1e-0),
1245  NCBI_CONST_LONGDOUBLE(1e-16),
1246  NCBI_CONST_LONGDOUBLE(1e-32),
1247  NCBI_CONST_LONGDOUBLE(1e-48),
1248  NCBI_CONST_LONGDOUBLE(1e-64),
1249  NCBI_CONST_LONGDOUBLE(1e-80),
1250  NCBI_CONST_LONGDOUBLE(1e-96),
1251  NCBI_CONST_LONGDOUBLE(1e-112),
1252  NCBI_CONST_LONGDOUBLE(1e-128),
1253  NCBI_CONST_LONGDOUBLE(1e-144),
1254  NCBI_CONST_LONGDOUBLE(1e-160),
1255  NCBI_CONST_LONGDOUBLE(1e-176),
1256  NCBI_CONST_LONGDOUBLE(1e-192),
1257  NCBI_CONST_LONGDOUBLE(1e-208),
1258  NCBI_CONST_LONGDOUBLE(1e-224),
1259  NCBI_CONST_LONGDOUBLE(1e-240)
1260  };
1261  ret *= mul2[exponent&15];
1262  for ( exponent >>= 4; exponent; --exponent ) {
1263  ret *= NCBI_CONST_LONGDOUBLE(1e-256);
1264  }
1265  }
1266  if ( ret < DBL_MIN ) {
1267  if (flags & fDecimalPosixFinite) {
1268  ret = DBL_MIN;
1269  }
1270  err_guard.Set(ERANGE);
1271  }
1272  }
1273  }
1274  }
1275  if ( sign < 0 ) {
1276  ret = -ret;
1277  }
1278  // done
1279  if (endptr) {
1280  *endptr = (char*)ptr;
1281  }
1282  return (double)ret;
1283 }
1284 
1285 
1286 /// @internal
1287 static double s_StringToDouble(const char* str, size_t size,
1289 {
1290  _ASSERT(str[size] == '\0');
1292  NCBI_THROW2(CStringException, eBadArgs,
1293  "NStr::StringToDouble(): mutually exclusive flags specified", 0);
1294  }
1296 
1297  // Current position in the string
1298  SIZE_TYPE pos = 0;
1299 
1300  // Skip allowed leading symbols
1302  bool spaces = ((flags & NStr::fAllowLeadingSymbols) ==
1305  spaces ? eSkipSpacesOnly : eSkipAllAllowed, flags);
1306  }
1307  // Check mandatory sign
1308  if (flags & NStr::fMandatorySign) {
1309  switch (str[pos]) {
1310  case '-':
1311  case '+':
1312  break;
1313  default:
1314  S2N_CONVERT_ERROR_INVAL(double);
1315  }
1316  }
1317  // For consistency make additional check on incorrect leading symbols.
1318  // Because strtod() may just skip such symbols.
1319  if (!(flags & NStr::fAllowLeadingSymbols)) {
1320  char c = str[pos];
1321  if ( !isdigit((unsigned char)c) && !s_IsDecimalPoint(c,flags) && c != '-' && c != '+') {
1322  S2N_CONVERT_ERROR_INVAL(double);
1323  }
1324  }
1325 
1326  // Conversion
1327  int& errno_ref = errno;
1328  errno_ref = 0;
1329 
1330  char* endptr = 0;
1331  const char* begptr = str + pos;
1332 
1333  double n;
1334  if (flags & NStr::fDecimalPosix) {
1335  n = NStr::StringToDoublePosix(begptr, &endptr, flags);
1336  } else {
1337  n = strtod(begptr, &endptr);
1338  }
1340  char* endptr2 = 0;
1341  double n2 = NStr::StringToDoublePosix(begptr, &endptr2, flags);
1342  if (!endptr || (endptr2 && endptr2 > endptr)) {
1343  n = n2;
1344  endptr = endptr2;
1345  }
1346  }
1347  if ( !endptr || endptr == begptr ) {
1348  S2N_CONVERT_ERROR(double, kEmptyStr, EINVAL, s_DiffPtr(endptr, begptr) + pos);
1349  }
1350  // some libs set ERANGE, others do not
1351  // here, we do not consider ERANGE as error
1352  if ( errno_ref && errno_ref != ERANGE ) {
1353  S2N_CONVERT_ERROR(double, kEmptyStr, errno_ref, s_DiffPtr(endptr, begptr) + pos);
1354  }
1355  // special cases
1356  if ((flags & NStr::fDecimalPosixFinite) && n != 0. && !isnan(n))
1357  {
1358  bool is_negative = n < 0.;
1359  if (is_negative) {
1360  n = -n;
1361  }
1362  if ( n < DBL_MIN) {
1363  n = DBL_MIN;
1364  } else if (!finite(n)) {
1365  n = DBL_MAX;
1366  }
1367  if (is_negative) {
1368  n = -n;
1369  }
1370  }
1371 
1372  pos += s_DiffPtr(endptr, begptr);
1373 
1374  // Skip allowed trailing symbols
1376  bool spaces = ((flags & NStr::fAllowTrailingSymbols) ==
1379  }
1380  CHECK_ENDPTR(double);
1381  return n;
1382 }
1383 
1384 
1385 double NStr::StringToDoubleEx(const char* str, size_t size,
1386  TStringToNumFlags flags)
1388  return s_StringToDouble(str, size, flags);
1389 }
1390 
1391 
1392 double NStr::StringToDouble(const CTempStringEx str, TStringToNumFlags flags)
1393 {
1394  size_t size = str.size();
1395  if ( str.HasZeroAtEnd() ) {
1396  // string has zero at the end already
1397  return s_StringToDouble(str.data(), size, flags);
1398  }
1399  char buf[256]; // small temporary buffer on stack for appending zero char
1400  if ( size < sizeof(buf) ) {
1401  memcpy(buf, str.data(), size);
1402  buf[size] = '\0';
1403  return s_StringToDouble(buf, size, flags);
1404  }
1405  else {
1406  // use std::string() to allocate memory for appending zero char
1407  return s_StringToDouble(string(str).c_str(), size, flags);
1408  }
1409 }
1410 
1411 /// @internal
1413  SIZE_TYPE& pos,
1414  Uint8 value,
1416 {
1418 
1419  unsigned char ch = str[pos];
1420  if ( !ch ) {
1421  return value;
1422  }
1423 
1424  ch = (unsigned char)toupper(ch);
1425  Uint8 v = value;
1426  bool err = false;
1427 
1428  switch(ch) {
1429  case 'K':
1430  pos++;
1431  if ((kMax_UI8 / 1024) < v) {
1432  err = true;
1433  }
1434  v *= 1024;
1435  break;
1436  case 'M':
1437  pos++;
1438  if ((kMax_UI8 / 1024 / 1024) < v) {
1439  err = true;
1440  }
1441  v *= 1024 * 1024;
1442  break;
1443  case 'G':
1444  pos++;
1445  if ((kMax_UI8 / 1024 / 1024 / 1024) < v) {
1446  err = true;
1447  }
1448  v *= 1024 * 1024 * 1024;
1449  break;
1450  default:
1451  // error -- the "qual" points to the last unprocessed symbol
1453  }
1454  if ( err ) {
1455  S2N_CONVERT_ERROR_OVERFLOW(DataSize);
1456  }
1457 
1458  ch = str[pos];
1459  if ( ch && toupper(ch) == 'B' ) {
1460  pos++;
1461  }
1462  return v;
1463 }
1464 
1465 
1467  TStringToNumFlags flags,
1468  int base)
1469 {
1470  // We have a limited base range here
1471  if ( base < 2 || base > 16 ) {
1472  NCBI_THROW2(CStringException, eConvert,
1473  "Bad numeric base '" + NStr::IntToString(base)+ "'", 0);
1474  }
1476 
1477  // Current position in the string
1478  SIZE_TYPE pos = 0;
1479 
1480  // Find end of number representation
1481  {{
1482  // Skip allowed leading symbols
1483  if (flags & fAllowLeadingSymbols) {
1484  bool spaces = ((flags & fAllowLeadingSymbols) ==
1487  spaces ? eSkipSpacesOnly : eSkipAllAllowed, flags);
1488  }
1489  // Determine sign
1490  if (str[pos] == '+') {
1491  pos++;
1492  // strip fMandatorySign flag
1493  flags &= ~fMandatorySign;
1494  } else {
1495  if (flags & fMandatorySign) {
1497  }
1498  }
1499  // Check radix base
1500  if ( !s_CheckRadix(str, pos, base) ) {
1501  S2N_CONVERT_ERROR_RADIX(Uint8, "bad numeric base '" +
1502  NStr::IntToString(base) + "'");
1503  }
1504  }}
1505 
1506  SIZE_TYPE numpos = pos;
1507  char ch = str[pos];
1508  while (ch) {
1509  if ( !s_IsGoodCharForRadix(ch, base) &&
1510  ((ch != ',') || !(flags & fAllowCommas)) ) {
1511  break;
1512  }
1513  ch = str[++pos];
1514  }
1515  // If string is empty, just use whole remaining string for conversion
1516  // (for correct error reporting)
1517  if (pos-numpos == 0) {
1518  pos = str.length();
1519  }
1520 
1521  // Convert to number
1522  Uint8 n = StringToUInt8(CTempString(str.data()+numpos, pos-numpos),
1523  flags, base);
1524  if ( !n && errno ) {
1525  // If exceptions are enabled that it has been already thrown.
1526  // The errno is also set, so just return a zero.
1527  return 0;
1528  }
1529  // Check trailer (KB, MB, ...)
1530  if ( ch ) {
1531  n = s_DataSizeConvertQual(str, pos, n, flags);
1532  }
1533  // Skip allowed trailing symbols
1534  if (flags & fAllowTrailingSymbols) {
1535  bool spaces = ((flags & fAllowTrailingSymbols) ==
1538  }
1540  return n;
1541 }
1542 
1543 
1545  TStringToNumFlags flags /* = 0 */)
1546 {
1547  TStringToNumFlags allowed_flags = fConvErr_NoThrow +
1548  fMandatorySign +
1549  fAllowCommas +
1552  fDS_ForceBinary +
1555 
1556  if ((flags & allowed_flags) != flags) {
1557  NCBI_THROW2(CStringException, eConvert, "Wrong set of flags", 0);
1558  }
1560 
1561  const char* str_ptr = str.data();
1562  const char* str_end = str_ptr + str.size();
1563  if (flags & fAllowLeadingSymbols) {
1564  bool allow_all = (flags & fAllowLeadingSymbols) != fAllowLeadingSpaces;
1565  for (; str_ptr < str_end; ++str_ptr) {
1566  char c = *str_ptr;
1567  if (isdigit(c))
1568  break;
1569  if (isspace(c))
1570  continue;
1571  if ((c == '+' || c == '-') && (flags & fMandatorySign)
1572  && str_ptr + 1 < str_end && isdigit(*(str_ptr + 1)))
1573  {
1574  break;
1575  }
1576  if (!allow_all)
1577  break;
1578  }
1579  }
1580 
1581  if (str_ptr < str_end && *str_ptr == '+') {
1582  ++str_ptr;
1583  }
1584  else if ((str_ptr < str_end && *str_ptr == '-')
1585  || (flags & fMandatorySign))
1586  {
1587  S2N_CONVERT_ERROR(Uint8, kEmptyStr, EINVAL, str_ptr - str.data());
1588  }
1589 
1590  const char* num_start = str_ptr;
1591  bool have_dot = false;
1592  bool allow_commas = (flags & fAllowCommas) != 0;
1593  bool allow_dot = (flags & fDS_ProhibitFractions) == 0;
1594  Uint4 digs_pre_dot = 0, digs_post_dot = 0;
1595 
1596  for (; str_ptr < str_end; ++str_ptr) {
1597  char c = *str_ptr;
1598  if (isdigit(c)) {
1599  if (have_dot)
1600  ++digs_post_dot;
1601  else
1602  ++digs_pre_dot;
1603  }
1604  else if (c == '.' && allow_dot) {
1605  if (have_dot || str_ptr == num_start)
1606  break;
1607  if (*(str_ptr - 1) == ',') {
1608  --str_ptr;
1609  break;
1610  }
1611  have_dot = true;
1612  }
1613  else if (c == ',' && allow_commas) {
1614  if (have_dot || str_ptr == num_start)
1615  break;
1616  if (*(str_ptr - 1) == ',') {
1617  --str_ptr;
1618  break;
1619  }
1620  }
1621  else
1622  break;
1623  }
1624  if (have_dot && digs_post_dot == 0)
1625  --str_ptr;
1626  else if (str_ptr > num_start && *(str_ptr - 1) == ',')
1627  --str_ptr;
1628 
1629  const char* num_end = str_ptr;
1630  if (num_start == num_end) {
1631  S2N_CONVERT_ERROR(Uint8, kEmptyStr, EINVAL, str_ptr - str.data());
1632  }
1633  if (str_ptr < str_end && *str_ptr == ' '
1635  {
1636  ++str_ptr;
1637  }
1638  char suff_c = 0;
1639  if (str_ptr < str_end)
1640  suff_c = (char)toupper(*str_ptr);
1641 
1642  static const char s_Suffixes[] = {'K', 'M', 'G', 'T', 'P', 'E'};
1643  static const char* const s_BinCoefs[] = {"1024", "1048576", "1073741824",
1644  "1099511627776",
1645  "1125899906842624",
1646  "1152921504606846976"};
1647  static const Uint4 s_NumSuffixes = (Uint4)(sizeof(s_Suffixes) / sizeof(s_Suffixes[0]));
1648 
1649  bool binary_suff = (flags & fDS_ForceBinary) != 0;
1650  Uint4 suff_idx = 0;
1651  for (; suff_idx < s_NumSuffixes; ++suff_idx) {
1652  if (suff_c == s_Suffixes[suff_idx])
1653  break;
1654  }
1655  if (suff_idx < s_NumSuffixes) {
1656  ++str_ptr;
1657  if (str_ptr + 1 < str_end && toupper(*str_ptr) == 'I'
1658  && toupper(*(str_ptr + 1)) == 'B')
1659  {
1660  str_ptr += 2;
1661  binary_suff = true;
1662  }
1663  else if (str_ptr < str_end && toupper(*str_ptr) == 'B')
1664  ++str_ptr;
1665  }
1666  else if (suff_c == 'B') {
1667  ++str_ptr;
1668  }
1669  else if (*(str_ptr - 1) == ' ')
1670  --str_ptr;
1671 
1672  if (flags & fAllowTrailingSymbols) {
1673  bool allow_all = (flags & fAllowTrailingSymbols) != fAllowTrailingSpaces;
1674  for (; str_ptr < str_end; ++str_ptr) {
1675  char c = *str_ptr;
1676  if (isspace(c))
1677  continue;
1678  if (!allow_all)
1679  break;
1680  }
1681  }
1682  if (str_ptr != str_end) {
1683  S2N_CONVERT_ERROR(Uint8, kEmptyStr, EINVAL, str_ptr - str.data());
1684  }
1685 
1686  Uint4 orig_digs = digs_pre_dot + digs_post_dot;
1687  AutoArray<Uint1> orig_num(orig_digs);
1688  str_ptr = num_start;
1689  for (Uint4 i = 0; str_ptr < num_end; ++str_ptr) {
1690  if (*str_ptr == ',' || *str_ptr == '.')
1691  continue;
1692  orig_num[i++] = Uint1(*str_ptr - '0');
1693  }
1694 
1695  Uint1* num_to_conv = orig_num.get();
1696  Uint4 digs_to_conv = digs_pre_dot;
1697  AutoArray<Uint1> mul_num;
1698  if (binary_suff && suff_idx < s_NumSuffixes) {
1699  const char* coef = s_BinCoefs[suff_idx];
1700  Uint4 coef_size = Uint4(strlen(coef));
1701  mul_num = new Uint1[orig_digs + coef_size];
1702  memset(mul_num.get(), 0, orig_digs + coef_size);
1703  for (Uint4 coef_i = 0; coef_i < coef_size; ++coef_i) {
1704  Uint1 coef_d = Uint1(coef[coef_i] - '0');
1705  Uint1 carry = 0;
1706  Uint4 res_idx = orig_digs + coef_i;
1707  for (int orig_i = orig_digs - 1; orig_i >= 0; --orig_i, --res_idx) {
1708  Uint1 orig_d = orig_num[orig_i];
1709  Uint1 res_d = Uint1(coef_d * orig_d + carry + mul_num[res_idx]);
1710  carry = 0;
1711  while (res_d >= 10) {
1712  res_d = (Uint1)(res_d - 10); // res_d -= 10;
1713  ++carry;
1714  }
1715  mul_num[res_idx] = res_d;
1716  }
1717  _ASSERT(carry <= 9);
1718  for (; carry != 0; --res_idx) {
1719  Uint1 res_d = Uint1(mul_num[res_idx] + carry);
1720  carry = 0;
1721  while (res_d >= 10) {
1722  res_d = (Uint1)(res_d - 10); // res_d -= 10;
1723  ++carry;
1724  }
1725  mul_num[res_idx] = res_d;
1726  }
1727  }
1728  digs_to_conv = orig_digs + coef_size - digs_post_dot;
1729  num_to_conv = mul_num.get();
1730  while (digs_to_conv > 1 && *num_to_conv == 0) {
1731  --digs_to_conv;
1732  ++num_to_conv;
1733  }
1734  }
1735  else if (suff_idx < s_NumSuffixes) {
1736  Uint4 coef_size = (suff_idx + 1) * 3;
1737  if (coef_size <= digs_post_dot) {
1738  digs_to_conv += coef_size;
1739  digs_post_dot -= coef_size;
1740  }
1741  else {
1742  digs_to_conv += digs_post_dot;
1743  coef_size -= digs_post_dot;
1744  digs_post_dot = 0;
1745  mul_num = new Uint1[digs_to_conv + coef_size];
1746  memmove(mul_num.get(), num_to_conv, digs_to_conv);
1747  memset(mul_num.get() + digs_to_conv, 0, coef_size);
1748  num_to_conv = mul_num.get();
1749  digs_to_conv += coef_size;
1750  }
1751  }
1752 
1753  const Uint8 limdiv = kMax_UI8/10;
1754  const int limoff = int(kMax_UI8 % 10);
1755  Uint8 n = 0;
1756  for (Uint4 i = 0; i < digs_to_conv; ++i) {
1757  Uint1 d = num_to_conv[i];
1758  if (n >= limdiv && (n > limdiv || d > limoff)) {
1759  S2N_CONVERT_ERROR(Uint8, kEmptyStr, ERANGE, i);
1760  }
1761  n *= 10;
1762  n += d;
1763  }
1764  if (digs_post_dot != 0 && num_to_conv[digs_to_conv] >= 5) {
1765  if (n == kMax_UI8) {
1766  S2N_CONVERT_ERROR(Uint8, kEmptyStr, ERANGE, digs_to_conv);
1767  }
1768  ++n;
1769  }
1770  return n;
1771 }
1772 
1773 
1774 size_t NStr::StringToSizet(const CTempString str,
1775  TStringToNumFlags flags, int base)
1776 {
1777 #if (SIZEOF_SIZE_T > 4)
1778  return StringToUInt8(str, flags, base);
1779 #else
1780  return StringToUInt(str, flags, base);
1781 #endif
1783 
1784 
1785 /// @internal
1786 template <typename T>
1787 static void s_UnsignedOtherBaseToString(string& out_str,
1788  T value,
1790  int base)
1791 {
1792  _ASSERT(base != 10);
1793 
1794  const SIZE_TYPE kBufSize = CHAR_BIT * sizeof(value);
1795  char buffer[kBufSize + 2]; // +2 for fWithRadix
1796  char* pos = buffer + kBufSize;
1797  const char* kDigit = (flags & NStr::fUseLowercase) ? kDigitLower : kDigitUpper;
1798 
1799  out_str.erase();
1800 
1801  if ( base == 16 ) {
1802  if ( flags & NStr::fWithRadix ) {
1803  out_str.append("0x");
1804  }
1805 
1806  do {
1807  *--pos = kDigit[value % 16];
1808  value /= 16;
1809  } while ( value );
1810  }
1811  else if ( base == 8 ) {
1812  if ( flags & NStr::fWithRadix ) {
1813  out_str.append("0");
1814  if ( value == 0 ) {
1815  // to prevent "00"
1816  return;
1817  }
1818  }
1819  do {
1820  *--pos = kDigit[value % 8];
1821  value /= 8;
1822  } while ( value );
1823  }
1824  else {
1825  do {
1826  *--pos = kDigit[value % base];
1827  value /= base;
1828  } while ( value );
1829  }
1830  out_str.append(pos, buffer + kBufSize - pos);
1831 }
1832 
1833 
1834 /// @internal
1835 static void s_SignedBase10ToString(string& out_str,
1836  unsigned long value,
1837  long svalue,
1839  int base)
1840 {
1841  _ASSERT(base == 10);
1842 
1843  const SIZE_TYPE kBufSize = CHAR_BIT * sizeof(value);
1844  char buffer[kBufSize+2];
1845  char* pos = buffer + kBufSize;
1846 
1847  if (svalue < 0) {
1848  value = static_cast<unsigned long>(-svalue);
1849  }
1850  if ((flags & NStr::fWithCommas)) {
1851  int cnt = -1;
1852  do {
1853  if (++cnt == 3) {
1854  *--pos = ',';
1855  cnt = 0;
1856  }
1857  *--pos = '0' + value % 10;
1858  value /= 10;
1859  } while (value);
1860  }
1861  else {
1862  do {
1863  *--pos = '0' + value % 10;
1864  value /= 10;
1865  } while (value);
1866  }
1867 
1868  if (svalue < 0)
1869  *--pos = '-';
1870  else if (flags & NStr::fWithSign)
1871  *--pos = '+';
1873  out_str.assign(pos, buffer + kBufSize - pos);
1874 }
1875 
1876 
1877 void NStr::IntToString(string& out_str, int svalue,
1878  TNumToStringFlags flags, int base)
1879 {
1880  if ( base < 2 || base > 36 ) {
1881  CNcbiError::SetErrno(errno = EINVAL);
1882  return;
1883  }
1884  unsigned int value = static_cast<unsigned int>(svalue);
1885  if ( base == 10 ) {
1886  s_SignedBase10ToString(out_str, value, svalue, flags, base);
1887  } else {
1888  s_UnsignedOtherBaseToString(out_str, value, flags, base);
1889  }
1890  errno = 0;
1891 }
1892 
1893 
1894 void NStr::LongToString(string& out_str, long svalue,
1895  TNumToStringFlags flags, int base)
1896 {
1897  if ( base < 2 || base > 36 ) {
1898  CNcbiError::SetErrno(errno = EINVAL);
1899  return;
1900  }
1901  unsigned long value = static_cast<unsigned long>(svalue);
1902  if ( base == 10 ) {
1903  s_SignedBase10ToString(out_str, value, svalue, flags, base);
1904  } else {
1905  s_UnsignedOtherBaseToString(out_str, value, flags, base);
1906  }
1907  errno = 0;
1908 }
1909 
1910 
1911 void NStr::ULongToString(string& out_str,
1912  unsigned long value,
1913  TNumToStringFlags flags,
1914  int base)
1915 {
1916  if ( base < 2 || base > 36 ) {
1917  CNcbiError::SetErrno(errno = EINVAL);
1918  return;
1919  }
1920  const SIZE_TYPE kBufSize = CHAR_BIT * sizeof(value);
1921  char buffer[kBufSize];
1922  char* pos = buffer + kBufSize;
1923  out_str.erase();
1924 
1925  if ( base == 10 ) {
1926  if ( (flags & fWithCommas) ) {
1927  int cnt = -1;
1928  do {
1929  if (++cnt == 3) {
1930  *--pos = ',';
1931  cnt = 0;
1932  }
1933  *--pos = '0' + value % 10;
1934  value /= 10;
1935  } while ( value );
1936  }
1937  else {
1938  do {
1939  *--pos = '0' + value % 10;
1940  value /= 10;
1941  } while ( value );
1942  }
1943 
1944  if ( (flags & fWithSign) ) {
1945  *--pos = '+';
1946  }
1947  out_str.assign(pos, buffer + kBufSize - pos);
1948  }
1949  else {
1950  s_UnsignedOtherBaseToString(out_str, value, flags, base);
1951  }
1952  errno = 0;
1953 }
1954 
1955 
1957 // On some platforms division of Int8 is very slow,
1958 // so will try to optimize it working with chunks.
1959 // Works only for radix base == 10.
1961 #define PRINT_INT8_CHUNK 1000000000
1962 #define PRINT_INT8_CHUNK_SIZE 9
1963 
1964 /// @internal
1965 static char* s_PrintBase10Uint8(char* pos,
1966  Uint8 value,
1968 {
1969  if ( (flags & NStr::fWithCommas) ) {
1970  int cnt = -1;
1971 #ifdef PRINT_INT8_CHUNK
1972  // while n doesn't fit in Uint4 process the number
1973  // by 9-digit chunks within 32-bit Uint4
1974  while ( value & ~Uint8(Uint4(~0)) ) {
1975  Uint4 chunk = Uint4(value);
1977  chunk -= PRINT_INT8_CHUNK*Uint4(value);
1978  char* end = pos - PRINT_INT8_CHUNK_SIZE - 2; // 9-digit chunk should have 2 commas
1979  do {
1980  if (++cnt == 3) {
1981  *--pos = ',';
1982  cnt = 0;
1983  }
1984  *--pos = '0' + chunk % 10;
1985  chunk /= 10;
1986  } while ( pos != end );
1987  }
1988  // process all remaining digits in 32-bit number
1989  Uint4 chunk = Uint4(value);
1990  do {
1991  if (++cnt == 3) {
1992  *--pos = ',';
1993  cnt = 0;
1994  }
1995  *--pos = '0' + chunk % 10;
1996  chunk /= 10;
1997  } while ( chunk );
1998 #else
1999  do {
2000  if (++cnt == 3) {
2001  *--pos = ',';
2002  cnt = 0;
2003  }
2004  *--pos = '0' + value % 10;
2005  value /= 10;
2006  } while ( value );
2007 #endif
2008  }
2009  else {
2010 #ifdef PRINT_INT8_CHUNK
2011  // while n doesn't fit in Uint4 process the number
2012  // by 9-digit chunks within 32-bit Uint4
2013  while ( value & ~Uint8(Uint4(~0)) ) {
2014  Uint4 chunk = Uint4(value);
2016  chunk -= PRINT_INT8_CHUNK*Uint4(value);
2017  char* end = pos - PRINT_INT8_CHUNK_SIZE;
2018  do {
2019  *--pos = '0' + chunk % 10;
2020  chunk /= 10;
2021  } while ( pos != end );
2022  }
2023  // process all remaining digits in 32-bit number
2024  Uint4 chunk = Uint4(value);
2025  do {
2026  *--pos = '0' + chunk % 10;
2027  chunk /= 10;
2028  } while ( chunk );
2029 #else
2030  do {
2031  *--pos = '0' + value % 10;
2032  value /= 10;
2033  } while ( value );
2034 #endif
2035  }
2036  return pos;
2037 }
2038 
2039 
2040 void NStr::Int8ToString(string& out_str, Int8 svalue,
2041  TNumToStringFlags flags, int base)
2042 {
2043  if ( base < 2 || base > 36 ) {
2044  CNcbiError::SetErrno(errno = EINVAL);
2045  return;
2046  }
2047  Uint8 value;
2048  if (base == 10) {
2049  const SIZE_TYPE kBufSize = CHAR_BIT * sizeof(value);
2050  char buffer[kBufSize];
2051 
2052  value = static_cast<Uint8>(svalue<0?-svalue:svalue);
2053  char* pos = s_PrintBase10Uint8(buffer + kBufSize, value, flags);
2054  if (svalue < 0)
2055  *--pos = '-';
2056  else if (flags & fWithSign)
2057  *--pos = '+';
2058  out_str.assign(pos, buffer + kBufSize - pos);
2059  } else {
2060  value = static_cast<Uint8>(svalue);
2061  s_UnsignedOtherBaseToString(out_str, value, flags, base);
2062  }
2063  errno = 0;
2064 }
2065 
2066 
2067 void NStr::UInt8ToString(string& out_str, Uint8 value,
2068  TNumToStringFlags flags, int base)
2069 {
2070  if ( base < 2 || base > 36 ) {
2071  CNcbiError::SetErrno(errno = EINVAL);
2072  return;
2073  }
2074  if (base == 10) {
2075  const SIZE_TYPE kBufSize = CHAR_BIT * sizeof(value);
2076  char buffer[kBufSize];
2077 
2078  char* pos = s_PrintBase10Uint8(buffer + kBufSize, value, flags);
2079  if ( flags & fWithSign ) {
2080  *--pos = '+';
2081  }
2082  out_str.assign(pos, buffer + kBufSize - pos);
2083  } else {
2084  s_UnsignedOtherBaseToString(out_str, value, flags, base);
2085  }
2086  errno = 0;
2087 }
2088 
2089 
2090 void NStr::UInt8ToString_DataSize(string& out_str,
2091  Uint8 value,
2092  TNumToStringFlags flags /* = 0 */,
2093  unsigned int max_digits /* = 3 */)
2094 {
2095  TNumToStringFlags allowed_flags = fWithSign +
2096  fWithCommas +
2097  fDS_Binary +
2100  fDS_ShortSuffix +
2102 
2103  if ((flags & allowed_flags) != flags) {
2104  NCBI_THROW2(CStringException, eConvert, "Wrong set of flags", 0);
2105  }
2106 
2107  if (max_digits < 3)
2108  max_digits = 3;
2109 
2110  static const char s_Suffixes[] = {'K', 'M', 'G', 'T', 'P', 'E'};
2111  static const Uint4 s_NumSuffixes = Uint4(sizeof(s_Suffixes) / sizeof(s_Suffixes[0]));
2112 
2113  static const SIZE_TYPE kBufSize = 50;
2114  char buffer[kBufSize];
2115  char* num_start;
2116  char* dot_ptr;
2117  char* num_end;
2118  Uint4 digs_pre_dot, suff_idx;
2119 
2120  if (!(flags &fDS_Binary)) {
2121  static const Uint8 s_Coefs[] = {1000, 1000000, 1000000000,
2122  NCBI_CONST_UINT8(1000000000000),
2123  NCBI_CONST_UINT8(1000000000000000),
2124  NCBI_CONST_UINT8(1000000000000000000)};
2125  suff_idx = 0;
2126  for (; suff_idx < s_NumSuffixes; ++suff_idx) {
2127  if (value < s_Coefs[suff_idx])
2128  break;
2129  }
2130  num_start = s_PrintBase10Uint8(buffer + kBufSize, value, 0);
2131  num_start[-1] = '0';
2132  dot_ptr = buffer + kBufSize - 3 * suff_idx;
2133  digs_pre_dot = Uint4(dot_ptr - num_start);
2134  if (!(flags & fDS_NoDecimalPoint)) {
2135  num_end = min(buffer + kBufSize, dot_ptr + (max_digits - digs_pre_dot));
2136  }
2137  else {
2138  while (suff_idx > 0 && max_digits - digs_pre_dot >= 3) {
2139  --suff_idx;
2140  digs_pre_dot += 3;
2141  dot_ptr += 3;
2142  }
2143  num_end = dot_ptr;
2144  }
2145  char* round_dig = num_end - 1;
2146  if (num_end < buffer + kBufSize && *num_end >= '5')
2147  ++(*round_dig);
2148  while (*round_dig == '0' + 10) {
2149  *round_dig = '0';
2150  --round_dig;
2151  ++(*round_dig);
2152  }
2153  if (round_dig < num_start) {
2154  _ASSERT(num_start - round_dig == 1);
2155  num_start = round_dig;
2156  ++digs_pre_dot;
2157  if (!(flags & fDS_NoDecimalPoint)) {
2158  if (digs_pre_dot > 3) {
2159  ++suff_idx;
2160  digs_pre_dot -= 3;
2161  dot_ptr -= 3;
2162  }
2163  --num_end;
2164  }
2165  else {
2166  if (digs_pre_dot > max_digits) {
2167  ++suff_idx;
2168  digs_pre_dot -= 3;
2169  dot_ptr -= 3;
2170  num_end = dot_ptr;
2171  }
2172  }
2173  }
2174  }
2175  else {
2176  static const Uint8 s_Coefs[] = {1, 1024, 1048576, 1073741824,
2177  NCBI_CONST_UINT8(1099511627776),
2178  NCBI_CONST_UINT8(1125899906842624),
2179  NCBI_CONST_UINT8(1152921504606846976)};
2180 
2181  suff_idx = 1;
2182  for (; suff_idx < s_NumSuffixes; ++suff_idx) {
2183  if (value < s_Coefs[suff_idx])
2184  break;
2185  }
2186  bool can_try_another = true;
2187 try_another_suffix:
2188  Uint8 mul_coef = s_Coefs[suff_idx - 1];
2189  Uint8 whole_num = value / mul_coef;
2190  if (max_digits == 3 && whole_num >= 1000) {
2191  ++suff_idx;
2192  goto try_another_suffix;
2193  }
2194  num_start = s_PrintBase10Uint8(buffer + kBufSize, whole_num, 0);
2195  num_start[-1] = '0';
2196  digs_pre_dot = Uint4(buffer + kBufSize - num_start);
2197  if (max_digits - digs_pre_dot >= 3 && (flags & fDS_NoDecimalPoint)
2198  && suff_idx != 1 && can_try_another)
2199  {
2200  Uint4 new_suff = suff_idx - 1;
2201 try_even_more_suffix:
2202  Uint8 new_num = value / s_Coefs[new_suff - 1];
2203  char* new_start = s_PrintBase10Uint8(buffer + kBufSize / 2, new_num, 0);
2204  Uint4 new_digs = Uint4(buffer + kBufSize / 2 - new_start);
2205  if (new_digs <= max_digits) {
2206  if (max_digits - digs_pre_dot >= 3 && new_suff != 1) {
2207  --new_suff;
2208  goto try_even_more_suffix;
2209  }
2210  suff_idx = new_suff;
2211  can_try_another = false;
2212  goto try_another_suffix;
2213  }
2214  if (new_suff != suff_idx - 1) {
2215  suff_idx = new_suff + 1;
2216  can_try_another = false;
2217  goto try_another_suffix;
2218  }
2219  }
2220  memcpy(buffer, num_start - 1, digs_pre_dot + 1);
2221  num_start = buffer + 1;
2222  dot_ptr = num_start + digs_pre_dot;
2223  Uint4 cnt_more_digs = 1;
2224  if (!(flags & fDS_NoDecimalPoint))
2225  cnt_more_digs += min(max_digits - digs_pre_dot, 3 * (suff_idx - 1));
2226  num_end = dot_ptr;
2227  Uint8 left_val = value - whole_num * mul_coef;
2228  do {
2229  left_val *= 10;
2230  Uint1 d = Uint1(left_val / mul_coef);
2231  *num_end = char(d + '0');
2232  ++num_end;
2233  left_val -= d * mul_coef;
2234  --cnt_more_digs;
2235  }
2236  while (cnt_more_digs != 0);
2237  --num_end;
2238 
2239  char* round_dig = num_end - 1;
2240  if (*num_end >= '5')
2241  ++(*round_dig);
2242  while (*round_dig == '0' + 10) {
2243  *round_dig = '0';
2244  --round_dig;
2245  ++(*round_dig);
2246  }
2247  if (round_dig < num_start) {
2248  _ASSERT(round_dig == buffer);
2249  num_start = round_dig;
2250  ++digs_pre_dot;
2251  if (digs_pre_dot > max_digits) {
2252  ++suff_idx;
2253  goto try_another_suffix;
2254  }
2255  if (num_end != dot_ptr)
2256  --num_end;
2257  }
2258  if (!(flags & fDS_NoDecimalPoint) && digs_pre_dot == 4
2259  && num_start[0] == '1' && num_start[1] == '0'
2260  && num_start[2] == '2' && num_start[3] == '4')
2261  {
2262  ++suff_idx;
2263  goto try_another_suffix;
2264  }
2265 
2266  --suff_idx;
2267  }
2268 
2269  out_str.erase();
2270  if (flags & fWithSign)
2271  out_str.append(1, '+');
2272  if (!(flags & fWithCommas) || digs_pre_dot <= 3) {
2273  out_str.append(num_start, digs_pre_dot);
2274  }
2275  else {
2276  Uint4 digs_first = digs_pre_dot % 3;
2277  out_str.append(num_start, digs_first);
2278  char* left_ptr = num_start + digs_first;
2279  Uint4 digs_left = digs_pre_dot - digs_first;
2280  while (digs_left != 0) {
2281  out_str.append(1, ',');
2282  out_str.append(left_ptr, 3);
2283  left_ptr += 3;
2284  digs_left -= 3;
2285  }
2286  }
2287  if (num_end != dot_ptr) {
2288  out_str.append(1, '.');
2289  out_str.append(dot_ptr, num_end - dot_ptr);
2290  }
2291 
2292  if (suff_idx == 0) {
2293  if (flags & fDS_PutBSuffixToo) {
2295  out_str.append(1, ' ');
2296  out_str.append(1, 'B');
2297  }
2298  }
2299  else {
2300  --suff_idx;
2302  out_str.append(1, ' ');
2303  out_str.append(1, s_Suffixes[suff_idx]);
2304  if (!(flags & fDS_ShortSuffix)) {
2305  if (flags & fDS_Binary)
2306  out_str.append(1, 'i');
2307  out_str.append(1, 'B');
2308  }
2309  }
2310  errno = 0;
2312 
2313 
2314 // A maximal double precision used in the double to string conversion
2315 #if defined(NCBI_OS_MSWIN)
2316  const int kMaxDoublePrecision = 200;
2317 #else
2318  const int kMaxDoublePrecision = 308;
2319 #endif
2320 // A maximal size of a double value in a string form.
2321 // Exponent size + sign + dot + ending '\0' + max.precision
2322 const int kMaxDoubleStringSize = 308 + 3 + kMaxDoublePrecision;
2323 
2324 
2325 void NStr::DoubleToString(string& out_str, double value,
2327 {
2328  char buffer[kMaxDoubleStringSize]; // inludes ending '\0'
2329  int n = 0;
2330  if (precision >= 0 ||
2331  ((flags & fDoublePosix) && (!finite(value) || value == 0.))) {
2333  buffer[n] = '\0';
2334  } else {
2335  const char* format;
2336  switch (flags & fDoubleGeneral) {
2337  case fDoubleFixed:
2338  format = "%f";
2339  break;
2340  case fDoubleScientific:
2341  format = "%e";
2342  break;
2343  case fDoubleGeneral: // default
2344  default:
2345  format = "%g";
2346  break;
2347  }
2348  n = ::snprintf(buffer, kMaxDoubleStringSize, format, value);
2349  if (n < 0) {
2350  buffer[0] = '\0';
2351  }
2352  if (flags & fDoublePosix) {
2353  struct lconv* conv = localeconv();
2354  if ('.' != *(conv->decimal_point)) {
2355  char* pos = strchr(buffer, *(conv->decimal_point));
2356  if (pos) {
2357  *pos = '.';
2358  }
2359  }
2360  }
2361  }
2362  out_str = buffer;
2363  errno = 0;
2364 }
2365 
2366 
2367 SIZE_TYPE NStr::DoubleToString(double value, unsigned int precision,
2368  char* buf, SIZE_TYPE buf_size,
2369  TNumToStringFlags flags)
2370 {
2371  char buffer[kMaxDoubleStringSize]; // inludes ending '\0'
2372  int n = 0;
2373  if ((flags & fDoublePosix) && (!finite(value) || value == 0.)) {
2374  if (value == 0.) {
2375  double zero = 0.;
2376  if (memcmp(&value, &zero, sizeof(double)) == 0) {
2377  strcpy(buffer, "0");
2378  n = 2;
2379  } else {
2380  strcpy(buffer, "-0");
2381  n = 3;
2382  }
2383  } else if (isnan(value)) {
2384  strcpy(buffer, "NaN");
2385  n = 4;
2386  } else if (value > 0.) {
2387  strcpy(buffer, "INF");
2388  n = 4;
2389  } else {
2390  strcpy(buffer, "-INF");
2391  n = 5;
2392  }
2393  } else {
2394  if (precision > (unsigned int)kMaxDoublePrecision) {
2395  precision = (unsigned int)kMaxDoublePrecision;
2396  }
2397  const char* format;
2398  switch (flags & fDoubleGeneral) {
2399  case fDoubleScientific:
2400  format = "%.*e";
2401  break;
2402  case fDoubleGeneral:
2403  format = "%.*g";
2404  break;
2405  case fDoubleFixed: // default
2406  default:
2407  format = "%.*f";
2408  break;
2409  }
2410  n = ::snprintf(buffer, kMaxDoubleStringSize, format, (int)precision, value);
2411  if (n < 0) {
2412  n = 0;
2413  }
2414  if (flags & fDoublePosix) {
2415  struct lconv* conv = localeconv();
2416  if ('.' != *(conv->decimal_point)) {
2417  char* pos = strchr(buffer, *(conv->decimal_point));
2418  if (pos) {
2419  *pos = '.';
2420  }
2421  }
2422  }
2423  }
2424  SIZE_TYPE n_copy = min((SIZE_TYPE) n, buf_size);
2425  memcpy(buf, buffer, n_copy);
2426  errno = 0;
2427  return n_copy;
2428 }
2429 
2430 
2431 static char* s_ncbi_append_int2str(char* buffer, unsigned int value, size_t digits, bool zeros)
2432 {
2433  char* buffer_start = buffer;
2434  char* buffer_end = (buffer += digits-1);
2435  if (zeros) {
2436  do {
2437  *buffer-- = (char)('0' + (value % 10));
2438  value /= 10;
2439  } while (--digits);
2440  } else {
2441  do {
2442  *buffer-- = (char)('0' + (value % 10));
2443  } while (value /= 10);
2444 
2445  if (++buffer != buffer_start) {
2446  memmove(buffer_start, buffer, buffer_end-buffer+1);
2447  buffer_end -= buffer - buffer_start;
2448  }
2449  }
2450  return ++buffer_end;
2452 
2453 
2454 #define __NLG NCBI_CONST_LONGDOUBLE
2455 
2456 SIZE_TYPE NStr::DoubleToString_Ecvt(double val, unsigned int precision,
2457  char* buffer, SIZE_TYPE bufsize,
2458  int* dec, int* sign)
2459 {
2460  //errno = 0;
2461  *dec = *sign = 0;
2462  if (precision==0) {
2463  return 0;
2464  }
2465  if (precision > DBL_DIG) {
2466  precision = DBL_DIG;
2467  }
2468  if (val == 0.) {
2469  double zero = 0.;
2470  if (memcmp(&val, &zero, sizeof(double)) == 0) {
2471  *buffer='0';
2472  return 1;
2473  }
2474  *buffer++='-';
2475  *buffer='0';
2476  *sign = -1;
2477  return 2;
2478  }
2479  *sign = val < 0. ? -1 : 1;
2480  if (*sign < 0) {
2481  val = -val;
2482  }
2483  bool high_precision = precision > 9;
2484 
2485 // calculate exponent
2486  unsigned int exp=0;
2487  bool exp_positive = val >= 1.;
2488  unsigned int first, second=0;
2489  long double mult = __NLG(1.);
2490  long double value = val;
2491 
2492  if (exp_positive) {
2493  while (value>=__NLG(1.e256))
2494  {value*=__NLG(1.e-256); exp+=256;}
2495  if (value >= __NLG(1.e16)) {
2496  if (value>=__NLG(1.e240)) {value*=__NLG(1.e-240); exp+=240;}
2497  else if (value>=__NLG(1.e224)) {value*=__NLG(1.e-224); exp+=224;}
2498  else if (value>=__NLG(1.e208)) {value*=__NLG(1.e-208); exp+=208;}
2499  else if (value>=__NLG(1.e192)) {value*=__NLG(1.e-192); exp+=192;}
2500  else if (value>=__NLG(1.e176)) {value*=__NLG(1.e-176); exp+=176;}
2501  else if (value>=__NLG(1.e160)) {value*=__NLG(1.e-160); exp+=160;}
2502  else if (value>=__NLG(1.e144)) {value*=__NLG(1.e-144); exp+=144;}
2503  else if (value>=__NLG(1.e128)) {value*=__NLG(1.e-128); exp+=128;}
2504  else if (value>=__NLG(1.e112)) {value*=__NLG(1.e-112); exp+=112;}
2505  else if (value>=__NLG(1.e96)) {value*=__NLG(1.e-96); exp+=96;}
2506  else if (value>=__NLG(1.e80)) {value*=__NLG(1.e-80); exp+=80;}
2507  else if (value>=__NLG(1.e64)) {value*=__NLG(1.e-64); exp+=64;}
2508  else if (value>=__NLG(1.e48)) {value*=__NLG(1.e-48); exp+=48;}
2509  else if (value>=__NLG(1.e32)) {value*=__NLG(1.e-32); exp+=32;}
2510  else if (value>=__NLG(1.e16)) {value*=__NLG(1.e-16); exp+=16;}
2511  }
2512  if (value< __NLG(1.)) {mult=__NLG(1.e+9); exp-= 1;}
2513  else if (value< __NLG(10.)) {mult=__NLG(1.e+8); }
2514  else if (value< __NLG(1.e2)) {mult=__NLG(1.e+7); exp+= 1;}
2515  else if (value< __NLG(1.e3)) {mult=__NLG(1.e+6); exp+= 2;}
2516  else if (value< __NLG(1.e4)) {mult=__NLG(1.e+5); exp+= 3;}
2517  else if (value< __NLG(1.e5)) {mult=__NLG(1.e+4); exp+= 4;}
2518  else if (value< __NLG(1.e6)) {mult=__NLG(1.e+3); exp+= 5;}
2519  else if (value< __NLG(1.e7)) {mult=__NLG(1.e+2); exp+= 6;}
2520  else if (value< __NLG(1.e8)) {mult= __NLG(10.); exp+= 7;}
2521  else if (value< __NLG(1.e9)) {mult= __NLG(1.); exp+= 8;}
2522  else if (value<__NLG(1.e10)) {mult= __NLG(0.1); exp+= 9;}
2523  else if (value<__NLG(1.e11)) {mult=__NLG(1.e-2); exp+=10;}
2524  else if (value<__NLG(1.e12)) {mult=__NLG(1.e-3); exp+=11;}
2525  else if (value<__NLG(1.e13)) {mult=__NLG(1.e-4); exp+=12;}
2526  else if (value<__NLG(1.e14)) {mult=__NLG(1.e-5); exp+=13;}
2527  else if (value<__NLG(1.e15)) {mult=__NLG(1.e-6); exp+=14;}
2528  else if (value<__NLG(1.e16)) {mult=__NLG(1.e-7); exp+=15;}
2529  else {mult=__NLG(1.e-8); exp+=16;}
2530  } else {
2531  while (value<=__NLG(1.e-256))
2532  {value*=__NLG(1.e256); exp+=256;}
2533  if (value <= __NLG(1.e-16)) {
2534  if (value<=__NLG(1.e-240)) {value*=__NLG(1.e240); exp+=240;}
2535  else if (value<=__NLG(1.e-224)) {value*=__NLG(1.e224); exp+=224;}
2536  else if (value<=__NLG(1.e-208)) {value*=__NLG(1.e208); exp+=208;}
2537  else if (value<=__NLG(1.e-192)) {value*=__NLG(1.e192); exp+=192;}
2538  else if (value<=__NLG(1.e-176)) {value*=__NLG(1.e176); exp+=176;}
2539  else if (value<=__NLG(1.e-160)) {value*=__NLG(1.e160); exp+=160;}
2540  else if (value<=__NLG(1.e-144)) {value*=__NLG(1.e144); exp+=144;}
2541  else if (value<=__NLG(1.e-128)) {value*=__NLG(1.e128); exp+=128;}
2542  else if (value<=__NLG(1.e-112)) {value*=__NLG(1.e112); exp+=112;}
2543  else if (value<=__NLG(1.e-96)) {value*=__NLG(1.e96); exp+=96;}
2544  else if (value<=__NLG(1.e-80)) {value*=__NLG(1.e80); exp+=80;}
2545  else if (value<=__NLG(1.e-64)) {value*=__NLG(1.e64); exp+=64;}
2546  else if (value<=__NLG(1.e-48)) {value*=__NLG(1.e48); exp+=48;}
2547  else if (value<=__NLG(1.e-32)) {value*=__NLG(1.e32); exp+=32;}
2548  else if (value<=__NLG(1.e-16)) {value*=__NLG(1.e16); exp+=16;}
2549  }
2550  if (value<__NLG(1.e-15)) {mult=__NLG(1.e24); exp+=16;}
2551  else if (value<__NLG(1.e-14)) {mult=__NLG(1.e23); exp+=15;}
2552  else if (value<__NLG(1.e-13)) {mult=__NLG(1.e22); exp+=14;}
2553  else if (value<__NLG(1.e-12)) {mult=__NLG(1.e21); exp+=13;}
2554  else if (value<__NLG(1.e-11)) {mult=__NLG(1.e20); exp+=12;}
2555  else if (value<__NLG(1.e-10)) {mult=__NLG(1.e19); exp+=11;}
2556  else if (value<__NLG(1.e-9)) {mult=__NLG(1.e18); exp+=10;}
2557  else if (value<__NLG(1.e-8)) {mult=__NLG(1.e17); exp+=9;}
2558  else if (value<__NLG(1.e-7)) {mult=__NLG(1.e16); exp+=8;}
2559  else if (value<__NLG(1.e-6)) {mult=__NLG(1.e15); exp+=7;}
2560  else if (value<__NLG(1.e-5)) {mult=__NLG(1.e14); exp+=6;}
2561  else if (value<__NLG(1.e-4)) {mult=__NLG(1.e13); exp+=5;}
2562  else if (value<__NLG(1.e-3)) {mult=__NLG(1.e12); exp+=4;}
2563  else if (value<__NLG(1.e-2)) {mult=__NLG(1.e11); exp+=3;}
2564  else if (value<__NLG(1.e-1)) {mult=__NLG(1.e10); exp+=2;}
2565  else if (value<__NLG(1.)) {mult=__NLG(1.e9); exp+=1;}
2566  else {mult=__NLG(1.e8); }
2567  }
2568 
2569 // get all digits
2570  long double t1 = value * mult;
2571  if (t1 >= __NLG(1.e9)) {
2572  first = 999999999;
2573  } else if (t1 < __NLG(1.e8)) {
2574  first = 100000000;
2575  t1 = first;
2576  } else {
2577  first = (unsigned int)t1;
2578  }
2579  if (high_precision) {
2580  long double t2 = (t1-first) * __NLG(1.e8);
2581  if (t2 >= __NLG(1.e8)) {
2582  second = 99999999;
2583  } else {
2584  second = (unsigned int)t2;
2585  }
2586  }
2587 
2588 // convert them into string
2589  bool use_ext_buffer = bufsize > 20;
2590  char tmp[32];
2591  char *digits = use_ext_buffer ? buffer : tmp;
2592  char *digits_end = s_ncbi_append_int2str(digits,first,9,false);
2593  if (high_precision) {
2594  digits_end = s_ncbi_append_int2str(digits_end,second,8,true);
2595  }
2596  size_t digits_len = digits_end - digits;
2597  size_t digits_got = digits_len;
2598  size_t digits_expected = high_precision ? 17 : 9;
2599 
2600 // get significant digits according to requested precision
2601  size_t pos = precision;
2602  if (digits_len > precision) {
2603  digits_len = precision;
2604 
2605  // this is questionable, but in fact,
2606  // improves the result (on average)
2607 #if 1
2608  if (high_precision) {
2609  if (digits[pos] == '4') {
2610  size_t pt = pos-1;
2611  while (pt != 0 && digits[--pt] == '9')
2612  ;
2613  if (pt != 0 && (pos-pt) > precision/2)
2614  digits[pos]='5';
2615  } else if (digits[pos] == '5') {
2616  size_t pt = pos;
2617  while (pt != 0 && digits[--pt] == '0')
2618  ;
2619  if (pt != 0 && (pos-pt) > precision/2)
2620  digits[pos]='4';
2621  }
2622  }
2623 #endif
2624 
2625  if (digits[pos] >= '5') {
2626  do {
2627  if (digits[--pos] < '9') {
2628  ++digits[pos++];
2629  break;
2630  }
2631  digits[pos]='0';
2632  } while (pos > 0);
2633  if (pos == 0) {
2634  if (digits_expected <= digits_got) {
2635  if (exp_positive) {
2636  ++exp;
2637  } else {
2638 // exp cannot be 0, by design
2639  exp_positive = --exp == 0;
2640  }
2641  }
2642  *digits = '1';
2643  digits_len = 1;
2644  }
2645  }
2646  }
2647 
2648 // truncate trailing zeros
2649  for (pos = digits_len; pos-- > 0 && digits[pos] == '0';)
2650  --digits_len;
2651 
2652  *dec = exp_positive ? int(exp) : -int(exp);
2653 
2654  if (!use_ext_buffer) {
2655  if (digits_len <= bufsize) {
2656  memcpy(buffer, digits, digits_len);
2657  } else {
2658  NCBI_THROW2(CStringException, eConvert,
2659  "Destination buffer too small", 0);
2660  }
2661  }
2662  return digits_len;
2663 }
2664 #undef __NLG
2665 
2666 
2667 SIZE_TYPE NStr::DoubleToStringPosix(double val, unsigned int precision,
2668  char* buffer, SIZE_TYPE bufsize)
2669 {
2670  if (bufsize < precision+8) {
2671  NCBI_THROW2(CStringException, eConvert,
2672  "Destination buffer too small", 0);
2673  }
2674  int dec=0, sign=0;
2675  char digits[32];
2676  size_t digits_len = DoubleToString_Ecvt(
2677  val, precision, digits, sizeof(digits), &dec, &sign);
2678  if (digits_len == 0) {
2679  errno = 0;
2680  return 0;
2681  }
2682  if (val == 0.) {
2683  strncpy(buffer,digits, digits_len);
2684  return digits_len;
2685  }
2686  if (digits_len == 1 && dec == 0 && sign >=0) {
2687  *buffer = digits[0];
2688  errno = 0;
2689  return 1;
2690  }
2691  bool exp_positive = dec >= 0;
2692  unsigned int exp= (unsigned int)(exp_positive ? dec : (-dec));
2693 
2694  // assemble the result
2695  char *buffer_pos = buffer;
2696 // char *buffer_end = buffer + bufsize;
2697  char *digits_pos = digits;
2698 
2699  if (sign < 0) {
2700  *buffer_pos++ = '-';
2701  }
2702  // The 'e' format is used when the exponent of the value is less than -4
2703  // or greater than or equal to the precision argument
2704  if ((exp_positive && exp >= precision) || (!exp_positive && exp > 4)) {
2705  *buffer_pos++ = *digits_pos++;
2706  --digits_len;
2707  if (digits_len != 0) {
2708  *buffer_pos++ = '.';
2709  strncpy(buffer_pos,digits_pos,digits_len);
2710  buffer_pos += digits_len;
2711  }
2712  *buffer_pos++ = 'e';
2713  *buffer_pos++ = exp_positive ? '+' : '-';
2714 
2715 //#if defined(NCBI_OS_MSWIN)
2716 #if NCBI_COMPILER_MSVC && _MSC_VER < 1900
2717  bool need_zeros = true;
2718  size_t need_digits = 3;
2719 #else
2720  bool need_zeros = exp < 10 ? true : false;
2721  size_t need_digits = exp < 100 ? 2 : 3;
2722 #endif
2723  // assuming exp < 1000
2724  buffer_pos = s_ncbi_append_int2str(buffer_pos, exp, need_digits,need_zeros);
2725  } else if (exp_positive) {
2726  *buffer_pos++ = *digits_pos++;
2727  --digits_len;
2728  if (digits_len > exp) {
2729  strncpy(buffer_pos,digits_pos,exp);
2730  buffer_pos += exp;
2731  *buffer_pos++ = '.';
2732  strncpy(buffer_pos,digits_pos+exp,digits_len-exp);
2733  buffer_pos += digits_len-exp;
2734  } else {
2735  strncpy(buffer_pos,digits_pos,digits_len);
2736  buffer_pos += digits_len;
2737  exp -= (unsigned int)digits_len;
2738  while (exp--) {
2739  *buffer_pos++ = '0';
2740  }
2741  }
2742  } else {
2743  *buffer_pos++ = '0';
2744  *buffer_pos++ = '.';
2745  for (--exp; exp--;) {
2746  *buffer_pos++ = '0';
2747  }
2748  strncpy(buffer_pos,digits_pos, digits_len);
2749  buffer_pos += digits_len;
2750  }
2751  errno = 0;
2752  return buffer_pos - buffer;
2753 }
2754 
2755 
2756 string NStr::SizetToString(size_t value, TNumToStringFlags flags, int base)
2757 {
2758 #if (SIZEOF_SIZE_T > 4)
2759  return UInt8ToString(value, flags, base);
2760 #else
2761  return UIntToString(static_cast<unsigned int>(value), flags, base);
2762 #endif
2763 }
2764 
2765 
2766 string NStr::PtrToString(const void* value)
2767 {
2768  errno = 0;
2769  const int kBufSize = 64;
2770  char buffer[kBufSize];
2771  ::snprintf(buffer, kBufSize, "%p", value);
2772  return buffer;
2773 }
2774 
2775 
2776 void NStr::PtrToString(string& out_str, const void* value)
2777 {
2778  errno = 0;
2779  const int kBufSize = 64;
2780  char buffer[kBufSize];
2781  ::snprintf(buffer, kBufSize, "%p", value);
2782  out_str = buffer;
2783 }
2784 
2785 
2786 const void* NStr::StringToPtr(const CTempStringEx str, TStringToNumFlags flags)
2787 {
2788  errno = 0;
2789  void *ptr = NULL;
2790  int res;
2791  if ( str.HasZeroAtEnd() ) {
2792  res = ::sscanf(str.data(), "%p", &ptr);
2793  } else {
2794  res = ::sscanf(string(str).c_str(), "%p", &ptr);
2795  }
2796  if (res != 1) {
2797  if (flags & fConvErr_NoErrMessage) {
2798  CNcbiError::SetErrno(errno = EINVAL);
2799  } else {
2800  CNcbiError::SetErrno(errno = EINVAL, str);
2801  }
2802  return NULL;
2803  }
2804  return ptr;
2808 static const char* s_kTrueString = "true";
2809 static const char* s_kFalseString = "false";
2810 static const char* s_kTString = "t";
2811 static const char* s_kFString = "f";
2812 static const char* s_kYesString = "yes";
2813 static const char* s_kNoString = "no";
2814 static const char* s_kYString = "y";
2815 static const char* s_kNString = "n";
2816 static const char* s_kOnString = "on";
2817 static const char* s_kOffString = "off";
2818 
2819 
2820 const string NStr::BoolToString(bool value)
2822  return value ? s_kTrueString : s_kFalseString;
2823 }
2824 
2825 
2827 {
2828  if ( str == "1" ||
2833  AStrEquiv(str, s_kOnString, PNocase()) ) {
2834  errno = 0;
2835  return true;
2836  }
2837  if ( str == "0" ||
2843  errno = 0;
2844  return false;
2845  }
2847  "String cannot be converted to bool", 0);
2848 }
2849 
2850 
2851 string NStr::FormatVarargs(const char* format, va_list args)
2852 {
2853 #ifdef HAVE_VASPRINTF
2854  char* s;
2855  int n = vasprintf(&s, format, args);
2856  if (n >= 0) {
2857  string str(s, n);
2858  free(s);
2859  return str;
2860  } else {
2861  return kEmptyStr;
2862  }
2863 
2864 #elif defined(HAVE_VSNPRINTF)
2865  // deal with implementation quirks
2866  SIZE_TYPE size = 1024;
2868  buf.get()[size-1] = buf.get()[size-2] = 0;
2869  SIZE_TYPE n = vsnprintf(buf.get(), size, format, args);
2870  while (n >= size || buf.get()[size-2]) {
2871  if (buf.get()[size-1]) {
2872  ERR_POST_X(1, Warning << "Buffer overrun by buggy vsnprintf");
2873  }
2874  size = max(size << 1, n);
2875  buf.reset(new char[size]);
2876  buf.get()[size-1] = buf.get()[size-2] = 0;
2877  n = vsnprintf(buf.get(), size, format, args);
2878  }
2879  return (n > 0) ? string(buf.get(), n) : kEmptyStr;
2880 
2881 #elif defined(HAVE_VPRINTF)
2882  char buf[1024];
2883  buf[sizeof(buf) - 1] = 0;
2884  vsprintf(buf, format, args);
2885  if (buf[sizeof(buf) - 1]) {
2886  ERR_POST_X(2, Warning << "Buffer overrun by vsprintf");
2887  }
2888  return buf;
2889 
2890 #else
2891 # error Please port this code to your system.
2892 #endif
2893 }
2894 
2895 
2897  const CTempString pattern,
2898  ECase use_case,
2899  EDirection direction,
2900  SIZE_TYPE occurence)
2901 {
2902  const SIZE_TYPE slen = str.length();
2903  const SIZE_TYPE plen = pattern.length();
2904  SIZE_TYPE current_occurence = 0;
2905  SIZE_TYPE pos = 0;
2906  SIZE_TYPE current_pos = 0; // saved position of last search
2907  SIZE_TYPE search_pos = 0; // next search position
2908 
2909  if (plen > slen) {
2910  return NPOS;
2911  }
2912 
2913  if (use_case == eCase) {
2914 
2915  if (direction == eForwardSearch) {
2916  do {
2917  pos = str.find(pattern, search_pos);
2918  if (pos == NPOS) {
2919  return NPOS;
2920  }
2921  current_pos = pos;
2922  search_pos = pos + plen;
2923  ++current_occurence;
2924  }
2925  while (current_occurence <= occurence);
2926 
2927  } else {
2928  _ASSERT(direction == eReverseSearch);
2929  search_pos = slen - plen;
2930  do {
2931  pos = str.rfind(pattern, search_pos);
2932  if (pos == NPOS) {
2933  return NPOS;
2934  }
2935  current_pos = pos;
2936  search_pos = (pos < plen) ? 0 : pos - plen;
2937  ++current_occurence;
2938  }
2939  while (current_occurence <= occurence);
2940  }
2941 
2942  } else {
2943  _ASSERT(use_case == eNocase);
2944 
2945  // A set of lower/upper characters for pattern[0].
2946  string x_first(pattern, 0, 1);
2947  if (isupper((unsigned char)x_first[0])) {
2948  x_first += (char)tolower((unsigned char)x_first[0]);
2949  } else if (islower((unsigned char)x_first[0])) {
2950  x_first += (char)toupper((unsigned char)x_first[0]);
2951  }
2952 
2953  if (direction == eForwardSearch) {
2954  do {
2955  pos = str.find_first_of(x_first, search_pos);
2956  while (pos != NPOS) {
2957  if ( (pos + plen) > slen ) {
2958  return NPOS;
2959  }
2960  if ( CompareNocase(str, pos, plen, pattern) == 0 ) {
2961  break;
2962  }
2963  pos = str.find_first_of(x_first, pos + 1);
2964  }
2965  if (pos > slen) {
2966  return NPOS;
2967  }
2968  current_pos = pos;
2969  search_pos = pos + plen;
2970  ++current_occurence;
2971  }
2972  while (current_occurence <= occurence);
2973 
2974  } else {
2975  _ASSERT(direction == eReverseSearch);
2976  search_pos = slen - plen;
2977  do {
2978  pos = str.find_last_of(x_first, search_pos);
2979  while (pos != NPOS && pos
2980  && CompareNocase(str, pos, plen, pattern) != 0) {
2981  if (pos == 0) {
2982  return NPOS;
2983  }
2984  pos = str.find_last_of(x_first, pos - 1);
2985  }
2986  current_pos = pos;
2987  search_pos = (pos < plen) ? 0 : pos - plen;
2988  ++current_occurence;
2989  }
2990  while (current_occurence <= occurence);
2991  }
2992  }
2993  return current_pos;
2994 }
2995 
2996 
2997 // @deprecated
2999  SIZE_TYPE start, SIZE_TYPE end, EOccurrence where)
3000 {
3001  string pat(pattern, 0, 1);
3002  SIZE_TYPE l = pattern.size();
3003  if (isupper((unsigned char) pat[0])) {
3004  pat += (char) tolower((unsigned char) pat[0]);
3005  } else if (islower((unsigned char) pat[0])) {
3006  pat += (char) toupper((unsigned char) pat[0]);
3007  }
3008 
3009  if (where == eFirst) {
3010  SIZE_TYPE pos = str.find_first_of(pat, start);
3011  while (pos != NPOS && (pos + l) <= end
3012  && CompareNocase(str, pos, l, pattern) != 0) {
3013  pos = str.find_first_of(pat, pos + 1);
3014  }
3015  return pos > end ? NPOS : pos;
3016 
3017  } else { // eLast
3018  SIZE_TYPE pos = str.find_last_of(pat, end);
3019  while (pos != NPOS && pos >= start
3020  && CompareNocase(str, pos, l, pattern) != 0) {
3021  if (pos == 0) {
3022  return NPOS;
3023  }
3024  pos = str.find_last_of(pat, pos - 1);
3025  }
3026  return pos < start ? NPOS : pos;
3027  }
3028 }
3029 
3030 
3031 const string* NStr::Find(const list <string>& lst, const CTempString val,
3032  ECase use_case)
3033 {
3034  if (lst.empty()) return NULL;
3035  ITERATE (list<string>, st_itr, lst) {
3036  if (Equal(*st_itr, val, use_case)) {
3037  return &*st_itr;
3038  }
3039  }
3040  return NULL;
3041 }
3042 
3043 const string* NStr::Find(const vector <string>& vec, const CTempString val,
3044  ECase use_case)
3045 {
3046  if (vec.empty()) return NULL;
3047  ITERATE (vector<string>, st_itr, vec) {
3048  if (Equal(*st_itr, val, use_case)) {
3049  return &*st_itr;
3050  }
3051  }
3052  return NULL;
3053 }
3055 
3056 /// @internal
3057 // Check that symbol 'ch' is a word boundary character (don't matches [a-zA-Z0-9_]).
3058 static inline
3059 bool s_IsWordBoundaryChar(char ch)
3061  return !(ch == '_' || isalnum((unsigned char)ch));
3062 }
3063 
3064 
3066  const CTempString word,
3067  ECase use_case,
3068  EDirection direction)
3069 {
3070  const SIZE_TYPE slen = str.length();
3071  const SIZE_TYPE plen = word.length();
3072 
3073  SIZE_TYPE start = 0;
3074  SIZE_TYPE end = slen;
3075 
3076  SIZE_TYPE pos = Find(str, word, use_case, direction);
3077 
3078  while (pos != NPOS) {
3079  // Check word boundaries
3080  if ( ((pos == 0) || s_IsWordBoundaryChar(str[pos-1])) &&
3081  ((pos + plen == slen) || s_IsWordBoundaryChar(str[pos+plen])) ) {
3082  return pos;
3083  }
3084  // Find next occurrence
3085  if (direction == eForwardSearch) {
3086  if (pos + plen == slen) {
3087  return NPOS;
3088  }
3089  ++start;
3090  } else {
3091  if (pos == 0) {
3092  return NPOS;
3093  }
3094  --end;
3095  }
3096  pos = Find(CTempString(str, start, end - start), word, use_case, direction);
3097  if (pos != NPOS) {
3098  // update position: from start of the string "str"
3099  pos += start;
3100  }
3101  }
3102  return pos;
3103 }
3104 
3105 
3107 {
3108  const SIZE_TYPE len1 = s1.length();
3109  const SIZE_TYPE len2 = s2.length();
3110 
3111  // Eliminate the null case
3112  if (len1 == 0 || len2 == 0) {
3113  return 0;
3114  }
3115  SIZE_TYPE len = min(len1, len2);
3116 
3117  // Truncate the longer string
3118  CTempString t1, t2;
3119  if (len1 > len2) {
3120  t1 = s1.substr(len1-len, len);
3121  t2 = s2;
3122  } else {
3123  t1 = s1;
3124  t2 = s2.substr(0, len);
3125  }
3126  // Quick check for the worst case
3127  if (memcmp(t1.data(), t2.data(), len) == 0) {
3128  return len;
3129  }
3130 
3131  // Start by looking for a single character match
3132  // and increase length until no match is found.
3133  // Performance analysis: http://neil.fraser.name/news/2010/11/04/
3134  SIZE_TYPE best = 0;
3135  SIZE_TYPE n = 1;
3136  for (;;) {
3137  // Right 'n' symbols of 't1'
3138  CTempString pattern(t1.data() + len - n, n);
3139  SIZE_TYPE pos = t2.find(pattern);
3140  if (pos == NPOS) {
3141  return best;
3142  }
3143  n += pos;
3144  if (pos == 0 || memcmp(t1.data() + len - n, t2.data(), n) == 0) {
3145  best = n;
3146  n++;
3147  }
3148  }
3149  // Unreachable
3150  return best;
3151 }
3152 
3153 
3154 template <class TStr>
3155 TStr s_TruncateSpaces(const TStr& str, NStr::ETrunc where,
3156  const TStr& empty_str)
3157 {
3158  SIZE_TYPE length = str.length();
3159  if (length == 0) {
3160  return empty_str;
3161  }
3162  SIZE_TYPE beg = 0;
3163  if (where == NStr::eTrunc_Begin || where == NStr::eTrunc_Both) {
3164  _ASSERT(beg < length);
3165  while ( isspace((unsigned char) str[beg]) ) {
3166  if (++beg == length) {
3167  return empty_str;
3168  }
3169  }
3170  }
3171  SIZE_TYPE end = length;
3172  if ( where == NStr::eTrunc_End || where == NStr::eTrunc_Both ) {
3173  _ASSERT(beg < end);
3174  while (isspace((unsigned char) str[--end])) {
3175  if (beg == end) {
3176  return empty_str;
3177  }
3178  }
3179  _ASSERT(beg <= end && !isspace((unsigned char) str[end]));
3180  ++end;
3181  }
3182  _ASSERT(beg < end && end <= length);
3183  if ( beg | (end - length) ) { // if either beg != 0 or end != length
3184  return str.substr(beg, end - beg);
3185  }
3186  else {
3187  return str;
3188  }
3189 }
3190 
3191 string NStr::TruncateSpaces(const string& str, ETrunc where)
3192 {
3193  return s_TruncateSpaces(str, where, kEmptyStr);
3194 }
3195 
3197 {
3198  return s_TruncateSpaces(str, where, CTempString());
3199 }
3200 
3202 {
3203  str = s_TruncateSpaces(str, where, CTempString());
3204 }
3205 
3206 void NStr::TruncateSpacesInPlace(string& str, ETrunc where)
3207 {
3208  SIZE_TYPE length = str.length();
3209  if (length == 0) {
3210  return;
3211  }
3212  SIZE_TYPE beg = 0;
3213  if ( where == eTrunc_Begin || where == eTrunc_Both ) {
3214  // It's better to use str.data()[] to check string characters
3215  // to avoid implicit modification of the string by non-const operator[]
3216  _ASSERT(beg < length);
3217  while ( isspace((unsigned char) str.data()[beg]) ) {
3218  if (++beg == length) {
3219  str.erase();
3220  return;
3221  }
3222  }
3223  }
3224 
3225  SIZE_TYPE end = length;
3226  if ( where == eTrunc_End || where == eTrunc_Both ) {
3227  // It's better to use str.data()[] to check string characters
3228  // to avoid implicit modification of the string by non-const operator[]
3229  _ASSERT(beg < end);
3230  while (isspace((unsigned char) str.data()[--end])) {
3231  if (beg == end) {
3232  str.erase();
3233  return;
3234  }
3235  }
3236  _ASSERT(beg <= end && !isspace((unsigned char) str.data()[end]));
3237  ++end;
3238  }
3239  _ASSERT(beg < end && end <= length);
3240 
3241  if ( beg | (end - length) ) { // if either beg != 0 or end != length
3242  str.replace(0, length, str, beg, end - beg);
3243  }
3244 }
3245 
3246 
3247 void NStr::TrimPrefixInPlace(string& str, const CTempString prefix,
3248  ECase use_case)
3249 {
3250  if (!str.length() ||
3251  !prefix.length() ||
3252  !Equal(str, 0, prefix.length(), prefix, use_case)) {
3253  return;
3254  }
3255  str.erase(0, prefix.length());
3256 }
3257 
3258 
3260  ECase use_case)
3261 {
3262  if (!str.length() ||
3263  !prefix.length() ||
3264  !Equal(str, 0, prefix.length(), prefix, use_case)) {
3265  return;
3266  }
3267  str.assign(str.data() + prefix.length(), str.length() - prefix.length());
3268 }
3269 
3270 
3272  ECase use_case)
3273 {
3274  if (!str.length() ||
3275  !prefix.length() ||
3276  !Equal(str, 0, prefix.length(), prefix, use_case)) {
3277  return str;
3278  }
3279  return CTempString(str.data() + prefix.length(), str.length() - prefix.length());
3280 }
3281 
3282 
3283 void NStr::TrimSuffixInPlace(string& str, const CTempString suffix,
3284  ECase use_case)
3285 {
3286  if (!str.length() ||
3287  !suffix.length() ||
3288  !Equal(str, str.length() - suffix.length(), suffix.length(), suffix, use_case)) {
3289  return;
3290  }
3291  str.erase(str.length() - suffix.length());
3292 }
3293 
3294 
3296  ECase use_case)
3297 {
3298  if (!str.length() ||
3299  !suffix.length() ||
3300  !Equal(str, str.length() - suffix.length(), suffix.length(), suffix, use_case)) {
3301  return;
3302  }
3303  str.erase(str.length() - suffix.length());
3304 }
3305 
3306 
3308  ECase use_case)
3309 {
3310  if (!str.length() ||
3311  !suffix.length() ||
3312  !Equal(str, str.length() - suffix.length(), suffix.length(), suffix, use_case)) {
3313  return str;
3314  }
3315  return CTempString(str.data(), str.length() - suffix.length());
3316 }
3317 
3318 
3319 string& NStr::Replace(const string& src,
3320  const string& search, const string& replace,
3321  string& dst, SIZE_TYPE start_pos, SIZE_TYPE max_replace,
3322  SIZE_TYPE* num_replace)
3323 {
3324  // source and destination should not be the same
3325  if (&src == &dst) {
3326  NCBI_THROW2(CStringException, eBadArgs,
3327  "NStr::Replace(): source and destination are the same", 0);
3328  }
3329  if (num_replace)
3330  *num_replace = 0;
3331  if (start_pos + search.size() > src.size() || search == replace) {
3332  dst = src;
3333  return dst;
3334  }
3335 
3336  // Use different algorithms depending on size or 'search' and 'replace'
3337  // for better performance (and for big strings only! > 16KB).
3338 
3339  if (replace.size() > search.size() && src.size() > 16*1024) {
3340  // Replacing string is longer -- worst case.
3341  // Try to avoid memory reallocations inside std::string.
3342  // Count replacing strings first
3343  SIZE_TYPE n = 0;
3344  SIZE_TYPE start_orig = start_pos;
3345  for (SIZE_TYPE count = 0; !(max_replace && count >= max_replace); count++){
3346  start_pos = src.find(search, start_pos);
3347  if (start_pos == NPOS)
3348  break;
3349  n++;
3350  start_pos += search.size();
3351  }
3352  // Reallocate memory for destination string
3353  dst.resize(src.size() - n*search.size() + n*replace.size());
3354 
3355  // Use copy() to create destination string
3356  start_pos = start_orig;
3357  string::const_iterator src_start = src.begin();
3358  string::const_iterator src_end = src.begin();
3359  string::iterator dst_pos = dst.begin();
3360 
3361  for (SIZE_TYPE count = 0; !(max_replace && count >= max_replace); count++){
3362  start_pos = src.find(search, start_pos);
3363  if (start_pos == NPOS)
3364  break;
3365  // Copy from source string up to 'search'
3366  src_end = src.begin() + start_pos;
3367  copy(src_start, src_end, dst_pos);
3368  dst_pos += (src_end - src_start);
3369  // Append 'replace'
3370  copy(replace.begin(), replace.end(), dst_pos);
3371  dst_pos += replace.size();
3372  start_pos += search.size();
3373  src_start = src.begin() + start_pos;
3374  }
3375  // Copy source's string tail to the place
3376  copy(src_start, src.end(), dst_pos);
3377  if (num_replace)
3378  *num_replace = n;
3379 
3380  } else {
3381  // Replacing string is shorter or have the same length.
3382  // ReplaceInPlace() can be faster on some platform, but not much,
3383  // so we use regular algorithm even for equal lengths here.
3384  dst = src;
3385  for (SIZE_TYPE count = 0; !(max_replace && count >= max_replace); count++){
3386  start_pos = dst.find(search, start_pos);
3387  if (start_pos == NPOS)
3388  break;
3389  dst.replace(start_pos, search.size(), replace);
3390  start_pos += replace.size();
3391  if (num_replace)
3392  (*num_replace)++;
3393  }
3394  }
3395  return dst;
3396 }
3397 
3398 
3399 string NStr::Replace(const string& src,
3400  const string& search, const string& replace,
3401  SIZE_TYPE start_pos, SIZE_TYPE max_replace,
3402  SIZE_TYPE* num_replace)
3403 {
3404  string dst;
3405  Replace(src, search, replace, dst, start_pos, max_replace, num_replace);
3406  return dst;
3407 }
3408 
3409 
3410 string& NStr::ReplaceInPlace(string& src,
3411  const string& search, const string& replace,
3412  SIZE_TYPE start_pos, SIZE_TYPE max_replace,
3413  SIZE_TYPE* num_replace)
3414 {
3415  if ( num_replace )
3416  *num_replace = 0;
3417  if ( start_pos + search.size() > src.size() || search == replace )
3418  return src;
3419 
3420  bool equal_len = (search.size() == replace.size());
3421  for (SIZE_TYPE count = 0; !(max_replace && count >= max_replace); count++){
3422  start_pos = src.find(search, start_pos);
3423  if (start_pos == NPOS)
3424  break;
3425  // On some platforms string's replace() implementation
3426  // is not optimal if size of search and replace strings are equal
3427  if ( equal_len ) {
3428  copy(replace.begin(), replace.end(), src.begin() + start_pos);
3429  } else {
3430  src.replace(start_pos, search.size(), replace);
3431  }
3432  start_pos += replace.size();
3433  if (num_replace)
3434  (*num_replace)++;
3435  }
3436  return src;
3437 }
3438 
3439 
3440 template<typename TString, typename TContainer>
3441 TContainer& s_Split(const TString& str, const TString& delim,
3442  TContainer& arr, NStr::TSplitFlags flags,
3443  vector<SIZE_TYPE>* token_pos,
3444  CTempString_Storage* storage = NULL)
3445 {
3446  typedef CStrTokenPosAdapter<vector<SIZE_TYPE> > TPosArray;
3448  typedef CStrTokenize<TString, TContainer, TPosArray,
3449  CStrDummyTokenCount, TReserve> TSplitter;
3450 
3451  TPosArray token_pos_proxy(token_pos);
3452  TSplitter splitter(str, delim, flags, storage);
3453  splitter.Do(arr, token_pos_proxy, kEmptyStr);
3454  return arr;
3455 }
3456 
3457 #define CHECK_SPLIT_TEMPSTRING_FLAGS(where) \
3458  { \
3459  if ((flags & (NStr::fSplit_CanEscape | NStr::fSplit_CanQuote)) && !storage) { \
3460  NCBI_THROW2(CStringException, eBadArgs, \
3461  "NStr::" #where "(): the selected flags require non-NULL storage", 0); \
3462  } \
3463 }
3464 
3465 
3466 list<string>& NStr::Split(const CTempString str, const CTempString delim,
3467  list<string>& arr, TSplitFlags flags,
3468  vector<SIZE_TYPE>* token_pos)
3469 {
3470  return s_Split(str, delim, arr, flags, token_pos);
3471 }
3472 
3473 vector<string>& NStr::Split(const CTempString str, const CTempString delim,
3474  vector<string>& arr, TSplitFlags flags,
3475  vector<SIZE_TYPE>* token_pos)
3476 {
3477  return s_Split(str, delim, arr, flags, token_pos);
3478 }
3479 
3480 list<CTempString>& NStr::Split(const CTempString str, const CTempString delim,
3481  list<CTempString>& arr, TSplitFlags flags,
3482  vector<SIZE_TYPE>* token_pos, CTempString_Storage* storage)
3485  return s_Split(str, delim, arr, flags, token_pos, storage);
3486 }
3487 
3488 vector<CTempString>& NStr::Split(const CTempString str, const CTempString delim,
3489  vector<CTempString>& arr, TSplitFlags flags,
3490  vector<SIZE_TYPE>* token_pos, CTempString_Storage* storage)
3493  return s_Split(str, delim, arr, flags, token_pos, storage);
3494 }
3495 
3496 list<CTempStringEx>& NStr::Split(const CTempString str, const CTempString delim,
3497  list<CTempStringEx>& arr, TSplitFlags flags,
3498  vector<SIZE_TYPE>* token_pos, CTempString_Storage* storage)
3501  return s_Split(str, delim, arr, flags, token_pos, storage);
3502 }
3503 
3504 vector<CTempStringEx>& NStr::Split(const CTempString str, const CTempString delim,
3505  vector<CTempStringEx>& arr, TSplitFlags flags,
3506  vector<SIZE_TYPE>* token_pos, CTempString_Storage* storage)
3509  return s_Split(str, delim, arr, flags, token_pos, storage);
3510 }
3511 
3512 list<string>& NStr::SplitByPattern(const CTempString str, const CTempString delim,
3513  list<string>& arr, TSplitFlags flags,
3514  vector<SIZE_TYPE>* token_pos)
3515 {
3516  return s_Split(str, delim, arr, fSplit_ByPattern | flags, token_pos);
3517 }
3518 
3519 vector<string>& NStr::SplitByPattern(const CTempString str, const CTempString delim,
3520  vector<string>& arr, TSplitFlags flags,
3521  vector<SIZE_TYPE>* token_pos)
3522 {
3523  return s_Split(str, delim, arr, fSplit_ByPattern | flags, token_pos);
3524 }
3525 
3526 list<CTempString>& NStr::SplitByPattern(const CTempString str, const CTempString delim,
3527  list<CTempString>& arr, TSplitFlags flags,
3528  vector<SIZE_TYPE>* token_pos, CTempString_Storage* storage)
3531  return s_Split(str, delim, arr, fSplit_ByPattern | flags, token_pos, storage);
3532 }
3533 
3534 vector<CTempString>& NStr::SplitByPattern(const CTempString str, const CTempString delim,
3535  vector<CTempString>& arr, TSplitFlags flags,
3536  vector<SIZE_TYPE>* token_pos, CTempString_Storage* storage)
3539  return s_Split(str, delim, arr, fSplit_ByPattern | flags, token_pos, storage);
3540 }
3541 
3542 list<CTempStringEx>& NStr::SplitByPattern(const CTempString str, const CTempString delim,
3543  list<CTempStringEx>& arr, TSplitFlags flags,
3544  vector<SIZE_TYPE>* token_pos, CTempString_Storage* storage)
3547  return s_Split(str, delim, arr, fSplit_ByPattern | flags, token_pos, storage);
3548 }
3549 
3550 vector<CTempStringEx>& NStr::SplitByPattern(const CTempString str, const CTempString delim,
3551  vector<CTempStringEx>& arr, TSplitFlags flags,
3552  vector<SIZE_TYPE>* token_pos, CTempString_Storage* storage)
3553 {
3555  return s_Split(str, delim, arr, fSplit_ByPattern | flags, token_pos, storage);
3556 }
3557 
3558 
3559 bool NStr::SplitInTwo(const CTempString str, const CTempString delim,
3560  string& str1, string& str2, TSplitFlags flags)
3561 {
3562  CTempStringEx ts1, ts2;
3563  CTempString_Storage storage;
3564  bool result = SplitInTwo(str, delim, ts1, ts2, flags, &storage);
3565  str1 = ts1;
3566  str2 = ts2;
3567  return result;
3568 }
3569 
3570 
3571 bool NStr::SplitInTwo(const CTempString str, const CTempString delim,
3572  CTempString& str1, CTempString& str2, TSplitFlags flags,
3573  CTempString_Storage* storage)
3574 {
3575  CTempStringEx ts1, ts2;
3576  bool result = SplitInTwo(str, delim, ts1, ts2, flags, storage);
3577  str1 = ts1;
3578  str2 = ts2;
3579  return result;
3580 }
3581 
3582 
3583 bool NStr::SplitInTwo(const CTempString str, const CTempString delim,
3584  CTempStringEx& str1, CTempStringEx& str2,
3585  TSplitFlags flags, CTempString_Storage* storage)
3586 {
3591 
3592  CTempStringList part_collector(storage);
3593  TSplitter splitter(str, delim, flags, storage);
3594  SIZE_TYPE delim_pos = NPOS;
3595 
3596  // get first part
3597  splitter.Advance(&part_collector, NULL, &delim_pos);
3598  part_collector.Join(&str1);
3599  part_collector.Clear();
3600 
3601  // don't need further splitting, just quote and escape parsing
3602  splitter.SetDelim(kEmptyStr);
3603  splitter.Advance(&part_collector);
3604  part_collector.Join(&str2);
3606  return delim_pos != NPOS;
3607 }
3608 
3610 #define SS_ADD_CHAR(c) \
3611  out.push_back(c); \
3612  last = c;
3613 
3614 string NStr::Sanitize(CTempString str, CTempString allow_chars, CTempString reject_chars,
3615  char reject_replacement, TSS_Flags flags)
3616 {
3617  string out;
3618  out.reserve(str.size());
3619 
3620  // Use fSS_print by default if no any other filter, including custom
3621  bool have_class = (flags & (fSS_alpha | fSS_digit | fSS_alnum | fSS_print | fSS_cntrl | fSS_punct)) > 0;
3622  if ( allow_chars.empty() && reject_chars.empty() && !have_class ) {
3623  flags |= fSS_print;
3624  have_class = true;
3625  }
3626 
3627  bool have_allowed = false;
3628  char last = '\0';
3629 
3630  for (char c : str) {
3631 
3632  // Check against filters: character classes via flags, allowed chars, rejected chars.
3633  bool allowed = false;
3634  if ( have_class ) {
3635  allowed = ((flags & fSS_Reject) != 0);
3636  if (((flags & fSS_print) && isprint((unsigned char)c)) ||
3637  ((flags & fSS_alnum) && isalnum((unsigned char)c)) ||
3638  ((flags & fSS_alpha) && isalpha((unsigned char)c)) ||
3639  ((flags & fSS_digit) && isdigit((unsigned char)c)) ||
3640  ((flags & fSS_cntrl) && iscntrl((unsigned char)c)) ||
3641  ((flags & fSS_punct) && ispunct((unsigned char)c)) ) {
3642 
3643  // If matched and reverse logic -- treat char as rejected
3644  allowed = ((flags & fSS_Reject) == 0);
3645  }
3646  }
3647  else {
3648  // Special case: no any character class specified in flags
3649 
3650  // If <allow_chars> and fSS_Reject flag, then no any character allowed except <allow_chars>
3651  // -- "allow" already FALSE, no need to check this;
3652  // -- <allow_chars> will be checked below.
3653 
3654  // If <reject_chars> and no fSS_Reject flag, then all characters allowed except <reject_chars>.
3655  if (!reject_chars.empty() && ((flags & fSS_Reject) == 0)) {
3656  allowed = true;
3657  }
3658  // -- <reject_chars> will be checked below.
3659  }
3660  if (!allowed && !allow_chars.empty() && allow_chars.find(c) != NPOS ) {
3661  allowed = true;
3662  }
3663  if (allowed && !reject_chars.empty() && reject_chars.find(c) != NPOS ) {
3664  allowed = false;
3665  }
3666 
3667  // Good character?
3668  if ( allowed ) {
3669  // Special processing for allowed spaces.
3670  // Truncate leading spaces and merge if necessary
3671  if ( c == ' ' ) {
3672  if (!have_allowed && !(flags & fSS_NoTruncate_Begin)) {
3673  // Skip spaces at start of the string
3674  continue;
3675  }
3676  if (flags & fSS_NoMerge) {
3677  SS_ADD_CHAR(c);
3678  }
3679  else {
3680  // Merge spaces
3681  if (last != ' ') {
3682  SS_ADD_CHAR(c);
3683  }
3684  }
3685  }
3686  else {
3687  // Some other allowed character
3688  SS_ADD_CHAR(c);
3689  have_allowed = true;
3690  }
3691  continue;
3692  }
3693 
3694  // Rejected
3695  if ( flags & fSS_Remove ) {
3696  continue;
3697  }
3698  // Special check on leading spaces, if <reject_replacement> is a space
3699  if (reject_replacement == ' ') {
3700  if (!have_allowed && !(flags & fSS_NoTruncate_Begin)) {
3701  // Skip spaces at start of the string
3702  continue;
3703  }
3704  }
3705  // Replace rejected character
3706  if (flags & fSS_NoMerge) {
3707  SS_ADD_CHAR(reject_replacement);
3708  have_allowed = true;
3709  }
3710  else {
3711  // Merge rejected
3712  if (last != reject_replacement) {
3713  SS_ADD_CHAR(reject_replacement);
3714  have_allowed = true;
3715  }
3716  }
3717  }
3718 
3719  // Truncate trailing spaces if necessary
3720  if (last == ' ' && !(flags & fSS_NoTruncate_End)) {
3721  SIZE_TYPE pos = out.find_last_not_of(last);
3722  if (pos == NPOS) {
3723  out.clear();
3724  }
3725  else {
3726  out.resize(pos+1);
3727  }
3728  }
3729 
3730  return out;
3732 
3734 
3735 enum ELanguage {
3738 };
3739 
3740 
3741 static string s_PrintableString(const CTempString str,
3743  ELanguage lang)
3744 {
3745  unique_ptr<CNcbiOstrstream> out;
3746  SIZE_TYPE i, j = 0;
3747 
3748  for (i = 0; i < str.size(); ++i) {
3749  bool octal = false;
3750  char c = str[i];
3751  switch (c) {
3752  case '\a':
3753  if (lang == eLanguage_C)
3754  c = 'a';
3755  else
3756  octal = true;
3757  break;
3758  case '\b':
3759  c = 'b';
3760  break;
3761  case '\f':
3762  c = 'f';
3763  break;
3764  case '\r':
3765  c = 'r';
3766  break;
3767  case '\t':
3768  c = 't';
3769  break;
3770  case '\v':
3771  c = 'v';
3772  break;
3773  case '\n':
3774  if (!(mode & NStr::fNewLine_Passthru))
3775  c = 'n';
3776  /*FALLTHRU*/
3777  case '\\':
3778  case '\'':
3779  case '"':
3780  break;
3781  case '&':
3782  if (lang == eLanguage_Javascript)
3783  break;
3784  continue;
3785  case '?':
3786  if (lang == eLanguage_C) {
3787  if (i && str[i - 1] == '?')
3788  break;
3789  if (i < str.size() - 1 && str[i + 1] == '?')
3790  break;
3791  }
3792  continue;
3793  default:
3794  if (!isascii((unsigned char) c)) {
3795  if (mode & NStr::fNonAscii_Quote) {
3796  octal = true;
3797  break;
3798  }
3799  }
3800  if (!isprint((unsigned char) c)) {
3801  octal = true;
3802  break;
3803  }
3804  continue;
3805  }
3806  if (!out.get()) {
3807  out.reset(new CNcbiOstrstream);
3808  }
3809  if (i > j) {
3810  out->write(str.data() + j, i - j);
3811  }
3812  out->put('\\');
3813  if (c == '\n') {
3814  out->write("n\\\n", 3);
3815  } else if (octal) {
3816  bool reduce;
3817  if (!(mode & NStr::fPrintable_Full)) {
3818  reduce = (i == str.size() - 1 ||
3819  str[i + 1] < '0' || '7' < str[i + 1] ? true : false);
3820  } else {
3821  reduce = false;
3822  }
3823  unsigned char v;
3824  char val[3];
3825  int k = 0;
3826  v = (unsigned char) c >> 6;
3827  if (v || !reduce) {
3828  val[k++] = char('0' + v);
3829  reduce = false;
3830  }
3831  v = ((unsigned char) c >> 3) & 7;
3832  if (v || !reduce) {
3833  val[k++] = char('0' + v);
3834  }
3835  v = (unsigned char) c & 7;
3836  val[k++] = char('0' + v);
3837  out->write(val, k);
3838  } else {
3839  out->put(c);
3840  }
3841  j = i + 1;
3842  }
3843  if (j && i > j) {
3844  _ASSERT(out.get());
3845  out->write(str.data() + j, i - j);
3846  }
3847  if (out.get()) {
3848  // Return encoded string
3849  return CNcbiOstrstreamToString(*out);
3850  }
3851 
3852  // All characters are good - return (a copy of) the original string
3853  return str;
3854 }
3855 
3856 
3857 string NStr::Escape(const CTempString str, const CTempString metacharacters, char escape_char)
3858 {
3859  string out;
3860  if ( str.empty() ) {
3861  return out;
3862  }
3863  out.reserve(str.size() * 2); // maximum size for a new string (have all metacharacters)
3864 
3865  for (char c : str) {
3866  if (c == escape_char || metacharacters.find(c) != NPOS) {
3867  out += escape_char;
3868  }
3869  out += c;
3870  }
3871  return out;
3872 }
3873 
3874 
3875 string NStr::Unescape(const CTempString str, char escape_char)
3876 {
3877  string out;
3878  if ( str.empty() ) {
3879  return out;
3880  }
3881  out.reserve(str.size());
3882  bool escaped = false;
3883 
3884  for (char c : str) {
3885  if (escaped) {
3886  out += c;
3887  escaped = false;
3888  }
3889  else {
3890  if (c == escape_char) {
3891  escaped = true;
3892  }
3893  else {
3894  out += c;
3895  }
3896  }
3897  }
3898  return out;
3899 }
3900 
3901 
3902 string NStr::Quote(const CTempString str, char quote_char, char escape_char)
3903 {
3904  string out;
3905  if (str.empty()) {
3906  return out;
3907  }
3908  out.reserve(str.size() * 2); // maximum size for a new string
3909 
3910  out.push_back(quote_char);
3911  for (char c : str) {
3912  if (c == quote_char || c == escape_char) {
3913  out += escape_char;
3914  }
3915  out += c;
3916  }
3917  out.push_back(quote_char);
3919  return out;
3920 }
3921 
3922 
3923 string NStr::Unquote(const CTempString str, char escape_char)
3924 {
3925  string out;
3926  if (str.empty()) {
3927  return out;
3928  }
3929  out.reserve(str.size());
3930  bool escaped = false;
3931  char quote_char = str[0];
3932 
3933  if (str.length() < 2 || str[str.length()-1] != quote_char) {
3934  NCBI_THROW2(CStringException, eFormat,
3935  "The source string must start and finish with the same character", 0);
3936  }
3937  // Remove first and last characters ("quotes")
3938  CTempString s(str, 1, str.length() - 2);
3939 
3940  for (char c : s) {
3941  if (escaped) {
3942  out += c;
3943  escaped = false;
3944  }
3945  else {
3946  if (c == escape_char) {
3947  escaped = true;
3948  }
3949  else {
3950  out += c;
3951  }
3952  }
3953  }
3954  return out;
3955 }
3956 
3957 
3961 }
3962 
3963 
3965 {
3966  return s_PrintableString(str,
3969 }
3970 
3971 
3972 string NStr::CEncode(const CTempString str, EQuoted quoted)
3973 {
3974  switch (quoted) {
3975  case eNotQuoted:
3976  return PrintableString(str);
3977  case eQuoted:
3978  return '"' + PrintableString(str) + '"';
3979  }
3980  _TROUBLE;
3981  // Unreachable
3982  return str;
3983 }
3984 
3985 
3986 string NStr::CParse(const CTempString str, EQuoted quoted)
3987 {
3988  if (quoted == eNotQuoted) {
3989  return ParseEscapes(str);
3990  }
3991  _ASSERT(quoted == eQuoted);
3992 
3993  SIZE_TYPE pos;
3994  SIZE_TYPE len = str.length();
3995  const char quote_char = '"';
3996 
3997  if (len < 2 || str[0] != quote_char || str[len-1] != quote_char) {
3998  NCBI_THROW2(CStringException, eFormat,
3999  "The source string must start and finish with a double quote", 0);
4000  }
4001 
4002  // Flag that next char is escaped, ignore it
4003  bool escaped = false;
4004  // We have a quote mark, start collect string chars
4005  bool collect = true;
4006  // Position of last quote
4007  SIZE_TYPE last_quote = 0;
4008 
4009  string out;
4010  out.reserve(str.size());
4011 
4012  for (pos = 1; pos < len; ++pos) {
4013  unsigned char ch = str[pos];
4014  if (ch == quote_char && !escaped) {
4015  // Have a substring
4016  CTempString sub(str.data() + last_quote + 1, pos - last_quote - 1);
4017  if (collect) {
4018  // Parse escape sequences and add it to result
4019  out += ParseEscapes(sub);
4020  } else {
4021  // Possible we have adjacent strings ("A""B").
4022  if (pos != last_quote + 1) {
4023  NCBI_THROW2(CStringException, eFormat,
4024  "Quoted string format error", pos);
4025  }
4026  }
4027  last_quote = pos;
4028  collect = !collect;
4029  } else {
4030  escaped = ch == '\\' ? !escaped : false;
4031  }
4032  }
4033  if (escaped || last_quote != len-1) {
4034  NCBI_THROW2(CStringException, eFormat,
4035  "Unterminated quoted string", str.length());
4036  }
4037  return out;
4038 }
4039 
4040 
4041 string NStr::XmlEncode(const CTempString str, TXmlEncode flags)
4042 // http://www.w3.org/TR/2000/REC-xml-20001006#sec-predefined-ent
4043 {
4044  string result;
4045  SIZE_TYPE i;
4046 
4047  // wild guess...
4048  result.reserve(str.size());
4049 
4050  for (i = 0; i < str.size(); i++) {
4051  char c = str[i];
4052  switch ( c ) {
4053  case '&':
4054  result.append("&amp;");
4055  break;
4056  case '<':
4057  result.append("&lt;");
4058  break;
4059  case '>':
4060  result.append("&gt;");
4061  break;
4062  case '\'':
4063  result.append("&apos;");
4064  break;
4065  case '"':
4066  result.append("&quot;");
4067  break;
4068  case '-':
4069  // translate double hyphen and ending hyphen
4070  // http://www.w3.org/TR/xml11/#sec-comments
4071  if (flags & eXmlEnc_CommentSafe) {
4072  if (i+1 == str.size()) {
4073  result.append("&#x2d;");
4074  break;
4075  } else if (str[i+1] == '-') {
4076  ++i;
4077  result.append(1, c).append("&#x2d;");
4078  break;
4079  }
4080  }
4081  result.append(1, c);
4082  break;
4083 
4084  default:
4085  unsigned int uc = (unsigned int)(c);
4086 
4088  // Optional check on non-safe characters:
4089  // [#x1-#x8], [#xB-#xC], [#xE-#x1F], [#x7F-#x84], [#x86-#x9F]
4090  // https://www.w3.org/TR/xml11/#NT-Char
4091 
4092  if ((uc < 0x8) || (uc == 0xB) || (uc == 0xC) ||
4093  (uc >= 0x0E && uc <=0x1F) ||
4094  (uc >= 0x7F && uc <=0x84) ||
4095  (uc >= 0x86 && uc <=0x9F) )
4096  {
4097  // Skip unsafe characters
4098  if (flags & eXmlEnc_Unsafe_Skip) {
4099  continue;
4100  }
4101  // else, throw
4102  NCBI_THROW2(CStringException, eConvert,
4103  "NStr::XmlEncode -- Unsafe character '0x" + NStr::NumericToString(c, 0, 16) + "'", i);
4104  }
4105  }
4106  // Default behavior
4107  if (uc < 0x20) {
4108  const char* charmap = "0123456789abcdef";
4109  result.append("&#x");
4110  Uint1 ch = c;
4111  unsigned hi = ch >> 4;
4112  unsigned lo = ch & 0xF;
4113  if ( hi ) {
4114  result.append(1, charmap[hi]);
4115  }
4116  result.append(1, charmap[lo]).append(1, ';');
4117  } else {
4118  result.append(1, c);
4119  }
4120  break;
4121  }
4122  }
4123  return result;
4124 }
4125 
4126 
4127 string NStr::HtmlEncode(const CTempString str, THtmlEncode flags)
4128 {
4129  string result;
4130  SIZE_TYPE i;
4131  SIZE_TYPE semicolon = 0;
4132 
4133  // wild guess...
4134  result.reserve(str.size());
4135 
4136  const char* begin = str.data();
4137  const char* end = begin + str.size();
4138  for ( const char* curr = begin; curr < end; ++curr ) {
4139  TUnicodeSymbol c = CUtf8::Decode(curr);
4140  switch ( c ) {
4141  case '&':
4142  {{
4143  i = curr - begin;
4144  result.append("&");
4145  // Check on HTML entity
4146  bool is_entity = false;
4147  if ((flags & fHtmlEnc_SkipEntities) &&
4148  (i+2 < str.size()) && (semicolon != NPOS)) {
4149 
4150  if ( i >= semicolon ) {
4151  semicolon = str.find(";", i+1);
4152  }
4153  if ( semicolon != NPOS ) {
4154  SIZE_TYPE len = semicolon - i;
4155  SIZE_TYPE p = i + 1;
4156  if (str[i+1] == '#') {
4157  // Check on numeric character reference encoding
4159  p++;
4160  if (len || len <= 4) {
4161  for (; p < semicolon; ++p) {
4162  if (!isdigit((unsigned char)(str[p])))
4163  break;
4164  }
4165  }
4166  }
4167  } else {
4168  // Check on literal entity
4170  if (len && len <= 10) {
4171  for (; p < semicolon; ++p) {
4172  if (!isalpha((unsigned char)(str[p])))
4173  break;
4174  }
4175  }
4176  }
4177  }
4178  is_entity = (p == semicolon);
4179  }
4180  }
4181  if ( is_entity ) {
4183  ERR_POST_X_ONCE(5, Info << "string \"" << str <<
4184  "\" contains HTML encoded entities");
4185  }
4186  } else {
4187  result.append("amp;");
4188  }
4189  }}
4190  break;
4191  case '<':
4192  result.append("&lt;");
4193  break;
4194  case '>':
4195  result.append("&gt;");
4196  break;
4197  case '\'':
4198  result.append("&apos;");
4199  break;
4200  case '"':
4201  result.append("&quot;");
4202  break;
4203  default:
4204  if ((unsigned int)c < 0x20) {
4205  const char* charmap = "0123456789abcdef";
4206  result.append("&#x");
4207  Uint1 ch = c;
4208  unsigned hi = ch >> 4;
4209  unsigned lo = ch & 0xF;
4210  if ( hi ) {
4211  result.append(1, charmap[hi]);
4212  }
4213  result.append(1, charmap[lo]).append(1, ';');
4214  } else if (c > 0x7F) {
4215  result.append("&#x").append( NStr::NumericToString(c, 0, 16)).append(1, ';');;
4216  } else {
4217  result.append(1, c);
4218  }
4219  break;
4220  }
4221  }
4222  return result;
4223 }
4224 
4225 
4226 // Character entity references
4227 // http://www.w3.org/TR/html4/sgml/entities.html
4228 // http://www.w3.org/TR/1998/REC-html40-19980424/charset.html#h-5.3
4229 // only some entities from here were added (those shifted to right):
4230 // http://dev.w3.org/html5/html-author/charref
4231 
4232 static struct tag_HtmlEntities
4233 {
4234  TUnicodeSymbol u;
4235  const char* s;
4236 }
4237 const s_HtmlEntities[] = {
4238  { 9, "Tab" },
4239  { 10, "NewLine" },
4240  { 33, "excl" },
4241  { 34, "quot" },
4242  { 35, "num" },
4243  { 36, "dollar" },
4244  { 37, "percnt" },
4245  { 38, "amp" },
4246  { 39, "apos" },
4247  { 40, "lpar" },
4248  { 41, "rpar" },
4249  { 42, "ast" },
4250  { 43, "plus" },
4251  { 44, "comma" },
4252  { 46, "period" },
4253  { 47, "sol" },
4254  { 58, "colon" },
4255  { 59, "semi" },
4256  { 60, "lt" },
4257  { 61, "equals" },
4258  { 62, "gt" },
4259  { 63, "quest" },
4260  { 64, "commat" },
4261  { 91, "lsqb" },
4262  { 92, "bsol" },
4263  { 93, "rsqb" },
4264  { 94, "Hat" },
4265  { 95, "lowbar" },
4266  { 96, "grave" },
4267  { 123, "lcub" },
4268  { 124, "verbar" },
4269  { 125, "rcub" },
4270  { 160, "nbsp" },
4271  { 161, "iexcl" },
4272  { 162, "cent" },
4273  { 163, "pound" },
4274  { 164, "curren" },
4275  { 165, "yen" },
4276  { 166, "brvbar" },
4277  { 167, "sect" },
4278  { 168, "uml" },
4279  { 169, "copy" },
4280  { 170, "ordf" },
4281  { 171, "laquo" },
4282  { 172, "not" },
4283  { 173, "shy" },
4284  { 174, "reg" },
4285  { 175, "macr" },
4286  { 176, "deg" },
4287  { 177, "plusmn" },
4288  { 178, "sup2" },
4289  { 179, "sup3" },
4290  { 180, "acute" },
4291  { 181, "micro" },
4292  { 182, "para" },
4293  { 183, "middot" },
4294  { 184, "cedil" },
4295  { 185, "sup1" },
4296  { 186, "ordm" },
4297  { 187, "raquo" },
4298  { 188, "frac14" },
4299  { 189, "frac12" },
4300  { 190, "frac34" },
4301  { 191, "iquest" },
4302  { 192, "Agrave" },
4303  { 193, "Aacute" },
4304  { 194, "Acirc" },
4305  { 195, "Atilde" },
4306  { 196, "Auml" },
4307  { 197, "Aring" },
4308  { 198, "AElig" },
4309  { 199, "Ccedil" },
4310  { 200, "Egrave" },
4311  { 201, "Eacute" },
4312  { 202, "Ecirc" },
4313  { 203, "Euml" },
4314  { 204, "Igrave" },
4315  { 205, "Iacute" },
4316  { 206, "Icirc" },
4317  { 207, "Iuml" },
4318  { 208, "ETH" },
4319  { 209, "Ntilde" },
4320  { 210, "Ograve" },
4321  { 211, "Oacute" },
4322  { 212, "Ocirc" },
4323  { 213, "Otilde" },
4324  { 214, "Ouml" },
4325  { 215, "times" },
4326  { 216, "Oslash" },
4327  { 217, "Ugrave" },
4328  { 218, "Uacute" },
4329  { 219, "Ucirc" },
4330  { 220, "Uuml" },
4331  { 221, "Yacute" },
4332  { 222, "THORN" },
4333  { 223, "szlig" },
4334  { 224, "agrave" },
4335  { 225, "aacute" },
4336  { 226, "acirc" },
4337  { 227, "atilde" },
4338  { 228, "auml" },
4339  { 229, "aring" },
4340  { 230, "aelig" },
4341  { 231, "ccedil" },
4342  { 232, "egrave" },
4343  { 233, "eacute" },
4344  { 234, "ecirc" },
4345  { 235, "euml" },
4346  { 236, "igrave" },
4347  { 237, "iacute" },
4348  { 238, "icirc" },
4349  { 239, "iuml" },
4350  { 240, "eth" },
4351  { 241, "ntilde" },
4352  { 242, "ograve" },
4353  { 243, "oacute" },
4354  { 244, "ocirc" },
4355  { 245, "otilde" },
4356  { 246, "ouml" },
4357  { 247, "divide" },
4358  { 248, "oslash" },
4359  { 249, "ugrave" },
4360  { 250, "uacute" },
4361  { 251, "ucirc" },
4362  { 252, "uuml" },
4363  { 253, "yacute" },
4364  { 254, "thorn" },
4365  { 255, "yuml" },
4366  { 338, "OElig" },
4367  { 339, "oelig" },
4368  { 352, "Scaron" },
4369  { 353, "scaron" },
4370  { 376, "Yuml" },
4371  { 402, "fnof" },
4372  { 710, "circ" },
4373  { 732, "tilde" },
4374  { 913, "Alpha" },
4375  { 914, "Beta" },
4376  { 915, "Gamma" },
4377  { 916, "Delta" },
4378  { 917, "Epsilon" },
4379  { 918, "Zeta" },
4380  { 919, "Eta" },
4381  { 920, "Theta" },
4382  { 921, "Iota" },
4383  { 922, "Kappa" },
4384  { 923, "Lambda" },
4385  { 924, "Mu" },
4386  { 925, "Nu" },
4387  { 926, "Xi" },
4388  { 927, "Omicron" },
4389  { 928, "Pi" },
4390  { 929, "Rho" },
4391  { 931, "Sigma" },
4392  { 932, "Tau" },
4393  { 933, "Upsilon" },
4394  { 934, "Phi" },
4395  { 935, "Chi" },
4396  { 936, "Psi" },
4397  { 937, "Omega" },
4398  { 945, "alpha" },
4399  { 946, "beta" },
4400  { 947, "gamma" },
4401  { 948, "delta" },
4402  { 949, "epsilon" },
4403  { 950, "zeta" },
4404  { 951, "eta" },
4405  { 952, "theta" },
4406  { 953, "iota" },
4407  { 954, "kappa" },
4408  { 955, "lambda" },
4409  { 956, "mu" },
4410  { 957, "nu" },
4411  { 958, "xi" },
4412  { 959, "omicron" },
4413  { 960, "pi" },
4414  { 961, "rho" },
4415  { 962, "sigmaf" },
4416  { 963, "sigma" },
4417  { 964, "tau" },
4418  { 965, "upsilon" },
4419  { 966, "phi" },
4420  { 967, "chi" },
4421  { 968, "psi" },
4422  { 969, "omega" },
4423  { 977, "thetasym" },
4424  { 978, "upsih" },
4425  { 982, "piv" },
4426  { 8194, "ensp" },
4427  { 8195, "emsp" },
4428  { 8201, "thinsp" },
4429  { 8204, "zwnj" },
4430  { 8205, "zwj" },
4431  { 8206, "lrm" },
4432  { 8207, "rlm" },
4433  { 8211, "ndash" },
4434  { 8212, "mdash" },
4435  { 8216, "lsquo" },
4436  { 8217, "rsquo" },
4437  { 8218, "sbquo" },
4438  { 8220, "ldquo" },
4439  { 8221, "rdquo" },
4440  { 8222, "bdquo" },
4441  { 8224, "dagger" },
4442  { 8225, "Dagger" },
4443  { 8226, "bull" },
4444  { 8230, "hellip" },
4445  { 8240, "permil" },
4446  { 8242, "prime" },
4447  { 8243, "Prime" },
4448  { 8249, "lsaquo" },
4449  { 8250, "rsaquo" },
4450  { 8254, "oline" },
4451  { 8260, "frasl" },
4452  { 8364, "euro" },
4453  { 8472, "weierp" },
4454  { 8465, "image" },
4455  { 8476, "real" },
4456  { 8482, "trade" },
4457  { 8501, "alefsym" },
4458  { 8592, "larr" },
4459  { 8593, "uarr" },
4460  { 8594, "rarr" },
4461  { 8595, "darr" },
4462  { 8596, "harr" },
4463  { 8629, "crarr" },
4464  { 8656, "lArr" },
4465  { 8657, "uArr" },
4466  { 8658, "rArr" },
4467  { 8659, "dArr" },
4468  { 8660, "hArr" },
4469  { 8704, "forall" },
4470  { 8706, "part" },
4471  { 8707, "exist" },
4472  { 8709, "empty" },
4473  { 8711, "nabla" },
4474  { 8712, "isin" },
4475  { 8713, "notin" },
4476  { 8715, "ni" },
4477  { 8719, "prod" },
4478  { 8721, "sum" },
4479  { 8722, "minus" },
4480  { 8727, "lowast" },
4481  { 8730, "radic" },
4482  { 8733, "prop" },
4483  { 8734, "infin" },
4484  { 8736, "ang" },
4485  { 8743, "and" },
4486  { 8744, "or" },
4487  { 8745, "cap" },
4488  { 8746, "cup" },
4489  { 8747, "int" },
4490  { 8756, "there4" },
4491  { 8764, "sim" },
4492  { 8773, "cong" },
4493  { 8776, "asymp" },
4494  { 8800, "ne" },
4495  { 8801, "equiv" },
4496  { 8804, "le" },
4497  { 8805, "ge" },
4498  { 8834, "sub" },
4499  { 8835, "sup" },
4500  { 8836, "nsub" },
4501  { 8838, "sube" },
4502  { 8839, "supe" },
4503  { 8853, "oplus" },
4504  { 8855, "otimes" },
4505  { 8869, "perp" },
4506  { 8901, "sdot" },
4507  { 8968, "lceil" },
4508  { 8969, "rceil" },
4509  { 8970, "lfloor" },
4510  { 8971, "rfloor" },
4511  { 9001, "lang" },
4512  { 9002, "rang" },
4513  { 9674, "loz" },
4514  { 9824, "spades" },
4515  { 9827, "clubs" },
4516  { 9829, "hearts" },
4517  { 9830, "diams" },
4518  { 0, 0 }
4519 };
4520 
4522 {
4523  const struct tag_HtmlEntities* p = s_HtmlEntities;
4524  for ( ; p->u != 0; ++p) {
4525  if (uch == p->u) {
4526  return p->s;
4527  }
4528  }
4529  return kEmptyStr;
4530 }
4531 
4532 string NStr::HtmlDecode(const CTempString str, EEncoding encoding, THtmlDecode* result_flags)
4533 {
4534  string ustr;
4535  THtmlDecode result = 0;
4536 
4537  if (encoding == eEncoding_Unknown) {
4538  encoding = CUtf8::GuessEncoding(str);
4539  if (encoding == eEncoding_Unknown) {
4540  NCBI_THROW2(CStringException, eBadArgs,
4541  "Unable to guess the source string encoding", 0);
4542  }
4543  }
4544  // wild guess...
4545  ustr.reserve(str.size());
4546 
4547  CTempString::const_iterator i, e = str.end();
4548  char ch;
4550 
4551  for (i = str.begin(); i != e;) {
4552  ch = *(i++);
4553  //check for HTML entities and character references
4554  if (i != e && ch == '&') {
4555  CTempString::const_iterator start_of_entity, end_of_entity, itmp;
4556  end_of_entity = itmp = i;
4557  bool ent, dec, hex, parsed=false;
4558  ent = isalpha((unsigned char)(*itmp)) != 0;
4559  dec = !ent && *itmp == '#' && ++itmp != e &&
4560  isdigit((unsigned char)(*itmp)) != 0;
4561  hex = !dec && itmp != e &&
4562  (*itmp == 'x' || *itmp == 'X') && ++itmp != e &&
4563  isxdigit((unsigned char)(*itmp)) != 0;
4564  start_of_entity = itmp;
4565 
4566  if (itmp != e && (ent || dec || hex)) {
4567  // do not look too far
4568  for (int len=0; len<16 && itmp != e; ++len, ++itmp) {
4569  if (*itmp == '&' || *itmp == '#') {
4570  break;
4571  }
4572  if (*itmp == ';') {
4573  end_of_entity = itmp;
4574  break;
4575  }
4576  ent = ent && isalnum( (unsigned char)(*itmp)) != 0;
4577  dec = dec && isdigit( (unsigned char)(*itmp)) != 0;
4578  hex = hex && isxdigit((unsigned char)(*itmp)) != 0;
4579  }
4580  if (end_of_entity != i && (ent || dec || hex)) {
4581  uch = 0;
4582  if (ent) {
4583  string entity(start_of_entity, end_of_entity);
4584  const struct tag_HtmlEntities* p = s_HtmlEntities;
4585  for ( ; p->u != 0; ++p) {
4586  if (entity.compare(p->s) == 0) {
4587  uch = p->u;
4588  parsed = true;
4590  break;
4591  }
4592  }
4593  } else {
4594  parsed = true;
4596  for (itmp = start_of_entity; itmp != end_of_entity; ++itmp) {
4597  TUnicodeSymbol ud = *itmp;
4598  if (dec) {
4599  uch = 10 * uch + (ud - '0');
4600  } else if (hex) {
4601  if (ud >='0' && ud <= '9') {
4602  ud -= '0';
4603  } else if (ud >='a' && ud <= 'f') {
4604  ud -= 'a';
4605  ud += 10;
4606  } else if (ud >='A' && ud <= 'F') {
4607  ud -= 'A';
4608  ud += 10;
4609  }
4610  uch = 16 * uch + ud;
4611  }
4612  }
4613  }
4614  if (parsed) {
4615  ustr += CUtf8::AsUTF8(&uch,1);
4616  i = ++end_of_entity;
4617  continue;
4618  }
4619  }
4620  }
4621  }
4622  // no entity - append as is
4623  if (encoding == eEncoding_UTF8 || encoding == eEncoding_Ascii) {
4624  ustr.append( 1, ch );
4625  } else {
4627  ustr += CUtf8::AsUTF8(CTempString(&ch,1), encoding);
4628  }
4629  }
4630  if (result_flags) {
4631  *result_flags = result;
4632  }
4633  return ustr;
4635 
4636 
4637 // http://www.json.org/
4638 
4639 string NStr::JsonEncode(const CTempString str, EJsonEncode encoding)
4640 {
4641  string result;
4642  // wild guess...
4643  result.reserve(str.size()+2);
4644 
4645  auto encode_char = [&](char c)
4646  {
4647  static const char* charmap = "0123456789abcdef";
4648  result.append("\\u00");
4649  Uint1 ch = c;
4650  unsigned hi = ch >> 4;
4651  unsigned lo = ch & 0xF;
4652  result.append(1, charmap[hi]);
4653  result.append(1, charmap[lo]);
4654  };
4655 
4656  for (auto c : str) {
4657  switch ( c ) {
4658  case '"':
4659  result.append("\\\"");
4660  break;
4661  case '\\':
4662  result.append("\\\\");
4663  break;
4664  default:
4665  if ((unsigned int)c < 0x20) {
4666  // Control characters U+0000 through U+001F
4667  encode_char(c);
4668  } else {
4669  if (encoding == eJsonEnc_UTF8 && (unsigned int)c >= 0x80) {
4670  encode_char(c);
4671  } else {
4672  result.append(1, c);
4673  }
4674  }
4675  break;
4676  }
4677  }
4678  if (encoding == eJsonEnc_Quoted) {
4679  return '"' + result + '"';
4680  }
4681  return result;
4682 }
4683 
4684 
4685 string NStr::ShellEncode(const string& str)
4686 {
4687  // 1. Special-case of non-printable characters. We have no choice and
4688  // must use BASH extensions if we want printable output.
4689  //
4690  // Aesthetic issue: Most people are not familiar with the BASH-only
4691  // quoting style. Avoid it as much as possible.
4692 
4693  ITERATE ( string, it, str ) {
4694  if ( !isprint(Uchar(*it)) ) {
4695  return "$'" + NStr::PrintableString(str) + "'";
4696  }
4697  }
4698 
4699  /////////////////////////////////////////////////////////////////////////
4700  // Bourne Shell quoting as IEEE-standard without special extensions.
4701  //
4702  // There are 3 basic ways to quote/escape in Bourne Shell:
4703  //
4704  // - Single-quotes. All characters (including non-printable
4705  // characters newlines, backslashes), are literal. There is no escape.
4706  // - Double-quotes. Need to escape some metacharacters, such as literal
4707  // escape (\‍), variable expansion ($) and command substitution (`).
4708  // - Escape without quotes. Use backslash.
4709  /////////////////////////////////////////////////////////////////////////
4710 
4711  // 2. Non-empty printable string without meta-characters.
4712  //
4713  // Shell special characters, according to IEEE Std 1003.1,
4714  // plus ! (Bourne shell exit status negation and Bash history expansion),
4715  // braces (Bourne enhanced expansion), space, tab, and newline.
4716  //
4717  // See http://www.opengroup.org/onlinepubs/009695399/toc.htm
4718  // See Bourne and Bash man pages.
4719 
4720  if (!str.empty() &&
4721  str.find_first_of("!{} \t\r\n[|&;<>()$`\"'*?#~=%\\") == NPOS) {
4722  return str;
4723  }
4724 
4725  // 3. Printable string, but either empty or some shell meta-characters.
4726  //
4727  // Aesthetics preference:
4728  // i) If the string includes literal single-quotes, then prefer
4729  // double-quoting provided there is no need to escape embedded
4730  // literal double-quotes, escapes (\‍), variable substitution ($),
4731  // or command substitution (`).
4732 
4733  if (str.find('\'') != NPOS &&
4734  str.find_first_of("\"\\$`") == NPOS) {
4735  return "\"" + str + "\"";
4736  }
4737 
4738  // Use single-quoting. The only special case for Bourne shell
4739  // single-quoting is a literal single-quote, which needs to
4740  // be pulled out of the quoted region.
4741  //
4742  // Single-quoting does not have any escape character, so close
4743  // the quoted string ('), then emit an escaped or quoted literal
4744  // single-quote (\' or "'"), and resume the quoted string (').
4745  //
4746  // Aesthetics preferences:
4747  // ii) Prefer single-quoting over escape characters, especially
4748  // escaped whitespace. However, this is in compromise to optimal
4749  // quoting: if there are many literal single-quotes and the
4750  // use of double-quotes would involve the need to escape embedded
4751  // characters, then it may be more pleasing to escape the
4752  // shell meta-characters, and avoid the need for single-quoting
4753  // in the presence of literal single-quotes.
4754  // iii) If there are no literal double-quotes, then all else being equal,
4755  // avoid double-quotes and prefer escaping. Double-quotes are
4756  // more commonly used by enclosing formats such as ASN.1 Text
4757  // and CVS, and would thus need to be escaped. If there are
4758  // literal double-quotes, then having them is in the output is
4759  // unavoidable, and this aesthetics rule becomes secondary to
4760  // the preference for avoiding escape characters. If there are
4761  // literal escape characters, then having them is unavoidable
4762  // and avoidance of double-quotes is once again recommended.
4763 
4764  // TODO: Should simplify runs of multiple quotes, for example:
4765  // '\'''\'''\'' -> '"'''"'
4766 
4767  bool avoid_double_quotes = (str.find('"') == NPOS ||
4768  str.find('\\') != NPOS);
4769  string s = "'" + NStr::Replace(str, "'",
4770  avoid_double_quotes ? "'\\''" : "'\"'\"'") + "'";
4771 
4772  // Aesthetic improvement: Remove paired single-quotes ('')
4773  // that aren't escaped, as these evaluate to an empty string.
4774  // Don't apply this simplification for the degenerate case when
4775  // the string is the empty string ''. (Non degenerate strings
4776  // must be length greater than 2). Implement the equivalent
4777  // of the Perl regexp:
4778  //
4779  // s/(?<!\\‍)''//g
4780  //
4781  if (s.size() > 2) {
4782  size_t pos = 0;
4783  while ( true ) {
4784  pos = s.find("''", pos);
4785  if (pos == NPOS) break;
4786  if (pos == 0 || s[pos-1] != '\\') {
4787  s.erase(pos, 2);
4788  } else {
4789  ++pos;
4790  }
4791  }
4792  }
4794  return s;
4795 }
4796 
4797 
4798 string NStr::ParseEscapes(const CTempString str, EEscSeqRange mode, char user_char)
4799 {
4800  string out;
4801  out.reserve(str.size()); // result string can only be smaller
4802  SIZE_TYPE pos = 0;
4803  bool is_error = false;
4804 
4805  while (pos < str.size() || !is_error) {
4806  SIZE_TYPE pos2 = str.find('\\', pos);
4807  if (pos2 == NPOS) {
4808  //~ out += str.substr(pos);
4809  CTempString sub(str, pos);
4810  out += sub;
4811  break;
4812  }
4813  //~ out += str.substr(pos, pos2 - pos);
4814  CTempString sub(str, pos, pos2-pos);
4815  out += sub;
4816  if (++pos2 == str.size()) {
4817  NCBI_THROW2(CStringException, eFormat,
4818  "Unterminated escape sequence", pos2);
4819  }
4820  switch (str[pos2]) {
4821  case 'a': out += '\a'; break;
4822  case 'b': out += '\b'; break;
4823  case 'f': out += '\f'; break;
4824  case 'n': out += '\n'; break;
4825  case 'r': out += '\r'; break;
4826  case 't': out += '\t'; break;
4827  case 'v': out += '\v'; break;
4828  case 'x':
4829  {{
4830  pos = ++pos2;
4831  while (pos < str.size()
4832  && isxdigit((unsigned char) str[pos])) {
4833  pos++;
4834  }
4835  if (pos > pos2) {
4836  SIZE_TYPE len = pos-pos2;
4837  if ((mode == eEscSeqRange_FirstByte) && (len > 2)) {
4838  // Take only 2 first hex-digits
4839  len = 2;
4840  pos = pos2 + 2;
4841  }
4842  unsigned int value =
4843  StringToUInt(CTempString(str, pos2, len), 0, 16);
4844  if ((mode != eEscSeqRange_Standard) && (value > 255)) {
4845  // eEscSeqRange_Standard -- by default
4846  switch (mode) {
4848  // Already have right value
4849  break;
4850  case eEscSeqRange_Throw:
4851  NCBI_THROW2(CStringException, eFormat,
4852  "Escape sequence '" + NStr::PrintableString(CTempString(str, pos2, len)) +
4853  "' is out of range [0-255]", pos2);
4854  break;
4855  case eEscSeqRange_Errno:
4856  CNcbiError::SetErrno(errno = ERANGE, str);
4857  is_error = true;
4858  continue;
4859  case eEscSeqRange_User:
4860  value = (unsigned)user_char;
4861  break;
4862  default:
4863  NCBI_THROW2(CStringException, eFormat, "Wrong set of flags", pos2);
4864  }
4865  }
4866  out += static_cast<char>(value);
4867  } else {
4868  NCBI_THROW2(CStringException, eFormat,
4869  "\\x followed by no hexadecimal digits", pos);
4870  }
4871  }}
4872  continue;
4873  case '0': case '1': case '2': case '3':
4874  case '4': case '5': case '6': case '7':
4875  {{
4876  pos = pos2;
4877  unsigned char c = (unsigned char)(str[pos++] - '0');
4878  while (pos < pos2 + 3 && pos < str.size()
4879  && str[pos] >= '0' && str[pos] <= '7') {
4880  c = (unsigned char)((c << 3) | (str[pos++] - '0'));
4881  }
4882  out += c;
4883  }}
4884  continue;
4885  case '\n':
4886  // quoted EOL means no EOL
4887  break;
4888  default:
4889  out += str[pos2];
4890  break;
4891  }
4892  pos = pos2 + 1;
4893  }
4894  if (mode == eEscSeqRange_Errno) {
4895  if (is_error) {
4896  return kEmptyStr;
4897  }
4898  errno = 0;
4899  }
4900  return out;
4901 }
4902 
4903 
4904 CTempString s_Unquote(const CTempString str, size_t* n_read)
4905 {
4906  const char* str_pos = str.data();
4907  char quote_char;
4908 
4909  if (str.empty() || ((quote_char = *str_pos) != '"' && quote_char != '\'')) {
4910  NCBI_THROW2(CStringException, eFormat,
4911  "The source string must start with a quote", 0);
4912  }
4913 
4914  const char* str_end = str_pos + str.length();
4915  bool escaped = false;
4916 
4917  while (++str_pos < str_end) {
4918  if (*str_pos == quote_char && !escaped) {
4919  size_t pos = str_pos - str.data();
4920  if (n_read != NULL)
4921  *n_read = pos + 1;
4922  return CTempString(str.data() + 1, pos - 1);
4923  } else {
4924  escaped = *str_pos == '\\' ? !escaped : false;
4925  }
4926  }
4928  "Unterminated quoted string", str.length());
4929 }
4930 
4931 
4932 string NStr::ParseQuoted(const CTempString str, size_t* n_read /*= NULL*/)
4933 {
4934  return ParseEscapes(s_Unquote(std::move(str), n_read));
4935 }
4936 
4937 
4938 // An adjusted copy-paste of NStr::ParseEscapes
4940 {
4941  string out;
4942  out.reserve(str.size()); // result string can only be smaller
4943  SIZE_TYPE pos = 0;
4944 
4945  while (pos < str.size()) {
4946  SIZE_TYPE pos2 = str.find('\\', pos);
4947  if (pos2 == NPOS) {
4948  //~ out += str.substr(pos);
4949  CTempString sub(str, pos);
4950  out += sub;
4951  break;
4952  }
4953  //~ out += str.substr(pos, pos2 - pos);
4954  CTempString sub(str, pos, pos2-pos);
4955  out += sub;
4956  if (++pos2 == str.size()) {
4957  NCBI_THROW2(CStringException, eFormat,
4958  "Unterminated escape sequence", pos2);
4959  }
4960  switch (str[pos2]) {
4961  case '"':
4962  case '\\':
4963  case '/': out += str[pos2]; break;
4964  case 'b': out += '\b'; break;
4965  case 'f': out += '\f'; break;
4966  case 'n': out += '\n'; break;
4967  case 'r': out += '\r'; break;
4968  case 't': out += '\t'; break;
4969  case 'u':
4970  pos = ++pos2;
4971  while (pos < str.size() && isxdigit((unsigned char) str[pos])) {
4972  pos++;
4973  }
4974  if (auto len = pos - pos2) {
4975  if (len < 4) {
4976  NCBI_THROW2(CStringException, eFormat, "Invalid JSON escape sequence", pos2);
4977  } else if (len > 4) {
4978  len = 4;
4979  pos = pos2 + 4;
4980  }
4981  unsigned int value = NStr::StringToUInt(CTempString(str, pos2, len), 0, 16);
4982  if (value > 0xff) {
4983  NCBI_THROW2(CStringException, eConvert,
4984  "Escaped UTF-8 characters after '\\u00ff' are not supported", pos2);
4985  }
4986  out += static_cast<char>(value);
4987  continue;
4988  } else {
4989  NCBI_THROW2(CStringException, eFormat, "\\u followed by no hexadecimal digits", pos);
4990  }
4991  default:
4992  NCBI_THROW2(CStringException, eFormat, "Invalid JSON escape sequence", pos2);
4993  }
4994  pos = pos2 + 1;
4995  }
4996  return out;
4997 }
4998 
4999 
5000 string NStr::JsonDecode(const CTempString str, size_t* n_read /*= NULL*/)
5001 {
5002  return s_ParseJsonEncodeEscapes(s_Unquote(std::move(str), n_read));
5004 
5005 
5006 // Determines the end of an HTML <...> tag, accounting for attributes
5007 // and comments (the latter allowed only within <!...>).
5008 static SIZE_TYPE s_EndOfTag(const string& str, SIZE_TYPE start)
5009 {
5010  _ASSERT(start < str.size() && str[start] == '<');
5011  bool comments_ok = (start + 1 < str.size() && str[start + 1] == '!');
5012  for (SIZE_TYPE pos = start + 1; pos < str.size(); ++pos) {
5013  switch (str[pos]) {
5014  case '>': // found the end
5015  return pos;
5016 
5017  case '\"': // start of "string"; advance to end
5018  pos = str.find('\"', pos + 1);
5019  if (pos == NPOS) {
5020  NCBI_THROW2(CStringException, eFormat,
5021  "Unclosed string in HTML tag", start);
5022  // return pos;
5023  }
5024  break;
5025 
5026  case '-': // possible start of -- comment --; advance to end
5027  if (comments_ok && pos + 1 < str.size()
5028  && str[pos + 1] == '-') {
5029  pos = str.find("--", pos + 2);
5030  if (pos == NPOS) {
5031  NCBI_THROW2(CStringException, eFormat,
5032  "Unclosed comment in HTML tag", start);
5033  // return pos;
5034  } else {
5035  ++pos;
5036  }
5037  }
5038  }
5039  }
5040  NCBI_THROW2(CStringException, eFormat, "Unclosed HTML tag", start);
5041  // return NPOS;
5043 
5044 
5045 // Determines the end of an HTML &foo; character/entity reference
5046 // (which might not actually end with a semicolon :-/ , but we ignore that case)
5047 static SIZE_TYPE s_EndOfReference(const string& str, SIZE_TYPE start)
5048 {
5049  _ASSERT(start < str.size() && str[start] == '&');
5050 
5051  SIZE_TYPE pos = str.find_first_not_of
5052  ("#0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
5053  start + 1);
5054  if (pos != NPOS && str[pos] == ';') {
5055  // found terminating semicolon, so it's valid, and we return that
5056  return pos;
5057  } else {
5058  // We consider it just a '&' by itself since it's invalid
5059  return start;
5060  }
5061 }
5062 
5063 
5064 static SIZE_TYPE s_VisibleHtmlWidth(const string& str)
5065 {
5066  SIZE_TYPE width = 0, pos = 0;
5067  for (;;) {
5068  SIZE_TYPE pos2 = str.find_first_of("<&", pos);
5069  if (pos2 == NPOS) {
5070  width += str.size() - pos;
5071  break;
5072  } else {
5073  width += pos2 - pos;
5074  if (str[pos2] == '&') {
5075  ++width;
5076  pos = s_EndOfReference(str, pos);
5077  } else {
5078  pos = s_EndOfTag(str, pos);
5079  }
5080  if (pos == NPOS) {
5081  break;
5082  } else {
5083  ++pos;
5084  }
5085  }
5086  }
5087  return width;
5088 }
5089 
5090 static
5091 inline bool _isspace(unsigned char c)
5093  return ((c>=0x09 && c<=0x0D) || (c==0x20));
5094 }
5095 
5096 template<typename _D>
5097 void NStr::WrapIt(const string& str, SIZE_TYPE width,
5098  _D& dest, TWrapFlags flags,
5099  const string* prefix,
5100  const string* prefix1)
5101 {
5102  if (prefix == 0) {
5103  prefix = &kEmptyStr;
5104  }
5105 
5106  if (prefix1 == 0)
5107  prefix1 = prefix;
5108 
5109  SIZE_TYPE pos = 0, len = str.size(), nl_pos = 0;
5110 
5111  const bool is_html = flags & fWrap_HTMLPre ? true : false;
5112  const bool do_flat = (flags & fWrap_FlatFile) != 0;
5113  string temp_back; temp_back.reserve(width);
5114 
5115  enum EScore { // worst to best
5116  eForced,
5117  ePunct,
5118  eComma,
5119  eSpace,
5120  eNewline
5121  };
5122 
5123  // To avoid copying parts of str when we need to store a
5124  // substr of str, we store the substr as a pair
5125  // representing start (inclusive) and end (exclusive).
5126  typedef pair<SIZE_TYPE, SIZE_TYPE> TWrapSubstr;
5127 
5128  // This variable is used for HTML links that cross line boundaries.
5129  // Since it's aesthetically displeasing for a link to cross a boundary, we
5130  // close it at the end of each line and re-open it after the next line's
5131  // prefix
5132  // (This is needed in, e.g. AE017351)
5133  TWrapSubstr best_link(0, 0); // last link found before current best_pos
5134  TWrapSubstr latest_link(0, 0); // last link found at all
5135 
5136  while (pos < len) {
5137  bool hyphen = false; // "-" or empty
5138  SIZE_TYPE column = is_html ? s_VisibleHtmlWidth(*prefix1) : prefix1->size();
5139  SIZE_TYPE column0 = column;
5140  // the next line will start at best_pos
5141  SIZE_TYPE best_pos = NPOS;
5142  EScore best_score = eForced;
5143 
5144  // certain logic can be skipped if this part has no backspace,
5145  // which is, by far, the most common case
5146  bool thisPartHasBackspace = false;
5147 
5148  temp_back = *prefix1;
5149 
5150  // append any still-open links from previous lines
5151  if (is_html && best_link.second != 0) {
5152  temp_back.append(
5153  str.begin() + best_link.first,
5154  str.begin() + best_link.second);
5155  }
5156 
5157  SIZE_TYPE pos0 = pos;
5158 
5159  // we can't do this in HTML mode because we might have to deal with
5160  // link tags that go across lines.
5161  if (!is_html) {
5162  if (nl_pos <= pos) {
5163  nl_pos = str.find('\n', pos);
5164  if (nl_pos == NPOS) {
5165  nl_pos = len;
5166  }
5167  }
5168  if (column + (nl_pos - pos) <= width) {
5169  pos0 = nl_pos;
5170  }
5171  }
5172 
5173  for (SIZE_TYPE pos2 = pos0; pos2 < len && column <= width;
5174  ++pos2, ++column) {
5175  EScore score = eForced;
5176  SIZE_TYPE score_pos = pos2;
5177  const char c = str[pos2];
5178 
5179  if (c == '\n') {
5180  best_pos = pos2;
5181  best_score = eNewline;
5182  best_link = latest_link;
5183  break;
5184  }
5185  else if (_isspace((unsigned char)c)) {
5186  if (!do_flat && pos2 > 0 &&
5187  _isspace((unsigned char)str[pos2 - 1])) {
5188  if (pos2 < len - 1 && str[pos2 + 1] == '\b') {
5189  thisPartHasBackspace = true;
5190  }
5191  continue; // take the first space of a group
5192  }
5193  score = eSpace;
5194  }
5195  else if (is_html && c == '<') {
5196  // treat tags as zero-width...
5197  SIZE_TYPE start_of_tag = pos2;
5198  pos2 = s_EndOfTag(str, pos2);
5199  --column;
5200  if (pos2 == NPOS) {
5201  break;
5202  }
5203 
5204  if ((pos2 - start_of_tag) >= 6 &&
5205  str[start_of_tag + 1] == 'a' &&
5206  str[start_of_tag + 2] == ' ' &&
5207  str[start_of_tag + 3] == 'h' &&
5208  str[start_of_tag + 4] == 'r' &&
5209  str[start_of_tag + 5] == 'e' &&
5210  str[start_of_tag + 6] == 'f')
5211  {
5212  // remember current link in case of line wrap
5213  latest_link.first = start_of_tag;
5214  latest_link.second = pos2 + 1;
5215  }
5216  if ((pos2 - start_of_tag) >= 3 &&
5217  str[start_of_tag + 1] == '/' &&
5218  str[start_of_tag + 2] == 'a' &&
5219  str[start_of_tag + 3] == '>')
5220  {
5221  // link is closed
5222  latest_link.first = 0;
5223  latest_link.second = 0;
5224  }
5225  }
5226  else if (is_html && c == '&') {
5227  // ...and references as single characters
5228  pos2 = s_EndOfReference(str, pos2);
5229  if (pos2 == NPOS) {
5230  break;
5231  }
5232  }
5233  else if (c == ',' && column < width && score_pos < len - 1) {
5234  score = eComma;
5235  ++score_pos;
5236  }
5237  else if (do_flat ? c == '-' : ispunct((unsigned char)c)) {
5238  // For flat files, only whitespace, hyphens and commas
5239  // are special.
5240  switch (c) {
5241  case '(': case '[': case '{': case '<': case '`':
5242  score = ePunct;
5243  break;
5244  default:
5245  if (score_pos < len - 1 && column < width) {
5246  score = ePunct;
5247  ++score_pos;
5248  }
5249  break;
5250  }
5251  }
5252 
5253  if (score >= best_score && score_pos > pos0) {
5254  best_pos = score_pos;
5255  best_score = score;
5256  best_link = latest_link;
5257  }
5258 
5259  while (pos2 < len - 1 && str[pos2 + 1] == '\b') {
5260  // Account for backspaces
5261  ++pos2;
5262  if (column > column0) {
5263  --column;
5264  }
5265  thisPartHasBackspace = true;
5266  }
5267  }
5268 
5269  if (best_score != eNewline && column <= width) {
5270  if (best_pos != len) {
5271  // If the whole remaining text can fit, don't split it...
5272  best_pos = len;
5273  best_link = latest_link;
5274  // Force backspace checking, to play it safe
5275  thisPartHasBackspace = true;
5276  }
5277  }
5278  else if (best_score == eForced && (flags & fWrap_Hyphenate)) {
5279  hyphen = true;
5280  --best_pos;
5281  }
5282 
5283  {{
5284  string::const_iterator begin = str.begin() + pos;
5285  string::const_iterator end = str.begin() + best_pos;
5286  if (thisPartHasBackspace) {
5287  // eat backspaces and the characters (if any) that precede them
5288 
5289  string::const_iterator bs; // position of next backspace
5290  while ((bs = find(begin, end, '\b')) != end) {
5291  if (bs != begin) {
5292  // add all except the last one
5293  temp_back.append(begin, bs - 1);
5294  }
5295  else {
5296  // The backspace is at the beginning of next substring,
5297  // so we should remove previously added symbol if any.
5298  SIZE_TYPE size = temp_back.size();
5299  if (size > prefix1->size()) { // current size > prefix size
5300  temp_back.resize(size - 1);
5301  }
5302  }
5303  // skip over backspace
5304  begin = bs + 1;
5305  }
5306  }
5307  if (begin != end) {
5308  // add remaining characters
5309  temp_back.append(begin, end);
5310  }
5311  }}
5312 
5313  // if we didn't close the link on this line, we
5314  // close it here
5315  if (is_html && best_link.second != 0) {
5316  temp_back += "</a>";
5317  }
5318 
5319  if (hyphen) {
5320  temp_back += '-';
5321  }
5322  pos = best_pos;
5323  prefix1 = prefix;
5324 
5325  if (do_flat) {
5326  if (best_score == eSpace) {
5327  while (str[pos] == ' ') {
5328  ++pos;
5329  }
5330  if (str[pos] == '\n') {
5331  ++pos;
5332  }
5333  }
5334  if (best_score == eNewline) {
5335  ++pos;
5336  }
5337  }
5338  else {
5339  if (best_score == eSpace || best_score == eNewline) {
5340  ++pos;
5341  }
5342  }
5343  while (pos < len && str[pos] == '\b') {
5344  ++pos;
5345  }
5346 
5347  dest.Append(temp_back);
5348  }
5349 }
5350 
5351 
5352 void NStr::Wrap(const string& str, SIZE_TYPE width,
5353  IWrapDest& dest, TWrapFlags flags,
5354  const string* prefix,
5355  const string* prefix1)
5357  WrapIt(str, width, dest, flags, prefix, prefix1);
5358 }
5359 
5360 
5361 list<string>& NStr::Wrap(const string& str, SIZE_TYPE width,
5362  list<string>& arr2, NStr::TWrapFlags flags,
5363  const string* prefix, const string* prefix1)
5364 {
5365  CWrapDestStringList d(arr2);
5366  WrapIt(str, width, d, flags, prefix, prefix1);
5367  return arr2;
5368 }
5369 
5370 
5371 list<string>& NStr::WrapList(const list<string>& l, SIZE_TYPE width,
5372  const string& delim, list<string>& arr,
5374  const string* prefix,
5375  const string* prefix1)
5376 {
5377  if (l.empty()) {
5378  return arr;
5379  }
5380 
5381  const string* pfx = prefix1 ? prefix1 : prefix;
5382  string s = *pfx;
5383  bool is_html = flags & fWrap_HTMLPre ? true : false;
5384  SIZE_TYPE column = is_html? s_VisibleHtmlWidth(s) : s.size();
5385  SIZE_TYPE delwidth = is_html? s_VisibleHtmlWidth(delim) : delim.size();
5386  bool at_start = true;
5387 
5388  ITERATE (list<string>, it, l) {
5389  SIZE_TYPE term_width = is_html ? s_VisibleHtmlWidth(*it) : it->size();
5390  if ( at_start ) {
5391  if (column + term_width <= width) {
5392  s += *it;
5393  column += term_width;
5394  at_start = false;
5395  } else {
5396  // Can't fit, even on its own line; break separately.
5397  Wrap(*it, width, arr, flags, prefix, pfx);
5398  pfx = prefix;
5399  s = *prefix;
5400  column = is_html ? s_VisibleHtmlWidth(s) : s.size();
5401  at_start = true;
5402  }
5403  } else if (column + delwidth + term_width <= width) {
5404  s += delim;
5405  s += *it;
5406  column += delwidth + term_width;
5407  at_start = false;
5408  } else {
5409  // Can't fit on this line; break here and try again.
5410  arr.push_back(s);
5411  pfx = prefix;
5412  s = *prefix;
5413  column = is_html ? s_VisibleHtmlWidth(s) : s.size();
5414  at_start = true;
5415  --it;
5416  }
5417  }
5418  arr.push_back(s);
5419  return arr;
5420 }
5421 
5422 
5423 list<string>& NStr::Justify(const CTempString str,
5424  SIZE_TYPE width,
5425  list<string>& par,
5426  const CTempString* pfx,
5427  const CTempString* pfx1)
5428 {
5429  static const CTempString kNothing;
5430  if (!pfx)
5431  pfx = &kNothing;
5432  const CTempString* p = pfx1 ? pfx1 : pfx;
5433 
5434  SIZE_TYPE pos = 0;
5435  for (SIZE_TYPE len = p->size(); pos < str.size(); len = p->size()) {
5436  list<CTempString> words;
5437  unsigned int nw = 0; // How many words are there in the line
5438  bool big = false;
5439  do {
5440  while (pos < str.size()) {
5441  if (!isspace((unsigned char) str[pos]))
5442  break;
5443  ++pos;
5444  }
5445  SIZE_TYPE start = pos;
5446  while (pos < str.size()) {
5447  if ( isspace((unsigned char) str[pos]))
5448  break;
5449  ++pos;
5450  }
5451  SIZE_TYPE wlen = pos - start;
5452  if (!wlen)
5453  break;
5454  if (width < len + nw + wlen) {
5455  if (nw) {
5456  if (width < wlen && len < width - len)
5457  big = true; // Big word is coming, no space stretch
5458  pos = start; // Will have to rescan this word again
5459  break;
5460  }
5461  big = true; // Long line with a long lonely word :-/
5462  }
5463  words.push_back(CTempString(str, start, wlen));
5464  len += wlen;
5465  ++nw;
5466  if (str[pos - 1] == '.' ||
5467  str[pos - 1] == '!' ||
5468  str[pos - 1] == '?') {
5469  if (len + 1 >= width)
5470  break;
5471  words.push_back(CTempString("", 0));
5472  _ASSERT(!big);
5473  nw++;
5474  }
5475  } while (!big);
5476  if (!nw)
5477  break;
5478  if (words.back().empty()) {
5479  words.pop_back();
5480  _ASSERT(nw > 1);
5481  nw--;
5482  }
5483  SIZE_TYPE space;
5484  if (nw > 1) {
5485  if (pos < str.size() && len < width && !big) {
5486  space = (width - len) / (nw - 1);
5487  nw = (unsigned int)((width - len) % (nw - 1));
5488  } else {
5489  space = 1;
5490  nw = 0;
5491  }
5492  } else
5493  space = 0;
5494  par.push_back(*p);
5495  unsigned int n = 0;
5496  ITERATE(list<CTempString>, w, words) {
5497  if (n)
5498  par.back().append(space + (n <= nw ? 1 : 0) , ' ');
5499  par.back().append(w->data(), w->size());
5500  ++n;
5501  }
5502  p = pfx;
5503  }
5504  return par;
5505 }
5506 
5507 
5508 string NStr::Dedent(const CTempString str, TDedentFlags flags)
5509 {
5510  if (str.empty()) {
5511  return str;
5512  }
5513 #if !defined(NCBI_OS_MSWIN)
5514 #endif
5515  vector<CTempString> lines;
5516  NStr::Split(str, "\n", lines);
5517 
5518  // Find common whitespace prefix
5519 
5520  CTempString prefix; // common prefix
5521 
5522  for (SIZE_TYPE i = 0; i < lines.size(); i++) {
5523  auto& line = lines[i];
5524  SIZE_TYPE len = line.size();
5525  if (i == 0 && (flags & fDedent_SkipFirstLine) ) {
5526  // Skip first line
5527  continue;
5528  }
5529  if (!len) {
5530  // Skip empty lines
5531  continue;
5532  }
5533  SIZE_TYPE pos = 0;
5534  while (isspace((unsigned char)line[pos])) {
5535  if (++pos == len) {
5536  break;
5537  }
5538  }
5539  if (!pos) {
5540  // No whitespaces on the current line, no common empty prefix
5541  break;
5542  }
5543  if (pos == len && (flags & fDedent_NormalizeEmptyLines)) {
5544  // Line have whitespaces only -- exclude this line from a common prefix computing
5545  continue;
5546  }
5547  // Update length of the common prefix
5548  if (prefix.empty() || prefix.size() > pos) {
5549  prefix.assign(line, 0, pos);
5550  }
5551  }
5552 
5553  // Trim common prefix (if any), do necessary processing requested by flags
5554 
5555  string result;
5556  result.reserve(str.size());
5557 
5558  for (SIZE_TYPE i = 0; i < lines.size(); i++) {
5559  auto& line = lines[i];
5560  SIZE_TYPE len = line.size();
5561  bool last_line = (i == lines.size()-1);
5562 
5563  if (i == 0) {
5564  if ((flags & fDedent_SkipFirstLine) ||
5565  ((flags & fDedent_SkipEmptyFirstLine) && !len) ) {
5566  // Skip first line from result
5567  continue;
5568  }
5569  }
5570  if (!len) {
5571  // Skip empty lines
5572  if (!last_line) {
5573  result += '\n';
5574  }
5575  continue;
5576  }
5578  SIZE_TYPE pos = 0;
5579  while (isspace((unsigned char)line[pos])) {
5580  if (++pos == len) {
5581  break;
5582  }
5583  }
5584  if (pos == len) {
5585  // Normalize whitespace only lines
5586  if (!last_line) {
5587  result += '\n';
5588  }
5589  continue;
5590  }
5591  }
5592  // Trim common prefix, if any
5593  if ( prefix.size() ) {
5595  }
5596  result += line;
5597  if (!last_line) {
5598  result += '\n';
5599  }
5600  }
5601  return result;
5603 
5604 
5605 
5606 #if !defined(HAVE_STRDUP)
5607 extern char* strdup(const char* str)
5608 {
5609  if ( !str ) {
5610  return 0;
5611  }
5612  size_t size = strlen(str) + 1;
5613  void* result = malloc(size);
5614  return (char*)(result ? memcpy(result, str, size) : 0);
5615 }
5616 #endif
5617 
5618 
5619 static const char s_Encode[256][4] = {
5620  "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
5621  "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
5622  "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
5623  "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
5624  "+", "!", "%22", "%23", "$", "%25", "%26", "'",
5625  "(", ")", "*", "%2B", ",", "-", ".", "%2F",
5626  "0", "1", "2", "3", "4", "5", "6", "7",
5627  "8", "9", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
5628  "%40", "A", "B", "C", "D", "E", "F", "G",
5629  "H", "I", "J", "K", "L", "M", "N", "O",
5630  "P", "Q", "R", "S", "T", "U", "V", "W",
5631  "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_",
5632  "%60", "a", "b", "c", "d", "e", "f", "g",
5633  "h", "i", "j", "k", "l", "m", "n", "o",
5634  "p", "q", "r", "s", "t", "u", "v", "w",
5635  "x", "y", "z", "%7B", "%7C", "%7D", "%7E", "%7F",
5636  "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
5637  "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
5638  "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
5639  "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
5640  "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
5641  "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
5642  "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
5643  "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
5644  "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
5645  "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
5646  "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
5647  "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
5648  "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
5649  "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
5650  "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
5651  "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
5652 };
5653 
5654 static const char s_EncodeMarkChars[256][4] = {
5655  "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
5656  "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
5657  "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
5658  "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
5659  "+", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
5660  "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F",
5661  "0", "1", "2", "3", "4", "5", "6", "7",
5662  "8", "9", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
5663  "%40", "A", "B", "C", "D", "E", "F", "G",
5664  "H", "I", "J", "K", "L", "M", "N", "O",
5665  "P", "Q", "R", "S", "T", "U", "V", "W",
5666  "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "%5F",
5667  "%60", "a", "b", "c", "d", "e", "f", "g",
5668  "h", "i", "j", "k", "l", "m", "n", "o",
5669  "p", "q", "r", "s", "t", "u", "v", "w",
5670  "x", "y", "z", "%7B", "%7C", "%7D", "%7E", "%7F",
5671  "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
5672  "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
5673  "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
5674  "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
5675  "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
5676  "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
5677  "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
5678  "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
5679  "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
5680  "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
5681  "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
5682  "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
5683  "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
5684  "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
5685  "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
5686  "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
5687 };
5688 
5689 static const char s_EncodePercentOnly[256][4] = {
5690  "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
5691  "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
5692  "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
5693  "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
5694  "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
5695  "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F",
5696  "0", "1", "2", "3", "4", "5", "6", "7",
5697  "8", "9", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
5698  "%40", "A", "B", "C", "D", "E", "F", "G",
5699  "H", "I", "J", "K", "L", "M", "N", "O",
5700  "P", "Q", "R", "S", "T", "U", "V", "W",
5701  "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "%5F",
5702  "%60", "a", "b", "c", "d", "e", "f", "g",
5703  "h", "i", "j", "k", "l", "m", "n", "o",
5704  "p", "q", "r", "s", "t", "u", "v", "w",
5705  "x", "y", "z", "%7B", "%7C", "%7D", "%7E", "%7F",
5706  "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
5707  "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
5708  "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
5709  "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
5710  "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
5711  "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
5712  "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
5713  "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
5714  "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
5715  "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
5716  "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
5717  "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
5718  "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
5719  "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
5720  "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
5721  "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
5722 };
5723 
5724 static const char s_EncodePath[256][4] = {
5725  "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
5726  "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
5727  "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
5728  "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
5729  "+", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
5730  "%28", "%29", "%2A", "%2B", "%2C", "%2D", ".", "/",
5731  "0", "1", "2", "3", "4", "5", "6", "7",
5732  "8", "9", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
5733  "%40", "A", "B", "C", "D", "E", "F", "G",
5734  "H", "I", "J", "K", "L", "M", "N", "O",
5735  "P", "Q", "R", "S", "T", "U", "V", "W",
5736  "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_",
5737  "%60", "a", "b", "c", "d", "e", "f", "g",
5738  "h", "i", "j", "k", "l", "m", "n", "o",
5739  "p", "q", "r", "s", "t", "u", "v", "w",
5740  "x", "y", "z", "%7B", "%7C", "%7D", "%7E", "%7F",
5741  "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
5742  "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
5743  "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
5744  "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
5745  "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
5746  "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
5747  "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
5748  "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
5749  "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
5750  "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
5751  "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
5752  "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
5753  "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
5754  "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
5755  "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
5756  "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
5757 };
5758 
5759 // RFC-2396:
5760 // scheme = alpha *( alpha | digit | "+" | "-" | "." )
5761 static const char s_EncodeURIScheme[256][4] = {
5762  "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
5763  "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
5764  "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
5765  "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
5766  "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
5767  "%28", "%29", "%2A", "+", "%2C", "-", ".", "%2F",
5768  "0", "1", "2", "3", "4", "5", "6", "7",
5769  "8", "9", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
5770  "%40", "A", "B", "C", "D", "E", "F", "G",
5771  "H", "I", "J", "K", "L", "M", "N", "O",
5772  "P", "Q", "R", "S", "T", "U", "V", "W",
5773  "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "%5F",
5774  "%60", "a", "b", "c", "d", "e", "f", "g",
5775  "h", "i", "j", "k", "l", "m", "n", "o",
5776  "p", "q", "r", "s", "t", "u", "v", "w",
5777  "x", "y", "z", "%7B", "%7C", "%7D", "%7E", "%7F",
5778  "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
5779  "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
5780  "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
5781  "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
5782  "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
5783  "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
5784  "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
5785  "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
5786  "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
5787  "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
5788  "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
5789  "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
5790  "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
5791  "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
5792  "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
5793  "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
5794 };
5795 
5796 // RFC-2396:
5797 // userinfo = *( unreserved | escaped |
5798 // ";" | ":" | "&" | "=" | "+" | "$" | "," )
5799 // unreserved = alphanum | mark
5800 // mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
5801 // Note: ":" is name/password separator, so it must be encoded in each of them.
5802 static const char s_EncodeURIUserinfo[256][4] = {
5803  "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
5804  "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
5805  "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
5806  "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
5807  "%20", "!", "%22", "%23", "$", "%25", "&", "'",
5808  "(", ")", "*", "+", ",", "-", ".", "%2F",
5809  "0", "1", "2", "3", "4", "5", "6", "7",
5810  "8", "9", "%3A", ";", "%3C", "=", "%3E", "%3F",
5811  "%40", "A", "B", "C", "D", "E", "F", "G",
5812  "H", "I", "J", "K", "L", "M", "N", "O",
5813  "P", "Q", "R", "S", "T", "U", "V", "W",
5814  "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_",
5815  "%60", "a", "b", "c", "d", "e", "f", "g",
5816  "h", "i", "j", "k", "l", "m", "n", "o",
5817  "p", "q", "r", "s", "t", "u", "v", "w",
5818  "x", "y", "z", "%7B", "%7C", "%7D", "~", "%7F",
5819  "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
5820  "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
5821  "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
5822  "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
5823  "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
5824  "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
5825  "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
5826  "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
5827  "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
5828  "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
5829  "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
5830  "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
5831  "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
5832  "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
5833  "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
5834  "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
5835 };
5836 
5837 // RFC-2396:
5838 // host = hostname | IPv4address
5839 // hostname = *( domainlabel "." ) toplabel [ "." ]
5840 // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum
5841 // toplabel = alpha | alpha *( alphanum | "-" ) alphanum
5842 // IPv4address = 1*digit "." 1*digit "." 1*digit "." 1*digit
5843 static const char s_EncodeURIHost[256][4] = {
5844  "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
5845  "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
5846  "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
5847  "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
5848  "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
5849  "%28", "%29", "%2A", "%2B", "%2C", "-", ".", "%2F",
5850  "0", "1", "2", "3", "4", "5", "6", "7",
5851  "8", "9", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
5852  "%40", "A", "B", "C", "D", "E", "F", "G",
5853  "H", "I", "J", "K", "L", "M", "N", "O",
5854  "P", "Q", "R", "S", "T", "U", "V", "W",
5855  "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "%5F",
5856  "%60", "a", "b", "c", "d", "e", "f", "g",
5857  "h", "i", "j", "k", "l", "m", "n", "o",
5858  "p", "q", "r", "s", "t", "u", "v", "w",
5859  "x", "y", "z", "%7B", "%7C", "%7D", "%7E", "%7F",
5860  "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
5861  "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
5862  "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
5863  "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
5864  "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
5865  "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
5866  "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
5867  "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
5868  "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
5869  "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
5870  "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
5871  "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
5872  "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
5873  "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
5874  "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
5875  "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
5876 };
5877 
5878 // RFC-2396:
5879 // path_segments = segment *( "/" segment )
5880 // segment = *pchar *( ";" param )
5881 // param = *pchar
5882 // pchar = unreserved | escaped |
5883 // ":" | "@" | "&" | "=" | "+" | "$" | ","
5884 // unreserved = alphanum | mark
5885 // mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
5886 static const char s_EncodeURIPath[256][4] = {
5887  "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
5888  "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
5889  "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
5890  "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
5891  "%20", "!", "%22", "%23", "$", "%25", "&", "'",
5892  "(", ")", "*", "+", ",", "-", ".", "/",
5893  "0", "1", "2", "3", "4", "5", "6", "7",
5894  "8", "9", ":", ";", "%3C", "=", "%3E", "%3F",
5895  "@", "A", "B", "C", "D", "E", "F", "G",
5896  "H", "I", "J", "K", "L", "M", "N", "O",
5897  "P", "Q", "R", "S", "T", "U", "V", "W",
5898  "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_",
5899  "%60", "a", "b", "c", "d", "e", "f", "g",
5900  "h", "i", "j", "k", "l", "m", "n", "o",
5901  "p", "q", "r", "s", "t", "u", "v", "w",
5902  "x", "y", "z", "%7B", "%7C", "%7D", "~", "%7F",
5903  "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
5904  "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
5905  "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
5906  "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
5907  "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
5908  "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
5909  "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
5910  "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
5911  "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
5912  "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
5913  "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
5914  "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
5915  "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
5916  "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
5917  "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
5918  "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
5919 };
5920 
5921 static const char s_EncodeURIQueryName[256][4] = {
5922  "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
5923  "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
5924  "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
5925  "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
5926  "%20", "!", "%22", "%23", "$", "%25", "%26", "'",
5927  "(", ")", "%2A", "%2B", "%2C", "-", ".", "/",
5928  "0", "1", "2", "3", "4", "5", "6", "7",
5929  "8", "9", ":", "%3B", "%3C", "%3D", "%3E", "?",
5930  "@", "A", "B", "C", "D", "E", "F", "G",
5931  "H", "I", "J", "K", "L", "M", "N", "O",
5932  "P", "Q", "R", "S", "T", "U", "V", "W",
5933  "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_",
5934  "%60", "a", "b", "c", "d", "e", "f", "g",
5935  "h", "i", "j", "k", "l", "m", "n", "o",
5936  "p", "q", "r", "s", "t", "u", "v", "w",
5937  "x", "y", "z", "%7B", "%7C", "%7D", "~", "%7F",
5938  "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
5939  "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
5940  "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
5941  "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
5942  "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
5943  "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
5944  "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
5945  "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
5946  "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
5947  "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
5948  "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
5949  "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
5950  "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
5951  "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
5952  "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
5953  "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
5954 };
5955 
5956 static const char s_EncodeURIQueryValue[256][4] = {
5957  "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
5958  "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
5959  "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
5960  "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
5961  "%20", "!", "%22", "%23", "$", "%25", "%26", "'",
5962  "(", ")", "%2A", "%2B", "%2C", "-", ".", "/",
5963  "0", "1", "2", "3", "4", "5", "6", "7",
5964  "8", "9", ":", "%3B", "%3C", "%3D", "%3E", "?",
5965  "@", "A", "B", "C", "D", "E", "F", "G",
5966  "H", "I", "J", "K", "L", "M", "N", "O",
5967  "P", "Q", "R", "S", "T", "U", "V", "W",
5968  "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_",
5969  "%60", "a", "b", "c", "d", "e", "f", "g",
5970  "h", "i", "j", "k", "l", "m", "n", "o",
5971  "p", "q", "r", "s", "t", "u", "v", "w",
5972  "x", "y", "z", "%7B", "%7C", "%7D", "~", "%7F",
5973  "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
5974  "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
5975  "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
5976  "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
5977  "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
5978  "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
5979  "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
5980  "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
5981  "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
5982  "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
5983  "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
5984  "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
5985  "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
5986  "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
5987  "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
5988  "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
5989 };
5990 
5991 // RFC-2396:
5992 // fragment = *uric
5993 // uric = reserved | unreserved | escaped
5994 // reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","
5995 // unreserved = alphanum | mark
5996 // mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
5997 static const char s_EncodeURIFragment[256][4] = {
5998  "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
5999  "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
6000  "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
6001  "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
6002  "%20", "!", "%22", "%23", "$", "%25", "&", "'",
6003  "(", ")", "*", "+", ",", "-", ".", "/",
6004  "0", "1", "2", "3", "4", "5", "6", "7",
6005  "8", "9", ":", ";", "%3C", "=", "%3E", "?",
6006  "@", "A", "B", "C", "D", "E", "F", "G",
6007  "H", "I", "J", "K", "L", "M", "N", "O",
6008  "P", "Q", "R", "S", "T", "U", "V", "W",
6009  "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_",
6010  "%60", "a", "b", "c", "d", "e", "f", "g",
6011  "h", "i", "j", "k", "l", "m", "n", "o",
6012  "p", "q", "r", "s", "t", "u", "v", "w",
6013  "x", "y", "z", "%7B", "%7C", "%7D", "~", "%7F",
6014  "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
6015  "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
6016  "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
6017  "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
6018  "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
6019  "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
6020  "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
6021  "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
6022  "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
6023  "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
6024  "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
6025  "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
6026  "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
6027  "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
6028  "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
6029  "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
6030 };
6031 
6032 static const char s_EncodeCookie[256][4] = {
6033  "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
6034  "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
6035  "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
6036  "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
6037  "+", "!", "%22", "%23", "$", "%25", "%26", "'",
6038  "(", ")", "*", "%2B", "%2C", "-", ".", "%2F",
6039  "0", "1", "2", "3", "4", "5", "6", "7",
6040  "8", "9", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
6041  "%40", "A", "B", "C", "D", "E", "F", "G",
6042  "H", "I", "J", "K", "L", "M", "N", "O",
6043  "P", "Q", "R", "S", "T", "U", "V", "W",
6044  "X", "Y", "Z", "%5B", "%5C", "%5D", "%5E", "_",
6045  "%60", "a", "b", "c", "d", "e", "f", "g",
6046  "h", "i", "j", "k", "l", "m", "n", "o",
6047  "p", "q", "r", "s", "t", "u", "v", "w",
6048  "x", "y", "z", "%7B", "%7C", "%7D", "%7E", "%7F",
6049  "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
6050  "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
6051  "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
6052  "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
6053  "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
6054  "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
6055  "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
6056  "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
6057  "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
6058  "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
6059  "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
6060  "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
6061  "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
6062  "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
6063  "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
6064  "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
6065 };
6066 
6067 string NStr::URLEncode(const CTempString str, EUrlEncode flag)
6068 {
6069  SIZE_TYPE len = str.length();
6070  if ( !len ) {
6071  return kEmptyStr;
6072  }
6073  const char (*encode_table)[4];
6074  switch (flag) {
6075  case eUrlEnc_SkipMarkChars:
6076  encode_table = s_Encode;
6077  break;
6079  encode_table = s_EncodeMarkChars;
6080  break;
6081  case eUrlEnc_PercentOnly:
6082  encode_table = s_EncodePercentOnly;
6083  break;
6084  case eUrlEnc_Path:
6085  encode_table = s_EncodePath;
6086  break;
6087  case eUrlEnc_URIScheme:
6088  encode_table = s_EncodeURIScheme;
6089  break;
6090  case eUrlEnc_URIUserinfo:
6091  encode_table = s_EncodeURIUserinfo;
6092  break;
6093  case eUrlEnc_URIHost:
6094  encode_table = s_EncodeURIHost;
6095  break;
6096  case eUrlEnc_URIPath:
6097  encode_table = s_EncodeURIPath;
6098  break;
6099  case eUrlEnc_URIQueryName:
6100  encode_table = s_EncodeURIQueryName;
6101  break;
6102  case eUrlEnc_URIQueryValue:
6103  encode_table = s_EncodeURIQueryValue;
6104  break;
6105  case eUrlEnc_URIFragment:
6106  encode_table = s_EncodeURIFragment;
6107  break;
6108  case eUrlEnc_Cookie:
6109  encode_table = s_EncodeCookie;
6110  break;
6111  case eUrlEnc_None:
6112  return str;
6113  default:
6114  _TROUBLE;
6115  // To keep off compiler warning
6116  encode_table = 0;
6117  }
6118 
6119  string dst;
6120  SIZE_TYPE pos;
6121  SIZE_TYPE dst_len = len;
6122  const unsigned char* cstr = (const unsigned char*)str.data();
6123  for (pos = 0; pos < len; pos++) {
6124  if (encode_table[cstr[pos]][0] == '%')
6125  dst_len += 2;
6126  }
6127  dst.resize(dst_len);
6128 
6129  SIZE_TYPE p = 0;
6130  for (pos = 0; pos < len; pos++, p++) {
6131  const char* subst = encode_table[cstr[pos]];
6132  if (*subst != '%') {
6133  dst[p] = *subst;
6134  } else {
6135  dst[p] = '%';
6136  dst[++p] = *(++subst);
6137  dst[++p] = *(++subst);
6138  }
6139  }
6140  _ASSERT( p == dst_len );
6141  return dst;
6142 }
6143 
6144 
6145 CStringUTF8 NStr::SQLEncode(const CStringUTF8& str, ESqlEncode flag)
6146 {
6147  SIZE_TYPE stringSize = str.size(), offset = 0;
6149 
6150  result.reserve(stringSize + 7);
6151  if (flag == eSqlEnc_TagNonASCII) {
6152  result.append(1, 'N');
6153  offset = 1;
6154  }
6155  result.append(1, '\'');
6156  for (SIZE_TYPE i = 0; i < stringSize; i++) {
6157  char c = str[i];
6158  if (c == '\'') {
6159  result.append(1, '\'');
6160  } else if (offset > 0 && (c & 0x80) != 0) {
6161  offset = 0;
6162  }
6163  result.append(1, c);
6164  }
6165  result.append(1, '\'');
6166 
6167  return result.substr(offset);
6168 }
6169 
6170 
6171 static
6172 void s_URLDecode(const CTempString src, string& dst, NStr::EUrlDecode flag)
6173 {
6174  SIZE_TYPE len = src.length();
6175  if ( !len ) {
6176  dst.erase();
6177  return;
6178  }
6179  if (dst.length() < src.length()) {
6180  dst.resize(len);
6181  }
6182 
6183  SIZE_TYPE pdst = 0;
6184  for (SIZE_TYPE psrc = 0; psrc < len; pdst++) {
6185  switch ( src[psrc] ) {
6186  case '%': {
6187  // Accordingly RFC 1738 the '%' character is unsafe
6188  // and should be always encoded, but sometimes it is
6189  // not really encoded...
6190  if (psrc + 2 > len) {
6191  dst[pdst] = src[psrc++];
6192  } else {
6193  int n1 = NStr::HexChar(src[psrc+1]);
6194  int n2 = NStr::HexChar(src[psrc+2]);
6195  if (n1 < 0 || n1 > 15 || n2 < 0 || n2 > 15) {
6196  dst[pdst] = src[psrc++];
6197  } else {
6198  dst[pdst] = char((n1 << 4) | n2);
6199  psrc += 3;
6200  }
6201  }
6202  break;
6203  }
6204  case '+': {
6205  dst[pdst] = (flag == NStr::eUrlDec_All) ? ' ' : '+';
6206  psrc++;
6207  break;
6208  }
6209  default:
6210  dst[pdst] = src[psrc++];
6211  }
6212  }
6213  if (pdst < len) {
6214  dst.resize(pdst);
6215  }
6216 }
6217 
6218 
6219 string NStr::URLDecode(const CTempString str, EUrlDecode flag)
6220 {
6221  string dst;
6222  s_URLDecode(str, dst, flag);
6223  return dst;
6224 }
6225 
6226 
6227 void NStr::URLDecodeInPlace(string& str, EUrlDecode flag)
6229  s_URLDecode(str, str, flag);
6230 }
6231 
6232 
6234 {
6235  SIZE_TYPE len = str.length();
6236  if ( !len ) {
6237  return false;
6238  }
6239  const char (*encode_table)[4];
6240  switch (flag) {
6241  case eUrlEnc_SkipMarkChars:
6242  encode_table = s_Encode;
6243  break;
6245  encode_table = s_EncodeMarkChars;
6246  break;
6247  case eUrlEnc_PercentOnly:
6248  encode_table = s_EncodePercentOnly;
6249  break;
6250  case eUrlEnc_Path:
6251  encode_table = s_EncodePath;
6252  break;
6253  case eUrlEnc_Cookie:
6254  encode_table = s_EncodeCookie;
6255  break;
6256  case eUrlEnc_None:
6257  return false;
6258  default:
6259  _TROUBLE;
6260  // To keep off compiler warning and static analizer
6261  encode_table = s_Encode;
6262  }
6263  const unsigned char* cstr = (const unsigned char*)str.data();
6264 
6265  for (SIZE_TYPE pos = 0; pos < len; pos++) {
6266  const char* subst = encode_table[cstr[pos]];
6267  if (*subst != cstr[pos]) {
6268  return true;
6269  }
6270  }
6271  return false;
6272 }
6273 
6274 
6275 string NStr::Base64Encode(const CTempString str, size_t line_len)
6276 {
6277  size_t n = str.size();
6278  string dst;
6279  char dst_buf[128];
6280  size_t pos = 0, n_read, n_written;
6281 
6282  while ( n ) {
6283  BASE64_Encode(str.data() + pos, n, &n_read, dst_buf, sizeof(dst_buf), &n_written, &line_len);
6284  pos += n_read;
6285  n -= n_read;
6286  dst.append(dst_buf, n_written);
6287  }
6288  return dst;
6289 }
6290 
6291 string NStr::Base64Decode(const CTempString str)
6292 {
6293  size_t n = str.size();
6294  string dst;
6295  char dst_buf[128];
6296  size_t pos = 0, n_read, n_written;
6297 
6298  while ( n ) {
6299  if (!BASE64_Decode(str.data() + pos, n, &n_read, dst_buf, sizeof(dst_buf), &n_written)) {
6300  return string();
6301  }
6302  pos += n_read;
6303  n -= n_read;
6304  dst.append(dst_buf, n_written);
6305  }
6306  return dst;
6307 }
6309 
6310 
6311 /// @internal
6312 static
6313 bool s_IsIPAddress(const char* str, size_t size)
6314 {
6315  _ASSERT(str[size] == '\0');
6316 
6317  const char* c = str;
6318 
6319  // IPv6?
6320  if ( strchr(str, ':') ) {
6321  if (NStr::CompareNocase(str, 0, 7, "::ffff:") == 0) {
6322  // Mapped IPv4 address
6323  return size > 7 && s_IsIPAddress(str + 7, size - 7);
6324  }
6325 
6326  int colons = 0;
6327  bool have_group = false;
6328  const char* prev_colon = NULL;
6329  int digits = 0;
6330  // Continue until
6331  for (; c && c - str < (int)size && *c != '%'; c++) {
6332  if (*c == ':') {
6333  colons++;
6334  if (colons > 7) {
6335  // Too many separators
6336  return false;
6337  }
6338  if (prev_colon && c - prev_colon == 1) {
6339  // A group of zeros found
6340  if (have_group) {
6341  // Only one group is allowed
6342  return false;
6343  }
6344  have_group = true;
6345  }
6346  prev_colon = c;
6347  digits = 0;
6348  continue;
6349  }
6350  digits++;
6351  if (digits > 4) {
6352  // Too many digits between colons
6353  return false;
6354  }
6355  char d = (char)toupper((unsigned char)(*c));
6356  if (d < '0' || d > 'F') {
6357  // Invalid digit
6358  return false;
6359  }
6360  }
6361  // Check if zone index is present
6362  if (*c == '%') {
6363  // It's not clear yet what zone index may look like.
6364  // Ignore it.
6365  }
6366  // Make sure there was at least one colon.
6367  return colons > 1;
6368  }
6369 
6370  unsigned long val;
6371  int dots = 0;
6372 
6373  int& errno_ref = errno;
6374  for (;;) {
6375  char* e;
6376  if ( !isdigit((unsigned char)(*c)) )
6377  return false;
6378  errno_ref = 0;
6379  val = strtoul(c, &e, 10);
6380  if (c == e || errno_ref)
6381  return false;
6382  c = e;
6383  if (*c != '.')
6384  break;
6385  if (++dots > 3)
6386  return false;
6387  if (val > 255)
6388  return false;
6389  c++;
6390  }
6391 
6392  // Make sure the whole string was checked (it is possible to have \0 chars
6393  // in the middle of the string).
6394  if ((size_t)(c - str) != size) {
6395  return false;
6396  }
6397  return !*c && dots == 3 && val < 256;
6398 }
6399 
6400 
6402 {
6403  size_t size = str.size();
6404  if ( str.HasZeroAtEnd() ) {
6405  // string has zero at the end already
6406  return s_IsIPAddress(str.data(), size);
6407  }
6408  char buf[256]; // small temporary buffer on stack for appending zero char
6409  if ( size < sizeof(buf) ) {
6410  memcpy(buf, str.data(), size);
6411  buf[size] = '\0';
6412  return s_IsIPAddress(buf, size);
6413  }
6414  else {
6415  // use std::string() to allocate memory for appending zero char
6416  return s_IsIPAddress(string(str).c_str(), size);
6417  }
6418 }
6419 
6420 
6421 namespace {
6422  // Comparator to decide if a symbol is a delimiter
6423  template <typename TDelimiter>
6424  class PDelimiter
6425  {
6426  private:
6427  const TDelimiter& delimiter;
6428 
6429  public:
6430  PDelimiter(const TDelimiter& delim)
6431  : delimiter(delim)
6432  {}
6433 
6434  bool operator()(char tested_symbol) const;
6435  };
6436 
6437 
6438  // Template search for a field
6439  // @param str
6440  // C or C++ string to search in.
6441  // @param field_no
6442  // Zero-based field number.
6443  // @param delimiter
6444  // Functor to decide if a symbol is a delimiter
6445  // @param merge
6446  // Whether to merge or not adjacent delimiters.
6447  // @return
6448  // Found field; or empty string if the required field is not found.
6449  template <typename TComparator, typename TResult>
6450  TResult s_GetField(const CTempString str,
6451  size_t field_no,
6452  const TComparator& delimiter,
6453  NStr::EMergeDelims merge)
6454  {
6455  const char* current_ptr = str.data();
6456  const char* end_ptr = current_ptr + str.length();
6457  size_t current_field = 0;
6458 
6459  // Search for the beginning of the required field
6460  for ( ; current_field != field_no; current_field++) {
6461  while (current_ptr < end_ptr && !delimiter(*current_ptr))
6462  current_ptr++;
6463 
6464  if (merge == NStr::eMergeDelims) {
6465  while (current_ptr < end_ptr && delimiter(*current_ptr))
6466  current_ptr++;
6467  }
6468  else
6469  current_ptr++;
6470 
6471  if (current_ptr >= end_ptr)
6472  return TResult();
6473  }
6474 
6475  if (current_field != field_no)
6476  return TResult();
6477 
6478  // Here: current_ptr points to the first character after the delimiter.
6479  const char* field_start = current_ptr;
6480  while (current_ptr < end_ptr && !delimiter(*current_ptr))
6481  current_ptr++;
6482 
6483  return TResult(field_start, current_ptr - field_start);
6484  }
6485 
6486 
6487 
6488  template <>
6489  bool PDelimiter<char>::operator() (char c) const
6490  {
6491  return delimiter == c;
6492  }
6493 
6494  template <>
6495  bool PDelimiter<CTempString>::operator() (char c) const
6496  {
6497  return delimiter.find(c) != NPOS;
6498  }
6499 }
6500 
6501 
6502 string NStr::GetField(const CTempString str,
6503  size_t field_no,
6504  const CTempString delimiters,
6505  EMergeDelims merge)
6506 {
6507  return s_GetField<PDelimiter<CTempString>, string>
6508  (str,
6509  field_no,
6510  PDelimiter<CTempString>(delimiters),
6511  merge);
6512 }
6513 
6514 
6515 string NStr::GetField(const CTempString str,
6516  size_t field_no,
6517  char delimiter,
6518  EMergeDelims merge)
6519 {
6520  return s_GetField<PDelimiter<char>, string>
6521  (str,
6522  field_no,
6523  PDelimiter<char>(delimiter),
6524  merge);
6525 }
6526 
6527 
6529  size_t field_no,
6530  const CTempString delimiters,
6531  EMergeDelims merge)
6532 {
6533  return s_GetField<PDelimiter<CTempString>, CTempString>
6534  (str,
6535  field_no,
6536  PDelimiter<CTempString>(delimiters),
6537  merge);
6538 }
6539 
6540 
6542  size_t field_no,
6543  char delimiter,
6544  EMergeDelims merge)
6545 {
6546  return s_GetField<PDelimiter<char>, CTempString>
6547  (str,
6548  field_no,
6549  PDelimiter<char>(delimiter),
6550  merge);
6551 }
6552 
6553 bool NStr::x_ReportLimitsError(const CTempString str, TStringToNumFlags flags)
6554 {
6555  if (flags & NStr::fConvErr_NoThrow) {
6556 // if ((flags & fConvErr_NoErrno) == 0) {
6557  if (flags & fConvErr_NoErrMessage) {
6558  CNcbiError::SetErrno(errno = ERANGE);
6559  } else {
6560  CNcbiError::SetErrno(errno = ERANGE, str);
6561  }
6562 // }
6563  return false;
6564  } else {
6565  NCBI_THROW2(CStringException, eConvert,
6566  "NStr::StringToNumeric overflow", 0);
6567  }
6568 }
6569 
6570 
6571 /////////////////////////////////////////////////////////////////////////////
6572 // CStringUTF8 / CUtf8
6573 
6574 #if defined(__EXPORT_CTOR_STRINGUTF8__)
6575 
6578 }
6581 }
6584 }
6585 
6586 
6588  const CTempString src, EEncoding encoding,EValidate validate) {
6590 }
6592  const char* src, EEncoding encoding, EValidate validate) {
6594 }
6596  const string& src, EEncoding encoding, EValidate validate) {
6598 }
6600  assign( CUtf8::AsUTF8(src));
6601 }
6602 #if NCBITOOLKIT_USE_LONG_UCS4
6604  assign( CUtf8::AsUTF8(src));
6605 }
6606 #endif
6608  assign( CUtf8::AsUTF8(src));
6609 }
6610 #if defined(HAVE_WSTRING)
6612  assign( CUtf8::AsUTF8(src));
6613 }
6614 #endif
6616  assign( CUtf8::AsUTF8(src));
6617 }
6618 #if NCBITOOLKIT_USE_LONG_UCS4
6620  assign( CUtf8::AsUTF8(src));
6621 }
6622 #endif
6624  assign( CUtf8::AsUTF8(src));
6625 }
6626 #if defined(HAVE_WSTRING)
6628  assign( CUtf8::AsUTF8(src));
6629 }
6630 #endif
6631 
6633  ECharBufferType type, const TUnicodeSymbol* src, SIZE_TYPE char_count) {
6634  assign( CUtf8::AsUTF8(src, type == eCharBuffer ? char_count : NPOS));
6635 }
6636 #if NCBITOOLKIT_USE_LONG_UCS4
6638  ECharBufferType type, const TCharUCS4* src, SIZE_TYPE char_count) {
6639  assign( CUtf8::AsUTF8(src, type == eCharBuffer ? char_count : NPOS));
6640 }
6641 #endif
6643  ECharBufferType type, const TCharUCS2* src, SIZE_TYPE char_count) {
6644  assign( CUtf8::AsUTF8(src, type == eCharBuffer ? char_count : NPOS));
6645 }
6647  ECharBufferType type, const wchar_t* src, SIZE_TYPE char_count) {
6648  assign( CUtf8::AsUTF8(src, type == eCharBuffer ? char_count : NPOS));
6649 }
6650 #endif // __EXPORT_CTOR_STRINGUTF8__
6651 
6654 {
6655  SIZE_TYPE count = 0;
6656  src = str.begin();
6657  CTempString::const_iterator to = str.end();
6658  for (; src != to; ++src, ++count) {
6659  SIZE_TYPE more = 0;
6660  bool good = x_EvalFirst(*src, more);
6661  while (more-- && good) {
6662  good = (++src != to) && x_EvalNext(*src);
6663  }
6664  if ( !good ) {
6665  return count;
6666  }
6667  }
6668  return count;
6669 }
6670 
6672 {
6674  x_GetValidSymbolCount(src,err);
6675  if (err == src.end()) {
6676  return CTempString();
6677  }
6678  CTempString::const_iterator from = max(err - 32, src.begin());
6679  CTempString::const_iterator to = min(err + 16, src.end());
6680  return CTempString(from, to - from);
6681 }
6682 
6684 {
6686  SIZE_TYPE count = x_GetValidSymbolCount(str,err);
6687  if (err != str.end()) {
6688  NCBI_THROW2(CStringException, eFormat,
6689  string("Source string is not in UTF8 format: ") +
6691  (err - str.begin()));
6692  }
6693  return count;
6694 }
6695 
6697 {
6698  SIZE_TYPE more = 0;
6701  bool cp1252, iso1, ascii, utf8, cesu8;
6702  for (cp1252 = iso1 = ascii = utf8 = true, cesu8=false; i != end; ++i) {
6703  Uint1 ch = *i;
6704  bool skip = false;
6705  if (more != 0) {
6706  if (x_EvalNext(ch)) {
6707  --more;
6708  if (more == 0) {
6709  ascii = false;
6710  }
6711  skip = true;
6712  } else {
6713  more = 0;
6714  utf8 = false;
6715  }
6716  }
6717  if (ch > 0x7F) {
6718  ascii = false;
6719 // http://en.wikipedia.org/wiki/ISO/IEC_8859-1
6720 // Note: From the point of view of the C++ Toolkit, the ISO 8859-1
6721 // character set includes symbols 0x00 through 0xFF except 0x80 through 0x9F.
6722  if (ch < 0xA0) {
6723  iso1 = false;
6724 // http://en.wikipedia.org/wiki/Windows-1252
6725  if (ch == 0x81 || ch == 0x8D || ch == 0x8F ||
6726  ch == 0x90 || ch == 0x9D) {
6727  cp1252 = false;
6728  }
6729  }
6730  if (!skip && utf8 && !x_EvalFirst(ch, more)) {
6731  utf8 = false;
6732  }
6733  if (utf8 && !cesu8 && ch == 0xED && (end - i) > 5) {
6734  uint8_t c1 = *(i+1);
6735  uint8_t c3 = *(i+3);
6736  uint8_t c4 = *(i+4);
6737  if ( ((c1 & 0xA0) == 0xA0) && (c3 == (uint8_t)0xED) && ((c4 & 0xB0) == 0xB0) ) {
6738  cesu8 = true;
6739  }
6740  }
6741  }
6742  }
6743  if (more != 0) {
6744  utf8 = false;
6745  }
6746  if (ascii) {
6747  return eEncoding_Ascii;
6748  } else if (utf8) {
6749  return cesu8 ? eEncoding_CESU8 : eEncoding_UTF8;
6750  } else if (cp1252) {
6752  }
6753  return eEncoding_Unknown;
6754 }
6755 
6756 
6757 bool CUtf8::MatchEncoding(const CTempString& src, EEncoding encoding)
6758 {
6759  bool matches = false;
6760  EEncoding enc_src = GuessEncoding(src);
6761  switch ( enc_src ) {
6762  default:
6763  case eEncoding_Unknown:
6764  matches = false;
6765  break;
6766  case eEncoding_Ascii:
6767  matches = true;
6768  break;
6769  case eEncoding_CESU8:
6770  matches = (encoding == enc_src || encoding == eEncoding_UTF8);
6771  break;
6772  case eEncoding_UTF8:
6774  matches = (encoding == enc_src);
6775  break;
6776  case eEncoding_ISO8859_1:
6777  matches = (encoding == enc_src || encoding == eEncoding_Windows_1252);
6778  break;
6779  }
6780  return matches;
6781 }
6782 
6783 string CUtf8::EncodingToString(EEncoding encoding)
6784 {
6785  switch (encoding) {
6786  case eEncoding_UTF8: break;
6787  case eEncoding_CESU8: return "CESU-8";
6788  case eEncoding_Ascii: return "US-ASCII";
6789  case eEncoding_ISO8859_1: return "ISO-8859-1";
6790  case eEncoding_Windows_1252: return "windows-1252";
6791  default:
6792  NCBI_THROW2(CStringException, eBadArgs,
6793  "Cannot convert encoding to string", 0);
6794  break;
6795  }
6796  return "UTF-8";
6797 }
6798 
6799 // see http://www.iana.org/assignments/character-sets
6801 {
6802  if (NStr::CompareNocase(str,"UTF-8")==0) {
6803  return eEncoding_UTF8;
6804  }
6805  if (NStr::CompareNocase(str,"windows-1252")==0) {
6806  return eEncoding_Windows_1252;
6807  }
6808  int i;
6809  const char* ascii[] = {
6810  "ANSI_X3.4-1968","iso-ir-6","ANSI_X3.4-1986","ISO_646.irv:1991",
6811  "ASCII","ISO646-US","US-ASCII","us","IBM367","cp367","csASCII", NULL};
6812  for (i=0; ascii[i]; ++i) {
6813  if (NStr::CompareNocase(str,ascii[i])==0) {
6814  return eEncoding_Ascii;
6815  }
6816  }
6817  const char* iso8859_1[] = {
6818  "ISO_8859-1:1987","iso-ir-100","ISO_8859-1","ISO-8859-1",
6819  "latin1","l1","IBM819","CP819","csISOLatin1", NULL};
6820  for (i=0; iso8859_1[i]; ++i) {
6821  if (NStr::CompareNocase(str,iso8859_1[i])==0) {
6822  return eEncoding_ISO8859_1;
6823  }
6824  }
6825  const char* cesu[] = {
6826  "CESU-8","csCESU8","csCESU-8",NULL};
6827  for (i=0; cesu[i]; ++i) {
6828  if (NStr::CompareNocase(str,iso8859_1[i])==0) {
6829  return eEncoding_CESU8;
6830  }
6831  }
6833 }
6834 
6835 
6836 // cp1252, codepoints for chars 0x80 to 0x9F
6837 static const TUnicodeSymbol s_cp1252_table[] = {
6838  0x20AC, 0x003F, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
6839  0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0x003F, 0x017D, 0x003F,
6840  0x003F, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
6841  0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x003F, 0x017E, 0x0178
6842 };
6843 
6845 {
6846  Uint1 ch = c;
6847  switch (encoding)
6848  {
6849  case eEncoding_Unknown:
6850  case eEncoding_UTF8:
6851  case eEncoding_CESU8:
6852  NCBI_THROW2(CStringException, eBadArgs,
6853  "Unacceptable character encoding", 0);
6854  case eEncoding_Ascii:
6855  case eEncoding_ISO8859_1:
6856  break;
6858  if (ch > 0x7F && ch < 0xA0) {
6859  return s_cp1252_table[ ch - 0x80 ];
6860  }
6861  break;
6862  default:
6863  NCBI_THROW2(CStringException, eBadArgs,
6864  "Unsupported character encoding", 0);
6865  }
6866  return (TUnicodeSymbol)ch;
6867 }
6868 
6869 char CUtf8::SymbolToChar(TUnicodeSymbol cp, EEncoding encoding)
6870 {
6871  if( encoding == eEncoding_UTF8 || encoding == eEncoding_CESU8 || encoding == eEncoding_Unknown) {
6872  NCBI_THROW2(CStringException, eBadArgs,
6873  "Unacceptable character encoding", 0);
6874  }
6875  if ( cp <= 0xFF) {
6876  return (char)cp;
6877  }
6878  if ( encoding == eEncoding_Windows_1252 ) {
6879  for (Uint1 ch = 0x80; ch <= 0x9F; ++ch) {
6880  if (s_cp1252_table[ ch - 0x80 ] == cp) {
6881  return (char)ch;
6882  }
6883  }
6884  }
6885  if (cp > 0xFF) {
6886  NCBI_THROW2(CStringException, eConvert,
6887  "Failed to convert symbol to requested encoding", 0);
6888  }
6889  return (char)cp;
6891 
6892 struct SCharEncoder
6894  virtual TUnicodeSymbol ToUnicode(char ch) const = 0;
6895  virtual char ToChar(TUnicodeSymbol sym) const = 0;
6896 };
6897 
6898 struct SEncEncoder : public SCharEncoder
6900  SEncEncoder( EEncoding encoding) : m_Encoding(encoding) {}
6901  virtual TUnicodeSymbol ToUnicode(char ch) const {
6903  }
6904  virtual char ToChar(TUnicodeSymbol sym) const {
6905  return CUtf8::SymbolToChar(sym, m_Encoding);
6906  }
6908 };
6909 
6910 #if defined(HAVE_WSTRING)
6911 struct SLocaleEncoder : public SCharEncoder
6913  SLocaleEncoder( const locale& lcl)
6914  : m_Lcl(lcl)
6915  , m_Facet(use_facet< ctype<wchar_t> >(lcl)) {
6916  }
6917  virtual TUnicodeSymbol ToUnicode(char ch) const {
6918  wchar_t w = m_Facet.widen(ch);
6919  if (w == (wchar_t)-1) {
6920  string msg("Failed to convert to Unicode char ");
6921  msg += NStr::NumericToString(ch) + ", locale " + m_Lcl.name();
6922  NCBI_THROW2(CStringException, eConvert, msg, 0);
6923  }
6924  return w;
6925  }
6926  virtual char ToChar(TUnicodeSymbol sym) const {
6927  char ch = m_Facet.narrow(sym,0);
6928  if (ch == 0 && sym != 0) {
6929  string msg("Failed to convert Unicode symbol ");
6930  msg += NStr::NumericToString(sym) + " to requested locale " + m_Lcl.name();
6931  NCBI_THROW2(CStringException, eConvert, msg, 0);
6932  }
6933  return ch;
6934  }
6935  const locale& m_Lcl;
6936  const ctype<wchar_t>& m_Facet;
6937 };
6938 
6939 
6941 {
6942  return SLocaleEncoder(lcl).ToUnicode(ch);
6943 }
6944 
6946 {
6947  return SLocaleEncoder(lcl).ToChar(sym);
6948 }
6949 
6950 CStringUTF8& CUtf8::x_Append(CStringUTF8& self, const CTempString& src, const locale& lcl)
6951 {
6952  SLocaleEncoder enc(lcl);
6953  SIZE_TYPE needed = self.length();
6954  for (char ch : src) {
6955  needed += x_BytesNeeded( enc.ToUnicode(ch) );
6956  }
6957  self.reserve(needed+1);
6958  for (char ch : src) {
6959  x_AppendChar( self, enc.ToUnicode(ch));
6960  }
6961  return self;
6962 }
6963 #endif
6964 
6965 string x_AsSingleByteString(const CTempString& str,
6966  const SCharEncoder& enc, const char* substitute_on_error)
6967 {
6968  string result;
6969  result.reserve( CUtf8::GetSymbolCount(str)+1 );
6970  CTempString::const_iterator src = str.begin();
6971  CTempString::const_iterator to = str.end();
6972  for ( ; src != to; ++src ) {
6973  TUnicodeSymbol sym = CUtf8::Decode( src );
6974  if (substitute_on_error) {
6975  try {
6976  result.append(1, enc.ToChar(sym));
6977  }
6978  catch (CStringException&) {
6979  result.append(substitute_on_error);
6980  }
6981  } else {
6982  result.append(1, enc.ToChar(sym));
6983  }
6984  }
6985  return result;
6986 }
6987 
6989  EEncoding encoding, const char* substitute_on_error, EValidate validate)
6990 {
6991  if (validate == CUtf8::eValidate) {
6993  }
6994  if( encoding == eEncoding_UTF8) {
6995  return str;
6996  }
6997  if( encoding == eEncoding_CESU8) {
6998  NCBI_THROW2(CStringException, eConvert,
6999  "Conversion into CESU-8 encoding is not supported", 0);
7000  }
7001  return x_AsSingleByteString(str, SEncEncoder(encoding), substitute_on_error);
7002 }
7003 
7004 #if defined(HAVE_WSTRING)
7006  const locale& lcl, const char* substitute_on_error, EValidate validate)
7007 {
7008  if (validate == CUtf8::eValidate) {
7010  }
7011  return x_AsSingleByteString(str, SLocaleEncoder(lcl), substitute_on_error);
7012 }
7013 #endif
7014 
7015 void CUtf8::x_Validate(const CTempString& str)
7016 {
7017  if ( !MatchEncoding( str,eEncoding_UTF8 ) ) {
7018  NCBI_THROW2(CStringException, eBadArgs,
7019  string("Source string is not in UTF8 format: ") +
7022  }
7023 }
7024 
7026 {
7027  Uint4 ch = c;
7028  if (ch < 0x80) {
7029  self.append(1, Uint1(ch));
7030  }
7031  else if (ch < 0x800) {
7032  self.append(1, Uint1( (ch >> 6) | 0xC0));
7033  self.append(1, Uint1( (ch & 0x3F) | 0x80));
7034  } else if (ch < 0x10000) {
7035  self.append(1, Uint1( (ch >> 12) | 0xE0));
7036  self.append(1, Uint1(((ch >> 6) & 0x3F) | 0x80));
7037  self.append(1, Uint1(( ch & 0x3F) | 0x80));
7038  } else {
7039  self.append(1, Uint1( (ch >> 18) | 0xF0));
7040  self.append(1, Uint1(((ch >> 12) & 0x3F) | 0x80));
7041  self.append(1, Uint1(((ch >> 6) & 0x3F) | 0x80));
7042  self.append(1, Uint1( (ch & 0x3F) | 0x80));
7043  }
7044  return self;
7045 }
7046 
7048  EEncoding encoding, EValidate validate)
7049 {
7050  if (encoding == eEncoding_Unknown) {
7051  encoding = GuessEncoding(src);
7052  if (encoding == eEncoding_Unknown) {
7053  NCBI_THROW2(CStringException, eBadArgs,
7054  "Unable to guess the source string encoding", 0);
7055  }
7056  } else if (validate == eValidate) {
7057  if ( !MatchEncoding( src,encoding ) ) {
7058  NCBI_THROW2(CStringException, eBadArgs,
7059  "Source string does not match the declared encoding", 0);
7060  }
7061  }
7062  if (encoding == eEncoding_UTF8 || encoding == eEncoding_Ascii) {
7063  self.append(src);
7064  return self;
7065  }
7066  if (encoding == eEncoding_CESU8) {
7067  self.reserve(max(self.capacity(),self.length()+src.length()));
7068  const char* i = src.data();
7069  const char* end = i + src.length();
7070  for (; i != end; ++i) {
7071  Uint1 ch = *i;
7072  if (ch == 0xED && (end - i) > 5) {
7073  uint8_t c1 = *(i+1);
7074  uint8_t c3 = *(i+3);
7075  uint8_t c4 = *(i+4);
7076  if ( ((c1 & 0xA0) == 0xA0) && (c3 == (uint8_t)0xED) && ((c4 & 0xB0) == 0xB0) ) {
7077  CUtf8::AppendAsUTF8(self, CUtf8::AsBasicString<TCharUCS2>(CTempString(i,6), 0));
7078  i += 5;
7079  continue;
7080  }
7081  }
7082  self.append(1, ch);
7083  }
7084  return self;
7085  }
7086 
7087  SIZE_TYPE needed = 0;
7090  for (i = src.begin(); i != end; ++i) {
7091  needed += x_BytesNeeded( CharToSymbol( *i,encoding ) );
7092  }
7093  if ( !needed ) {
7094  return self;
7095  }
7096  self.reserve(max(self.capacity(),self.length()+needed+1));
7097  for (i = src.begin(); i != end; ++i) {
7098  x_AppendChar( self, CharToSymbol( *i, encoding ) );
7099  }
7100  return self;
7101 }
7102 
7104 {
7105  Uint4 ch = c;
7106  if (ch < 0x80) {
7107  return 1;
7108  } else if (ch < 0x800) {
7109  return 2;
7110  } else if (ch < 0x10000) {
7111  return 3;
7112  }
7113  return 4;
7114 }
7115 
7117 {
7118  CTempString::const_iterator src = str.begin();
7119  CTempString::const_iterator to = str.end();
7120  SIZE_TYPE more = 0;
7121  bool good = x_EvalFirst(*src, more);
7122  while (more-- && good) {
7123  good = (++src != to) && x_EvalNext(*src);
7124  }
7125  return good ? (src - str.begin() + 1) : 0;
7126 }
7127 
7128 bool CUtf8::x_EvalFirst(char ch, SIZE_TYPE& more)
7129 {
7130  more = 0;
7131  if ((ch & 0x80) != 0) {
7132  if ((ch & 0xE0) == 0xC0) {
7133  if ((ch & 0xFE) == 0xC0) {
7134  // C0 and C1 are not valid UTF-8 chars
7135  return false;
7136  }
7137  more = 1;
7138  } else if ((ch & 0xF0) == 0xE0) {
7139  more = 2;
7140  } else if ((ch & 0xF8) == 0xF0) {
7141  if ((unsigned char)ch > (unsigned char)0xF4) {
7142  // F5-FF are not valid UTF-8 chars
7143  return false;
7144  }
7145  more = 3;
7146  } else {
7147  return false;
7148  }
7149  }
7150  return true;
7151 }
7152 
7153 
7154 bool CUtf8::x_EvalNext(char ch)
7155 {
7156  return (ch & 0xC0) == 0x80;
7157 }
7158 
7160 {
7161  TUnicodeSymbol chRes = 0;
7162  more = 0;
7163  if ((ch & 0x80) == 0) {
7164  chRes = ch;
7165  } else if ((ch & 0xE0) == 0xC0) {
7166  chRes = (ch & 0x1F);
7167  more = 1;
7168  } else if ((ch & 0xF0) == 0xE0) {
7169  chRes = (ch & 0x0F);
7170  more = 2;
7171  } else if ((ch & 0xF8) == 0xF0) {
7172  chRes = (ch & 0x07);
7173  more = 3;
7174  } else {
7175  NCBI_THROW2(CStringException, eBadArgs,
7176  "Source string is not in UTF8 format", 0);
7177  }
7178  return chRes;
7179 }
7180 
7181 
7183 {
7184  if ((ch & 0xC0) == 0x80) {
7185  return (chU << 6) | (ch & 0x3F);
7186  } else {
7187  NCBI_THROW2(CStringException, eBadArgs,
7188  "Source string is not in UTF8 format", 0);
7189  }
7190  return 0;
7191 }
7192 
7194 {
7195 /*
7196  {0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x20, 0x85, 0xA0, 0x1680, 0x180E,
7197  0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A,
7198  0x2028, 0x2029, 0x202F, 0x205F, 0x3000 };
7199 */
7200  if (chU >= 0x85) {
7201  if (chU < 0x2000) {
7202  return chU == 0x85 || chU == 0xA0 || chU == 0x1680 || chU == 0x180E;
7203  } else if (chU >= 0x3000) {
7204  return chU == 0x3000;
7205  }
7206  return chU <=0x200A || chU == 0x2028 || chU == 0x2029 || chU == 0x202F || chU == 0x205F;
7207  }
7208  return iswspace(chU)!=0;
7209 }
7210 
7212 {
7213  if (!str.empty()) {
7215  if (t.empty()) {
7216  str.erase();
7217  } else {
7218  str.replace(0,str.length(),t.data(),t.length());
7219  }
7220  }
7221  return str;
7222 }
7223 
7225  const CTempString& str, NStr::ETrunc side)
7226 {
7227  if (str.empty()) {
7228  return str;
7229  }
7230  CTempString::const_iterator beg = str.begin();
7232  if (side == NStr::eTrunc_Begin || side == NStr::eTrunc_Both) {
7233  for (CTempString::const_iterator next = beg; beg != end; beg = ++next) {
7234  if (!IsWhiteSpace( CUtf8::Decode( next ) )) {
7235  break;
7236  }
7237  }
7238  }
7239  if (side == NStr::eTrunc_End || side == NStr::eTrunc_Both) {
7240  while (end != beg) {
7241  while (end != beg) {
7242  char ch = *(--end);
7243  if ((ch & 0x80) == 0 || (ch & 0xC0) == 0xC0) {
7244  break;
7245  }
7246  }
7248  if (!IsWhiteSpace( CUtf8::Decode( next ) )) {
7249  end = ++next;
7250  break;
7251  }
7252  }
7253  }
7254  CTempString res;
7255  if (beg != end) {
7256  res.assign(beg,end-beg);
7257  }
7258  return res;
7259 }
7260 
7261 const char* CStringException::GetErrCodeString(void) const
7262 {
7263  switch (GetErrCode()) {
7264  case eConvert: return "eConvert";
7265  case eBadArgs: return "eBadArgs";
7266  case eFormat: return "eFormat";
7267  default: return CException::GetErrCodeString();
7268  }
7269 }
7270 
7272 /////////////////////////////////////////////////////////////////////////////
7273 // CStringPairsParser decoders and encoders
7274 
7275 
7277  : m_Flag(flag)
7278 {
7279 }
7280 
7281 
7282 string CStringDecoder_Url::Decode(const CTempString src,
7283  EStringType ) const
7285  return NStr::URLDecode(src, m_Flag);
7286 }
7287 
7288 
7290  : m_Flag(flag)
7291 {
7292 }
7293 
7294 
7295 string CStringEncoder_Url::Encode(const CTempString src,
7296  EStringType ) const
7297 {
7298  return NStr::URLEncode(src, m_Flag);
7299 }
7301 
7302 /////////////////////////////////////////////////////////////////////////////
7303 // CEncodedString --
7304 
7306  NStr::EUrlEncode flag)
7308  SetString(s, flag);
7309 }
7310 
7311 
7313  NStr::EUrlEncode flag)
7314 {
7315  m_Original = s;
7316  if ( NStr::NeedsURLEncoding(s, flag) ) {
7317  if ( m_Encoded.get() ) {
7318  // Do not re-allocate string object
7319  *m_Encoded = NStr::URLEncode(s, flag);
7320  }
7321  else {
7322  m_Encoded.reset(new string(NStr::URLEncode(s, flag)));
7323  }
7324  }
7325  else {
7326  m_Encoded.reset();
7327  }
7328 }
7329 
7330 
7331 /////////////////////////////////////////////////////////////////////////////
7332 // CTempString (deprecated constructors, defined out of line to cut down
7333 // on spurious warnings when building with compilers that warn on
7334 // definition rather than merely, and arguably more sensibly, on usage).
7335 
7336 
7338  : m_String(str+pos), m_Length(len)
7339 {
7340 } // NCBI_FAKE_WARNING
7341 
7342 
7343 CTempString::CTempString(const string& str, size_type len)
7344  : m_String(str.data()), m_Length(min(len, str.size()))
7346 } // NCBI_FAKE_WARNING
7347 
7348 
7349 
7350 void CTempStringList::Join(string* s) const
7351 {
7352  s->reserve(GetSize());
7353  *s = m_FirstNode.str;
7354  for (const SNode* node = m_FirstNode.next.get(); node != NULL;
7355  node = node->next.get()) {
7356  s->append(node->str.data(), node->str.size());
7357  }
7358 }
7359 
7360 
7361 void CTempStringList::Join(CTempString* s) const
7362 {
7365  *s = str;
7366 }
7367 
7368 
7369 void CTempStringList::Join(CTempStringEx* s) const
7370 {
7371  if (m_FirstNode.next.get() == NULL) {
7372  *s = m_FirstNode.str;
7373  } else {
7374  if ( !m_Storage ) {
7375  NCBI_THROW2(CStringException, eBadArgs,
7376  "CTempStringList::Join(): non-NULL storage required", 0);
7377  }
7378  SIZE_TYPE n = GetSize();
7379  char* buf = m_Storage->Allocate(n + 1);
7380  char* p = buf;
7381  for (const SNode* node = &m_FirstNode; node != NULL;
7382  node = node->next.get()) {
7383  memcpy(p, node->str.data(), node->str.size());
7384  p += node->str.size();
7385  }
7386  *p = '\0';
7387  s->assign(buf, n);
7388  }
7389 }
7390 
7391 
7393 {
7394  SIZE_TYPE total = m_FirstNode.str.size();
7395  for (const SNode* node = m_FirstNode.next.get(); node != NULL;
7396  node = node->next.get()) {
7397  total += node->str.size();
7398  }
7399  return total;
7400 }
7401 
7402 
7404 {
7405 }
7406 
7407 
7409 {
7411  delete[] (*it);
7412  *it = 0;
7413  }
7414 }
7415 
7416 
7418 {
7419  m_Data.push_back(new char[len]);
7420  return m_Data.back();
7421 }
7422 
7423 
7424 bool CStrTokenizeBase::Advance(CTempStringList* part_collector, SIZE_TYPE* ptr_part_start, SIZE_TYPE* ptr_delim_pos)
7425 {
7426  SIZE_TYPE pos, part_start, delim_pos = 0, quote_pos = 0;
7427  bool found_text = false, done;
7428  char active_quote = '\0';
7429 
7430  // Skip leading delimiters.
7431  // NOTE: We cannot process
7432  if (!m_Pos && (m_Flags & NStr::fSplit_Truncate_Begin) != 0) {
7433  SkipDelims();
7434  }
7435  pos = part_start = m_Pos;
7436  done = (pos == NPOS);
7437  // save part start position
7438  if (ptr_part_start) {
7439  *ptr_part_start = part_start;
7440  }
7441 
7442  // Checks
7443  if (pos >= m_Str.size()) {
7444  pos = NPOS;
7445  done = true;
7446  }
7447  if (ptr_delim_pos) {
7448  *ptr_delim_pos = NPOS;
7449  }
7450 
7451  // Each chunk covers the half-open interval [part_start, delim_pos).
7452 
7453  while ( !done &&
7454  ((delim_pos = m_Str.find_first_of(m_InternalDelim, pos)) != NPOS)) {
7455 
7456  SIZE_TYPE next_start = pos = delim_pos + 1;
7457  bool handled = false;
7458  char c = m_Str[delim_pos];
7459 
7460  if ((m_Flags & NStr::fSplit_CanEscape) != 0 && c == '\\') {
7461  // treat the following character literally
7462  if (++pos > m_Str.size()) {
7463  NCBI_THROW2(CStringException, eFormat, "Unescaped trailing \\", delim_pos);
7464  }
7465  handled = true;
7466 
7467  } else if ((m_Flags & NStr::fSplit_CanQuote) != 0) {
7468  if (active_quote != '\0') {
7469  if (c == active_quote) {
7470  if (pos < m_Str.size() && m_Str[pos] == active_quote) {
7471  // count a doubled quote as one literal occurrence
7472  ++pos;
7473  } else {
7474  active_quote = '\0';
7475  }
7476  } else {
7477  continue; // not actually a boundary
7478  }
7479  handled = true;
7480  } else if (((m_Flags & NStr::fSplit_CanSingleQuote) != 0 && c == '\'') ||
7481  ((m_Flags & NStr::fSplit_CanDoubleQuote) != 0 && c == '"')) {
7482  active_quote = c;
7483  quote_pos = delim_pos;
7484  handled = true;
7485  }
7486  }
7487 
7488  if ( !handled ) {
7489  if ((m_Flags & NStr::fSplit_ByPattern) != 0) {
7490  if (delim_pos + m_Delim.size() <= m_Str.size()
7491  && (memcmp(m_Delim.data() + 1, m_Str.data() + pos,
7492  m_Delim.size() - 1) == 0)) {
7493  done = true;
7494  next_start = pos = delim_pos + m_Delim.size();
7495  } else {
7496  continue;
7497  }
7498  } else {
7499  done = true;
7500  }
7501  // save delimiter position
7502  if (ptr_delim_pos) {
7503  *ptr_delim_pos = delim_pos;
7504  }
7505  }
7506 
7507  if (delim_pos > part_start) {
7508  found_text = true;
7509  if (part_collector != NULL) {
7510  part_collector->Add(m_Str.substr(part_start, delim_pos - part_start));
7511  }
7512  }
7513  part_start = next_start;
7514  }
7515 
7516  if (active_quote != '\0') {
7517  NCBI_THROW2(CStringException, eFormat, string("Unbalanced ") + active_quote, quote_pos);
7518  }
7519 
7520  if (delim_pos == NPOS) {
7521  found_text = true;
7522  if (part_collector != NULL) {
7523  part_collector->Add(m_Str.substr(part_start));
7524  }
7525  m_Pos = NPOS;
7526  } else {
7527  m_Pos = pos;
7528  MergeDelims();
7529  }
7530  return found_text;
7531 }
7532 
7533 
7534 void CStrTokenizeBase::x_SkipDelims(bool force_skip)
7535 {
7537 
7538  if ( !force_skip && (m_Flags & NStr::fSplit_MergeDelimiters) == 0 ) {
7539  return;
7540  }
7541  // skip all delimiters, starting from the current position
7542  if ((m_Flags & NStr::fSplit_ByPattern) == 0) {
7544  } else {
7545  while (m_Pos != NPOS
7546  && m_Pos + m_Delim.size() <= m_Str.size()
7547  && (memcmp(m_Delim.data(), m_Str.data() + m_Pos,
7548  m_Delim.size()) == 0)) {
7549  m_Pos += m_Delim.size();
7550  }
7551  }
7552 }
7553 
7554 
7556 {
7558  return; // Nothing to do
7559  }
7561  char* buf = m_DelimStorage.Allocate(n + 3);
7562  char *s = buf;
7563  memcpy(s, m_InternalDelim.data(), n);
7564  if ((m_Flags & NStr::fSplit_CanEscape) != 0) {
7565  s[n++] = '\\';
7566  }
7567  if ((m_Flags & NStr::fSplit_CanSingleQuote) != 0) {
7568  s[n++] = '\'';
7569  }
7570  if ((m_Flags & NStr::fSplit_CanDoubleQuote) != 0) {
7571  s[n++] = '"';
7572  }
7574 }
7575 
7576 
ncbi::TMaskedQueryRegions mask
AutoArray –.
Definition: ncbimisc.hpp:527
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
int m_Errno
Definition: ncbistr.cpp:542
int Errno(void) const
Definition: ncbistr.cpp:531
~CS2N_Guard(void)
Definition: ncbistr.cpp:521
CS2N_Guard(NStr::TStringToNumFlags, bool skip_if_zero)
Definition: ncbistr.cpp:516
string Message(const CTempString str, const char *to_type, const CTempString msg)
Definition: ncbistr.cpp:545
void Set(int errcode)
Definition: ncbistr.cpp:530
bool m_SkipIfZero
Definition: ncbistr.cpp:541
void Throw(void)
Definition: ncbistr.cpp:533
bool m_NoErrno
Definition: ncbistr.cpp:540
Adapter for token position container pointer(NULL legal) Makes pointer to a container look as a legal...
Main tokenization algorithm.
CStringException –.
Definition: ncbistr.hpp:4506
@ eCharBuffer
Zeros are part of the data.
Singly-linked list of substrings that will constitute a single Split/Tokenize piece,...
Helper class to allocate memory for CTempString[Ex] on demand in the functions which need to modify t...
Definition: tempstr.hpp:1051
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
The NCBI C++ standard methods for dealing with std::string.
static uch flags
constexpr auto begin(const ct_const_array< T, N > &in) noexcept
constexpr auto end(const ct_const_array< T, N > &in) noexcept
#define T(s)
Definition: common.h:230
std::ofstream out("events_result.xml")
main entry point for tests
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:51
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56
static char precision
Definition: genparams.c:28
static const char * str(char *buf, int n)
Definition: stats.c:84
static const char * column
Definition: stats.c:23
static char tmp[3200]
Definition: utf8.c:42
static const char * validate(DSNINFO *di)
Go looking for trouble.
Definition: winsetup.c:179
#define vasprintf
Definition: replacements.h:60
int offset
Definition: replacements.h:160
static TDSICONV * conv
Definition: charconv.c:168
char data[12]
Definition: iconv.c:80
unsigned char uint8_t
char * strdup(const char *str)
Supply string duplicate function, if one is not defined.
Definition: ncbistr.cpp:5602
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
Definition: ncbimisc.hpp:822
element_type * get(void) const
Get pointer.
Definition: ncbimisc.hpp:581
string
Definition: cgiapp.hpp:687
EUrlEncode
Definition: cgi_util.hpp:56
EUrlDecode
Definition: cgi_util.hpp:65
#define NULL
Definition: ncbistd.hpp:225
#define ERR_POST_X_ONCE(err_subcode, message)
Error posting only once during program execution with default error code and given error subcode.
Definition: ncbidiag.hpp:621
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
Definition: ncbidiag.hpp:550
#define isnan
Definition: ncbifloat.h:89
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
#define NCBI_THROW2(exception_class, err_code, message, extra)
Throw exception with extra parameter.
Definition: ncbiexpt.hpp:1754
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
Definition: ncbiexpt.cpp:444
static void SetErrno(int errno_code)
Set last error using errno code.
Definition: ncbierror.cpp:190
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
#define finite
Define value of finite (Is Finite).
Definition: ncbifloat.h:109
EStringType
String type.
Definition: serialdef.hpp:185
#define kMax_UI8
Definition: ncbi_limits.h:222
#define NCBI_CONST_LONGDOUBLE(v)
Definition: ncbitype.h:245
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
unsigned char Uchar
Alias for unsigned char.
Definition: ncbitype.h:95
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define kMax_Int
Definition: ncbi_limits.h:184
#define kMax_I8
Definition: ncbi_limits.h:221
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
#define kMax_UInt
Definition: ncbi_limits.h:185
#define CHAR_BIT
std::string CStringUTF8
Definition: ncbistl.hpp:254
#define NCBI_AS_STRING(value)
Convert some value to string even if this value is macro itself.
Definition: ncbistl.hpp:146
char * Allocate(CTempString::size_type len)
Definition: ncbistr.cpp:7412
static EEncoding StringToEncoding(const CTempString &encoding_name)
Convert encoding name into EEncoding enum, taking into account synonyms as per http://www....
Definition: ncbistr.cpp:6795
static list< string > & SplitByPattern(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Variation of Split() with fSplit_ByPattern flag applied by default.
Definition: ncbistr.cpp:3507
static bool IsWhiteSpace(TUnicodeSymbol sym)
Determines if a symbol is whitespace per http://unicode.org/charts/uca/chart_Whitespace....
Definition: ncbistr.cpp:7188
CTempString m_Delim
static bool StringToBool(const CTempString str)
Convert string to bool.
Definition: ncbistr.cpp:2821
NStr::EUrlDecode m_Flag
Definition: ncbistr.hpp:4572
CTempStringEx m_InternalDelim
static string HtmlDecode(const CTempString str, EEncoding encoding=eEncoding_Unknown, THtmlDecode *result_flags=NULL)
Decode HTML entities and character references.
Definition: ncbistr.cpp:4527
static void x_Validate(const CTempString &str)
Definition: ncbistr.cpp:7010
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
EEncoding
Definition: ncbistr.hpp:199
static bool x_EvalFirst(char ch, SIZE_TYPE &more)
Definition: ncbistr.cpp:7123
static string SizetToString(size_t value, TNumToStringFlags flags=0, int base=10)
Convert size_t to string.
Definition: ncbistr.cpp:2751
virtual string Encode(const CTempString src, EStringType stype) const
Definition: ncbistr.cpp:7290
const_iterator end() const
Return an iterator to the string's ending position (one past the end of the represented sequence)
Definition: tempstr.hpp:306
static string Int8ToString(Int8 value, TNumToStringFlags flags=0, int base=10)
Convert Int8 to string.
Definition: ncbistr.hpp:5159
static string PrintableString(const CTempString str, TPrintableMode mode=fNewLine_Quote|fNonAscii_Passthru)
Get a printable version of the specified string.
Definition: ncbistr.cpp:3953
CStringDecoder_Url(NStr::EUrlDecode flag=NStr::eUrlDec_All)
Definition: ncbistr.cpp:7271
static CStringUTF8 & x_Append(CStringUTF8 &u8str, const CTempString &src, EEncoding encoding, EValidate validate)
Definition: ncbistr.cpp:7042
static CTempString TruncateSpaces_Unsafe(const CTempString str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3191
static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)
Convert double to string.
Definition: ncbistr.hpp:5187
static int StringToNonNegativeInt(const CTempString str, TStringToNumFlags flags=0)
Convert string to non-negative integer value.
Definition: ncbistr.cpp:457
#define kEmptyStr
Definition: ncbistr.hpp:123
~CTempString_Storage(void)
Definition: ncbistr.cpp:7403
static CTempString GetField_Unsafe(const CTempString str, size_t field_no, const CTempString delimiters, EMergeDelims merge=eNoMergeDelims)
Search for a field.
Definition: ncbistr.cpp:6523
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
EUrlEncode
URL-encode flags.
Definition: ncbistr.hpp:3141
EMergeDelims
Whether to merge adjacent delimiters.
Definition: ncbistr.hpp:2514
static string AsSingleByteString(const CTempString &src, EEncoding encoding, const char *substitute_on_error=0, EValidate validate=eNoValidate)
Convert UTF8 string into a single-byte character representation.
Definition: ncbistr.cpp:6983
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static SIZE_TYPE CommonOverlapSize(const CTempString s1, const CTempString s2)
Determine if the suffix of one string is the prefix of another.
Definition: ncbistr.cpp:3101
const CTempString & m_Str
static bool x_EvalNext(char ch)
Definition: ncbistr.cpp:7149
int TPrintableMode
Bitwise OR of EPrintableMode flags.
Definition: ncbistr.hpp:2736
static bool MatchEncoding(const CTempString &src, EEncoding encoding)
Check the encoding of the C/C++ string.
Definition: ncbistr.cpp:6752
static Int8 StringToInt8(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to Int8.
Definition: ncbistr.cpp:793
int TNumToStringFlags
Bitwise OR of "ENumToStringFlags".
Definition: ncbistr.hpp:266
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
Definition: ncbistr.cpp:3461
NStr::EUrlEncode m_Flag
Definition: ncbistr.hpp:4585
static string Base64Encode(const CTempString str, size_t line_len=0)
Base64-encode string.
Definition: ncbistr.cpp:6270
static string GetField(const CTempString str, size_t field_no, const CTempString delimiters, EMergeDelims merge=eNoMergeDelims)
Search for a field.
Definition: ncbistr.cpp:6497
EUrlDecode
URL decode flags.
Definition: ncbistr.hpp:3157
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
Definition: ncbistr.cpp:2993
static string Escape(const CTempString str, const CTempString metacharacters, char escape_char='\\')
Escape string (generic version).
Definition: ncbistr.cpp:3852
CEncodedString(void)
Definition: ncbistr.hpp:4825
static bool IsLower(const CTempString str)
Checks if all letters in the given string have a lower case.
Definition: ncbistr.cpp:433
static CTempString TruncateSpaces_Unsafe(const CTempString &str, NStr::ETrunc side=NStr::eTrunc_Both)
Truncate spaces in the string.
Definition: ncbistr.cpp:7219
static string HtmlEntity(TUnicodeSymbol uch)
Returns HTML entity name for this symbol if one exists (without leading ampersand and trailing semico...
Definition: ncbistr.cpp:4516
static bool MatchesMask(CTempString str, CTempString mask, ECase use_case=eCase)
Match "str" against the "mask".
Definition: ncbistr.cpp:389
virtual const char * GetErrCodeString(void) const override
Translate from the error code value to its string representation.
Definition: ncbistr.cpp:7256
static SIZE_TYPE x_BytesNeeded(TUnicodeSymbol ch)
Definition: ncbistr.cpp:7098
static CStringUTF8 SQLEncode(const CStringUTF8 &str, ESqlEncode flag)
SQL-encode string.
Definition: ncbistr.cpp:6140
static string LongToString(long value, TNumToStringFlags flags=0, int base=10)
Convert Int to string.
Definition: ncbistr.hpp:5141
unique_ptr< string > m_Encoded
Definition: ncbistr.hpp:4846
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
Definition: ncbistr.cpp:106
static double StringToDoubleEx(const char *str, size_t size, TStringToNumFlags flags=0)
This version accepts zero-terminated string.
Definition: ncbistr.cpp:1380
void x_ExtendInternalDelim()
Definition: ncbistr.cpp:7550
static double StringToDouble(const CTempStringEx str, TStringToNumFlags flags=0)
Convert string to double.
Definition: ncbistr.cpp:1387
static string HtmlEncode(const CTempString str, THtmlEncode flags=fHtmlEnc_EncodeAll)
Encode a string for HTML.
Definition: ncbistr.cpp:4122
static Uint8 StringToUInt8_DataSize(const CTempString str, TStringToNumFlags flags=0)
Convert string that can contain "software" qualifiers to Uint8.
Definition: ncbistr.cpp:1539
#define NPOS
Definition: ncbistr.hpp:133
static SIZE_TYPE GetSymbolCount(const CTempString &src)
Get the number of symbols (code points) in UTF8 string.
Definition: ncbistr.cpp:6678
static int strncasecmp(const char *s1, const char *s2, size_t n)
Case-insensitive comparison of two zero-terminated strings, narrowed to the specified number of chara...
Definition: ncbistr.hpp:5247
static size_t StringToSizet(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to size_t.
Definition: ncbistr.cpp:1769
Uint4 TUnicodeSymbol
Unicode character.
Definition: ncbistr.hpp:141
CTempString & assign(const char *src_str, size_type len)
Assign new values to the content of the a string.
Definition: tempstr.hpp:733
static CTempString x_GetErrorFragment(const CTempString &src)
Definition: ncbistr.cpp:6666
static string URLDecode(const CTempString str, EUrlDecode flag=eUrlDec_All)
URL-decode string.
Definition: ncbistr.cpp:6214
static CTempString TrimSuffix_Unsafe(const CTempString str, const CTempString suffix, ECase use_case=eCase)
Trim suffix from a string.
Definition: ncbistr.cpp:3302
static const string BoolToString(bool value)
Convert bool to string.
Definition: ncbistr.cpp:2815
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
Definition: ncbistr.cpp:3201
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static SIZE_TYPE DoubleToStringPosix(double value, unsigned int precision, char *buf, SIZE_TYPE buf_size)
Convert double to string with specified precision and put the result into a character buffer,...
Definition: ncbistr.cpp:2662
void x_SkipDelims(bool force_skip)
Definition: ncbistr.cpp:7529
static EEncoding GuessEncoding(const CTempString &src)
Guess the encoding of the C/C++ string.
Definition: ncbistr.cpp:6691
static string Dedent(const CTempString str, TDedentFlags flags=0)
Dedent multi-line string, removing common whitespace prefix for each line.
Definition: ncbistr.cpp:5503
virtual string Decode(const CTempString src, EStringType stype) const
Definition: ncbistr.cpp:7277
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
Definition: ncbistr.cpp:2891
PNocase_Generic< string > PNocase
Definition: ncbistr.hpp:4908
static unsigned long StringToULong(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned long.
Definition: ncbistr.cpp:665
const char * const_iterator
Definition: tempstr.hpp:71
TUnicodeSymbol TCharUCS4
Definition: ncbistr.hpp:3842
static int HexChar(char ch)
Convert character to integer.
Definition: ncbistr.hpp:5196
size_t GetSize(void) const
Definition: ncbistr.cpp:7387
static string EncodingToString(EEncoding encoding)
Give Encoding name as string.
Definition: ncbistr.cpp:6778
int TSplitFlags
Bitwise OR of ESplitFlags.
Definition: ncbistr.hpp:2510
ETrunc
Which end to truncate a string.
Definition: ncbistr.hpp:2239
static string ParseEscapes(const CTempString str, EEscSeqRange mode=eEscSeqRange_Standard, char user_char='?')
Parse C-style escape sequences in the specified string.
Definition: ncbistr.cpp:4793
static void PtrToString(string &out_str, const void *ptr)
Convert pointer to string.
Definition: ncbistr.cpp:2771
static string Quote(const CTempString str, char quote_char='"', char escape_char = '\\')
Quote string (generic version).
Definition: ncbistr.cpp:3897
const char * data(void) const
Return a pointer to the array represented.
Definition: tempstr.hpp:313
static string UInt8ToString_DataSize(Uint8 value, TNumToStringFlags flags=0, unsigned int max_digits=3)
Convert UInt8 to string using "software" qualifiers.
Definition: ncbistr.hpp:5177
basic_string< TUnicodeSymbol > TStringUnicode
Unicode string.
Definition: ncbistr.hpp:143
static double StringToDoublePosix(const char *str, char **endptr=0, TStringToNumFlags flags=0)
Convert string to double-precision value (analog of strtod function)
Definition: ncbistr.cpp:984
const wchar_t *const kEmptyWCStr
Definition: ncbistr.cpp:71
static SIZE_TYPE FindWord(const CTempString str, const CTempString word, ECase use_case=eCase, EDirection direction=eForwardSearch)
Find given word in the string.
Definition: ncbistr.cpp:3060
static TUnicodeSymbol DecodeFirst(char ch, SIZE_TYPE &more)
Begin converting first character of UTF8 sequence into Unicode.
Definition: ncbistr.cpp:7154
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
Definition: tempstr.hpp:334
static CTempString TrimPrefix_Unsafe(const CTempString str, const CTempString prefix, ECase use_case=eCase)
Trim prefix from a string.
Definition: ncbistr.cpp:3266
TStringUnicode TStringUCS4
Definition: ncbistr.hpp:3843
static string JsonDecode(const CTempString str, size_t *n_read=NULL)
Decode a string encoded by JsonEncode.
Definition: ncbistr.cpp:4995
bool AStrEquiv(const Arg1 &x, const Arg2 &y, Pred pr)
Check equivalence of arguments using predicate.
Definition: ncbistr.hpp:5037
static SIZE_TYPE EvaluateSymbolLength(const CTempString &src)
Check buffer for presence of UTF-8 byte sequence and return length of first symbol.
Definition: ncbistr.cpp:7111
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3314
Uint2 TCharUCS2
Type for character in UCS-2 encoding.
Definition: ncbistr.hpp:3847
static char SymbolToChar(TUnicodeSymbol sym, EEncoding encoding)
Convert Unicode code point into encoded character.
Definition: ncbistr.cpp:6864
static CStringUTF8 & x_AppendChar(CStringUTF8 &u8str, TUnicodeSymbol ch)
Definition: ncbistr.cpp:7020
static long StringToLong(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to long.
Definition: ncbistr.cpp:653
static CStringUTF8 AsUTF8(const CTempString &src, EEncoding encoding, EValidate validate=eNoValidate)
Convert into UTF8 from a C/C++ string.
Definition: ncbistr.hpp:3889
basic_string< TCharUCS2 > TStringUCS2
Type for string in UCS-2 encoding.
Definition: ncbistr.hpp:3849
static string Unescape(const CTempString str, char escape_char='\\')
Unescape string (generic version).
Definition: ncbistr.cpp:3870
static list< string > & WrapList(const list< string > &l, SIZE_TYPE width, const string &delim, list< string > &arr, TWrapFlags flags=0, const string *prefix=0, const string *prefix1=0)
Wrap the list using the specified criteria.
Definition: ncbistr.cpp:5366
static CStringUTF8 & TruncateSpacesInPlace(CStringUTF8 &str, NStr::ETrunc side=NStr::eTrunc_Both)
Truncate spaces in the string (in-place)
Definition: ncbistr.cpp:7206
int THtmlDecode
Definition: ncbistr.hpp:3071
bool Advance(CTempStringList *part_collector)
Return TRUE if it found some text and put it into collector.
static string CParse(const CTempString str, EQuoted quoted=eQuoted)
Discard C-style backslash escapes.
Definition: ncbistr.cpp:3981
static string XmlEncode(const CTempString str, TXmlEncode flags=eXmlEnc_Contents)
Encode a string for XML.
Definition: ncbistr.cpp:4036
static string Unquote(const CTempString str, char escape_char='\\')
Unquote string (generic version).
Definition: ncbistr.cpp:3918
static SIZE_TYPE x_GetValidSymbolCount(const CTempString &src, CTempString::const_iterator &err)
Definition: ncbistr.cpp:6647
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
Definition: ncbistr.hpp:5109
static string JavaScriptEncode(const CTempString str)
Encode a string for JavaScript.
Definition: ncbistr.cpp:3959
static void WrapIt(const string &str, SIZE_TYPE width, _D &dest, TWrapFlags flags=0, const string *prefix=0, const string *prefix1=0)
Wrap the specified string into lines of a specified width.
Definition: ncbistr.cpp:5092
static TUnicodeSymbol DecodeNext(TUnicodeSymbol chU, char ch)
Convert next character of UTF8 sequence into Unicode.
Definition: ncbistr.cpp:7177
string m_Original
Definition: ncbistr.hpp:4845
void MergeDelims(void)
Assumes that we already have a delimiter on the previous position, so just skip all subsequent,...
static void TrimSuffixInPlace(string &str, const CTempString suffix, ECase use_case=eCase)
Trim suffix from a string (in-place)
Definition: ncbistr.cpp:3278
size_t size_type
Definition: tempstr.hpp:70
CTempString(void)
Definition: tempstr.hpp:412
static Uint8 StringToUInt8(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to Uint8.
Definition: ncbistr.cpp:873
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
Definition: ncbistr.cpp:3554
size_type length(void) const
Return the length of the represented array.
Definition: tempstr.hpp:320
static bool x_ReportLimitsError(const CTempString str, TStringToNumFlags flags)
Definition: ncbistr.cpp:6548
static string ULongToString(unsigned long value, TNumToStringFlags flags=0, int base=10)
Convert unsigned long to string.
Definition: ncbistr.hpp:5150
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
Definition: ncbistr.cpp:642
int TSS_Flags
Bitwise OR of ESS_Flags.
Definition: ncbistr.hpp:2853
static TUnicodeSymbol CharToSymbol(char ch, EEncoding encoding)
Convert encoded character into Unicode.
Definition: ncbistr.cpp:6839
static string Base64Decode(const CTempString str)
Base64-decode string.
Definition: ncbistr.cpp:6286
static list< string > & Justify(const CTempString str, SIZE_TYPE width, list< string > &par, const CTempString *pfx=0, const CTempString *pfx1=0)
Justify the specified string into a series of lines of the same width.
Definition: ncbistr.cpp:5418
CTempString_Storage * m_Storage
void Clear(void)
static const void * StringToPtr(const CTempStringEx str, TStringToNumFlags flags=0)
Convert string to pointer.
Definition: ncbistr.cpp:2781
void Join(string *s) const
Definition: ncbistr.cpp:7345
static string Sanitize(CTempString str, TSS_Flags flags=fSS_print)
Sanitize a string, allowing only specified classes of characters.
Definition: ncbistr.hpp:2876
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
Definition: tempstr.hpp:776
void Add(const CTempString &s)
static bool NeedsURLEncoding(const CTempString str, EUrlEncode flag=eUrlEnc_SkipMarkChars)
Check if the string needs the requested URL-encoding.
Definition: ncbistr.cpp:6228
static string JsonEncode(const CTempString str, EJsonEncode encoding=eJsonEnc_UTF8)
Encode a string for JSON.
Definition: ncbistr.cpp:4634
static bool IsIPAddress(const CTempStringEx str)
Check if the string contains a valid IP address.
Definition: ncbistr.cpp:6396
static TUnicodeSymbol Decode(const char *&src)
Convert sequence of UTF8 code units into Unicode code point.
Definition: ncbistr.hpp:5662
size_type find_first_not_of(const CTempString match, size_type pos=0) const
Find the first occurrence of any character not in the matching string within the current string,...
Definition: tempstr.hpp:553
static void TrimPrefixInPlace(string &str, const CTempString prefix, ECase use_case=eCase)
Trim prefix from a string (in-place)
Definition: ncbistr.cpp:3242
ECase
Which type of string comparison.
Definition: ncbistr.hpp:1204
TErrCode GetErrCode(void) const
Get error code.
Definition: ncbistr.hpp:4455
void SetString(const CTempString s, NStr::EUrlEncode flag=NStr::eUrlEnc_SkipMarkChars)
Set new original string.
Definition: ncbistr.cpp:7307
int TWrapFlags
Bitwise OR of "EWrapFlags".
Definition: ncbistr.hpp:3232
int TStringToNumFlags
Bitwise OR of "EStringToNumFlags".
Definition: ncbistr.hpp:311
static bool IsUpper(const CTempString str)
Checks if all letters in the given string have a upper case.
Definition: ncbistr.cpp:445
list< TBuffer > TData
Definition: tempstr.hpp:1062
size_type find_first_of(const CTempString match, size_type pos=0) const
Find the first occurrence of any character in the matching string within the current string,...
Definition: tempstr.hpp:538
static SIZE_TYPE GetValidBytesCount(const CTempString &src)
Get the number of valid UTF-8 bytes (code units) in buffer.
Definition: ncbistr.hpp:5657
static string FormatVarargs(const char *format, va_list args)
Handle an arbitrary printf-style format string.
Definition: ncbistr.cpp:2846
static void Wrap(const string &str, SIZE_TYPE width, IWrapDest &dest, TWrapFlags flags, const string *prefix, const string *prefix1)
Definition: ncbistr.cpp:5347
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
Definition: ncbistr.hpp:673
size_type find(const CTempString match, size_type pos=0) const
Find the first instance of the entire matching string within the current string, beginning at an opti...
Definition: tempstr.hpp:655
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
Definition: ncbistr.hpp:5384
static void URLDecodeInPlace(string &str, EUrlDecode flag=eUrlDec_All)
URL-decode string to itself.
Definition: ncbistr.cpp:6222
static string CEncode(const CTempString str, EQuoted quoted=eQuoted)
Encode a string for C/C++.
Definition: ncbistr.cpp:3967
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
Definition: ncbistr.cpp:3405
static string & ToUpper(string &str)
Convert string to upper case – string& version.
Definition: ncbistr.cpp:424
void SkipDelims(void)
Skip all delimiters starting from current position.
static string URLEncode(const CTempString str, EUrlEncode flag=eUrlEnc_SkipMarkChars)
URL-encode string.
Definition: ncbistr.cpp:6062
static enable_if< is_integral< TChar >::value &&(1< sizeof(TChar)), CStringUTF8 & >::type AppendAsUTF8(CStringUTF8 &dest, const basic_string< TChar > &src)
Convert Unicode C++ string into UTF8 and append it to existing string.
Definition: ncbistr.hpp:3950
static string ShellEncode(const string &str)
Quotes a string in Bourne Again Shell (BASH) syntax, in a way that disallows non-printable characters...
Definition: ncbistr.cpp:4680
CTempStringEx & assign(const char *str, size_type len)
Assign new values to the content of the a string.
Definition: tempstr.hpp:970
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string.
Definition: ncbistr.cpp:3186
CTempString_Storage m_DelimStorage
const char *const kEmptyCStr
Empty "C" string (points to a '\0').
Definition: ncbistr.cpp:68
CStringEncoder_Url(NStr::EUrlEncode flag=NStr::eUrlEnc_SkipMarkChars)
Definition: ncbistr.cpp:7284
size_type size(void) const
Return the length of the represented array.
Definition: tempstr.hpp:327
static SIZE_TYPE DoubleToString_Ecvt(double value, unsigned int precision, char *buf, SIZE_TYPE buf_size, int *dec, int *sign)
Convert double to string with specified precision.
Definition: ncbistr.cpp:2451
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
static int CompareCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive compare of a substring with another string.
Definition: ncbistr.cpp:135
static string UInt8ToString(Uint8 value, TNumToStringFlags flags=0, int base=10)
Convert UInt8 to string.
Definition: ncbistr.hpp:5168
const_iterator begin() const
Return an iterator to the string's starting position.
Definition: tempstr.hpp:299
unique_ptr< SNode > next
static string ParseQuoted(const CTempString str, size_t *n_read=NULL)
Discard C-style backslash escapes and extract a quoted string.
Definition: ncbistr.cpp:4927
@ eJsonEnc_UTF8
Encode all characters above 0x80 to \uXXXX form.
Definition: ncbistr.hpp:3092
@ eJsonEnc_Quoted
Quote resulting string.
Definition: ncbistr.hpp:3094
@ eEncoding_Windows_1252
Definition: ncbistr.hpp:207
@ eEncoding_Ascii
Definition: ncbistr.hpp:202
@ eEncoding_ISO8859_1
Note: From the point of view of the C++.
Definition: ncbistr.hpp:203
@ eEncoding_CESU8
Definition: ncbistr.hpp:208
@ eEncoding_UTF8
Definition: ncbistr.hpp:201
@ eEncoding_Unknown
Definition: ncbistr.hpp:200
@ fNonAscii_Passthru
Allow non-ASCII but printable characters.
Definition: ncbistr.hpp:2732
@ fPrintable_Full
Show all octal digits at all times.
Definition: ncbistr.hpp:2734
@ fNonAscii_Quote
Octal for all non-ASCII characters.
Definition: ncbistr.hpp:2733
@ fNewLine_Passthru
Break the line at every "\n" occurrence.
Definition: ncbistr.hpp:2730
@ fNewLine_Quote
Display "\n" instead of actual linebreak.
Definition: ncbistr.hpp:2728
@ fDedent_NormalizeEmptyLines
Each line containing only whitespaces will be normalized to a single newline character in the output.
Definition: ncbistr.hpp:3408
@ fDedent_SkipEmptyFirstLine
Ignore first line and skip it from the result, if it is empty only.
Definition: ncbistr.hpp:3413
@ fDedent_SkipFirstLine
Ignore first line and skip it from the result.
Definition: ncbistr.hpp:3412
@ fSS_NoMerge
Do not merge adjacent spaces (rejected chars)
Definition: ncbistr.hpp:2848
@ fSS_print
Check on isprint()
Definition: ncbistr.hpp:2838
@ fSS_NoTruncate_Begin
Do not truncate leading spaces.
Definition: ncbistr.hpp:2849
@ fSS_NoTruncate_End
Do not truncate trailing spaces.
Definition: ncbistr.hpp:2850
@ fSS_cntrl
Check on iscntrl()
Definition: ncbistr.hpp:2839
@ fSS_Reject
Reject specified characters, allow all other.
Definition: ncbistr.hpp:2843
@ fSS_punct
Check on ispunct()
Definition: ncbistr.hpp:2840
@ fSS_alpha
Check on isalpha()
Definition: ncbistr.hpp:2835
@ fSS_digit
Check on isdigit()
Definition: ncbistr.hpp:2836
@ fSS_alnum
Check on isalnum()
Definition: ncbistr.hpp:2837
@ fSS_Remove
Remove (rather than replace) rejected chars.
Definition: ncbistr.hpp:2847
@ fDecimalPosixFinite
StringToDouble*(): Keep result finite and normalized: if DBL_MAX < result < INF, result becomes DBL_M...
Definition: ncbistr.hpp:302
@ fAllowTrailingSpaces
Ignore trailing space characters.
Definition: ncbistr.hpp:297
@ fDS_ProhibitFractions
StringToUInt8_DataSize(): Ignore any fraction part of a value, "1.2K" ~ "1K".
Definition: ncbistr.hpp:307
@ fConvErr_NoErrMessage
Set errno, but do not set CNcbiError message on error.
Definition: ncbistr.hpp:291
@ fConvErr_NoThrow
Do not throw an exception on error.
Definition: ncbistr.hpp:285
@ fAllowLeadingSymbols
Ignore leading non-numeric characters.
Definition: ncbistr.hpp:295
@ fDS_ForceBinary
StringToUInt8_DataSize(): Use 1024 as a kilobyte factor regardless of suffix, like "KB" or "KiB".
Definition: ncbistr.hpp:306
@ fAllowCommas
Allow commas. See 'ENumToStringFlags::fWithCommas'.
Definition: ncbistr.hpp:293
@ fAllowTrailingSymbols
Ignore trailing non-numerics characters.
Definition: ncbistr.hpp:298
@ fDS_ProhibitSpaceBeforeSuffix
StringToUInt8_DataSize(): Do not allow spaces between value and suffix, like "10 K".
Definition: ncbistr.hpp:308
@ fMandatorySign
Check on mandatory sign. See 'ENumToStringFlags::fWithSign'.
Definition: ncbistr.hpp:292
@ fAllowLeadingSpaces
Ignore leading spaces in converted string.
Definition: ncbistr.hpp:294
@ fDecimalPosix
StringToDouble*(): For decimal point, use C locale.
Definition: ncbistr.hpp:300
@ fDecimalPosixOrLocal
StringToDouble*(): For decimal point, try both C and current locale.
Definition: ncbistr.hpp:301
@ eSqlEnc_TagNonASCII
Produce N'...' when input's not pure ASCII.
Definition: ncbistr.hpp:3168
@ fSplit_CanEscape
Allow \... escaping.
Definition: ncbistr.hpp:2503
@ fSplit_CanQuote
Definition: ncbistr.hpp:2506
@ fSplit_MergeDelimiters
Merge adjacent delimiters.
Definition: ncbistr.hpp:2498
@ fSplit_CanDoubleQuote
Allow "..." quoting.
Definition: ncbistr.hpp:2505
@ fSplit_Truncate_Begin
Truncate leading delimiters.
Definition: ncbistr.hpp:2499
@ fSplit_ByPattern
Require full delimiter strings.
Definition: ncbistr.hpp:2502
@ fSplit_CanSingleQuote
Allow '...' quoting.
Definition: ncbistr.hpp:2504
@ fWrap_Hyphenate
Add a hyphen when breaking words?
Definition: ncbistr.hpp:3228
@ fWrap_HTMLPre
Wrap as pre-formatted HTML?
Definition: ncbistr.hpp:3229
@ fWrap_FlatFile
Wrap for flat file use.
Definition: ncbistr.hpp:3230
@ eUrlEnc_ProcessMarkChars
Convert all non-alphanumeric chars, spaces are converted to '+'.
Definition: ncbistr.hpp:3143
@ eUrlEnc_None
Do not encode.
Definition: ncbistr.hpp:3154
@ eUrlEnc_URIQueryValue
Encode query part of an URI, arg value.
Definition: ncbistr.hpp:3151
@ eUrlEnc_PercentOnly
Convert all non-alphanumeric chars including space and '' to %## format.
Definition: ncbistr.hpp:3144
@ eUrlEnc_URIHost
Encode host part of an URI.
Definition: ncbistr.hpp:3148
@ eUrlEnc_URIQueryName
Encode query part of an URI, arg name.
Definition: ncbistr.hpp:3150
@ eUrlEnc_URIPath
Encode path part of an URI.
Definition: ncbistr.hpp:3149
@ eUrlEnc_Path
Same as ProcessMarkChars but preserves valid path characters ('/', '.')
Definition: ncbistr.hpp:3145
@ eUrlEnc_URIScheme
Encode scheme part of an URI.
Definition: ncbistr.hpp:3146
@ eUrlEnc_URIUserinfo
Encode userinfo part of an URI.
Definition: ncbistr.hpp:3147
@ eUrlEnc_SkipMarkChars
Do not convert chars like '!', '(' etc.
Definition: ncbistr.hpp:3142
@ eUrlEnc_Cookie
Same as SkipMarkChars with encoded ','.
Definition: ncbistr.hpp:3153
@ eUrlEnc_URIFragment
Encode fragment part of an URI.
Definition: ncbistr.hpp:3152
@ eMergeDelims
Definition: ncbistr.hpp:2515
@ eFirst
First occurrence.
Definition: ncbistr.hpp:1940
@ eUrlDec_All
Decode '+' to space.
Definition: ncbistr.hpp:3158
@ eReverseSearch
Search in a backward direction.
Definition: ncbistr.hpp:1947
@ eForwardSearch
Search in a forward direction.
Definition: ncbistr.hpp:1946
@ eXmlEnc_CommentSafe
Encode double hyphen and ending hyphen, making the result safe to put into XML comments.
Definition: ncbistr.hpp:3030
@ eXmlEnc_Unsafe_Throw
Definition: ncbistr.hpp:3035
@ eXmlEnc_Unsafe_Skip
Check each character to conform XML 1.1 standards, skip any not allowed character or throw an CString...
Definition: ncbistr.hpp:3034
@ fDoubleGeneral
Definition: ncbistr.hpp:258
@ fDoubleScientific
DoubleToString*(): Use scientific format for double conversions.
Definition: ncbistr.hpp:256
@ fDoublePosix
DoubleToString*(): Use C locale for double conversions.
Definition: ncbistr.hpp:257
@ fWithRadix
Prefix the output value with radix for "well-known" bases like 8 ("0") and 16 ("0x")
Definition: ncbistr.hpp:252
@ fDoubleFixed
DoubleToString*(): Use n.nnnn format for double conversions.
Definition: ncbistr.hpp:255
@ fDS_PutBSuffixToo
UInt8ToString_DataSize(): Use "B" suffix for small bytes values.
Definition: ncbistr.hpp:264
@ fWithCommas
Use commas as thousands separator.
Definition: ncbistr.hpp:254
@ fUseLowercase
Use lowercase letters for string representation for bases above 10.
Definition: ncbistr.hpp:251
@ fDS_ShortSuffix
UInt8ToString_DataSize(): Use short suffix, like "10.0K".
Definition: ncbistr.hpp:263
@ fWithSign
Prefix the output value with a sign ('+'/'-')
Definition: ncbistr.hpp:253
@ fDS_PutSpaceBeforeSuffix
UInt8ToString_DataSize(): Add space between value and qualifiers, like "10.0 KB".
Definition: ncbistr.hpp:262
@ fDS_Binary
UInt8ToString_DataSize(): Use 1024 as a kilobyte factor, not 1000.
Definition: ncbistr.hpp:260
@ fDS_NoDecimalPoint
UInt8ToString_DataSize(): Do not add a decimal point ("10KB" vs "10.0KB")
Definition: ncbistr.hpp:261
@ eConvert
Failure to convert string.
Definition: ncbistr.hpp:4510
@ eFormat
Wrong format for any input to string methods.
Definition: ncbistr.hpp:4512
@ eBadArgs
Bad arguments to string methods.
Definition: ncbistr.hpp:4511
@ eTrunc_Both
Truncate spaces at both begin and end of string.
Definition: ncbistr.hpp:2242
@ eTrunc_End
Truncate trailing spaces only.
Definition: ncbistr.hpp:2241
@ eTrunc_Begin
Truncate leading spaces only.
Definition: ncbistr.hpp:2240
@ eQuoted
String is quoted.
Definition: ncbistr.hpp:2988
@ eNotQuoted
String is not quoted.
Definition: ncbistr.hpp:2989
@ fHtmlEnc_CheckPreencoded
Print warning if some pre-encoded entity found in the string.
Definition: ncbistr.hpp:3052
@ fHtmlEnc_SkipLiteralEntities
Skip "&entity;".
Definition: ncbistr.hpp:3049
@ fHtmlEnc_SkipNumericEntities
Skip "&#NNNN;".
Definition: ncbistr.hpp:3050
@ fHtmlEnc_SkipEntities
Definition: ncbistr.hpp:3051
@ fHtmlDec_Encoding_Changed
Character encoding changed.
Definition: ncbistr.hpp:3069
@ fHtmlDec_CharRef_Entity
Character entity reference(s) was found.
Definition: ncbistr.hpp:3067
@ fHtmlDec_CharRef_Numeric
Numeric character reference(s) was found.
Definition: ncbistr.hpp:3068
@ eNocase
Case insensitive compare.
Definition: ncbistr.hpp:1206
@ eCase
Case sensitive compare.
Definition: ncbistr.hpp:1205
@ eNoValidate
Definition: ncbistr.hpp:3877
@ eValidate
Definition: ncbistr.hpp:3878
@ eEscSeqRange_User
Set char to the user value passed in another parameter.
Definition: ncbistr.hpp:2945
@ eEscSeqRange_Standard
Set char to the last (least significant byte) of the escape sequence (default).
Definition: ncbistr.hpp:2939
@ eEscSeqRange_Throw
Throw an exception.
Definition: ncbistr.hpp:2943
@ eEscSeqRange_Errno
Set errno to ERANGE, return empty string.
Definition: ncbistr.hpp:2944
@ eEscSeqRange_FirstByte
Set char to the first byte of the escape sequence.
Definition: ncbistr.hpp:2941
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
#define NCBI_DEVELOPMENT_VER
Definition of all error codes used in corelib (xncbi.lib).
char * buf
int i
yy_size_t n
int len
static void hex(unsigned char c)
Definition: mdb_dump.c:56
mdb_mode_t mode
Definition: lmdb++.h:38
const struct ncbi::grid::netcache::search::fields::SIZE size
pair< CTempString, CTempString > s_GetField(const string &data, size_t &pos)
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
static unsigned int ud(time_t one, time_t two)
unsigned int a
Definition: ncbi_localip.c:102
EIPRangeType t
Definition: ncbi_localip.c:101
#define NCBI_CONST_UINT8(v)
Definition: ncbi_std.h:196
#define vsnprintf
Definition: ncbiconf_msvc.h:66
int isalpha(Uchar c)
Definition: ncbictype.hpp:61
int isspace(Uchar c)
Definition: ncbictype.hpp:69
int isalnum(Uchar c)
Definition: ncbictype.hpp:62
int tolower(Uchar c)
Definition: ncbictype.hpp:72
int isxdigit(Uchar c)
Definition: ncbictype.hpp:71
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
int toupper(Uchar c)
Definition: ncbictype.hpp:73
int isprint(Uchar c)
Definition: ncbictype.hpp:67
int ispunct(Uchar c)
Definition: ncbictype.hpp:68
int islower(Uchar c)
Definition: ncbictype.hpp:66
int isupper(Uchar c)
Definition: ncbictype.hpp:70
int iscntrl(Uchar c)
Definition: ncbictype.hpp:63
Defines NCBI C++ Toolkit portable error codes.
Floating-point support routines.
static const char * s_kFString
Definition: ncbistr.cpp:2806
static bool _isspace(unsigned char c)
Definition: ncbistr.cpp:5086
static const char * s_kYString
Definition: ncbistr.cpp:2809
static const char s_EncodeURIScheme[256][4]
Definition: ncbistr.cpp:5756
#define CHECK_ENDPTR_SIZE(to_type)
Definition: ncbistr.cpp:603
#define S2N_CONVERT_GUARD_EX(flags)
Definition: ncbistr.cpp:566
static const char * s_kOnString
Definition: ncbistr.cpp:2811
static double s_StringToDouble(const char *str, size_t size, NStr::TStringToNumFlags flags)
Definition: ncbistr.cpp:1282
static void s_UnsignedOtherBaseToString(string &out_str, T value, NStr::TNumToStringFlags flags, int base)
Definition: ncbistr.cpp:1782
static const char * s_kNoString
Definition: ncbistr.cpp:2808
static const char s_EncodeURIPath[256][4]
Definition: ncbistr.cpp:5881
static const char s_EncodePath[256][4]
Definition: ncbistr.cpp:5719
static const char s_EncodeMarkChars[256][4]
Definition: ncbistr.cpp:5649
#define CHECK_SPLIT_TEMPSTRING_FLAGS(where)
Definition: ncbistr.cpp:3452
const int kMaxDoublePrecision
Definition: ncbistr.cpp:2311
#define PRINT_INT8_CHUNK_SIZE
Definition: ncbistr.cpp:1957
static const char kDigitUpper[]
Definition: ncbistr.cpp:58
string x_AsSingleByteString(const CTempString &str, const SCharEncoder &enc, const char *substitute_on_error)
Definition: ncbistr.cpp:6960
static const char s_EncodePercentOnly[256][4]
Definition: ncbistr.cpp:5684
const char *const kNcbiDevelopmentVersionString
Definition: ncbistr.cpp:77
static bool s_IsWordBoundaryChar(char ch)
Definition: ncbistr.cpp:3054
static const char * s_kFalseString
Definition: ncbistr.cpp:2804
static const char kDigitLower[]
Definition: ncbistr.cpp:59
#define __NLG
Definition: ncbistr.cpp:2449
static const char s_EncodeURIQueryValue[256][4]
Definition: ncbistr.cpp:5951
static const char * s_kOffString
Definition: ncbistr.cpp:2812
TStr s_TruncateSpaces(const TStr &str, NStr::ETrunc where, const TStr &empty_str)
Definition: ncbistr.cpp:3150
EMatchesMaskResult
Definition: ncbistr.cpp:282
@ eNoMatch
Definition: ncbistr.cpp:284
@ eMismatch
Definition: ncbistr.cpp:285
@ eMatch
Definition: ncbistr.cpp:283
static SIZE_TYPE s_DiffPtr(const char *end, const char *start)
Definition: ncbistr.cpp:63
#define S2N_CONVERT_ERROR(to_type, msg, errcode, pos)
Definition: ncbistr.cpp:569
static const TUnicodeSymbol s_cp1252_table[]
Definition: ncbistr.cpp:6832
#define S2N_CONVERT_ERROR_RADIX(to_type, msg)
Definition: ncbistr.cpp:592
static const char * s_kYesString
Definition: ncbistr.cpp:2807
static const char * s_kNString
Definition: ncbistr.cpp:2810
static bool s_IsDecimalPoint(unsigned char ch, NStr::TStringToNumFlags flags)
Definition: ncbistr.cpp:719
#define S2N_CONVERT_GUARD(flags)
Regular guard.
Definition: ncbistr.cpp:561
static struct tag_HtmlEntities s_HtmlEntities[]
static bool s_IsIPAddress(const char *str, size_t size)
Definition: ncbistr.cpp:6308
static void s_SkipAllowedSymbols(const CTempString str, SIZE_TYPE &pos, ESkipMode skip_mode, NStr::TStringToNumFlags flags)
Definition: ncbistr.cpp:735
static const char s_EncodeURIFragment[256][4]
Definition: ncbistr.cpp:5992
static Uint8 s_DataSizeConvertQual(const CTempString str, SIZE_TYPE &pos, Uint8 value, NStr::TStringToNumFlags flags)
Definition: ncbistr.cpp:1407
static bool s_IsGoodCharForRadix(char ch, int base, int *value=0)
Definition: ncbistr.cpp:679
static const char * s_kTrueString
Definition: ncbistr.cpp:2803
static bool s_CheckRadix(const CTempString str, SIZE_TYPE &pos, int &base)
Definition: ncbistr.cpp:761
#define PRINT_INT8_CHUNK
Definition: ncbistr.cpp:1956
static const char s_EncodeURIQueryName[256][4]
Definition: ncbistr.cpp:5916
static EMatchesMaskResult s_MatchesMask(CTempString str, CTempString mask, bool ignore_case)
Definition: ncbistr.cpp:291
string s_ParseJsonEncodeEscapes(const CTempString str)
Definition: ncbistr.cpp:4934
static const char s_EncodeURIUserinfo[256][4]
Definition: ncbistr.cpp:5797
END_NCBI_NAMESPACE
Definition: ncbistr.cpp:7572
static SIZE_TYPE s_VisibleHtmlWidth(const string &str)
Definition: ncbistr.cpp:5059
ELanguage
Definition: ncbistr.cpp:3730
@ eLanguage_Javascript
Definition: ncbistr.cpp:3732
@ eLanguage_C
Definition: ncbistr.cpp:3731
static SIZE_TYPE s_EndOfReference(const string &str, SIZE_TYPE start)
Definition: ncbistr.cpp:5042
static void s_SignedBase10ToString(string &out_str, unsigned long value, long svalue, NStr::TNumToStringFlags flags, int base)
Definition: ncbistr.cpp:1830
static char * s_ncbi_append_int2str(char *buffer, unsigned int value, size_t digits, bool zeros)
Definition: ncbistr.cpp:2426
TContainer & s_Split(const TString &str, const TString &delim, TContainer &arr, NStr::TSplitFlags flags, vector< SIZE_TYPE > *token_pos, CTempString_Storage *storage=NULL)
Definition: ncbistr.cpp:3436
static string s_PrintableString(const CTempString str, NStr::TPrintableMode mode, ELanguage lang)
Definition: ncbistr.cpp:3736
static const char s_EncodeURIHost[256][4]
Definition: ncbistr.cpp:5838
CTempString s_Unquote(const CTempString str, size_t *n_read)
Definition: ncbistr.cpp:4899
static const char s_Encode[256][4]
Definition: ncbistr.cpp:5614
BEGIN_NCBI_NAMESPACE
Definition: ncbistr.cpp:54
ESkipMode
Definition: ncbistr.cpp:712
@ eSkipSpacesOnly
Definition: ncbistr.cpp:715
@ eSkipAll
Definition: ncbistr.cpp:713
@ eSkipAllAllowed
Definition: ncbistr.cpp:714
const int kMaxDoubleStringSize
Definition: ncbistr.cpp:2317
static const char s_EncodeCookie[256][4]
Definition: ncbistr.cpp:6027
static char * s_PrintBase10Uint8(char *pos, Uint8 value, NStr::TNumToStringFlags flags)
Definition: ncbistr.cpp:1960
static void s_URLDecode(const CTempString src, string &dst, NStr::EUrlDecode flag)
Definition: ncbistr.cpp:6167
static const char * s_kTString
Definition: ncbistr.cpp:2805
static SIZE_TYPE s_EndOfTag(const string &str, SIZE_TYPE start)
Definition: ncbistr.cpp:5003
#define SS_ADD_CHAR(c)
Definition: ncbistr.cpp:3605
#define S2N_CONVERT_ERROR_INVAL(to_type)
Definition: ncbistr.cpp:589
#define CHECK_ENDPTR(to_type)
Definition: ncbistr.cpp:598
#define S2N_CONVERT_ERROR_OVERFLOW(to_type)
Definition: ncbistr.cpp:595
#define CHECK_COMMAS
Definition: ncbistr.cpp:608
Algorithms for string processing.
T max(T x_, T y_)
T min(T x_, T y_)
static Format format
Definition: njn_ioutil.cpp:53
Int4 delta(size_t dimension_, const Int4 *score_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
static unsigned cnt[256]
#define memmove(a, b, c)
static BOOL utf8
Definition: pcregrep.c:199
static int bufsize
Definition: pcregrep.c:162
static const char * suffix[]
Definition: pcregrep.c:408
static const char * prefix[]
Definition: pcregrep.c:405
static char * locale
Definition: pcregrep.c:149
static pcre_uint8 * buffer
Definition: pcretest.c:1051
static const streamsize kBufSize
static const char delimiter[]
#define BASE64_Decode
Definition: ncbi_base64.h:42
#define BASE64_Encode
Definition: ncbi_base64.h:41
Do nothing target reservation trait.
Do nothing token counter.
Do-nothing token position container.
virtual char ToChar(TUnicodeSymbol sym) const =0
virtual TUnicodeSymbol ToUnicode(char ch) const =0
virtual TUnicodeSymbol ToUnicode(char ch) const
Definition: ncbistr.cpp:6896
SEncEncoder(EEncoding encoding)
Definition: ncbistr.cpp:6895
EEncoding m_Encoding
Definition: ncbistr.cpp:6902
virtual char ToChar(TUnicodeSymbol sym) const
Definition: ncbistr.cpp:6899
const locale & m_Lcl
Definition: ncbistr.cpp:6930
const ctype< wchar_t > & m_Facet
Definition: ncbistr.cpp:6931
virtual TUnicodeSymbol ToUnicode(char ch) const
Definition: ncbistr.cpp:6912
virtual char ToChar(TUnicodeSymbol sym) const
Definition: ncbistr.cpp:6921
SLocaleEncoder(const locale &lcl)
Definition: ncbistr.cpp:6908
TUnicodeSymbol u
Definition: ncbistr.cpp:4229
const char * s
Definition: ncbistr.cpp:4230
Definition: type.c:6
#define uch
#define _TROUBLE
#define _ASSERT
@ eSpace
Definition: text_util.cpp:56
done
Definition: token1.c:1
else result
Definition: token2.c:20
void free(voidpf ptr)
voidp malloc(uInt size)
unsigned char uch
Definition: zutil.h:39
Modified on Wed Apr 17 13:09:36 2024 by modify_doxy.py rev. 669887