NCBI C++ ToolKit
ncbistr.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef CORELIB___NCBISTR__HPP
2 #define CORELIB___NCBISTR__HPP
3 
4 /* $Id: ncbistr.hpp 102749 2024-07-05 14:32:19Z ivanov $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors: Eugene Vasilchenko, Denis Vakatov
30  *
31  *
32  */
33 
34 /// @file ncbistr.hpp
35 /// The NCBI C++ standard methods for dealing with std::string
36 
37 
38 #include <corelib/ncbi_limits.hpp>
39 #include <corelib/tempstr.hpp>
40 #include <corelib/ncbierror.hpp>
41 #ifdef NCBI_OS_OSF1
42 # include <strings.h>
43 #endif
44 #include <stdarg.h>
45 #include <time.h>
46 #include <set>
47 #include <functional>
48 
49 
51 
52 /** @addtogroup String
53  *
54  * @{
55  */
56 
57 /// Empty "C" string (points to a '\0').
58 NCBI_XNCBI_EXPORT extern const char *const kEmptyCStr;
59 #define NcbiEmptyCStr NCBI_NS_NCBI::kEmptyCStr
60 
61 #if defined(HAVE_WSTRING)
62 NCBI_XNCBI_EXPORT extern const wchar_t *const kEmptyWCStr;
63 #define NcbiEmptyWCStr NCBI_NS_NCBI::kEmptyWCStr
64 #endif
65 
66 /// Empty "C++" string.
67 #if defined(NCBI_OS_MSWIN) \
68  || (defined(NCBI_OS_LINUX) \
69  && (defined(NCBI_COMPILER_GCC) || defined(NCBI_COMPILER_ANY_CLANG)))
71 {
72 public:
73  /// Get string.
74  inline
75  static const string& Get(void)
76  {
77  static string empty_str;
78  return empty_str;
79  }
80 };
81 #if defined(HAVE_WSTRING)
83 {
84 public:
85  /// Get string.
86  static const wstring& Get(void)
87  {
88  static wstring empty_str;
89  return empty_str;
90  }
91 };
92 #endif
93 #else
95 {
96 public:
97  /// Get string.
98  static const string& Get(void);
99 private:
100  /// Helper method to initialize private data member and return
101  /// null string.
102  static const string& FirstGet(void);
103  static const string* m_Str; ///< Null string pointer.
104 };
105 
106 # if defined(HAVE_WSTRING)
108 {
109 public:
110  /// Get string.
111  static const wstring& Get(void);
112 private:
113  /// Helper method to initialize private data member and return
114  /// null string.
115  static const wstring& FirstGet(void);
116  static const wstring* m_Str; ///< Null string pointer.
117 };
118 # endif
119 #endif // NCBI_OS_MSWIN....
120 
121 
122 #define NcbiEmptyString NCBI_NS_NCBI::CNcbiEmptyString::Get()
123 #define kEmptyStr NcbiEmptyString
124 
125 #if defined(HAVE_WSTRING)
126 # define NcbiEmptyWString NCBI_NS_NCBI::CNcbiEmptyWString::Get()
127 # define kEmptyWStr NcbiEmptyWString
128 #endif
129 
130 // SIZE_TYPE and NPOS
131 
132 typedef NCBI_NS_STD::string::size_type SIZE_TYPE;
133 #define NPOS NCBI_NS_STD::string::npos
134 
135 
136 
137 /////////////////////////////////////////////////////////////////////////////
138 // Unicode-related definitions and conversions
139 
140 /// Unicode character
142 /// Unicode string
143 typedef basic_string<TUnicodeSymbol> TStringUnicode;
144 
145 #if defined(NCBI_OS_MSWIN) && defined(_UNICODE)
146 
147 typedef wchar_t TXChar;
148 typedef wstring TXString;
149 
150 # if !defined(_TX)
151 # define _TX(x) L ## x
152 # endif
153 
154 # if defined(_DEBUG)
155 # define _T_XSTRING(x) \
156  ncbi::CUtf8::AsBasicString<ncbi::TXChar>(x, NULL, ncbi::CUtf8::eValidate)
157 # else
158 # define _T_XSTRING(x) \
159  ncbi::CUtf8::AsBasicString<ncbi::TXChar>(x, NULL, ncbi::CUtf8::eNoValidate)
160 # endif
161 # define _T_STDSTRING(x) ncbi::CUtf8::AsUTF8(x)
162 # define _T_XCSTRING(x) _T_XSTRING(x).c_str()
163 # define _T_CSTRING(x) _T_STDSTRING(x).c_str()
164 
165 # define NcbiEmptyXCStr NcbiEmptyWCStr
166 # define NcbiEmptyXString NcbiEmptyWString
167 # define kEmptyXStr kEmptyWStr
168 # define kEmptyXCStr kEmptyWCStr
169 
170 #else
171 
172 typedef char TXChar;
173 typedef string TXString;
174 
175 # if !defined(_TX)
176 # define _TX(x) x
177 # endif
178 
179 # define _T_XSTRING(x) (x)
180 # define _T_STDSTRING(x) (x)
181 # define _T_XCSTRING(x) ncbi::impl_ToCString(x)
182 # define _T_CSTRING(x) (x)
183 
184 # define NcbiEmptyXCStr NcbiEmptyCStr
185 # define NcbiEmptyXString NcbiEmptyString
186 # define kEmptyXStr kEmptyStr
187 # define kEmptyXCStr kEmptyCStr
188 
189 inline const char* impl_ToCString(const char* s) { return s; }
190 inline const char* impl_ToCString(const string& s) { return s.c_str(); }
191 
192 #endif
193 
194 
195 /////////////////////////////////////////////////////////////////////////////
196 ///
197 
198 #if defined(NCBI_CUTF8_ENCODING_CLASSIC) || !defined(HAVE_ENUM_CLASS)
199 enum EEncoding {
203  eEncoding_ISO8859_1, ///< Note: From the point of view of the C++
204  ///< Toolkit, the ISO 8859-1 character set includes
205  ///< symbols 0x00 through 0xFF except 0x80 through
206  ///< 0x9F.
209 };
210 #else
211 // Temporary safeguard to protect against implicit conversion of EEncoding
212 // to size_t, etc
213 // @attention Do not use "EEncoding::Xxx" values directly, as they will go
214 // away eventually! Use the "eEncoding_Xxx" values instead.
215 enum class EEncoding {
216  Unknown, ///< Do not use this directly! It will go away eventually!
217  UTF8, ///< Do not use this directly! It will go away eventually!
218  Ascii, ///< Do not use this directly! It will go away eventually!
219  ISO8859_1, ///< Do not use this directly! It will go away eventually!
220  Windows_1252, ///< Do not use this directly! It will go away eventually!
221  // CESU-8 spec https://www.unicode.org/reports/tr26/tr26-4.html
222  // It is not intended nor recommended for open information exchange.
223  // but since it may appear in incoming data, we want to be able to detect it
224  // and convert into 'normal' UTF-8
225  CESU8
226 };
227 #define eEncoding_Unknown EEncoding::Unknown
228 #define eEncoding_UTF8 EEncoding::UTF8
229 #define eEncoding_Ascii EEncoding::Ascii
230 #define eEncoding_ISO8859_1 EEncoding::ISO8859_1
231 #define eEncoding_Windows_1252 EEncoding::Windows_1252
232 #define eEncoding_CESU8 EEncoding::CESU8
233 #endif
234 
235 
236 /////////////////////////////////////////////////////////////////////////////
237 ///
238 /// NStr --
239 ///
240 /// Encapsulates class-wide string processing functions.
241 
243 {
244 public:
245  /// Number to string conversion flags.
246  ///
247  /// NOTE:
248  /// If specified base in the *ToString() methods is not default 10,
249  /// that some flags like fWithSign and fWithCommas will be ignored.
251  fUseLowercase = (1 << 4), ///< Use lowercase letters for string representation for bases above 10
252  fWithRadix = (1 << 5), ///< Prefix the output value with radix for "well-known" bases like 8 ("0") and 16 ("0x")
253  fWithSign = (1 << 6), ///< Prefix the output value with a sign ('+'/'-')
254  fWithCommas = (1 << 7), ///< Use commas as thousands separator
255  fDoubleFixed = (1 << 8), ///< DoubleToString*(): Use n.nnnn format for double conversions
256  fDoubleScientific = (1 << 9), ///< DoubleToString*(): Use scientific format for double conversions
257  fDoublePosix = (1 << 10), ///< DoubleToString*(): Use C locale for double conversions
258  fDoubleGeneral = fDoubleFixed | fDoubleScientific,
259  // Additional flags to convert "software" qualifiers (see UInt8ToString_DataSize)
260  fDS_Binary = (1 << 11), ///< UInt8ToString_DataSize(): Use 1024 as a kilobyte factor, not 1000.
261  fDS_NoDecimalPoint = (1 << 12), ///< UInt8ToString_DataSize(): Do not add a decimal point ("10KB" vs "10.0KB")
262  fDS_PutSpaceBeforeSuffix = (1 << 13), ///< UInt8ToString_DataSize(): Add space between value and qualifiers, like "10.0 KB"
263  fDS_ShortSuffix = (1 << 14), ///< UInt8ToString_DataSize(): Use short suffix, like "10.0K"
264  fDS_PutBSuffixToo = (1 << 15) ///< UInt8ToString_DataSize(): Use "B" suffix for small bytes values.
265  };
266  typedef int TNumToStringFlags; ///< Bitwise OR of "ENumToStringFlags"
267 
268  /// String to number conversion flags.
270  /// Do not throw an exception on error.
271  /// Could be used with methods throwing an exception by default, ignored otherwise.
272  /// Just return zero and set errno to non-zero instead of throwing an exception.
273  /// We recommend the following technique to check against errors
274  /// with minimum overhead when this flag is used:
275  /// @code
276  /// if (!retval && errno != 0)
277  /// ERROR;
278  /// @endcode
279  /// And for StringToDouble*() variants:
280  /// @code
281  /// if (retval == HUGE_VAL || retval == -HUGE_VAL ||
282  /// !retval && errno != 0)
283  /// ERROR;
284  /// @endcode
285  fConvErr_NoThrow = (1 << 0),
286  /*
287  fConvErr_NoErrno = (1 << 1), ///< Do not set errno at all.
288  ///< If used together with fConvErr_NoThrow flag
289  ///< returns 0 on error (-1 for StringToNonNegativeInt).
290  */
291  fConvErr_NoErrMessage = (1 << 2), ///< Set errno, but do not set CNcbiError message on error
292  fMandatorySign = (1 << 17), ///< Check on mandatory sign. See 'ENumToStringFlags::fWithSign'.
293  fAllowCommas = (1 << 18), ///< Allow commas. See 'ENumToStringFlags::fWithCommas'.
294  fAllowLeadingSpaces = (1 << 19), ///< Ignore leading whitespace characters in converted string.
295  fAllowLeadingSymbols = (1 << 20) | fAllowLeadingSpaces,
296  ///< Ignore leading non-numeric characters.
297  fAllowTrailingSpaces = (1 << 21), ///< Ignore trailing whitespace characters.
298  fAllowTrailingSymbols = (1 << 22) | fAllowTrailingSpaces,
299  ///< Ignore trailing non-numerics characters.
300  fDecimalPosix = (1 << 23), ///< StringToDouble*(): For decimal point, use C locale.
301  fDecimalPosixOrLocal = (1 << 24), ///< StringToDouble*(): For decimal point, try both C and current locale.
302  fDecimalPosixFinite = (1 << 25), ///< StringToDouble*(): Keep result finite and normalized:
303  ///< if DBL_MAX < result < INF, result becomes DBL_MAX
304  ///< if 0 < result < DBL_MIN, result becomes DBL_MIN
305  // Additional flags to convert "software" qualifiers (see StringToUInt8_DataSize)
306  fDS_ForceBinary = (1 << 26), ///< StringToUInt8_DataSize(): Use 1024 as a kilobyte factor regardless of suffix, like "KB" or "KiB".
307  fDS_ProhibitFractions = (1 << 27), ///< StringToUInt8_DataSize(): Ignore any fraction part of a value, "1.2K" ~ "1K"
308  fDS_ProhibitSpaceBeforeSuffix = (1 << 28) ///< StringToUInt8_DataSize(): Do not allow spaces between value and suffix, like "10 K".
309  };
310  typedef EStringToNumFlags EConvErrFlags; ///< Formerly split out
311  typedef int TStringToNumFlags; ///< Bitwise OR of "EStringToNumFlags"
312  typedef TStringToNumFlags TConvErrFlags; ///< Formerly split out
313 
314  /// Convert string to a numeric value.
315  ///
316  /// @param str
317  /// String to be converted.
318  /// @param flags
319  /// Optional flags to tune up how the string is converted to value.
320  /// @param base
321  /// Radix base. Allowed values are 0, 2..36. Zero means to use the
322  /// first characters to determine the base - a leading "0x" or "0X"
323  /// means base 16; otherwise a leading 0 means base 8; otherwise base 10.
324  /// @return
325  /// - If conversion succeeds, set errno to zero and return the
326  /// converted value.
327  /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
328  /// - Otherwise, set errno to non-zero and return zero.
329  template <typename TNumeric>
330  static TNumeric StringToNumeric(const CTempString str,
332  int base = 10)
333  {
334  return x_StringToNumeric<TNumeric>(str, flags, base);
335  }
336 
337  /// Convert string to a numeric value.
338  ///
339  /// @param str [in]
340  /// String to be converted.
341  /// @param value [out]
342  /// The numeric value represented by "str". Zero on any error.
343  /// @param flags [in]
344  /// Optional flags to tune up how the string is converted to value.
345  /// @param base [in]
346  /// Radix base. Allowed values are 0, 2..36. Zero means to use the
347  /// first characters to determine the base - a leading "0x" or "0X"
348  /// means base 16; otherwise a leading 0 means base 8; otherwise base 10.
349  /// @return
350  /// - If conversion succeeds, set errno to zero, set the value, and
351  /// return true.
352  /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
353  /// - Otherwise, set errno to non-zero, set the value to zero, and
354  /// return false.
355  template <typename TNumeric>
356  static bool StringToNumeric(const CTempString str,
357  TNumeric* value, /*[out]*/
359  int base = 10)
360  {
361  return x_StringToNumeric(str, value, flags, base);
362  }
363 
364  /// Convert string to non-negative integer value.
365  ///
366  /// @param str
367  /// String containing only digits, representing non-negative
368  /// decimal value in the int range: [0..kMax_Int].
369  /// @param flags
370  /// How to convert string to value.
371  /// Only fConvErr_NoErrMessage flag is supported here.
372  /// @return
373  /// - If conversion succeeds, set errno to zero and return the converted value.
374  /// - Otherwise, set errno to non-zero and return -1.
375  static int StringToNonNegativeInt(const CTempString str, TStringToNumFlags flags = 0);
376 
377  /// @deprecated
378  /// Use template-based StringToNumeric<> or StringToNonNegativeInt() instead.
380  static int StringToNumeric(const string& str)
381  {
382  return StringToNonNegativeInt(str);
383  }
384 
385  /// Convert string to int.
386  ///
387  /// @param str
388  /// String to be converted.
389  /// @param flags
390  /// How to convert string to value.
391  /// @param base
392  /// Radix base. Allowed values are 0, 2..36. Zero means to use the
393  /// first characters to determine the base - a leading "0x" or "0X"
394  /// means base 16; otherwise a leading 0 means base 8; otherwise base 10.
395  /// @return
396  /// - If conversion succeeds, set errno to zero and return the
397  /// converted value.
398  /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
399  /// - Otherwise, set errno to non-zero and return zero.
400  static int StringToInt(const CTempString str,
401  TStringToNumFlags flags = 0,
402  int base = 10);
403 
404  /// Convert string to unsigned int.
405  ///
406  /// @param str
407  /// String to be converted.
408  /// @param flags
409  /// How to convert string to value.
410  /// @param base
411  /// Radix base. Allowed values are 0, 2..36. Zero means to use the
412  /// first characters to determine the base - a leading "0x" or "0X"
413  /// means base 16; otherwise a leading 0 means base 8; otherwise base 10.
414  /// @return
415  /// - If conversion succeeds, set errno to zero and return the
416  /// converted value.
417  /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
418  /// - Otherwise, set errno to non-zero and return zero.
419  static unsigned int StringToUInt(const CTempString str,
420  TStringToNumFlags flags = 0,
421  int base = 10);
422 
423  /// Convert string to long.
424  ///
425  /// @param str
426  /// String to be converted.
427  /// @param flags
428  /// How to convert string to value.
429  /// @param base
430  /// Radix base. Allowed values are 0, 2..36. Zero means to use the
431  /// first characters to determine the base - a leading "0x" or "0X"
432  /// means base 16; otherwise a leading 0 means base 8; otherwise base 10.
433  /// @return
434  /// - If conversion succeeds, set errno to zero and return the
435  /// converted value.
436  /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
437  /// - Otherwise, set errno to non-zero and return zero.
438  static long StringToLong(const CTempString str,
439  TStringToNumFlags flags = 0,
440  int base = 10);
441 
442  /// Convert string to unsigned long.
443  ///
444  /// @param str
445  /// String to be converted.
446  /// @param flags
447  /// How to convert string to value.
448  /// @param base
449  /// Radix base. Allowed values are 0, 2..36. Zero means to use the
450  /// first characters to determine the base - a leading "0x" or "0X"
451  /// means base 16; otherwise a leading 0 means base 8; otherwise base 10.
452  /// @return
453  /// - If conversion succeeds, set errno to zero and return the
454  /// converted value.
455  /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
456  /// - Otherwise, set errno to non-zero and return zero.
457  static unsigned long StringToULong(const CTempString str,
458  TStringToNumFlags flags = 0,
459  int base = 10);
460 
461  /// Convert string to double-precision value (analog of strtod function)
462  ///
463  /// @param str
464  /// String to be converted.
465  /// @param endptr
466  /// Pointer to character that stops scan.
467  /// @return
468  /// Double-precision value.
469  /// This function always uses dot as decimal separator.
470  /// - on overflow, it returns HUGE_VAL and sets errno to ERANGE;
471  /// - on underflow, it returns 0 and sets errno to ERANGE;
472  /// - if conversion was impossible, it returns 0 and sets errno.
473  /// Also, when input string equals (case-insensitive) to
474  /// - "NAN", the function returns NaN;
475  /// - "INF" or "INFINITY", the function returns HUGE_VAL;
476  /// - "-INF" or "-INFINITY", the function returns -HUGE_VAL;
477  /// @note
478  /// - If conversion succeeds, set errno to zero and return the
479  /// converted value.
480  /// - Otherwise, set errno to non-zero and return zero.
481  /// - Denormal or infinite results are considered successful conversion.
482  /// - To enforce finite and normalized result, use fDecimalPosixFinite flag.
483  /// - This function is meant to be more "low-level" than other
484  /// StringToXxx functions - for example, it allows trailing characters
485  /// (and doesn't include a flags parameter for tweaking such behavior).
486  /// This could result in strings like "nanosecond" being converted to
487  /// NaN, "-inf=input_file" being converted to -INF, or other unexpected
488  /// behavior. Therefore, please consider using StringToDouble unless
489  /// you specifically need this functionality.
490  static double StringToDoublePosix(const char* str, char** endptr=0,
491  TStringToNumFlags flags=0);
492 
493 
494  /// Convert string to double.
495  ///
496  /// @param str
497  /// String to be converted.
498  /// @param flags
499  /// How to convert string to value.
500  /// Do not support fAllowCommas flag.
501  /// @return
502  /// - If invalid flags are passed, throw an exception.
503  /// - If conversion succeeds, set errno to zero and return the
504  /// converted value.
505  /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
506  /// - Otherwise, set errno to non-zero and return zero.
507  /// @note
508  /// - Denormal or infinite results are considered successful conversion.
509  /// - To enforce finite and normalized result, use fDecimalPosixFinite flag.
510  static double StringToDouble(const CTempStringEx str,
511  TStringToNumFlags flags = 0);
512 
513  /// This version accepts zero-terminated string
514  /// @deprecated
515  /// It is unsafe to use this method directly, please use StringToDouble()
516  /// instead.
518  static double StringToDoubleEx(const char* str, size_t size,
519  TStringToNumFlags flags = 0);
520 
521  /// Convert string to Int8.
522  ///
523  /// @param str
524  /// String to be converted.
525  /// @param flags
526  /// How to convert string to value.
527  /// @param base
528  /// Radix base. Allowed values are 0, 2..36. Zero means to use the
529  /// first characters to determine the base - a leading "0x" or "0X"
530  /// means base 16; otherwise a leading 0 means base 8; otherwise base 10.
531  /// @return
532  /// - If conversion succeeds, set errno to zero and return the
533  /// converted value.
534  /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
535  /// - Otherwise, set errno to non-zero and return zero.
536  static Int8 StringToInt8(const CTempString str,
537  TStringToNumFlags flags = 0,
538  int base = 10);
539 
540  /// Convert string to Uint8.
541  ///
542  /// @param str
543  /// String to be converted.
544  /// @param flags
545  /// How to convert string to value.
546  /// @param base
547  /// Radix base. Allowed values are 0, 2..36. Zero means to use the
548  /// first characters to determine the base - a leading "0x" or "0X"
549  /// means base 16; otherwise a leading 0 means base 8; otherwise base 10.
550  /// @return
551  /// - If conversion succeeds, set errno to zero and return the
552  /// converted value.
553  /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
554  /// - Otherwise, set errno to non-zero and return zero.
555  static Uint8 StringToUInt8(const CTempString str,
556  TStringToNumFlags flags = 0,
557  int base = 10);
558 
559  /// Convert string that can contain "software" qualifiers to Uint8.
560  ///
561  /// String can contain "software" qualifiers: G(giga-), MB(mega-),
562  /// KiB (kibi-) etc.
563  /// Example: 100MB, 1024KiB, 5.7G.
564  /// Meaning of qualifiers depends on flags and by default is 1000-based
565  /// (i.e. K=1000, M=10^6 etc.) except in cases when qualifiers with "iB"
566  /// are used, i.e. KiB=1024, MiB=1024^2 etc. When flags parameter contains
567  /// fDS_ForceBinary then qualifiers without "iB" (i.e. "K" or "MB") will
568  /// also be 1024-based.
569  /// String can contain a decimal fraction (except when fDS_ProhibitFractions
570  /// flag is used), in this case the resultant Uint8 number will be rounded
571  /// to fit into integer value.
572  ///
573  /// @param str
574  /// String to be converted.
575  /// @param flags
576  /// How to convert string to value.
577  /// @return
578  /// - If invalid flags are passed, throw an exception.
579  /// - If conversion succeeds, return the converted value.
580  /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
581  /// - Otherwise, set errno to non-zero and return zero.
582  static Uint8 StringToUInt8_DataSize(const CTempString str,
583  TStringToNumFlags flags = 0);
584 
585  /// Convert string to number of bytes.
586  ///
587  /// String can contain "software" qualifiers: MB(megabyte), KB (kilobyte).
588  /// Example: 100MB, 1024KB
589  /// Note the qualifiers are power-of-2 based, aka kibi-, mebi- etc, so that
590  /// 1KB = 1024B (not 1000B), 1MB = 1024KB = 1048576B, etc.
591  ///
592  /// @param str
593  /// String to be converted.
594  /// @param flags
595  /// How to convert string to value.
596  /// @param base
597  /// Numeric base of the number (before the qualifier). Allowed values
598  /// are 0, 2..20. Zero means to use the first characters to determine
599  /// the base - a leading "0x" or "0X" means base 16; otherwise a
600  /// leading 0 means base 8; otherwise base 10.
601  /// The base is limited to 20 to prevent 'K' from being interpreted as
602  /// a digit in the number.
603  /// @return
604  /// - If conversion succeeds, set errno to zero and return the
605  /// converted value.
606  /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
607  /// - Otherwise, set errno to non-zero and return zero.
608  /// @deprecated Use StringToUInt8_DataSize(str, flags) instead.
610  static Uint8 StringToUInt8_DataSize(const CTempString str,
611  TStringToNumFlags flags,
612  int base);
613 
614  /// Convert string to size_t.
615  ///
616  /// @param str
617  /// String to be converted.
618  /// @param flags
619  /// How to convert string to value.
620  /// @param base
621  /// Radix base. Allowed values are 0, 2..36. Zero means to use the
622  /// first characters to determine the base - a leading "0x" or "0X"
623  /// means base 16; otherwise a leading 0 means base 8; otherwise base 10.
624  /// @return
625  /// - If conversion succeeds, set errno to zero and return the
626  /// converted value.
627  /// - Otherwise, if fConvErr_NoThrow is not set, throw an exception.
628  /// - Otherwise, set errno to non-zero and return zero.
629  static size_t StringToSizet(const CTempString str,
630  TStringToNumFlags flags = 0,
631  int base = 10);
632 
633  /// Convert string to pointer.
634  ///
635  /// @param str
636  /// String to be converted.
637  /// @param flags
638  /// How to convert string to value.
639  /// Only fConvErr_NoErrMessage flag is supported here.
640  /// @return
641  /// Pointer value corresponding to its string representation.
642  /// - If conversion succeeds, set errno to zero and return the
643  /// converted value.
644  /// - Otherwise, set errno to non-zero and return NULL.
645  static const void* StringToPtr(const CTempStringEx str, TStringToNumFlags flags = 0);
646 
647  /// Convert character to integer.
648  ///
649  /// @param ch
650  /// Character to be converted.
651  /// @return
652  /// Integer (0..15) corresponding to the "ch" as a hex digit.
653  /// Return -1 on error.
654  static int HexChar(char ch);
655 
656  /// Convert numeric value to string.
657  ///
658  /// @param value
659  /// Numeric value to be converted.
660  /// @param flags
661  /// How to convert value to string.
662  /// @param base
663  /// Radix base. Default is 10. Allowed values are 2..36.
664  /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
665  /// If necessary you should add it yourself.
666  /// If value is float or double type, the parameter is ignored.
667  /// @return
668  /// - If conversion succeeds, set errno to zero and return the
669  /// converted string value.
670  /// - Otherwise, set errno to non-zero and return empty string.
671  template<typename TNumeric>
673  NumericToString(TNumeric value, TNumToStringFlags flags = 0, int base = 10)
674  {
675  string ret;
676  x_NumericToString(ret, value, flags, base);
677  return ret;
678  }
679  template <typename TStrictId>
680  static typename enable_if< is_integral<typename TStrictId::TId>::value && is_member_function_pointer<decltype(&TStrictId::Get)>::value, string>::type
681  NumericToString(TStrictId value, TNumToStringFlags flags = 0, int base = 10)
682  {
683  return NumericToString(value.Get(), flags, base);
684  }
685 
686  /// Convert numeric value to string.
687  ///
688  /// @param out_str
689  /// Output string variable.
690  /// @param value
691  /// Numeric value to be converted.
692  /// @param flags
693  /// How to convert value to string.
694  /// @param base
695  /// Radix base. Default is 10. Allowed values are 2..36.
696  /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
697  /// If necessary you should add it yourself.
698  /// If value is float or double type, the parameter is ignored.
699  /// @note
700  /// - If conversion succeeds, set errno to zero and return the
701  /// converted string value in 'out_str'.
702  /// - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
703  template<typename TNumeric>
704  static void NumericToString(string& out_str, TNumeric value,
705  TNumToStringFlags flags = 0, int base = 10)
706  {
707  x_NumericToString(out_str, value, flags, base);
708  }
709 
710  /// Convert int to string.
711  ///
712  /// @param value
713  /// Integer value to be converted.
714  /// @param flags
715  /// How to convert value to string.
716  /// @param base
717  /// Radix base. Default is 10. Allowed values are 2..36.
718  /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
719  /// If necessary you should add it yourself.
720  /// @return
721  /// - If conversion succeeds, set errno to zero and return the
722  /// converted string value.
723  /// - Otherwise, set errno to non-zero and return empty string.
724  static string IntToString(int value, TNumToStringFlags flags = 0,
725  int base = 10);
726 
727  static string IntToString(unsigned int value, TNumToStringFlags flags = 0,
728  int base = 10);
729 
730  /// Convert int to string.
731  ///
732  /// @param out_str
733  /// Output string variable.
734  /// @param value
735  /// Integer value to be converted.
736  /// @param flags
737  /// How to convert value to string.
738  /// @param base
739  /// Radix base. Default is 10. Allowed values are 2..36.
740  /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
741  /// If necessary you should add it yourself.
742  /// @note
743  /// - If conversion succeeds, set errno to zero and return the
744  /// converted string value in 'out_str'.
745  /// - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
746  static void IntToString(string& out_str, int value,
747  TNumToStringFlags flags = 0,
748  int base = 10);
749 
750  static void IntToString(string& out_str, unsigned int value,
751  TNumToStringFlags flags = 0,
752  int base = 10);
753 
754  /// Convert UInt to string.
755  ///
756  /// @param value
757  /// Integer value (unsigned long) to be converted.
758  /// @param flags
759  /// How to convert value to string.
760  /// @param base
761  /// Radix base. Default is 10. Allowed values are 2..36.
762  /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
763  /// If necessary you should add it yourself.
764  /// @return
765  /// - If conversion succeeds, set errno to zero and return the
766  /// converted string value.
767  /// - Otherwise, set errno to non-zero and return empty string.
768  static string UIntToString(unsigned int value,
769  TNumToStringFlags flags = 0,
770  int base = 10);
771 
772  static string UIntToString(int value,
773  TNumToStringFlags flags = 0,
774  int base = 10);
775 
776  /// Convert UInt to string.
777  ///
778  /// @param out_str
779  /// Output string variable
780  /// @param value
781  /// Integer value (unsigned long) to be converted.
782  /// @param flags
783  /// How to convert value to string.
784  /// @param base
785  /// Radix base. Default is 10. Allowed values are 2..36.
786  /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
787  /// If necessary you should add it yourself.
788  /// @note
789  /// - If conversion succeeds, set errno to zero and return the
790  /// converted string value in 'out_str'.
791  /// - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
792  static void UIntToString(string& out_str, unsigned int value,
793  TNumToStringFlags flags = 0,
794  int base = 10);
795 
796  static void UIntToString(string& out_str, int value,
797  TNumToStringFlags flags = 0,
798  int base = 10);
799 
800  /// Convert Int to string.
801  ///
802  /// @param value
803  /// Integer value (long) to be converted.
804  /// @param flags
805  /// How to convert value to string.
806  /// @param base
807  /// Radix base. Default is 10. Allowed values are 2..36.
808  /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
809  /// If necessary you should add it yourself.
810  /// @return
811  /// - If conversion succeeds, set errno to zero and return the
812  /// converted string value.
813  /// - Otherwise, set errno to non-zero and return empty string.
814  static string LongToString(long value, TNumToStringFlags flags = 0,
815  int base = 10);
816 
817  /// Convert Int to string.
818  ///
819  /// @param out_str
820  /// Output string variable.
821  /// @param value
822  /// Integer value (long) to be converted.
823  /// @param flags
824  /// How to convert value to string.
825  /// @param base
826  /// Radix base. Default is 10. Allowed values are 2..36.
827  /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
828  /// If necessary you should add it yourself.
829  /// @note
830  /// - If conversion succeeds, set errno to zero and return the
831  /// converted string value in 'out_str'.
832  /// - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
833  static void LongToString(string& out_str, long value,
834  TNumToStringFlags flags = 0,
835  int base = 10);
836 
837  /// Convert unsigned long to string.
838  ///
839  /// @param value
840  /// Integer value (unsigned long) to be converted.
841  /// @param flags
842  /// How to convert value to string.
843  /// @param base
844  /// Radix base. Default is 10. Allowed values are 2..36.
845  /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
846  /// If necessary you should add it yourself.
847  /// @return
848  /// - If conversion succeeds, set errno to zero and return the
849  /// converted string value.
850  /// - Otherwise, set errno to non-zero and return empty string.
851  static string ULongToString(unsigned long value,
852  TNumToStringFlags flags = 0,
853  int base = 10);
854 
855  /// Convert unsigned long to string.
856  ///
857  /// @param out_str
858  /// Output string variable
859  /// @param value
860  /// Integer value (unsigned long) to be converted.
861  /// @param flags
862  /// How to convert value to string.
863  /// @param base
864  /// Radix base. Default is 10. Allowed values are 2..36.
865  /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
866  /// If necessary you should add it yourself.
867  /// @note
868  /// - If conversion succeeds, set errno to zero and return the
869  /// converted string value in 'out_str'.
870  /// - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
871  static void ULongToString(string& out_str, unsigned long value,
872  TNumToStringFlags flags = 0,
873  int base = 10);
874 
875  /// Convert Int8 to string.
876  ///
877  /// @param value
878  /// Integer value (Int8) to be converted.
879  /// @param flags
880  /// How to convert value to string.
881  /// @param base
882  /// Radix base. Default is 10. Allowed values are 2..36.
883  /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
884  /// If necessary you should add it yourself.
885  /// @return
886  /// - If conversion succeeds, set errno to zero and return the
887  /// converted string value.
888  /// - Otherwise, set errno to non-zero and return empty string.
889  static string Int8ToString(Int8 value,
890  TNumToStringFlags flags = 0,
891  int base = 10);
892 
893  /// Convert Int8 to string.
894  ///
895  /// @param out_str
896  /// Output string variable
897  /// @param value
898  /// Integer value (Int8) to be converted.
899  /// @param flags
900  /// How to convert value to string.
901  /// @param base
902  /// Radix base. Default is 10. Allowed values are 2..36.
903  /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
904  /// If necessary you should add it yourself.
905  /// @note
906  /// - If conversion succeeds, set errno to zero and return the
907  /// converted string value in 'out_str'.
908  /// - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
909  static void Int8ToString(string& out_str, Int8 value,
910  TNumToStringFlags flags = 0,
911  int base = 10);
912 
913  /// Convert UInt8 to string.
914  ///
915  /// @param value
916  /// Integer value (UInt8) to be converted.
917  /// @param flags
918  /// How to convert value to string.
919  /// @param base
920  /// Radix base. Default is 10. Allowed values are 2..36.
921  /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
922  /// If necessary you should add it yourself.
923  /// @return
924  /// - If conversion succeeds, set errno to zero and return the
925  /// converted string value.
926  /// - Otherwise, set errno to non-zero and return empty string.
927  static string UInt8ToString(Uint8 value,
928  TNumToStringFlags flags = 0,
929  int base = 10);
930 
931  /// Convert UInt8 to string.
932  ///
933  /// @param out_str
934  /// Output string variable
935  /// @param value
936  /// Integer value (UInt8) to be converted.
937  /// @param flags
938  /// How to convert value to string.
939  /// @param base
940  /// Radix base. Default is 10. Allowed values are 2..36.
941  /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
942  /// If necessary you should add it yourself.
943  /// @note
944  /// - If conversion succeeds, set errno to zero and return the
945  /// converted string value in 'out_str'.
946  /// - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
947  static void UInt8ToString(string& out_str, Uint8 value,
948  TNumToStringFlags flags = 0,
949  int base = 10);
950 
951  /// Convert UInt8 to string using "software" qualifiers.
952  ///
953  /// Result of conversion will be limited to max_digits digits so that e.g.
954  /// 1024 will be converted to 1.02KB. Conversion will be made using
955  /// rounding so that 1025 will be converted to 1.03KB. By default function
956  /// uses 1000-based qualifiers (as in examples above) but with fDS_Binary
957  /// flag it will use 1024-based qualifiers, e.g. 1100 will be converted to
958  /// 1.07KiB. With fDS_ShortSuffix flag function will omit "B" in 1000-based
959  /// and "iB" in 1024-based qualifiers. When the result of conversion doesn't
960  /// need any qualifiers then the result of this function will be equivalent
961  /// to result of UInt8ToString() above except if fDS_PutBSuffixToo flag
962  /// is passed. In the latter case "B" will be added to the number.
963  ///
964  /// Function will always try to use a maximum possible qualifier and
965  /// a number with decimal point except if fDS_NoDecimalPoint flag is passed.
966  /// In that case function will return only whole number and try to use a
967  /// minimum possible qualifier (which makes difference only if
968  /// max_digits > 3).
969  ///
970  /// @param value
971  /// Integer value (UInt8) to be converted.
972  /// @param flags
973  /// How to convert value to string.
974  /// @param max_digits
975  /// Maximum number of digits to use (cannot be less than 3)
976  /// @return
977  /// - If invalid flags are passed, throw an exception.
978  /// - If conversion succeeds, return the converted value.
979  static string UInt8ToString_DataSize(Uint8 value,
980  TNumToStringFlags flags = 0,
981  unsigned int max_digits = 3);
982 
983  /// Convert UInt8 to string using "software" qualifiers.
984  ///
985  /// See notes and details of how function works in the comments to
986  /// UInt8ToString_DataSize() above.
987  ///
988  /// @param out_str
989  /// Output string variable
990  /// @param value
991  /// Integer value (UInt8) to be converted.
992  /// @param flags
993  /// How to convert value to string.
994  /// @param max_digits
995  /// Maximum number of digits to use (cannot be less than 3)
996  static void UInt8ToString_DataSize(string& out_str,
997  Uint8 value,
998  TNumToStringFlags flags = 0,
999  unsigned int max_digits = 3);
1000  /// Convert double to string.
1001  ///
1002  /// @param value
1003  /// Double value to be converted.
1004  /// @param precision
1005  /// Precision value for conversion. If precision is more that maximum
1006  /// for current platform, then it will be truncated to this maximum.
1007  /// If it is negative, that double will be converted to number in
1008  /// scientific notation.
1009  /// @param flags
1010  /// How to convert value to string.
1011  /// If double format flags are not specified, that next output format
1012  /// will be used by default:
1013  /// - fDoubleFixed, if 'precision' >= 0.
1014  /// - fDoubleGeneral, if 'precision' < 0.
1015  /// @return
1016  /// - If conversion succeeds, set errno to zero and return the
1017  /// converted string value.
1018  /// - Otherwise, set errno to non-zero and return empty string.
1019  static string DoubleToString(double value, int precision = -1,
1020  TNumToStringFlags flags = 0);
1021 
1022  /// Convert double to string.
1023  ///
1024  /// @param out_str
1025  /// Output string variable
1026  /// @param value
1027  /// Double value to be converted.
1028  /// @param precision
1029  /// Precision value for conversion. If precision is more that maximum
1030  /// for current platform, then it will be truncated to this maximum.
1031  /// If it is negative, that double will be converted to number in
1032  /// scientific notation.
1033  /// @param flags
1034  /// How to convert value to string.
1035  /// If double format flags are not specified, that next output format
1036  /// will be used by default:
1037  /// - fDoubleFixed, if 'precision' >= 0.
1038  /// - fDoubleGeneral, if 'precision' < 0.
1039  /// @note
1040  /// - If conversion succeeds, set errno to zero and return the
1041  /// converted string value in 'out_str'.
1042  /// - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
1043  static void DoubleToString(string& out_str, double value,
1044  int precision = -1,
1045  TNumToStringFlags flags = 0);
1046 
1047  /// Convert double to string with specified precision and place the result
1048  /// in the specified buffer.
1049  ///
1050  /// @param value
1051  /// Double value to be converted.
1052  /// @param precision
1053  /// Precision value for conversion. If precision is more that maximum
1054  /// for current platform, then it will be truncated to this maximum.
1055  /// @param buf
1056  /// Put result of the conversion into this buffer.
1057  /// @param buf_size
1058  /// Size of buffer, "buf".
1059  /// @param flags
1060  /// How to convert value to string.
1061  /// Default output format is fDoubleFixed.
1062  /// @return
1063  /// - If conversion succeeds, set errno to zero and return the
1064  /// number of bytes stored in "buf", not counting the
1065  /// terminating '\0'.
1066  /// - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
1067  static SIZE_TYPE DoubleToString(double value, unsigned int precision,
1068  char* buf, SIZE_TYPE buf_size,
1069  TNumToStringFlags flags = 0);
1070 
1071  /// Convert double to string with specified precision and put the result
1072  /// into a character buffer, in scientific format.
1073  ///
1074  /// NOTE:
1075  /// The output character buffer is NOT zero-terminated.
1076  /// The decimal separator is dot, always.
1077  /// This function DOES NOT check 'value' for being finite or not-a-number;
1078  /// if it is, the result is unpredictable.
1079  /// This function is less precise for a small fraction of values
1080  /// (the difference is in the last significant digit) than its
1081  /// 'DoubleToString' siblings, but it is much faster.
1082  ///
1083  /// @param value
1084  /// Double value to be converted.
1085  /// @param precision
1086  /// Maximum number of significant digits to preserve. If precision is greater than
1087  /// maximum for the current platform, it will be truncated to this maximum.
1088  /// @param buf
1089  /// Put result of the conversion into this buffer.
1090  /// @param buf_size
1091  /// Size of buffer, "buf".
1092  /// @return
1093  /// The number of bytes written into "buf".
1094  static SIZE_TYPE DoubleToStringPosix(double value, unsigned int precision,
1095  char* buf, SIZE_TYPE buf_size);
1096 
1097 
1098  /// Convert double to string with specified precision.
1099  ///
1100  /// The result consists of three parts: significant digits, exponent and sign.
1101  /// For example, input value -12345.67 will produce
1102  /// buf = "1234567", *dec = 4, and *sign = -1.
1103  /// NOTE:
1104  /// The output character buffer is NOT zero-terminated.
1105  /// The buffer is NOT padded with zeros.
1106  /// This function DOES NOT check 'value' for being finite or not-a-number;
1107  /// if it is, the result is unpredictable.
1108  /// This function is less precise for a small fraction of values
1109  /// (the difference is in the last significant digit) than its
1110  /// 'DoubleToString' siblings, but it is much faster.
1111  ///
1112  /// @param value
1113  /// Double value to be converted.
1114  /// @param precision
1115  /// Maximum number of significant digits to preserve. If precision is greater than
1116  /// maximum for the current platform, it will be truncated to this maximum.
1117  /// @param buf
1118  /// Put result of the conversion into this buffer.
1119  /// @param buf_size
1120  /// Size of buffer, "buf".
1121  /// @param dec
1122  /// Exponent
1123  /// @param sign
1124  /// Sign of the value
1125  /// @return
1126  /// The number of bytes written into "buf".
1127  static SIZE_TYPE DoubleToString_Ecvt(double value, unsigned int precision,
1128  char* buf, SIZE_TYPE buf_size,
1129  int* dec, int* sign);
1130 
1131  /// Convert size_t to string.
1132  ///
1133  /// @param value
1134  /// Value to be converted.
1135  /// @param flags
1136  /// How to convert value to string.
1137  /// @param base
1138  /// Radix base. Default is 10. Allowed values are 2..36.
1139  /// Bases 8 and 16 do not add leading '0' and '0x' accordingly.
1140  /// If necessary you should add it yourself.
1141  /// @return
1142  /// - If conversion succeeds, set errno to zero and return the
1143  /// converted string value.
1144  /// - Otherwise, set errno to non-zero and return empty string.
1145  static string SizetToString(size_t value,
1146  TNumToStringFlags flags = 0,
1147  int base = 10);
1148 
1149  /// Convert pointer to string.
1150  ///
1151  /// @param out_str
1152  /// Output string variable.
1153  /// @param str
1154  /// Pointer to be converted.
1155  /// @note
1156  /// - If conversion succeeds, set errno to zero and return the
1157  /// converted string value in 'out_str'.
1158  /// - Otherwise, set errno to non-zero, value of 'out_str' is undefined.
1159  static void PtrToString(string& out_str, const void* ptr);
1160 
1161  /// Convert pointer to string.
1162  ///
1163  /// @param str
1164  /// Pointer to be converted.
1165  /// @return
1166  /// - If conversion succeeds, set errno to zero and return the
1167  /// converted string value representing the pointer.
1168  /// - Otherwise, set errno to non-zero and return empty string.
1169  static string PtrToString(const void* ptr);
1170 
1171  /// Convert bool to string.
1172  ///
1173  /// @param value
1174  /// Boolean value to be converted.
1175  /// @return
1176  /// One of: 'true, 'false'
1177  /// @note
1178  /// Don't change errno.
1179  static const string BoolToString(bool value);
1180 
1181  /// Convert string to bool.
1182  ///
1183  /// @param str
1184  /// Boolean string value to be converted. Can recognize
1185  /// case-insensitive version as one of:
1186  /// TRUE - 'true, 't', 'yes', 'y', '1', 'on';
1187  /// FALSE - 'false', 'f', 'no', 'n', '0', 'off'.
1188  /// @return
1189  /// - If conversion succeeds, set errno to zero and return TRUE or FALSE.
1190  /// - Otherwise, set errno to non-zero and throw an exception.
1191  static bool StringToBool(const CTempString str);
1192 
1193 
1194  /// Handle an arbitrary printf-style format string.
1195  ///
1196  /// This method exists only to support third-party code that insists on
1197  /// representing messages in this format; please stick to type-checked
1198  /// means of formatting such as the above ToString methods and I/O
1199  /// streams whenever possible.
1200  static string FormatVarargs(const char* format, va_list args);
1201 
1202 
1203  /// Which type of string comparison.
1204  enum ECase {
1205  eCase, ///< Case sensitive compare
1206  eNocase ///< Case insensitive compare
1207  };
1208 
1209  // ATTENTION. Be aware that:
1210  //
1211  // 1) "Compare***(..., SIZE_TYPE pos, SIZE_TYPE n, ...)" functions
1212  // follow the ANSI C++ comparison rules a la "basic_string::compare()":
1213  // s1[pos:pos+n) == s2 --> return 0
1214  // s1[pos:pos+n) < s2 --> return negative value
1215  // s1[pos:pos+n) > s2 --> return positive value
1216  //
1217  // 2) "strn[case]cmp()" functions follow the ANSI C comparison rules:
1218  // s1[0:n) == s2[0:n) --> return 0
1219  // s1[0:n) < s2[0:n) --> return negative value
1220  // s1[0:n) > s2[0:n) --> return positive value
1221 
1222 
1223  /// Case-sensitive compare of a substring with another string.
1224  ///
1225  /// @param s1
1226  /// String containing the substring to be compared.
1227  /// @param pos
1228  /// Start position of substring to be compared.
1229  /// @param n
1230  /// Number of characters in substring to be compared.
1231  /// @param s2
1232  /// String (char*) to be compared with substring.
1233  /// @return
1234  /// - 0, if s1[pos:pos+n) == s2;
1235  /// - Negative integer, if s1[pos:pos+n) < s2;
1236  /// - Positive integer, if s1[pos:pos+n) > s2.
1237  /// @sa
1238  /// Other forms of overloaded CompareCase() with differences in argument
1239  /// types: char* vs. CTempString[Ex]
1240  static int CompareCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1241  const char* s2);
1242 
1243  /// Case-sensitive compare of a substring with another string.
1244  ///
1245  /// @param s1
1246  /// String containing the substring to be compared.
1247  /// @param pos
1248  /// Start position of substring to be compared.
1249  /// @param n
1250  /// Number of characters in substring to be compared.
1251  /// @param s2
1252  /// String to be compared with substring.
1253  /// @return
1254  /// - 0, if s1[pos:pos+n) == s2;
1255  /// - Negative integer, if s1[pos:pos+n) < s2;
1256  /// - Positive integer, if s1[pos:pos+n) > s2.
1257  /// @sa
1258  /// Other forms of overloaded CompareCase() with differences in argument
1259  /// types: char* vs. CTempString[Ex]
1260  static int CompareCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1261  const CTempString s2);
1262 
1263  /// Case-sensitive compare of two strings -- char* version.
1264  ///
1265  /// @param s1
1266  /// String to be compared -- operand 1.
1267  /// @param s2
1268  /// String to be compared -- operand 2.
1269  /// @return
1270  /// - 0, if s1 == s2;
1271  /// - Negative integer, if s1 < s2;
1272  /// - Positive integer, if s1 > s2.
1273  /// @sa
1274  /// CompareNocase(), Compare() versions with same argument types.
1275  static int CompareCase(const char* s1, const char* s2);
1276 
1277  /// Case-sensitive compare of two strings -- CTempStringEx version.
1278  ///
1279  /// @param s1
1280  /// String to be compared -- operand 1.
1281  /// @param s2
1282  /// String to be compared -- operand 2.
1283  /// @return
1284  /// - 0, if s1 == s2;
1285  /// - Negative integer, if s1 < s2;
1286  /// - Positive integer, if s1 > s2.
1287  /// @sa
1288  /// CompareNocase(), Compare() versions with same argument types.
1289  static int CompareCase(const CTempStringEx s1, const CTempStringEx s2);
1290 
1291  /// Case-insensitive compare of a substring with another string.
1292  ///
1293  /// @param s1
1294  /// String containing the substring to be compared.
1295  /// @param pos
1296  /// Start position of substring to be compared.
1297  /// @param n
1298  /// Number of characters in substring to be compared.
1299  /// @param s2
1300  /// String (char*) to be compared with substring.
1301  /// @return
1302  /// - 0, if s1[pos:pos+n) == s2 (case-insensitive compare);
1303  /// - Negative integer, if s1[pos:pos+n) < s2 (case-insensitive compare);
1304  /// - Positive integer, if s1[pos:pos+n) > s2 (case-insensitive compare).
1305  /// @sa
1306  /// Other forms of overloaded CompareNocase() with differences in
1307  /// argument types: char* vs. CTempString[Ex]
1308  static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1309  const char* s2);
1310 
1311  /// Case-insensitive compare of a substring with another string.
1312  ///
1313  /// @param s1
1314  /// String containing the substring to be compared.
1315  /// @param pos
1316  /// Start position of substring to be compared.
1317  /// @param n
1318  /// Number of characters in substring to be compared.
1319  /// @param s2
1320  /// String to be compared with substring.
1321  /// @return
1322  /// - 0, if s1[pos:pos+n) == s2 (case-insensitive compare);
1323  /// - Negative integer, if s1[pos:pos+n) < s2 (case-insensitive compare);
1324  /// - Positive integer, if s1[pos:pos+n) > s2 (case-insensitive compare).
1325  /// @sa
1326  /// Other forms of overloaded CompareNocase() with differences in
1327  /// argument types: char* vs. CTempString[Ex]
1328  static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1329  const CTempString s2);
1330 
1331  /// Case-insensitive compare of two strings -- char* version.
1332  ///
1333  /// @param s1
1334  /// String to be compared -- operand 1.
1335  /// @param s2
1336  /// String to be compared -- operand 2.
1337  /// @return
1338  /// - 0, if s1 == s2 (case-insensitive compare);
1339  /// - Negative integer, if s1 < s2 (case-insensitive compare);
1340  /// - Positive integer, if s1 > s2 (case-insensitive compare).
1341  /// @sa
1342  /// CompareCase(), Compare() versions with same argument types.
1343  static int CompareNocase(const char* s1, const char* s2);
1344 
1345  /// Case-insensitive compare of two strings -- CTempStringEx version.
1346  ///
1347  /// @param s1
1348  /// String to be compared -- operand 1.
1349  /// @param s2
1350  /// String to be compared -- operand 2.
1351  /// @return
1352  /// - 0, if s1 == s2 (case-insensitive compare);
1353  /// - Negative integer, if s1 < s2 (case-insensitive compare);
1354  /// - Positive integer, if s1 > s2 (case-insensitive compare).
1355  /// @sa
1356  /// CompareCase(), Compare() versions with same argument types.
1357  static int CompareNocase(const CTempStringEx s1, const CTempStringEx s2);
1358 
1359  /// Compare of a substring with another string.
1360  ///
1361  /// @param s1
1362  /// String containing the substring to be compared.
1363  /// @param pos
1364  /// Start position of substring to be compared.
1365  /// @param n
1366  /// Number of characters in substring to be compared.
1367  /// @param s2
1368  /// String (char*) to be compared with substring.
1369  /// @param use_case
1370  /// Whether to do a case sensitive compare(eCase -- default), or a
1371  /// case-insensitive compare (eNocase).
1372  /// @return
1373  /// - 0, if s1[pos:pos+n) == s2;
1374  /// - Negative integer, if s1[pos:pos+n) < s2;
1375  /// - Positive integer, if s1[pos:pos+n) > s2.
1376  /// @sa
1377  /// Other forms of overloaded Compare() with differences in argument
1378  /// types: char* vs. CTempString[Ex]
1379  static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1380  const char* s2, ECase use_case = eCase);
1381 
1382  /// Compare of a substring with another string.
1383  ///
1384  /// @param s1
1385  /// String containing the substring to be compared.
1386  /// @param pos
1387  /// Start position of substring to be compared.
1388  /// @param n
1389  /// Number of characters in substring to be compared.
1390  /// @param s2
1391  /// String to be compared with substring.
1392  /// @param use_case
1393  /// Whether to do a case sensitive compare(default is eCase), or a
1394  /// case-insensitive compare (eNocase).
1395  /// @return
1396  /// - 0, if s1[pos:pos+n) == s2;
1397  /// - Negative integer, if s1pos:pos+n) < s2;
1398  /// - Positive integer, if s1[pos:pos+n) > s2.
1399  /// @sa
1400  /// Other forms of overloaded Compare() with differences in argument
1401  /// types: char* vs. CTempString[Ex]
1402  static int Compare(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1403  const CTempString s2, ECase use_case = eCase);
1404 
1405  /// Compare two strings -- char* version.
1406  ///
1407  /// @param s1
1408  /// String to be compared -- operand 1.
1409  /// @param s2
1410  /// String to be compared -- operand 2.
1411  /// @param use_case
1412  /// Whether to do a case sensitive compare(default is eCase), or a
1413  /// case-insensitive compare (eNocase).
1414  /// @return
1415  /// - 0, if s1 == s2.
1416  /// - Negative integer, if s1 < s2.
1417  /// - Positive integer, if s1 > s2.
1418  /// @sa
1419  /// Other forms of overloaded Compare() with differences in argument
1420  /// types: char* vs. CTempString[Ex]
1421  static int Compare(const char* s1, const char* s2,
1422  ECase use_case = eCase);
1423 
1424  /// Compare two strings -- CTempStringEx version.
1425  ///
1426  /// @param s1
1427  /// String to be compared -- operand 1.
1428  /// @param s2
1429  /// String to be compared -- operand 2.
1430  /// @param use_case
1431  /// Whether to do a case sensitive compare(default is eCase), or a
1432  /// case-insensitive compare (eNocase).
1433  /// @return
1434  /// - 0, if s1 == s2;
1435  /// - Negative integer, if s1 < s2;
1436  /// - Positive integer, if s1 > s2.
1437  /// @sa
1438  /// Other forms of overloaded Compare() with differences in argument
1439  /// types: char* vs. CTempString[Ex]
1440  static int Compare(const CTempStringEx s1, const CTempStringEx s2,
1441  ECase use_case = eCase);
1442 
1443  /// Case-sensitive equality of a substring with another string.
1444  ///
1445  /// @param s1
1446  /// String containing the substring to be compared.
1447  /// @param pos
1448  /// Start position of substring to be compared.
1449  /// @param n
1450  /// Number of characters in substring to be compared.
1451  /// @param s2
1452  /// String (char*) to be compared with substring.
1453  /// @return
1454  /// - true, if s1[pos:pos+n) equals s2;
1455  /// - false, otherwise
1456  /// @sa
1457  /// Other forms of overloaded EqualCase() with differences in argument
1458  /// types: char* vs. CTempString[Ex]
1459  static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1460  const char* s2);
1461 
1462  /// Case-sensitive equality of a substring with another string.
1463  ///
1464  /// @param s1
1465  /// String containing the substring to be compared.
1466  /// @param pos
1467  /// Start position of substring to be compared.
1468  /// @param n
1469  /// Number of characters in substring to be compared.
1470  /// @param s2
1471  /// String to be compared with substring.
1472  /// @return
1473  /// - true, if s1[pos:pos+n) equals s2;
1474  /// - false, otherwise
1475  /// @sa
1476  /// Other forms of overloaded EqualCase() with differences in argument
1477  /// types: char* vs. CTempString[Ex]
1478  static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1479  const CTempString s2);
1480 
1481  /// Case-sensitive equality of two strings -- char* version.
1482  ///
1483  /// @param s1
1484  /// String to be compared -- operand 1.
1485  /// @param s2
1486  /// String to be compared -- operand 2.
1487  /// @return
1488  /// - true, if s1 equals s2
1489  /// - false, otherwise
1490  /// @sa
1491  /// EqualCase(), Equal() versions with same argument types.
1492  static bool EqualCase(const char* s1, const char* s2);
1493 
1494  /// Case-sensitive equality of two strings.
1495  ///
1496  /// @param s1
1497  /// String to be compared -- operand 1.
1498  /// @param s2
1499  /// String to be compared -- operand 2.
1500  /// @return
1501  /// - true, if s1 equals s2
1502  /// - false, otherwise
1503  /// @sa
1504  /// EqualCase(), Equal() versions with same argument types.
1505  static bool EqualCase(const CTempStringEx s1, const CTempStringEx s2);
1506 
1507  /// Case-insensitive equality of a substring with another string.
1508  ///
1509  /// @param s1
1510  /// String containing the substring to be compared.
1511  /// @param pos
1512  /// Start position of substring to be compared.
1513  /// @param n
1514  /// Number of characters in substring to be compared.
1515  /// @param s2
1516  /// String (char*) to be compared with substring.
1517  /// @return
1518  /// - true, if s1[pos:pos+n) equals s2 (case-insensitive compare);
1519  /// - false, otherwise.
1520  /// @sa
1521  /// Other forms of overloaded EqualNocase() with differences in
1522  /// argument types: char* vs. CTempString[Ex]
1523  static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1524  const char* s2);
1525 
1526  /// Case-insensitive equality of a substring with another string.
1527  ///
1528  /// @param s1
1529  /// String containing the substring to be compared.
1530  /// @param pos
1531  /// Start position of substring to be compared.
1532  /// @param n
1533  /// Number of characters in substring to be compared.
1534  /// @param s2
1535  /// String to be compared with substring.
1536  /// @return
1537  /// - true, if s1[pos:pos+n) equals s2 (case-insensitive compare);
1538  /// - false, otherwise.
1539  /// @sa
1540  /// Other forms of overloaded EqualNocase() with differences in
1541  /// argument types: char* vs. CTempString[Ex]
1542  static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1543  const CTempString s2);
1544 
1545  /// Case-insensitive equality of two strings -- char* version.
1546  ///
1547  /// @param s1
1548  /// String to be compared -- operand 1.
1549  /// @param s2
1550  /// String to be compared -- operand 2.
1551  /// @return
1552  /// - true, if s1 equals s2 (case-insensitive compare);
1553  /// - false, otherwise.
1554  /// @sa
1555  /// EqualCase(), Equal() versions with same argument types.
1556  static bool EqualNocase(const char* s1, const char* s2);
1557 
1558  /// Case-insensitive equality of two strings.
1559  ///
1560  /// @param s1
1561  /// String to be compared -- operand 1.
1562  /// @param s2
1563  /// String to be compared -- operand 2.
1564  /// @return
1565  /// - true, if s1 equals s2 (case-insensitive compare);
1566  /// - false, otherwise.
1567  /// @sa
1568  /// EqualCase(), Equal() versions with same argument types.
1569  static bool EqualNocase(const CTempStringEx s1, const CTempStringEx s2);
1570 
1571  /// Test for equality of a substring with another string.
1572  ///
1573  /// @param s1
1574  /// String containing the substring to be compared.
1575  /// @param pos
1576  /// Start position of substring to be compared.
1577  /// @param n
1578  /// Number of characters in substring to be compared.
1579  /// @param s2
1580  /// String (char*) to be compared with substring.
1581  /// @param use_case
1582  /// Whether to do a case sensitive compare(eCase -- default), or a
1583  /// case-insensitive compare (eNocase).
1584  /// @return
1585  /// - true, if s1[pos:pos+n) equals s2;
1586  /// - false, otherwise.
1587  /// @sa
1588  /// Other forms of overloaded Equal() with differences in argument
1589  /// types: char* vs. CTempString[Ex]
1590  static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1591  const char* s2, ECase use_case = eCase);
1592 
1593  /// Test for equality of a substring with another string.
1594  ///
1595  /// @param s1
1596  /// String containing the substring to be compared.
1597  /// @param pos
1598  /// Start position of substring to be compared.
1599  /// @param n
1600  /// Number of characters in substring to be compared.
1601  /// @param s2
1602  /// String to be compared with substring.
1603  /// @param use_case
1604  /// Whether to do a case sensitive compare (default is eCase), or a
1605  /// case-insensitive compare (eNocase).
1606  /// @return
1607  /// - 0, if s1[pos:pos+n) == s2;
1608  /// - Negative integer, if s1[pos:pos+n) < s2;
1609  /// - Positive integer, if s1[pos:pos+n) > s2.
1610  /// @sa
1611  /// Other forms of overloaded Equal() with differences in argument
1612  /// types: char* vs. CTempString[Ex]
1613  static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n,
1614  const CTempString s2, ECase use_case = eCase);
1615 
1616  /// Test for equality of two strings -- char* version.
1617  ///
1618  /// @param s1
1619  /// String to be compared -- operand 1.
1620  /// @param s2
1621  /// String to be compared -- operand 2.
1622  /// @param use_case
1623  /// Whether to do a case sensitive compare (default is eCase), or a
1624  /// case-insensitive compare (eNocase).
1625  /// @return
1626  /// - 0, if s1 == s2;
1627  /// - Negative integer, if s1 < s2;
1628  /// - Positive integer, if s1 > s2.
1629  /// @sa
1630  /// EqualNocase(), Equal() versions with similar argument types.
1631  static bool Equal(const char* s1, const char* s2,
1632  ECase use_case = eCase);
1633 
1634  /// Test for equality of two strings.
1635  ///
1636  /// @param s1
1637  /// String to be compared -- operand 1.
1638  /// @param s2
1639  /// String to be compared -- operand 2.
1640  /// @param use_case
1641  /// Whether to do a case sensitive compare (default is eCase), or a
1642  /// case-insensitive compare (eNocase).
1643  /// @return
1644  /// - true, if s1 equals s2;
1645  /// - false, otherwise.
1646  /// @sa
1647  /// EqualNocase(), Equal() versions with similar argument types.
1648  static bool Equal(const CTempStringEx s1, const CTempStringEx s2,
1649  ECase use_case = eCase);
1650 
1651  // NOTE. On some platforms, "strn[case]cmp()" can work faster than their
1652  // "Compare***()" counterparts.
1653 
1654  /// String compare.
1655  ///
1656  /// @param s1
1657  /// String to be compared -- operand 1.
1658  /// @param s2
1659  /// String to be compared -- operand 2.
1660  /// @return
1661  /// - 0, if s1 == s2;
1662  /// - Negative integer, if s1 < s2;
1663  /// - Positive integer, if s1 > s2.
1664  /// @sa
1665  /// strncmp(), strcasecmp(), strncasecmp()
1666  static int strcmp(const char* s1, const char* s2);
1667 
1668  /// String compare up to specified number of characters.
1669  ///
1670  /// @param s1
1671  /// String to be compared -- operand 1.
1672  /// @param s2
1673  /// String to be compared -- operand 2.
1674  /// @param n
1675  /// Number of characters in string
1676  /// @return
1677  /// - 0, if s1 == s2;
1678  /// - Negative integer, if s1 < s2;
1679  /// - Positive integer, if s1 > s2.
1680  /// @sa
1681  /// strcmp(), strcasecmp(), strncasecmp()
1682  static int strncmp(const char* s1, const char* s2, size_t n);
1683 
1684  /// Case-insensitive comparison of two zero-terminated strings.
1685  ///
1686  /// @param s1
1687  /// String to be compared -- operand 1.
1688  /// @param s2
1689  /// String to be compared -- operand 2.
1690  /// @return
1691  /// - 0, if s1 == s2;
1692  /// - Negative integer, if s1 < s2;
1693  /// - Positive integer, if s1 > s2.
1694  /// @sa
1695  /// strcmp(), strncmp(), strncasecmp()
1696  static int strcasecmp(const char* s1, const char* s2);
1697 
1698  /// Case-insensitive comparison of two zero-terminated strings,
1699  /// narrowed to the specified number of characters.
1700  ///
1701  /// @param s1
1702  /// String to be compared -- operand 1.
1703  /// @param s2
1704  /// String to be compared -- operand 2.
1705  /// @return
1706  /// - 0, if s1 == s2;
1707  /// - Negative integer, if s1 < s2.
1708  /// - Positive integer, if s1 > s2.
1709  /// @sa
1710  /// strcmp(), strcasecmp(), strcasecmp()
1711  static int strncasecmp(const char* s1, const char* s2, size_t n);
1712 
1713  /// Wrapper for the function strftime() that corrects handling %D and %T
1714  /// time formats on MS Windows.
1715  static size_t strftime(char* s, size_t maxsize, const char* format,
1716  const struct tm* timeptr);
1717 
1718  /// Match "str" against the "mask".
1719  ///
1720  /// This function does not use regular expressions.
1721  /// Very similar to fnmatch(3), but there are differences (see also glob(7)).
1722  /// There's no special treatment for a slash character '/' in this call.
1723  ///
1724  /// @param str
1725  /// String to match.
1726  /// @param mask
1727  /// Mask used to match string "str".
1728  /// This is a text pattern, which, along ordinary characters that must match
1729  /// literally corresponding symbols in the string "str", can contains also
1730  /// mext wildcard characters: \n
1731  /// ? - matches to any single character in the string. \n
1732  /// * - matches to any number of characters in the string (including none). \n
1733  ///
1734  /// Mask also support POSIX character classes in the forms of "[...]" or "[!...]"
1735  /// that must MATCH or NOT MATCH, respectively, a single character in "str".
1736  /// To cancel the special meaning of '*', '?' or '[', they can be prepended with
1737  /// a backslash '\\' (the backslash in front of other characters does not change
1738  /// their meaning, so "\\\\" matches one graphical backslash in the "str").
1739  /// Within a character class, to have its literal meaning a closing square bracket ']'
1740  /// must be used at the first position, whereas '?', '*', '[, and '\\' stand
1741  /// just for themselves. Two characters separated by a minus sign '-' denote
1742  /// a range that can be used for contraction to include all characters in
1743  /// between: "[A-F]" is equivalent to "[ABCDEF]".
1744  /// For its literal meaning, the minus sign '-' can be used either at the very
1745  /// first position, or the last position before the closing bracket ']'.
1746  /// To have a range that begins with an exclamation point, one has to use
1747  /// a dummy empty range followed by that range with '!'.
1748  ///
1749  /// Examples:
1750  /// "!" matches a single '!' (note that just "[!]" is invalid);
1751  /// "[!!]" matches any character, which is not an exclamation point '!';
1752  /// "[][!]" matches ']', '[', and '!';
1753  /// "[!][-]" matches any character except for ']', '[', and '-';
1754  /// "[-]" matches a minus sign '-' (same as '-' just by itself);
1755  /// "[?*\\]" matches either '?', or '*', or a backslash '\\';
1756  /// "[]-\\]" matches nothing as it defines an empty range (from ']' to '\\');
1757  /// "\\[a]\\*" matches a literal substring "[a]*";
1758  /// "[![a-]" matches any char but '[', 'a' or '-' (same as "[!-[a]"; but not
1759  /// "[![-a]", which defines an empty range, thus matches any char!);
1760  /// "[]A]" matches either ']' or 'A' (NB: "[A]]" matches a substring "A]");
1761  /// "[0-9-]" matches any decimal digit or a minus sign '-' (same: "[-0-9]");
1762  /// "[9-0!-$]" matches '!', '"', '#', and '$' (as first range matches nothing).
1763  ///
1764  /// @note
1765  /// In the above, each double backslash denotes a single graphical backslash
1766  /// character (C string notation is used).
1767  /// @note
1768  /// Unlike shell globbing, "[--0]" *does* match the slash character '/'
1769  /// (along with '-', '.', and '0' that all fall within the range).
1770  /// @param use_case
1771  /// Whether to do a case sensitive compare for letters (eCase -- default),
1772  /// or a case-insensitive compare (eNocase).
1773  /// @return
1774  /// Return TRUE if "str" matches "mask", and FALSE otherwise
1775  /// (including patter errors).
1776  /// @sa
1777  /// CRegexp, CRegexpUtil
1778  ///
1779  static bool MatchesMask(CTempString str, CTempString mask, ECase use_case = eCase);
1780 
1781  /// Check if a string is blank (has no text).
1782  ///
1783  /// @param str
1784  /// String to check.
1785  /// @param pos
1786  /// starting position (default 0)
1787  static bool IsBlank(const CTempString str, SIZE_TYPE pos = 0);
1788 
1789  /// Checks if all letters in the given string have a lower case.
1790  ///
1791  /// @param str
1792  /// String to be checked.
1793  /// @return
1794  /// TRUE if all letter characters in the string are lowercase
1795  /// according to the current C locale (std::islower()).
1796  /// All non-letter characters will be ignored.
1797  /// TRUE if empty or no letters.
1798  static bool IsLower(const CTempString str);
1799 
1800  /// Checks if all letters in the given string have a upper case.
1801  ///
1802  /// @param str
1803  /// String to be checked.
1804  /// @return
1805  /// TRUE if all letter characters in the string are uppercase
1806  /// according to the current C locale (std::isupper()).
1807  /// All non-letter characters will be skipped.
1808  /// TRUE if empty or no letters.
1809  static bool IsUpper(const CTempString str);
1810 
1811 
1812  // The following 4 methods change the passed string, then return it
1813 
1814  /// Convert string to lower case -- string& version.
1815  ///
1816  /// @param str
1817  /// String to be converted.
1818  /// @return
1819  /// Lower cased string.
1820  static string& ToLower(string& str);
1821 
1822  /// Convert string to lower case -- char* version.
1823  ///
1824  /// @param str
1825  /// String to be converted.
1826  /// @return
1827  /// Lower cased string.
1828  static char* ToLower(char* str);
1829 
1830  /// Convert string to upper case -- string& version.
1831  ///
1832  /// @param str
1833  /// String to be converted.
1834  /// @return
1835  /// Upper cased string.
1836  static string& ToUpper(string& str);
1837 
1838  /// Convert string to upper case -- char* version.
1839  ///
1840  /// @param str
1841  /// String to be converted.
1842  /// @return
1843  /// Upper cased string.
1844  static char* ToUpper(char* str);
1845 
1846 private:
1847  /// Privatized ToLower() with const char* parameter to prevent passing of
1848  /// constant strings.
1849  static void/*dummy*/ ToLower(const char* /*dummy*/);
1850 
1851  /// Privatized ToUpper() with const char* parameter to prevent passing of
1852  /// constant strings.
1853  static void/*dummy*/ ToUpper(const char* /*dummy*/);
1854 
1855 public:
1856 
1857  /// Check if a string starts with a specified prefix value.
1858  ///
1859  /// @param str
1860  /// String to check.
1861  /// @param start
1862  /// Prefix value to check for.
1863  /// @param use_case
1864  /// Whether to do a case sensitive compare(default is eCase), or a
1865  /// case-insensitive compare (eNocase) while checking.
1866  static bool StartsWith(const CTempString str, const CTempString start,
1867  ECase use_case = eCase);
1868 
1869  /// Check if a string starts with a specified character value.
1870  ///
1871  /// @param str
1872  /// String to check.
1873  /// @param start
1874  /// Character value to check for.
1875  /// @param use_case
1876  /// Whether to do a case sensitive compare(default is eCase), or a
1877  /// case-insensitive compare (eNocase) while checking.
1878  static bool StartsWith(const CTempString str, char start,
1879  ECase use_case = eCase);
1880 
1881  /// Check if a string ends with a specified suffix value.
1882  ///
1883  /// @param str
1884  /// String to check.
1885  /// @param end
1886  /// Suffix value to check for.
1887  /// @param use_case
1888  /// Whether to do a case sensitive compare(default is eCase), or a
1889  /// case-insensitive compare (eNocase) while checking.
1890  static bool EndsWith(const CTempString str, const CTempString end,
1891  ECase use_case = eCase);
1892 
1893  /// Check if a string ends with a specified character value.
1894  ///
1895  /// @param str
1896  /// String to check.
1897  /// @param end
1898  /// Character value to check for.
1899  /// @param use_case
1900  /// Whether to do a case sensitive compare(default is eCase), or a
1901  /// case-insensitive compare (eNocase) while checking.
1902  static bool EndsWith(const CTempString str, char end,
1903  ECase use_case = eCase);
1904 
1905  /// Determine the common prefix of two strings.
1906  ///
1907  /// @param s1
1908  /// String to be compared -- operand 1.
1909  /// @param s2
1910  /// String to be compared -- operand 2.
1911  /// @return
1912  /// The number of characters common to the start of each string.
1913  static SIZE_TYPE CommonPrefixSize(const CTempString s1, const CTempString s2);
1914 
1915  /// Determine the common suffix of two strings.
1916  ///
1917  /// @param s1
1918  /// String to be compared -- operand 1.
1919  /// @param s2
1920  /// String to be compared -- operand 2.
1921  /// @return
1922  /// The number of characters common to the end of each string.
1923  static SIZE_TYPE CommonSuffixSize(const CTempString s1, const CTempString s2);
1924 
1925  /// Determine if the suffix of one string is the prefix of another.
1926  ///
1927  /// @param s1
1928  /// String to be compared -- operand 1.
1929  /// @param s2
1930  /// String to be compared -- operand 2.
1931  /// @return
1932  /// The number of characters common to the end of the first string
1933  /// and the start of the second string.
1934  static SIZE_TYPE CommonOverlapSize(const CTempString s1, const CTempString s2);
1935 
1936 
1937  /// Whether it is the first or last occurrence.
1938  /// @deprecated
1940  eFirst, ///< First occurrence
1941  eLast ///< Last occurrence
1942  };
1943 
1944  /// Search direction for Find() methods.
1945  enum EDirection {
1946  eForwardSearch = 0, ///< Search in a forward direction
1947  eReverseSearch ///< Search in a backward direction
1948  };
1949 
1950 
1951  /// Find the pattern in the string.
1952  ///
1953  /// @param str
1954  /// String to search.
1955  /// @param pattern
1956  /// Pattern to search for in "str".
1957  /// @param use_case
1958  /// Whether to do a case sensitive compare (default is eCase), or a
1959  /// case-insensitive compare (eNocase) while searching for the pattern.
1960  /// @param direction
1961  /// Define a search direction of the requested "occurrence"
1962  /// of "pattern" in "str".
1963  /// @param occurrence
1964  /// Which occurrence of the pattern in the string to use (zero-based).
1965  /// NOTE: When an occurrence is found the next occurrence will be
1966  /// searched for starting right *after* the found pattern.
1967  /// @return
1968  /// Start of the found pattern in the string.
1969  /// Or NPOS if there is no occurrence of the pattern in the string.
1970  static SIZE_TYPE Find(const CTempString str,
1971  const CTempString pattern,
1972  ECase use_case = eCase,
1973  EDirection direction = eForwardSearch,
1974  SIZE_TYPE occurrence = 0);
1975 
1976  /// Find the pattern in the specified range of a string.
1977  ///
1978  /// @param str
1979  /// String to search.
1980  /// @param pattern
1981  /// Pattern to search for in "str".
1982  /// @param start
1983  /// Position in "str" to start search from.
1984  /// 0 means start the search from the beginning of the string.
1985  /// @param end
1986  /// Position in "str" to perform search up to.
1987  /// NPOS means to search to the end of the string.
1988  /// @param which
1989  /// When set to eFirst, this means to find the first occurrence of
1990  /// "pattern" in "str". When set to eLast, this means to find the last
1991  /// occurrence of "pattern" in "str".
1992  /// @param use_case
1993  /// Whether to do a case sensitive compare (default is eCase), or a
1994  /// case-insensitive compare (eNocase) while searching for the pattern.
1995  /// @return
1996  /// - The start of the first or last (depending on "which" parameter)
1997  /// occurrence of "pattern" in "str", within the string interval
1998  /// ["start", "end"], or
1999  /// - NPOS if there is no occurrence of the pattern.
2000  /// @sa FindCase, FindNoCase, FindWord
2001  ///
2002  /// @deprecated
2003  /// Use
2004  /// @code
2005  /// Find(str, pattern, [use_case], [direction], [occurrence])
2006  /// @endcode
2007  /// method instead.
2008  /// For example:
2009  /// @code
2010  /// Find(str, pattern, 0, NPOS, eLast, eCase)
2011  /// @endcode
2012  /// can be replaced by
2013  /// @code
2014  /// Find(str, pattern, eCase, eReverseSearch, /* 0 */)
2015  /// @endcode
2016  /// If you doing a search on a substring of the 'str' and ["start", "end"] search
2017  /// interval is not a default [0, NPOS], that mean a whole 'str' string, you may
2018  /// need to pass a substring instead of 'str', like
2019  /// @code
2020  /// Find(CTempString(str, start, len), pattern, ....)
2021  /// @endcode
2022  /// and after checking search result on NPOS, adjust it by 'start' yourself.
2024  static SIZE_TYPE Find(const CTempString str,
2025  const CTempString pattern,
2026  SIZE_TYPE start, SIZE_TYPE end,
2027  EOccurrence which = eFirst,
2028  ECase use_case = eCase);
2029 
2030  /// Wrapper for backward-compatibility
2031  inline
2032  static SIZE_TYPE Find(const CTempString str, const CTempString pattern, SIZE_TYPE start)
2033  { return FindCase(str, pattern, start); }
2034 
2035 
2036  /// Find the pattern in the specified range of a string using a case
2037  /// sensitive search.
2038  ///
2039  /// @param str
2040  /// String to search.
2041  /// @param pattern
2042  /// Pattern to search for in "str".
2043  /// @param start
2044  /// Position in "str" to start search from -- default of 0 means start
2045  /// the search from the beginning of the string.
2046  /// @param end
2047  /// Position in "str" to perform search up to -- default of NPOS means
2048  /// to search to the end of the string.
2049  /// @param which
2050  /// When set to eFirst, this means to find the first occurrence of
2051  /// "pattern" in "str". When set to eLast, this means to find the last
2052  /// occurrence of "pattern" in "str".
2053  /// @return
2054  /// - The start of the first or last (depending on "which" parameter)
2055  /// occurrence of "pattern" in "str", within the string interval
2056  /// ["start", "end"], or
2057  /// - NPOS if there is no occurrence of the pattern.
2058  /// @sa Find
2059  ///
2060  /// @deprecated
2061  /// Use Find() method without [start:end] range.
2062  /// @deprecated
2063  /// Use one of the next methods instead:
2064  /// @code
2065  /// Find(str, pattern, [use_case], [direction], [occurrence])
2066  /// FindCase(str, pattern, [start])
2067  /// @endcode
2068  /// For example:
2069  /// @code
2070  /// FindCase(str, pattern, 0, NPOS, eLast)
2071  /// @endcode
2072  /// can be replaced by
2073  /// @code
2074  /// Find(str, pattern, eCase, eReverseSearch, /* 0 */)
2075  /// @endcode
2076  /// For simpler cases without range, or with default [0, NPOS] please use
2077  /// @code
2078  /// FindCase(str, pattern, [start])
2079  /// @endcode
2080  /// But if you doing a search on a substring of the 'str' and ["start", "end"] search
2081  /// interval is not a default [0, NPOS], that mean a whole 'str' string, you may
2082  /// need to pass a substring instead of 'str', like
2083  /// @code
2084  /// FindCase(CTempString(str, start, len), pattern, ....)
2085  /// @endcode
2086  /// and after checking search result on NPOS, adjust it by 'start' yourself.
2088  static SIZE_TYPE FindCase(const CTempString str,
2089  const CTempString pattern,
2090  SIZE_TYPE start, SIZE_TYPE end,
2091  EOccurrence which = eFirst);
2092 
2093  /// Wrappers for backward-compatibility
2094  static SIZE_TYPE FindCase(const CTempString str, const CTempString pattern);
2095  static SIZE_TYPE FindCase(const CTempString str, const CTempString pattern, SIZE_TYPE start);
2096 
2097  /// Find the pattern in the specified range of a string using a case
2098  /// insensitive search.
2099  ///
2100  /// @param str
2101  /// String to search.
2102  /// @param pattern
2103  /// Pattern to search for in "str".
2104  /// @param start
2105  /// Position in "str" to start search from -- default of 0 means start
2106  /// the search from the beginning of the string.
2107  /// @param end
2108  /// Position in "str" to perform search up to -- default of NPOS means
2109  /// to search to the end of the string.
2110  /// @param which
2111  /// When set to eFirst, this means to find the first occurrence of
2112  /// "pattern" in "str". When set to eLast, this means to find the last
2113  /// occurrence of "pattern" in "str".
2114  /// @return
2115  /// - The start of the first or last (depending on "which" parameter)
2116  /// occurrence of "pattern" in "str", within the string interval
2117  /// ["start", "end"], or
2118  /// - NPOS if there is no occurrence of the pattern.
2119  /// @sa Find
2120  ///
2121  /// @deprecated
2122  /// Use one of the next methods instead:
2123  /// @code
2124  /// Find(str, pattern, [use_case], [direction], [occurrence])
2125  /// FindNoCase(str, pattern, [start])
2126  /// @endcode
2127  /// For example:
2128  /// @code
2129  /// FindNoCase(str, pattern, 0, NPOS, eLast)
2130  /// @endcode
2131  /// can be replaced by
2132  /// @code
2133  /// Find(str, pattern, eNocase, eReverseSearch, /* 0 */)
2134  /// @endcode
2135  /// For simpler cases without range, or with default [0, NPOS] please use
2136  /// @code
2137  /// FindNoCase(str, pattern, [start])
2138  /// @endcode
2139  /// But if you doing a search on a substring of the 'str' and ["start", "end"] search
2140  /// interval is not a default [0, NPOS], that mean a whole 'str' string, you may
2141  /// need to pass a substring instead of 'str', like
2142  /// @code
2143  /// FindNoCase(CTempString(str, start, len), pattern, ....)
2144  /// @endcode
2145  /// and after checking search result on NPOS, adjust it by 'start' yourself.
2147  static SIZE_TYPE FindNoCase(const CTempString str,
2148  const CTempString pattern,
2149  SIZE_TYPE start, SIZE_TYPE end,
2150  EOccurrence which = eFirst);
2151 
2152  /// Wrapper for backward-compatibility
2153  static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern);
2154  static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start);
2155 
2156  /// Test for presence of a given string in a list or vector of strings
2157 
2158  static const string* Find (const list<string>& lst,
2159  const CTempString val,
2160  ECase use_case = eCase);
2161 
2162  static const string* FindCase (const list<string>& lst,
2163  const CTempString val);
2164 
2165  static const string* FindNoCase(const list<string>& lst,
2166  const CTempString val);
2167 
2168  static const string* Find (const vector<string>& vec,
2169  const CTempString val,
2170  ECase use_case = eCase);
2171 
2172  static const string* FindCase (const vector<string>& vec,
2173  const CTempString val);
2174 
2175  static const string* FindNoCase(const vector<string>& vec,
2176  const CTempString val);
2177 
2178  /// Find given word in the string.
2179  ///
2180  /// @param str
2181  /// String to search.
2182  /// @param word
2183  /// Word to search for in "str". The "word" can have any symbols,
2184  /// not letters only. Function treat it as a pattern, even it have
2185  /// any non-word characters.
2186  /// @param use_case
2187  /// Whether to do a case sensitive compare (default is eCase), or a
2188  /// case-insensitive compare (eNocase) while searching for the word.
2189  /// @param direction
2190  /// Define a search direction of the occurrence of "word" in "str".
2191  /// @return
2192  /// - Start of the found word in the string.
2193  /// - NPOS if there is no occurrence of the word in the string.
2194  static SIZE_TYPE FindWord(const CTempString str,
2195  const CTempString word,
2196  ECase use_case = eCase,
2197  EDirection direction = eForwardSearch);
2198 
2199  /// Find given word in the string.
2200  ///
2201  /// This function honors word boundaries:
2202  /// - starting or ending of the string,
2203  /// - any non-word character, all except [a-zA-Z0-9_].
2204  ///
2205  /// @param str
2206  /// String to search.
2207  /// @param word
2208  /// Word to search for in "str". The "word" can have any symbols,
2209  /// not letters only. Function treat it as a pattern, even it have
2210  /// any non-word characters.
2211  /// @param which
2212  /// When set to eFirst, this means to find the first occurrence of
2213  /// "word" in "str". When set to eLast, this means to find the last
2214  /// occurrence of "word" in "str".
2215  /// @param use_case
2216  /// Whether to do a case sensitive compare (default is eCase), or a
2217  /// case-insensitive compare (eNocase) while searching for the word.
2218  /// @return
2219  /// - The start of the first or last (depending on "which" parameter)
2220  /// occurrence of "word" in "str", or
2221  /// - NPOS if there is no occurrence of the word.
2222  /// @sa Find
2223  /// @deprecated
2224  /// Use FindWord() variant with EDirection parameter:
2225  /// @code
2226  /// FindWord(str, word, [use_case], [direction])
2227  /// @endcode
2228  inline
2231  const CTempString word,
2232  EOccurrence which,
2233  ECase use_case = eCase) {
2234  return FindWord(str, word, use_case, which == eFirst ? eForwardSearch : eReverseSearch);
2235  }
2236 
2237 
2238  /// Which end to truncate a string.
2239  enum ETrunc {
2240  eTrunc_Begin, ///< Truncate leading whitespace only
2241  eTrunc_End, ///< Truncate trailing whitespace only
2242  eTrunc_Both ///< Truncate whitespace at both begin and end of string
2243  };
2244 
2245  /// Truncate whitespace in a string.
2246  ///
2247  /// @param str
2248  /// String to truncate white-spaces from.
2249  /// @param where
2250  /// Which end of the string to truncate whitespace from.
2251  /// Default is to truncate whitespace from both ends (eTrunc_Both).
2252  /// @sa
2253  /// TruncateSpaces_Unsafe
2254  static string TruncateSpaces(const string& str,
2255  ETrunc where = eTrunc_Both);
2256 
2257  /// Truncate whitespace in a string.
2258  /// It can be faster but it is also more dangerous than TruncateSpaces()
2259  ///
2260  /// @param str
2261  /// String to truncate whitespace from.
2262  /// @param where
2263  /// Which end of the string to truncate whitespace from.
2264  /// Default is to truncate whitespace from both ends (eTrunc_Both).
2265  /// @attention
2266  /// The lifespan of the result string is the same as one of the source.
2267  /// So, for example, if the source is temporary string, or it changes somehow,
2268  /// then the result will be invalid right away (will point to already released
2269  /// or wrong range in the memory).
2270  /// @sa
2271  /// TruncateSpaces
2272  static CTempString TruncateSpaces_Unsafe(const CTempString str,
2273  ETrunc where = eTrunc_Both);
2274 
2275  /// @deprecated Use TruncateSpaces_Unsafe() instead -- AND, do make sure
2276  /// that you indeed use that in a safe manner!
2277  inline
2280  ETrunc where = eTrunc_Both) {
2281  return TruncateSpaces_Unsafe(str, where);
2282  }
2283 
2284  /// @deprecated Use TruncateSpaces_Unsafe() instead -- AND, do make sure
2285  /// that you indeed use that in a safe manner!
2286  inline
2288  static CTempString TruncateSpaces(const char* str,
2289  ETrunc where = eTrunc_Both) {
2290  return TruncateSpaces_Unsafe(str, where);
2291  }
2292 
2293  /// Truncate whitespace in a string (in-place)
2294  ///
2295  /// @param str
2296  /// String to truncate whitespace from.
2297  /// @param where
2298  /// Which end of the string to truncate whitespace from.
2299  /// Default is to truncate whitespace from both ends (eTrunc_Both).
2300  ///
2301  static void TruncateSpacesInPlace(string& str, ETrunc where = eTrunc_Both);
2302  static void TruncateSpacesInPlace(CTempString&, ETrunc where = eTrunc_Both);
2303 
2304 
2305  /// Trim prefix from a string (in-place)
2306  ///
2307  /// @param str
2308  /// String to trim from.
2309  /// @param prefix
2310  /// Prefix to remove.
2311  /// If string doesn't have specified prefix, it doesn't changes.
2312  /// @param use_case
2313  /// Whether to do a case sensitive compare (default is eCase), or a
2314  /// case-insensitive compare (eNocase) while checking for a prefix.
2315  ///
2316  static void TrimPrefixInPlace(string& str, const CTempString prefix,
2317  ECase use_case = eCase);
2318  static void TrimPrefixInPlace(CTempString& str, const CTempString prefix,
2319  ECase use_case = eCase);
2320 
2321  /// Trim prefix from a string.
2322  ///
2323  /// "Unsafe" counterpart to TrimPrefixInPlace().
2324  /// @param str
2325  /// String to trim from.
2326  /// @param prefix
2327  /// Prefix to remove.
2328  /// If string doesn't have specified prefix, it doesn't changes.
2329  /// @param use_case
2330  /// Whether to do a case sensitive compare (default is eCase), or a
2331  /// case-insensitive compare (eNocase) while checking for a prefix.
2332  /// @attention
2333  /// The lifespan of the result string is the same as one of the source.
2334  /// So, for example, if the source is temporary string, or it changes somehow,
2335  /// then the result will be invalid right away (will point to already released
2336  /// or wrong range in the memory).
2337  /// @sa
2338  /// TrimPrefixInPlace
2339  static CTempString TrimPrefix_Unsafe(const CTempString str,
2340  const CTempString prefix,
2341  ECase use_case = eCase);
2342 
2343  /// Trim suffix from a string (in-place)
2344  ///
2345  /// @param str
2346  /// String to trim from.
2347  /// @param suffix
2348  /// Suffix to remove.
2349  /// If string doesn't have specified suffix, it doesn't changes.
2350  /// @param use_case
2351  /// Whether to do a case sensitive compare (default is eCase), or a
2352  /// case-insensitive compare (eNocase) while checking for a suffix.
2353  static void TrimSuffixInPlace(string& str, const CTempString suffix,
2354  ECase use_case = eCase);
2355  static void TrimSuffixInPlace(CTempString& str, const CTempString suffix,
2356  ECase use_case = eCase);
2357 
2358  /// Trim suffix from a string.
2359  ///
2360  /// "Unsafe" counterpart to TrimSuffixInPlace().
2361  /// @param str
2362  /// String to trim from.
2363  /// @param suffix
2364  /// Suffix to remove.
2365  /// If string doesn't have specified suffix, it doesn't changes.
2366  /// @param use_case
2367  /// Whether to do a case sensitive compare (default is eCase), or a
2368  /// case-insensitive compare (eNocase) while checking for a suffix.
2369  /// @attention
2370  /// The lifespan of the result string is the same as one of the source.
2371  /// So, for example, if the source is temporary string, or it changes somehow,
2372  /// then the result will be invalid right away (will point to already released
2373  /// or wrong range in the memory).
2374  /// @sa
2375  /// TrimSuffixInPlace
2376  static CTempString TrimSuffix_Unsafe(const CTempString str,
2377  const CTempString suffix,
2378  ECase use_case = eCase);
2379 
2380  /// Replace occurrences of a substring within a string.
2381  ///
2382  /// @param src
2383  /// Source string from which specified substring occurrences are replaced.
2384  /// @param search
2385  /// Substring value in "src" that is replaced.
2386  /// @param replace
2387  /// Replace "search" substring with this value.
2388  /// @param dst
2389  /// Result of replacing the "search" string with "replace" in "src".
2390  /// This value is also returned by the function.
2391  /// @param start_pos
2392  /// Position to start search from.
2393  /// @param max_replace
2394  /// Replace no more than "max_replace" occurrences of substring "search"
2395  /// If "max_replace" is zero(default), then replace all occurrences with
2396  /// "replace".
2397  /// @param num_replace
2398  /// Optional pointer to a value which receives number of replacements occurred.
2399  /// @return
2400  /// Result of replacing the "search" string with "replace" in "src". This
2401  /// value is placed in "dst" as well.
2402  /// @note
2403  /// After replacing each occurence of the "search" string with "replace"
2404  /// this function move current search position behind a new replacement
2405  /// in the resulting string, so it doesn't search over any part of the
2406  /// inserted "replace".
2407  /// @sa
2408  /// Version of Replace() that returns a new string.
2409  static string& Replace(const string& src,
2410  const string& search,
2411  const string& replace,
2412  string& dst,
2413  SIZE_TYPE start_pos = 0,
2414  SIZE_TYPE max_replace = 0,
2415  SIZE_TYPE* num_replace = 0);
2416 
2417  /// Replace occurrences of a substring within a string and returns the
2418  /// result as a new string.
2419  ///
2420  /// @param src
2421  /// Source string from which specified substring occurrences are
2422  /// replaced.
2423  /// @param search
2424  /// Substring value in "src" that is replaced.
2425  /// @param replace
2426  /// Replace "search" substring with this value.
2427  /// @param start_pos
2428  /// Position to start search from.
2429  /// @param max_replace
2430  /// Replace no more than "max_replace" occurrences of substring "search"
2431  /// If "max_replace" is zero(default), then replace all occurrences with
2432  /// "replace".
2433  /// @param num_replace
2434  /// Optional pointer to a value which receives number of replacements occurred.
2435  /// @return
2436  /// A new string containing the result of replacing the "search" string
2437  /// with "replace" in "src"
2438  /// @note
2439  /// After replacing each occurence of the "search" string with "replace"
2440  /// this function move current search position behind a new replacement
2441  /// in the resulting string, so it doesn't search over any part of the
2442  /// inserted "replace".
2443  /// @sa
2444  /// Version of Replace() that has a destination parameter to accept
2445  /// result.
2446  static string Replace(const string& src,
2447  const string& search,
2448  const string& replace,
2449  SIZE_TYPE start_pos = 0,
2450  SIZE_TYPE max_replace = 0,
2451  SIZE_TYPE* num_replace = 0);
2452 
2453  /// Replace occurrences of a substring within a string.
2454  ///
2455  /// On some platforms this function is much faster than Replace()
2456  /// if sizes of "search" and "replace" strings are equal.
2457  /// Otherwise, the performance is mainly the same.
2458  /// @param src
2459  /// String where the specified substring occurrences are replaced.
2460  /// This value is also returned by the function.
2461  /// @param search
2462  /// Substring value in "src" that is replaced.
2463  /// @param replace
2464  /// Replace "search" substring with this value.
2465  /// @param start_pos
2466  /// Position to start search from.
2467  /// @param max_replace
2468  /// Replace no more than "max_replace" occurrences of substring "search"
2469  /// If "max_replace" is zero(default), then replace all occurrences with
2470  /// "replace".
2471  /// @note
2472  /// After replacing each occurence of the "search" string with "replace"
2473  /// this function move current search position behind a new replacement
2474  /// in the resulting string, so it doesn't search over any part of the
2475  /// inserted "replace".
2476  /// @param num_replace
2477  /// Optional pointer to a value which receives number of replacements occurred.
2478  /// @return
2479  /// Result of replacing the "search" string with "replace" in "src".
2480  /// @sa
2481  /// Replace
2482  static string& ReplaceInPlace(string& src,
2483  const string& search,
2484  const string& replace,
2485  SIZE_TYPE start_pos = 0,
2486  SIZE_TYPE max_replace = 0,
2487  SIZE_TYPE* num_replace = 0);
2488 
2489  /// Flags for Split*() methods.
2490  ///
2491  /// @note
2492  /// With quote support enabled, doubling a quote character suppresses
2493  /// its special meaning, as does escaping it if that's enabled too;
2494  /// unescaped trailing backslashes and unbalanced quotes result in
2495  /// exceptions.
2496  /// @note
2497  /// All escape symbols, single or double quotes became removed
2498  /// if a corresponding fSplit_Can* flag is used.
2500  fSplit_MergeDelimiters = 1 << 0, ///< Merge adjacent delimiters
2501  fSplit_Truncate_Begin = 1 << 1, ///< Truncate leading delimiters
2502  fSplit_Truncate_End = 1 << 2, ///< Truncate trailing delimiters
2503  fSplit_Truncate = fSplit_Truncate_Begin | fSplit_Truncate_End,
2504  fSplit_ByPattern = 1 << 3, ///< Require full delimiter strings
2505  fSplit_CanEscape = 1 << 4, ///< Allow \\... escaping
2506  fSplit_CanSingleQuote = 1 << 5, ///< Allow '...' quoting
2507  fSplit_CanDoubleQuote = 1 << 6, ///< Allow "..." quoting
2508  fSplit_CanQuote = fSplit_CanSingleQuote | fSplit_CanDoubleQuote,
2509  /// All delimiters are merged and trimmed, to get non-empty tokens only
2510  fSplit_Tokenize = fSplit_MergeDelimiters | fSplit_Truncate
2511  };
2512  typedef int TSplitFlags; ///< Bitwise OR of ESplitFlags
2513 
2514  /// Whether to merge adjacent delimiters.
2515  /// Used by some methods that don't need full functionality of ESplitFlags.
2517  eMergeDelims = fSplit_MergeDelimiters | fSplit_Truncate,
2518  eNoMergeDelims = 0
2519  };
2520 
2521  /// Split a string using specified delimiters.
2522  ///
2523  /// @param str
2524  /// String to be split.
2525  /// @param delim
2526  /// Delimiter(s) used to split string "str". The interpretation of
2527  /// multi-character values depends on flags: by default, any of those
2528  /// characters marks a split point (when unquoted), but with
2529  /// fSplit_ByPattern, the entire string must occur. (Meanwhile,
2530  /// an empty value disables splitting.)
2531  /// @param arr
2532  /// The split tokens are added to the list "arr" and also returned
2533  /// by the function.
2534  /// @param flags
2535  /// Flags directing splitting, characterized under ESplitFlags.
2536  /// @param token_pos
2537  /// Optional array for the tokens' positions in "str".
2538  /// @attention
2539  /// Modifying source CTempString object or destroying it,
2540  /// will invalidate results.
2541  /// @return
2542  /// The list "arr" is also returned.
2543  /// @sa
2544  /// ESplitFlags, SplitInTwo, SplitByPattern
2545  static list<string>& Split( const CTempString str,
2546  const CTempString delim,
2547  list<string>& arr,
2548  TSplitFlags flags = 0,
2549  vector<SIZE_TYPE>* token_pos = NULL);
2550 
2551  static vector<string>& Split(
2552  const CTempString str,
2553  const CTempString delim,
2554  vector<string>& arr,
2555  TSplitFlags flags = 0,
2556  vector<SIZE_TYPE>* token_pos = NULL);
2557 
2558  static list<CTempString>& Split(
2559  const CTempString str,
2560  const CTempString delim,
2561  list<CTempString>& arr,
2562  TSplitFlags flags = 0,
2563  vector<SIZE_TYPE>* token_pos = NULL,
2564  CTempString_Storage* storage = NULL);
2565 
2566  static vector<CTempString>& Split(
2567  const CTempString str,
2568  const CTempString delim,
2569  vector<CTempString>& arr,
2570  TSplitFlags flags = 0,
2571  vector<SIZE_TYPE>* token_pos = NULL,
2572  CTempString_Storage* storage = NULL);
2573 
2574  static list<CTempStringEx>& Split(
2575  const CTempString str,
2576  const CTempString delim,
2577  list<CTempStringEx>& arr,
2578  TSplitFlags flags = 0,
2579  vector<SIZE_TYPE>* token_pos = NULL,
2580  CTempString_Storage* storage = NULL);
2581 
2582  static vector<CTempStringEx>& Split(
2583  const CTempString str,
2584  const CTempString delim,
2585  vector<CTempStringEx>& arr,
2586  TSplitFlags flags = 0,
2587  vector<SIZE_TYPE>* token_pos = NULL,
2588  CTempString_Storage* storage = NULL);
2589 
2590  /// Split a string into two pieces using the specified delimiters
2591  ///
2592  /// @param str
2593  /// String to be split.
2594  /// @param delim
2595  /// Delimiters used to split string "str".
2596  /// @param str1
2597  /// The sub-string of "str" before the first character of "delim".
2598  /// It will not contain any characters in "delim".
2599  /// Will be empty if "str" begin with a delimiter.
2600  /// @param str2
2601  /// The sub-string of "str" after the first character of "delim" found.
2602  /// May contain "delim" characters.
2603  /// Will be empty if "str" had no "delim" characters or ended
2604  /// with the "delim" character.
2605  /// @param flags
2606  /// Flags directing splitting, characterized under ESplitFlags.
2607  /// Note, that fSplit_Truncate_End don't have any effect due nature
2608  /// of this method.
2609  /// @attention
2610  /// Modifying source CTempString object or destroying it,
2611  /// will invalidate results.
2612  /// @return
2613  /// true if a symbol from "delim" was found in "str", false if not.
2614  /// This lets you distinguish when there were no delimiters and when
2615  /// the very last character was the first delimiter.
2616  /// @sa
2617  /// ESplitFlags, Split
2618  static bool SplitInTwo(const CTempString str,
2619  const CTempString delim,
2620  string& str1,
2621  string& str2,
2622  TSplitFlags flags = 0);
2623 
2624  static bool SplitInTwo(const CTempString str,
2625  const CTempString delim,
2626  CTempString& str1,
2627  CTempString& str2,
2628  TSplitFlags flags = 0,
2629  CTempString_Storage* storage = NULL);
2630 
2631  static bool SplitInTwo(const CTempString str,
2632  const CTempString delim,
2633  CTempStringEx& str1,
2634  CTempStringEx& str2,
2635  TSplitFlags flags = 0,
2636  CTempString_Storage* storage = NULL);
2637 
2638 
2639  /// Variation of Split() with fSplit_ByPattern flag applied by default
2640 
2641  static list<string>& SplitByPattern(
2642  const CTempString str,
2643  const CTempString delim,
2644  list<string>& arr,
2645  TSplitFlags flags = 0,
2646  vector<SIZE_TYPE>* token_pos = NULL);
2647 
2648  static vector<string>& SplitByPattern(
2649  const CTempString str,
2650  const CTempString delim,
2651  vector<string>& arr,
2652  TSplitFlags flags = 0,
2653  vector<SIZE_TYPE>* token_pos = NULL);
2654 
2655  static list<CTempString>& SplitByPattern(
2656  const CTempString str,
2657  const CTempString delim,
2658  list<CTempString>& arr,
2659  TSplitFlags flags = 0,
2660  vector<SIZE_TYPE>* token_pos = NULL,
2661  CTempString_Storage* storage = NULL);
2662 
2663  static vector<CTempString>& SplitByPattern(
2664  const CTempString str,
2665  const CTempString delim,
2666  vector<CTempString>& arr,
2667  TSplitFlags flags = 0,
2668  vector<SIZE_TYPE>* token_pos = NULL,
2669  CTempString_Storage* storage = NULL);
2670 
2671  static list<CTempStringEx>& SplitByPattern(
2672  const CTempString str,
2673  const CTempString delim,
2674  list<CTempStringEx>& arr,
2675  TSplitFlags flags = 0,
2676  vector<SIZE_TYPE>* token_pos = NULL,
2677  CTempString_Storage* storage = NULL);
2678 
2679  static vector<CTempStringEx>& SplitByPattern(
2680  const CTempString str,
2681  const CTempString delim,
2682  vector<CTempStringEx>& arr,
2683  TSplitFlags flags = 0,
2684  vector<SIZE_TYPE>* token_pos = NULL,
2685  CTempString_Storage* storage = NULL);
2686 
2687  /// Join strings using the specified delimiter.
2688  ///
2689  /// @param arr
2690  /// Array of strings to be joined.
2691  /// @param delim
2692  /// Delimiter used to join the string.
2693  /// @return
2694  /// The strings in "arr" are joined into a single string, separated
2695  /// with "delim".
2696  /// @sa Split
2697  template<typename TContainer>
2698  static string
2699  Join(const TContainer& arr, const CTempString& delim)
2700  {
2701  return x_Join(begin(arr), end(arr), delim);
2702  }
2703  template<typename TValue>
2704  static string
2705  Join(const initializer_list<TValue>& arr, const CTempString& delim)
2706  {
2707  return x_Join(begin(arr), end(arr), delim);
2708  }
2709  template<typename TInputIterator>
2710  static string
2711  Join( TInputIterator from, TInputIterator to, const CTempString& delim)
2712  {
2713  return x_Join(from, to, delim);
2714  }
2715  template<typename TInputIterator>
2716  static string
2717  JoinNumeric( TInputIterator from, TInputIterator to, const CTempString& delim)
2718  {
2719  return x_Join( from, to, delim);
2720  }
2721  template<typename TIterator, typename FTransform>
2722  static string
2723  TransformJoin( TIterator from, TIterator to, const CTempString& delim, FTransform fnTransform);
2724 
2725 
2726  /// How to display printable strings.
2727  ///
2728  /// Assists in making a printable version of "str".
2730  fNewLine_Quote = 0, ///< Display "\n" instead of actual linebreak
2731  eNewLine_Quote = fNewLine_Quote,
2732  fNewLine_Passthru = 1, ///< Break the line at every "\n" occurrence
2733  eNewLine_Passthru = fNewLine_Passthru,
2734  fNonAscii_Passthru = 0, ///< Allow non-ASCII but printable characters
2735  fNonAscii_Quote = 2, ///< Octal for all non-ASCII characters
2736  fPrintable_Full = 64 ///< Show all octal digits at all times
2737  };
2738  typedef int TPrintableMode; ///< Bitwise OR of EPrintableMode flags
2739 
2740  /// Get a printable version of the specified string.
2741  ///
2742  /// All non-printable characters will be represented as "\a", "\b", "\f",
2743  /// "\n", "\r", "\t", "\v", "\'", "\"", "\\\\", etc. or "\\ooo" where 'ooo'
2744  /// is an octal code of the character. The resultant string is a well-
2745  /// formed C string literal, which, without alterations, can be compiled by
2746  /// a C/C++ compiler. Potential tri-graphs are taken care of, too.
2747  /// In many instances, octal representations of non-printable characters
2748  /// can be reduced to take less than all 3 digits, if there is no ambiguity
2749  /// in the interpretation. fPrintable_Full cancels the reduction, and
2750  /// forces to produce the full 3-digit octal codes throughout.
2751  ///
2752  /// @param str
2753  /// The string whose printable version is wanted.
2754  /// @param mode
2755  /// How to display the string. The default setting of fNewLine_Quote
2756  /// displays the new lines as "\n", and uses the octal code reduction.
2757  /// When set to fNewLine_Passthru, the line breaks are actually produced
2758  /// after each "\n" but preceded with trailing backslashes.
2759  /// @return
2760  /// Return a printable version of "str".
2761  /// @sa
2762  /// ParseEscapes, Escape, CEncode, CParse, Sanitize
2763  static string PrintableString(const CTempString str,
2764  TPrintableMode mode = fNewLine_Quote | fNonAscii_Passthru);
2765 
2766  /// Escape string (generic version).
2767  ///
2768  /// Prefix any occurrences of the metacharacters with the escape character.
2769  /// @param str
2770  /// The string to be escaped.
2771  /// @metacharacters
2772  /// List of characters that need to be escaped.
2773  /// Use NStr::Join() if you have metacharacters in list<>, vector<> or set<>.
2774  /// @param escape_char
2775  /// Character used for escaping metacharacters.
2776  /// Each metacharacter will be replaced with pair "escape_char + metacharacter".
2777  /// Each escape character will be replaced with pair "escape_char + escape_char".
2778  /// @return
2779  /// Escaped string.
2780  /// @sa
2781  /// Unescape, PrintableString, Join
2782  static string Escape(const CTempString str, const CTempString metacharacters,
2783  char escape_char = '\\');
2784 
2785  /// Unescape string (generic version).
2786  ///
2787  /// Remove escape characters added by Escape().
2788  /// @param str
2789  /// The string to be processed.
2790  /// @param escape_char
2791  /// Character used for escaping.
2792  /// @return
2793  /// Unescaped string.
2794  /// @sa
2795  /// Escape
2796  static string Unescape(const CTempString str, char escape_char = '\\');
2797 
2798 
2799  /// Quote string (generic version).
2800  ///
2801  /// Prepend and append a specified quote character, but escaping any occurrence
2802  /// of the quote character using either a specified escape character (default '\')
2803  /// or as option, by doubling the quoting character if escape character is the same
2804  /// (e.g. like the single quote in SQL, double-quote in CSV).
2805  ///
2806  /// @param str
2807  /// The string to be quoted.
2808  /// @param quote_char
2809  /// Character used for quoting, default to double quote '"'.
2810  /// @param escape_char
2811  /// Character used for escaping other quote characters inside string (default '\').
2812  /// Each <quote_char> in the string will be replaced with pair "escape_char + quote_char".
2813  /// Each <escape_char> in the string will be replaced with pair "escape_char + escape_char".
2814  /// @return
2815  /// Quoted string.
2816  /// @sa
2817  /// Unquote, ParseQuoted, CEncode
2818  static string Quote(const CTempString str, char quote_char = '"', char escape_char = '\\');
2819 
2820  /// Unquote string (generic version).
2821  ///
2822  /// Remove quotation added by Quote(). Uses first character as quoting character.
2823  /// @param str
2824  /// The string to be processed.
2825  /// @param escape_char
2826  /// Character used for escaping.
2827  /// @return
2828  /// Unquoted string.
2829  /// @sa
2830  /// Quote, ParseQuoted, CEncode
2831  static string Unquote(const CTempString str, char escape_char = '\\');
2832 
2833 
2834  /// Flags for Sanitize().
2835  enum ESS_Flags {
2836  // Character filters
2837  fSS_alpha = 1 << 0, ///< Check on ::isalpha()
2838  fSS_digit = 1 << 1, ///< Check on ::isdigit()
2839  fSS_alnum = 1 << 2, ///< Check on ::isalnum()
2840  fSS_print = 1 << 3, ///< Check on ::isprint()
2841  fSS_cntrl = 1 << 4, ///< Check on ::iscntrl()
2842  fSS_punct = 1 << 5, ///< Check on ::ispunct()
2843 
2844  // Filter: in or out?
2845  fSS_Reject = 1 << 11, ///< Reject specified characters, allow all other.
2846  ///< Revert default behavior, that allow specified
2847  ///< characters and reject all other.
2848  // Utility flags
2849  fSS_Remove = 1 << 12, ///< Remove (rather than replace) rejected chars
2850  fSS_NoMerge = 1 << 13, ///< Do not merge adjacent spaces (rejected chars)
2851  fSS_NoTruncate_Begin = 1 << 14, ///< Do not truncate leading spaces
2852  fSS_NoTruncate_End = 1 << 15, ///< Do not truncate trailing spaces
2853  fSS_NoTruncate = fSS_NoTruncate_Begin | fSS_NoTruncate_End
2854  };
2855  typedef int TSS_Flags; ///< Bitwise OR of ESS_Flags
2856 
2857  /// Sanitize a string, allowing only specified classes of characters.
2858  ///
2859  /// By default:
2860  /// - replace all non-printable characters with spaces;
2861  /// - merge coalescent spaces;
2862  /// - truncate leading and trailing spaces.
2863  /// @note
2864  /// - All coalescent leading/trailing spaces also will be merged
2865  /// by default if fSS_NoMerge has not specified.
2866  /// - The truncation of leading/trailing spaces is doing after
2867  /// allowing/rejecting characters. Depending on the specified flags,
2868  /// all rejected characters adjacent to it can be treat as part
2869  /// of leading/trailing spaces.
2870  /// @param str
2871  /// String to sanitize
2872  /// @param flags
2873  /// Alternative sanitation options
2874  /// @return
2875  /// Sanitized string
2876  /// @sa
2877  /// PrintableString
2878  static string Sanitize(CTempString str, TSS_Flags flags = fSS_print)
2879  {
2880  return Sanitize(str, CTempString(), CTempString(), ' ', flags);
2881  }
2882 
2883 
2884  /// Sanitize a string, allowing only specified characters or character classes.
2885  ///
2886  /// More customizable version of Sanitize():
2887  /// - allow to specify custom sets of allowed and rejected characters,
2888  /// in addition to predefined classes if specified, see TSS_Flags;
2889  /// - allow to specify replacement character for rejected symbols;
2890  /// By default:
2891  /// - replace all rejected characters with <reject_replacement>;
2892  /// - merge coalescent spaces and <reject_replacement>s (separately if differ);
2893  /// - truncate leading and trailing spaces.
2894  /// Filters check order:
2895  /// - character classes via flags.
2896  /// Note, that if no character classes are set, and no custom <allow_chars>
2897  /// or <reject_chars>, fSS_print will be used;
2898  /// - <allow_chars> if not empty, have priority over flags.
2899  /// - <reject_chars> if not empty, have priority over flags and <allow_chars> if have intersections.
2900  /// @note
2901  /// - All coalescent leading/trailing spaces also will be merged
2902  /// by default if fSS_NoMerge has not specified.
2903  /// - The truncation of leading/trailing spaces is doing after
2904  /// allowing/rejecting characters.
2905  /// @note
2906  /// Spaces processes after checks on allowance, so if it isn't allowed
2907  /// it will be threatened as regular rejected character.
2908  /// @param str
2909  /// String to sanitize.
2910  /// @param allow_chars
2911  /// Additional list of allowed characters, in addition to character classes in <flags>.
2912  /// Have priority over character classes.
2913  /// Use NStr::Join() if you have it in list<>, vector<> or set<>.
2914  /// @param reject_chars
2915  /// Additional list of rejected characters, in addition to character classes in <flags>.
2916  /// Have priority over character classes and <allow_chars>.
2917  /// Use NStr::Join() if you have it in list<>, vector<> or set<>.
2918  /// @param reject_replacement
2919  /// Replacement character for all rejected characters.
2920  /// @param flags
2921  /// Alternative sanitation options.
2922  /// If no custom <allow_chars> or <reject_chars>, and no character classes are set, then use fSS_print by default.
2923  /// If <reject_chars>, no class, and no fSS_Reject flag, then all characters allowed except <reject_chars>.
2924  /// If <allow_chars>, no class, and fSS_Reject flag, then no any character allowed except <allow_chars>.
2925  /// @return
2926  /// Sanitized string
2927  /// @sa
2928  /// PrintableString, Join
2929  static string Sanitize(CTempString str,
2930  CTempString allow_chars,
2931  CTempString reject_chars,
2932  char reject_replacement = ' ',
2933  TSS_Flags flags = 0);
2934 
2935  /// C-style escape sequences parsing mode.
2936  /// For escape sequences with a value outside the range of [0-255]
2937  /// the behavior of ParseEscapes() depends from this mode.
2938  /// By default all escape sequences within a out or range
2939  /// will be converted to the least significant byte, with no warning.
2941  eEscSeqRange_Standard, ///< Set char to the last (least significant
2942  ///< byte) of the escape sequence (default).
2943  eEscSeqRange_FirstByte, ///< Set char to the first byte of the escape
2944  ///< sequence.
2945  eEscSeqRange_Throw, ///< Throw an exception.
2946  eEscSeqRange_Errno, ///< Set errno to ERANGE, return empty string.
2947  eEscSeqRange_User ///< Set char to the user value
2948  ///< passed in another parameter.
2949  };
2950 
2951  /// Parse C-style escape sequences in the specified string.
2952  ///
2953  /// Parse escape sequences including all those produced by PrintableString.
2954  /// @param str
2955  /// The string to be parsed.
2956  /// @param mode
2957  /// Parsing mode.
2958  /// By default all escape sequences with a value outside the range of [0-255]
2959  /// will be converted to the least significant byte, with no warning.
2960  /// @param user_char
2961  /// If 'mode' have eEscSeqRange_User, replace all out of range
2962  /// escape sequences with this char.
2963  /// @return
2964  /// String with parsed C-style escape sequences.
2965  /// - If string have wrong format throw an CStringException exception.
2966  /// - If parsing succeeds, return the converted value.
2967  /// Set errno to zero only if eEscSeqRange_Errno is set.
2968  /// - Otherwise, if escape sequence is out of range [0-255],
2969  /// see eEscSeqRange* modes for behavior.
2970  /// @sa
2971  /// EEscSeqFlags, PrintableString, CEncode, CParse
2972  static string ParseEscapes(const CTempString str,
2973  EEscSeqRange mode = eEscSeqRange_Standard,
2974  char user_char = '?');
2975 
2976  /// Discard C-style backslash escapes and extract a quoted string.
2977  ///
2978  /// @param[in] str
2979  /// The original string to extract a quoted string from.
2980  /// It must start with a double quote.
2981  /// @param[out] n_read
2982  /// How many symbols the quoted string occupied in the original string.
2983  /// @return
2984  /// The extracted string, un-escaped and with the quotes removed.
2985  /// Throw an exception on format error.
2986  static string ParseQuoted(const CTempString str, size_t* n_read = NULL);
2987 
2988  /// Define that string is quoted or not.
2989  enum EQuoted {
2990  eQuoted, ///< String is quoted
2991  eNotQuoted ///< String is not quoted
2992  };
2993 
2994  /// Encode a string for C/C++.
2995  ///
2996  /// @param str
2997  /// The string to be parsed.
2998  /// @param quoted
2999  /// Define, to
3000  /// @sa
3001  /// CParse, PrintableString
3002  static string CEncode(const CTempString str, EQuoted quoted = eQuoted);
3003 
3004  /// Discard C-style backslash escapes.
3005  ///
3006  /// @param str
3007  /// The original string to parse.
3008  /// @param quoted
3009  /// Define that parsing string is quoted or not.
3010  /// If parameter "quoted" equal eQuoted and string is not started and
3011  /// finished with a double-quote, the exception will be thrown,
3012  /// otherwise quotes will be removed in result.
3013  /// @return
3014  /// String with parsed C-style escape sequences.
3015  /// @sa
3016  /// CEncode
3017  static string CParse(const CTempString str, EQuoted quoted = eQuoted);
3018 
3019  /// Encode a string for JavaScript.
3020  ///
3021  /// Replace relevant characters by predefined entities.
3022  /// Like to PrintableString(), but process some symbols in different way.
3023  /// @sa PrintableString
3024  static string JavaScriptEncode(const CTempString str);
3025 
3026  /// XML-encode flags
3027  enum EXmlEncode {
3028  /// Encode predefined entities only
3029  eXmlEnc_Contents = 0,
3030  /// Encode double hyphen and ending hyphen,
3031  /// making the result safe to put into XML comments.
3032  eXmlEnc_CommentSafe = 1 << 0,
3033  /// Check each character to conform XML 1.1 standards,
3034  /// skip any not allowed character or throw an CStringException.
3035  /// https://www.w3.org/TR/xml11/#NT-Char
3036  eXmlEnc_Unsafe_Skip = 1 << 1,
3037  eXmlEnc_Unsafe_Throw = 1 << 2
3038  };
3039  typedef int TXmlEncode; //< bitwise OR of "EXmlEncode"
3040 
3041  /// Encode a string for XML.
3042  ///
3043  /// Replace relevant characters by predefined entities.
3044  static string XmlEncode(const CTempString str,
3045  TXmlEncode flags = eXmlEnc_Contents);
3046 
3047 
3048  /// HTML-decode flags
3050  fHtmlEnc_EncodeAll = 0, ///< Encode all symbols
3051  fHtmlEnc_SkipLiteralEntities = 1 << 1, ///< Skip "&entity;"
3052  fHtmlEnc_SkipNumericEntities = 1 << 2, ///< Skip "&#NNNN;"
3053  fHtmlEnc_SkipEntities = fHtmlEnc_SkipLiteralEntities | fHtmlEnc_SkipNumericEntities,
3054  fHtmlEnc_CheckPreencoded = 1 << 3 ///< Print warning if some pre-encoded
3055  ///< entity found in the string
3056  };
3057  typedef int THtmlEncode; //< bitwise OR of "EHtmlEncode"
3058 
3059  /// Encode a string for HTML.
3060  ///
3061  /// Replace relevant characters by predefined entities.
3062  /// @param str
3063  /// Original string in UTF8 encoding.
3064  static string HtmlEncode(const CTempString str,
3065  THtmlEncode flags = fHtmlEnc_EncodeAll);
3066 
3067  /// HTML-decode flags
3069  fHtmlDec_CharRef_Entity = 1, ///< Character entity reference(s) was found
3070  fHtmlDec_CharRef_Numeric = 1 << 1, ///< Numeric character reference(s) was found
3071  fHtmlDec_Encoding_Changed = 1 << 2 ///< Character encoding changed
3072  };
3073  typedef int THtmlDecode; //< bitwise OR of "EHtmlDecode"
3074 
3075  /// Decode HTML entities and character references.
3076  ///
3077  /// @param str
3078  /// String to be decoded, which contains characters or numeric HTML entities
3079  /// @param encoding
3080  /// Encoding of the input string
3081  /// @return
3082  /// UTF8 encoded string
3083  static string HtmlDecode(const CTempString str,
3084  EEncoding encoding = eEncoding_Unknown,
3085  THtmlDecode* result_flags = NULL);
3086 
3087  /// Returns HTML entity name for this symbol if one exists
3088  /// (without leading ampersand and trailing semicolon);
3089  /// or empty string if suitable HTML entity was not found
3090  static string HtmlEntity(TUnicodeSymbol uch);
3091 
3092  /// Json-encode flags
3094  eJsonEnc_UTF8, ///< Encode all characters above 0x80 to \uXXXX form.
3095  ///< https://tools.ietf.org/html/rfc7159#section-8.1
3096  eJsonEnc_Quoted ///< Quote resulting string. Keep all Unicode symbols as is.
3097  ///< https://tools.ietf.org/html/rfc7159#section-7
3098  };
3099  /// Encode a string for JSON.
3100  ///
3101  /// @param str
3102  /// The string to encode.
3103  /// @param encoding
3104  /// Specifies how to encode string.
3105  /// There are 2 approaches, with representing whole string as UTF-8 encoded string,
3106  /// or leave all Unicode symbols "as is", but the resulting string will be put in double quotes.
3107  /// @warning
3108  /// This method is not intended to work with strings that already have UTF-8 encoding,
3109  /// except simple eJsonEnc_Quoted mode, that just quote a string, without any real encoding.
3110  /// Passed string have no information on encoding and JsonEncode() cannot detect it.
3111  /// So, with default eJsonEnc_UTF8 mode such strings will be re-encoded again, so you will
3112  /// have double UTF-8 encoded string as result, that is not what you may expect. Be aware.
3113  /// @return
3114  /// JSON encoded string
3115  static string JsonEncode(const CTempString str, EJsonEncode encoding = eJsonEnc_UTF8);
3116 
3117  /// Decode a string encoded by JsonEncode.
3118  ///
3119  /// @param str
3120  /// The string to encode.
3121  /// It must be in double quotes.
3122  /// @param[out] n_read
3123  /// How many symbols the quoted string occupied in the original string.
3124  /// @sa
3125  /// JsonEncode
3126  /// @warning
3127  /// This method only supports strings encoded by JsonEncode-specific encodings.
3128  static string JsonDecode(const CTempString str, size_t* n_read = NULL);
3129 
3130  /// Quotes a string in Bourne Again Shell (BASH) syntax, in a way
3131  /// that disallows non-printable characters in the result.
3132  /// This function does NOT implement aesthetically optimal quoting,
3133  /// but does try to avoid redundant quoting in simpler cases.
3134  /// Also, since it implements BASH syntax, the result may be
3135  /// incompatible with Bourne syntax, and may be non-obvious to
3136  /// people who are not familiar with the extended quoting syntax.
3137  /// @note The BASH shell has extensions beyond Bourne Shell quoting.
3138  /// Also, this is very different from C Shell quoting, and
3139  /// MS Windows Command Prompt quoting rules.
3140  static string ShellEncode(const string& str);
3141 
3142  /// URL-encode flags
3143  enum EUrlEncode {
3144  eUrlEnc_SkipMarkChars, ///< Do not convert chars like '!', '(' etc.
3145  eUrlEnc_ProcessMarkChars, ///< Convert all non-alphanumeric chars, spaces are converted to '+'
3146  eUrlEnc_PercentOnly, ///< Convert all non-alphanumeric chars including space and '%' to %## format
3147  eUrlEnc_Path, ///< Same as ProcessMarkChars but preserves valid path characters ('/', '.')
3148  eUrlEnc_URIScheme, ///< Encode scheme part of an URI.
3149  eUrlEnc_URIUserinfo, ///< Encode userinfo part of an URI.
3150  eUrlEnc_URIHost, ///< Encode host part of an URI.
3151  eUrlEnc_URIPath, ///< Encode path part of an URI.
3152  eUrlEnc_URIQueryName, ///< Encode query part of an URI, arg name.
3153  eUrlEnc_URIQueryValue, ///< Encode query part of an URI, arg value.
3154  eUrlEnc_URIFragment, ///< Encode fragment part of an URI.
3155  eUrlEnc_Cookie, ///< Same as SkipMarkChars with encoded ','
3156  eUrlEnc_None ///< Do not encode
3157  };
3158  /// URL decode flags
3159  enum EUrlDecode {
3160  eUrlDec_All, ///< Decode '+' to space
3161  eUrlDec_Percent ///< Decode only %XX
3162  };
3163  /// URL-encode string
3164  static string URLEncode(const CTempString str,
3165  EUrlEncode flag = eUrlEnc_SkipMarkChars);
3166 
3167  /// SQL encode flags
3168  enum ESqlEncode {
3169  eSqlEnc_Plain, ///< Always produce '...', with no tag.
3170  eSqlEnc_TagNonASCII ///< Produce N'...' when input's not pure ASCII.
3171  };
3172  /// SQL-encode string
3173  ///
3174  /// There are some assumptions/notes about the function:
3175  /// 1. Only for MS SQL and Sybase.
3176  /// 2. Only for string values in WHERE and LIKE clauses.
3177  /// 3. The ' symbol must not be used as an escape symbol in LIKE clause.
3178  /// 4. It must not be used for non-string values.
3179  /// 5. It expects a string without any outer quotes, and
3180  /// it adds single quotes to the returned string.
3181  /// 6. It expects UTF-8 (including its subsets, ASCII and Latin1) or
3182  /// Win1252 string, and the input encoding is preserved.
3183  /// @param str
3184  /// The string to encode
3185  /// @param flag
3186  /// Whether to tag the result with an N prefix if it contains any
3187  /// non-ASCII characters. Such tagging is generally advisable,
3188  /// but off by default per historical practice, since there are
3189  /// corner cases in which it may be inappropriate.
3190  /// @return
3191  /// Encoded string with added outer single quotes
3192  static CStringUTF8 SQLEncode(const CStringUTF8& str, ESqlEncode flag);
3193 
3195  { return SQLEncode(str, eSqlEnc_Plain); }
3196 
3197  /// URL-decode string
3198  static string URLDecode(const CTempString str, EUrlDecode flag = eUrlDec_All);
3199  /// URL-decode string to itself
3200  static void URLDecodeInPlace(string& str, EUrlDecode flag = eUrlDec_All);
3201  /// Check if the string needs the requested URL-encoding
3202  static bool NeedsURLEncoding(const CTempString str, EUrlEncode flag = eUrlEnc_SkipMarkChars);
3203 
3204  /// Base64-encode string.
3205  ///
3206  /// @param str
3207  /// The string to encode.
3208  /// @param line_len
3209  /// Specify a length for Base64-encoded lines. Default 0 mean no line breaks at all.
3210  /// @return
3211  /// Encoded string.
3212  /// @sa Base64Decode, BASE64_Encode, BASE64_Deccode
3213  static string Base64Encode(const CTempString str, size_t line_len = 0);
3214 
3215  /// Base64-decode string
3216  ///
3217  /// @param str
3218  /// The string to decode.
3219  /// @return
3220  /// Encoded string, or empty line on encoding error.
3221  /// @sa Base64Encode, BASE64_Encode, BASE64_Deccode
3222  static string Base64Decode(const CTempString str);
3223 
3224  /// Check if the string contains a valid IP address
3225  static bool IsIPAddress(const CTempStringEx str);
3226 
3227 
3228  /// How to wrap the words in a string to a new line.
3229  enum EWrapFlags {
3230  fWrap_Hyphenate = 0x1, ///< Add a hyphen when breaking words?
3231  fWrap_HTMLPre = 0x2, ///< Wrap as pre-formatted HTML?
3232  fWrap_FlatFile = 0x4 ///< Wrap for flat file use.
3233  };
3234  typedef int TWrapFlags; ///< Bitwise OR of "EWrapFlags"
3235 
3236  /// Wrap the specified string into lines of a specified width.
3237  ///
3238  /// Split string "str" into lines of width "width" and add the
3239  /// resulting lines to the list "arr". Normally, all
3240  /// lines will begin with "prefix" (counted against "width"),
3241  /// but the first line will instead begin with "prefix1" if
3242  /// you supply it.
3243  ///
3244  /// @param str
3245  /// String to be split into wrapped lines.
3246  /// @param width
3247  /// Width of each wrapped line.
3248  /// @param arr
3249  /// List of strings containing wrapped lines.
3250  /// @param flags
3251  /// How to wrap the words to a new line. See EWrapFlags documentation.
3252  /// @param prefix
3253  /// The prefix string added to each wrapped line, except the first line,
3254  /// unless "prefix1" is set.
3255  /// If "prefix" is set to 0(default), do not add a prefix string to the
3256  /// wrapped lines.
3257  /// @param prefix1
3258  /// The prefix string for the first line. Use this for the first line
3259  /// instead of "prefix".
3260  /// If "prefix1" is set to 0(default), do not add a prefix string to the
3261  /// first line.
3262  /// @return
3263  /// Return "arr", the list of wrapped lines.
3264  template<typename _D>
3265  static void WrapIt(const string& str, SIZE_TYPE width,
3266  _D& dest, TWrapFlags flags = 0,
3267  const string* prefix = 0,
3268  const string* prefix1 = 0);
3269 
3271  {
3272  public:
3273  virtual ~IWrapDest() {}
3274  virtual void Append(const string& s) = 0;
3275  virtual void Append(const CTempString& s) = 0;
3276  };
3277 
3279  {
3280  protected:
3281  list<string>& m_list;
3282  public:
3283  CWrapDestStringList(list<string>& l) : m_list(l) {};
3284  virtual void Append(const string& s)
3285  {
3286  m_list.push_back(s);
3287  }
3288  virtual void Append(const CTempString& s)
3289  {
3290  m_list.push_back(NcbiEmptyString);
3291  m_list.back().assign(s.data(), s.length());
3292  }
3293  };
3294 
3295  static void Wrap(const string& str, SIZE_TYPE width,
3296  IWrapDest& dest, TWrapFlags flags,
3297  const string* prefix,
3298  const string* prefix1);
3299 
3300  static list<string>& Wrap(const string& str, SIZE_TYPE width,
3301  list<string>& arr, TWrapFlags flags = 0,
3302  const string* prefix = 0,
3303  const string* prefix1 = 0);
3304 
3305  static list<string>& Wrap(const string& str, SIZE_TYPE width,
3306  list<string>& arr, TWrapFlags flags,
3307  const string& prefix,
3308  const string* prefix1 = 0);
3309 
3310  static list<string>& Wrap(const string& str, SIZE_TYPE width,
3311  list<string>& arr, TWrapFlags flags,
3312  const string& prefix,
3313  const string& prefix1);
3314 
3315 
3316  /// Wrap the list using the specified criteria.
3317  ///
3318  /// WrapList() is similar to Wrap(), but tries to avoid splitting any
3319  /// elements of the list to be wrapped. Also, the "delim" only applies
3320  /// between elements on the same line; if you want everything to end with
3321  /// commas or such, you should add them first.
3322  ///
3323  /// @param l
3324  /// The list to be wrapped.
3325  /// @param width
3326  /// Width of each wrapped line.
3327  /// @param delim
3328  /// Delimiters used to split elements on the same line.
3329  /// @param arr
3330  /// List containing the wrapped list result.
3331  /// @param flags
3332  /// How to wrap the words to a new line. See EWrapFlags documentation.
3333  /// @param prefix
3334  /// The prefix string added to each wrapped line, except the first line,
3335  /// unless "prefix1" is set.
3336  /// If "prefix" is set to 0(default), do not add a prefix string to the
3337  /// wrapped lines.
3338  /// @param prefix1
3339  /// The prefix string for the first line. Use this for the first line
3340  /// instead of "prefix".
3341  /// If "prefix1" is set to 0(default), do not add a prefix string to the
3342  /// first line.
3343  /// @return
3344  /// Return "arr", the wrapped list.
3345  static list<string>& WrapList(const list<string>& l, SIZE_TYPE width,
3346  const string& delim, list<string>& arr,
3347  TWrapFlags flags = 0,
3348  const string* prefix = 0,
3349  const string* prefix1 = 0);
3350 
3351  static list<string>& WrapList(const list<string>& l, SIZE_TYPE width,
3352  const string& delim, list<string>& arr,
3353  TWrapFlags flags,
3354  const string& prefix,
3355  const string* prefix1 = 0);
3356 
3357  static list<string>& WrapList(const list<string>& l, SIZE_TYPE width,
3358  const string& delim, list<string>& arr,
3359  TWrapFlags flags,
3360  const string& prefix,
3361  const string& prefix1);
3362 
3363 
3364  /// Justify the specified string into a series of lines of the same width.
3365  ///
3366  /// Split string "str" into a series of lines, all of which (except for the
3367  /// final one) are to be exactly "width" characters wide (by adding extra
3368  /// inner spaces between words when necessary), and store the resultant
3369  /// lines in the list "par". Normally, all lines in "par" will begin with
3370  /// "pfx" (counted against "width"), but the first line can instead begin
3371  /// with "pfx1" if provided.
3372  ///
3373  /// @note Words exceeding the specified "width" will not be split between
3374  /// lines but occupy individual lines (which will be wider than "width").
3375  ///
3376  /// @param str
3377  /// String to be split into justified lines.
3378  /// @param width
3379  /// Width of every line (except for the last one).
3380  /// @param par
3381  /// Resultant list of justified lines.
3382  /// @param pfx
3383  /// The prefix string added to each line, except for the first line
3384  /// if non-NULL "pfx1" is also set. Empty(or NULL) "pfx" causes no
3385  /// additions.
3386  /// @param pfx1
3387  /// The prefix string for the first line, if non-NULL.
3388  /// @return
3389  /// Return "par", the list of justified lines (a paragraph).
3390  static list<string>& Justify(const CTempString str,
3391  SIZE_TYPE width,
3392  list<string>& par,
3393  const CTempString* pfx = 0,
3394  const CTempString* pfx1 = 0);
3395 
3396  static list<string>& Justify(const CTempString str,
3397  SIZE_TYPE width,
3398  list<string>& par,
3399  const CTempString pfx,
3400  const CTempString* pfx1 = 0);
3401 
3402  static list<string>& Justify(const CTempString str,
3403  SIZE_TYPE width,
3404  list<string>& par,
3405  const CTempString pfx,
3406  const CTempString pfx1);
3407 
3408  /// Flags for Dedent() method
3410  fDedent_NormalizeEmptyLines = 1 << 0, ///< Each line containing only whitespace will be normalized
3411  ///< to a single newline character in the output. Such lines
3412  ///< are excluded from detecting common whitespace prefix.
3413  // Next flags can be useful for processing RAW multi-line literals "R(...)"
3414  fDedent_SkipFirstLine = 1 << 1, ///< Ignore first line and skip it from the result.
3415  fDedent_SkipEmptyFirstLine = 1 << 2, ///< Ignore first line and skip it from the result, if it is empty only.
3416  };
3417  typedef int TDedentFlags; ///< Bitwise OR of EDedentFlags
3418 
3419  /// Dedent multi-line string, removing common whitespace prefix for each line.
3420  ///
3421  /// @param str
3422  /// String to be dedented.
3423  /// @param flags
3424  /// Optional flags to tune up how to dedent string.
3425  /// @return
3426  /// String with removed common whitespace indentation.
3427  /// @note
3428  /// Empty lines, and lines containing whitespace only with fDedent_NormalizeEmptyLines flag,
3429  /// are not used in computing common whitespace prefix.
3430  /// @note
3431  /// Assumes that whitespace prefixes are the same on each line, in other words,
3432  /// if a common prefix have a mix of spaces and tabulation characters, it should
3433  /// be the same for each line. Or this method can works incorrectly.
3434  /// Also, you can use Replace() first to replace tabulations and make whitespace
3435  /// consistent across lines.
3436  /// @sa
3437  /// Replace, DedentR
3438  static string Dedent(const CTempString str, TDedentFlags flags = 0);
3439 
3440  /// Dedent multi-line string, removing common whitespace prefix for each line.
3441  ///
3442  /// Version for RAW multi-line literals "R(...)", embedded into the C++ code.
3443  /// @example
3444  ///
3445  /// make_request(NStr::DedentR(R"(
3446  /// {
3447  /// "param1": some_val,
3448  /// "param2": another_val,
3449  /// "param3": "These lines are easy-to-read ",
3450  /// "param4": "and don't interrupt the flow of indentation."
3451  /// }
3452  /// )");
3453  /// @sa
3454  /// Dedent
3455  static string DedentR(const CTempString str);
3456 
3457  /// Search for a field.
3458  ///
3459  /// @param str
3460  /// C or C++ string to search in.
3461  /// @param field_no
3462  /// Zero-based field number.
3463  /// @param delimiters
3464  /// A set of single-character delimiters.
3465  /// @param merge
3466  /// Whether to merge or not adjacent delimiters. Default: not to merge.
3467  /// @return
3468  /// Found field; or empty string if the required field is not found.
3469  /// @note
3470  /// Field 0 spans up to the first-found delimiter or the end-of-string.
3471  static string GetField(const CTempString str,
3472  size_t field_no,
3473  const CTempString delimiters,
3474  EMergeDelims merge = eNoMergeDelims);
3475 
3476  /// Search for a field.
3477  ///
3478  /// @param str
3479  /// C or C++ string to search in.
3480  /// @param field_no
3481  /// Zero-based field number.
3482  /// @param delimiter
3483  /// A single-character delimiter.
3484  /// @param merge
3485  /// Whether to merge or not adjacent delimiters. Default: not to merge.
3486  /// @return
3487  /// Found field; or empty string if the required field is not found.
3488  /// @note
3489  /// Field 0 spans up to the delimiter or the end-of-string.
3490  static string GetField(const CTempString str,
3491  size_t field_no,
3492  char delimiter,
3493  EMergeDelims merge = eNoMergeDelims);
3494 
3495  /// Search for a field.
3496  /// Avoid memory allocation at the expense of some usage safety.
3497  ///
3498  /// @param str
3499  /// C or C++ string to search in.
3500  /// @param field_no
3501  /// Zero-based field number.
3502  /// @param delimiters
3503  /// A set of single-character delimiters.
3504  /// @param merge
3505  /// Whether to merge or not adjacent delimiters. Default: not to merge.
3506  /// @return
3507  /// Found field; or empty string if the required field is not found.
3508  /// @note
3509  /// Field 0 spans up to the first-found delimiter or the end-of-string.
3510  /// @warning
3511  /// The return value stores a pointer to the input string 'str' so
3512  /// the return object validity time matches lifetime of the input 'str'.
3513  static
3514  CTempString GetField_Unsafe(const CTempString str,
3515  size_t field_no,
3516  const CTempString delimiters,
3517  EMergeDelims merge = eNoMergeDelims);
3518 
3519  /// Search for a field.
3520  /// Avoid memory allocation at the expense of some usage safety.
3521  ///
3522  /// @param str
3523  /// C or C++ string to search in.
3524  /// @param field_no
3525  /// Zero-based field number.
3526  /// @param delimiter
3527  /// A single-character delimiter.
3528  /// @param merge
3529  /// Whether to merge or not adjacent delimiters. Default: not to merge.
3530  /// @return
3531  /// Found field; or empty string if the required field is not found.
3532  /// @note
3533  /// Field 0 spans up to the delimiter or the end-of-string.
3534  /// @warning
3535  /// The return value stores a pointer to the input string 'str' so
3536  /// the return object validity time matches lifetime of the input 'str'.
3537  static
3538  CTempString GetField_Unsafe(const CTempString str,
3539  size_t field_no,
3540  char delimiter,
3541  EMergeDelims merge = eNoMergeDelims);
3542 
3543 private:
3544 // implementations
3545 
3546 // StringToNumeric
3547  static bool x_ReportLimitsError(const CTempString str, TStringToNumFlags flags);
3548 
3549  template< typename TNumeric, typename TSource>
3551  {
3553  return x_ReportLimitsError(str, flags);
3554  }
3555  return true;
3556  }
3557  template< typename TNumeric, typename TSource>
3559  {
3560  // dont use ::min() for float types, it returns positive value
3562  return x_ReportLimitsError(str, flags);
3563  }
3564  return true;
3565  }
3566 
3567  template <typename TNumeric>
3568  static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) < sizeof(int)), TNumeric>::type
3570  {
3571  int n = StringToInt(str, flags, base);
3572  return x_VerifyIntLimits<TNumeric>(n, str, flags) ? (TNumeric)n : 0;
3573  }
3574  template <typename TNumeric>
3575  static typename enable_if< is_integral<TNumeric>::value && is_unsigned<TNumeric>::value && (sizeof(TNumeric) < sizeof(unsigned int)), TNumeric>::type
3577  {
3578  unsigned int n = StringToUInt(str, flags, base);
3579  return x_VerifyIntLimits<TNumeric>(n, str, flags) ? (TNumeric)n : 0;
3580  }
3581 
3582  template <typename TNumeric>
3583  static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) == sizeof(int) && !is_same<TNumeric, long>::value), TNumeric>::type
3585  {
3586  return StringToInt(str, flags, base);
3587  }
3588  template <typename TNumeric>
3589  static typename enable_if< is_integral<TNumeric>::value && is_unsigned<TNumeric>::value && (sizeof(TNumeric) == sizeof(unsigned int) && !is_same<TNumeric, unsigned long>::value), TNumeric>::type
3591  {
3592  return StringToUInt(str, flags, base);
3593  }
3594  template <typename TNumeric>
3595  static typename enable_if< is_same<TNumeric, long>::value, TNumeric>::type
3597  {
3598  return StringToLong(str, flags, base);
3599  }
3600  template <typename TNumeric>
3603  {
3604  return StringToULong(str, flags, base);
3605  }
3606  template <typename TNumeric>
3607  static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) == sizeof(Int8) && !is_same<TNumeric, long>::value), TNumeric>::type
3609  {
3610  return StringToInt8(str, flags, base);
3611  }
3612  template <typename TNumeric>
3613  static typename enable_if< is_integral<TNumeric>::value && is_unsigned<TNumeric>::value && (sizeof(TNumeric) == sizeof(Uint8) && !is_same<TNumeric, unsigned long>::value), TNumeric>::type
3615  {
3616  return StringToUInt8(str, flags, base);
3617  }
3618  template <typename TStrictId>
3619  static typename enable_if< is_integral<typename TStrictId::TId>::value && is_member_function_pointer<decltype(&TStrictId::Get)>::value, TStrictId>::type
3621  {
3622  return TStrictId(StringToNumeric<typename TStrictId::TId>(str, flags, base));
3623  }
3624 
3625  template <typename TNumeric>
3626  static typename enable_if< is_same<TNumeric, float>::value, TNumeric>::type
3628  {
3629  double n = StringToDouble(str, flags);
3630  return x_VerifyFloatLimits<TNumeric>(n, str, flags) ? (TNumeric)n : 0;
3631  }
3632  template <typename TNumeric>
3633  static typename enable_if< is_same<TNumeric, double>::value, TNumeric>::type
3635  {
3636  return StringToDouble(str, flags);
3637  }
3638 
3639  template <typename TNumeric>
3640  static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) < sizeof(int)), bool>::type
3642  {
3643  int n = StringToInt(str, flags, base);
3644  *value = 0;
3645  if (( !n && errno ) || !x_VerifyIntLimits<TNumeric>(n, str, flags)) {
3646  return false;
3647  }
3648  *value = (TNumeric) n;
3649  return true;
3650  }
3651  template <typename TNumeric>
3652  static typename enable_if< is_integral<TNumeric>::value && is_unsigned<TNumeric>::value && (sizeof(TNumeric) < sizeof(unsigned int)), bool>::type
3654  {
3655  unsigned int n = StringToUInt(str, flags, base);
3656  *value = 0;
3657  if (( !n && errno ) || !x_VerifyIntLimits<TNumeric>(n, str, flags)) {
3658  return false;
3659  }
3660  *value = (TNumeric) n;
3661  return true;
3662  }
3663  template <typename TNumeric>
3664  static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) == sizeof(int) && !is_same<TNumeric, long>::value), bool>::type
3666  {
3667  *value = StringToInt(str, flags, base);
3668  return (*value || !errno);
3669  }
3670  template <typename TNumeric>
3671  static typename enable_if< is_integral<TNumeric>::value && is_unsigned<TNumeric>::value && (sizeof(TNumeric) == sizeof(unsigned int) && !is_same<TNumeric, unsigned long>::value), bool>::type
3673  {
3674  *value = StringToUInt(str, flags, base);
3675  return (*value || !errno);
3676  }
3677  static bool
3679  {
3680  *value = StringToLong(str, flags, base);
3681  return (*value || !errno);
3682  }
3683  static bool
3684  x_StringToNumeric(const CTempString str, unsigned long* value, TStringToNumFlags flags, int base)
3685  {
3686  *value = StringToULong(str, flags, base);
3687  return (*value || !errno);
3688  }
3689  template <typename TNumeric>
3690  static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) == sizeof(Int8) && !is_same<TNumeric, long>::value), bool>::type
3692  {
3693  *value = StringToInt8(str, flags, base);
3694  return (*value || !errno);
3695  }
3696  template <typename TNumeric>
3699  {
3700  *value = StringToUInt8(str, flags, base);
3701  return (*value || !errno);
3702  }
3703  static bool
3705  {
3706  double n = StringToDouble(str, flags);
3707  *value = 0;
3708  if (( !n && errno ) || !x_VerifyFloatLimits<float>(n, str, flags)) {
3709  return false;
3710  }
3711  *value = (float) n;
3712  return true;
3713  }
3714  static bool
3716  {
3717  *value = StringToDouble(str, flags);
3718  return (*value || !errno);
3719  }
3720  template <typename TStrictId>
3721  static typename enable_if< is_integral<typename TStrictId::TId>::value && is_member_function_pointer<decltype(&TStrictId::Get)>::value, bool>::type
3723  {
3724  return x_StringToNumeric(str, &value->Set(), flags, base);
3725  }
3726 
3727 // NumericToString
3728  template<typename TNumeric>
3729  static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) <= sizeof(int) && !is_same<TNumeric, long>::value), void>::type
3730  x_NumericToString(string& out_str, TNumeric value, TNumToStringFlags flags, int base)
3731  {
3732  IntToString(out_str, value, flags, base);
3733  }
3734  template<typename TNumeric>
3735  static typename enable_if< is_integral<TNumeric>::value && is_unsigned<TNumeric>::value && (sizeof(TNumeric) <= sizeof(unsigned int) && !is_same<TNumeric, unsigned long>::value), void>::type
3736  x_NumericToString(string& out_str, TNumeric value, TNumToStringFlags flags, int base)
3737  {
3738  UIntToString(out_str, value, flags, base);
3739  }
3740  static void
3741  x_NumericToString(string& out_str, long value, TNumToStringFlags flags, int base)
3742  {
3743  LongToString(out_str, value, flags, base);
3744  }
3745  static void
3746  x_NumericToString(string& out_str, unsigned long value, TNumToStringFlags flags, int base)
3747  {
3748  ULongToString(out_str, value, flags, base);
3749  }
3750 #if NCBI_COMPILER_MSVC && (_MSC_VER < 1900)
3751  static void
3752  x_NumericToString(string& out_str, Int8 value, TNumToStringFlags flags, int base)
3753  {
3754  Int8ToString(out_str, value, flags, base);
3755  }
3756  static void
3757  x_NumericToString(string& out_str, Uint8 value, TNumToStringFlags flags, int base)
3758  {
3759  UInt8ToString(out_str, value, flags, base);
3760  }
3761 #endif
3762  template<typename TNumeric>
3763  static typename enable_if< is_integral<TNumeric>::value && is_signed<TNumeric>::value && (sizeof(TNumeric) == sizeof(Int8) && !is_same<TNumeric, long>::value), void>::type
3764  x_NumericToString(string& out_str, TNumeric value, TNumToStringFlags flags, int base)
3765  {
3766  Int8ToString(out_str, value, flags, base);
3767  }
3768  template<typename TNumeric>
3770  x_NumericToString(string& out_str, TNumeric value, TNumToStringFlags flags, int base)
3771  {
3772  UInt8ToString(out_str, value, flags, base);
3773  }
3774  template<typename TNumeric>
3776  x_NumericToString(string& out_str, TNumeric value, TNumToStringFlags flags, int /*base*/)
3777  {
3778  DoubleToString(out_str, value, -1, flags);
3779  }
3780  template <typename TStrictId>
3781  static typename enable_if< is_integral<typename TStrictId::TId>::value && is_member_function_pointer<decltype(&TStrictId::Get)>::value, void>::type
3782  x_NumericToString(string& out_str, TStrictId value, TNumToStringFlags flags, int base)
3783  {
3784  return x_NumericToString(out_str, value.Get(), flags, base);
3785  }
3786 
3787 
3788 // Join
3789  template<typename TIterator>
3790  static string xx_Join( TIterator from, TIterator to, const CTempString& delim);
3791 
3792  template<typename TIterator>
3795  x_Join( TIterator from, TIterator to, const CTempString& delim)
3796  {
3797  return TransformJoin(from, to, delim, [](const typename TIterator::value_type& i){ return i;});
3798  }
3799 
3800  template<typename TIterator>
3803  x_Join( TIterator from, TIterator to, const CTempString& delim)
3804  {
3805  return xx_Join(from, to, delim);
3806  }
3807 
3808  template<typename TValue>
3810  x_Join( TValue* from, TValue* to, const CTempString& delim)
3811  {
3812  return xx_Join(from, to, delim);
3813  }
3814 
3815  template<typename TIterator>
3818  x_Join( TIterator from, TIterator to, const CTempString& delim)
3819  {
3820  return TransformJoin( from, to, delim, [](const typename TIterator::value_type& i){ return NumericToString(i);});
3821  }
3822 
3823  template<typename TValue>
3824  static typename enable_if<is_arithmetic<TValue>::value, string>::type
3825  x_Join( TValue* from, TValue* to, const CTempString& delim)
3826  {
3827  return TransformJoin( from, to, delim, [](const TValue& i){ return NumericToString(i);});
3828  }
3829 }; // class NStr
3830 
3831 
3832 
3833 /////////////////////////////////////////////////////////////////////////////
3834 ///
3835 
3836 
3837 #define NCBITOOLKIT_USE_LONG_UCS4 (SIZEOF_LONG == 4)
3838 #if NCBITOOLKIT_USE_LONG_UCS4
3839 /// UCS-4 character
3840 typedef unsigned long TCharUCS4;
3841 /// UCS-4 string
3842 typedef basic_string<TCharUCS4> TStringUCS4;
3843 #else
3846 #endif
3847 
3848 /// Type for character in UCS-2 encoding
3850 /// Type for string in UCS-2 encoding
3851 typedef basic_string<TCharUCS2> TStringUCS2;
3852 
3853 
3854 /// Operator for writing TStringUCS2 to stream.
3855 /// Operator is needed for using in SDBAPI.
3857 {
3858  os.write((const char*)str.data(), str.size() * sizeof(TCharUCS2));
3859  return os;
3860 }
3861 
3862 
3863 
3864 /////////////////////////////////////////////////////////////////////////////
3865 ///
3866 /// CUtf8 --
3867 ///
3868 /// Utility class to handle strings in UTF8 encoding.
3869 /// Can convert data to and from the following encodings:
3870 /// ISO 8859-1 (Latin1)
3871 /// Microsoft Windows code page 1252
3872 /// UCS-2, UCS-4
3873 
3875 {
3876 public:
3877  /// How to verify character encoding of the source data
3878  enum EValidate {
3880  eValidate
3881  };
3882 
3883  /// Convert into UTF8 from a C/C++ string
3884  ///
3885  /// @param src
3886  /// Source string
3887  /// @param encoding
3888  /// Character encoding of the source string
3889  /// @param validate
3890  /// Verify the character encoding of the source
3891  static CStringUTF8 AsUTF8(const CTempString& src,
3892  EEncoding encoding,
3893  EValidate validate = eNoValidate)
3894  {
3895  CStringUTF8 u8;
3896  return x_Append(u8,src,encoding,validate);
3897  }
3898 
3899 #if defined(HAVE_WSTRING)
3900  /// Convert into UTF8 from a C/C++ string
3901  ///
3902  /// @param src
3903  /// Source string
3904  /// @param lcl
3905  /// String locale
3906  static CStringUTF8 AsUTF8(const CTempString& src, const locale& lcl)
3907  {
3908  CStringUTF8 u8;
3909  return x_Append(u8,src,lcl);
3910  }
3911 #endif
3912 
3913  /// Convert into UTF8 from a Unicode C++ string
3914  ///
3915  /// @param src
3916  /// Source string
3917  /// @attention
3918  /// Only for TStringUnicode, TStringUCS4, TStringUCS2, wstring types
3919  template <typename TChar>
3920  static typename enable_if< is_integral<TChar>::value && (1 < sizeof(TChar)), CStringUTF8>::type
3921  AsUTF8(const basic_string<TChar>& src)
3922  {
3923  CStringUTF8 u8;
3924  return x_Append(u8, src.data(), src.size());
3925  }
3926 
3927  /// Convert into UTF8 from a Unicode character buffer
3928  ///
3929  /// @param src
3930  /// Source character buffer
3931  /// @param tchar_count
3932  /// Number of characters in the buffer;
3933  /// If it equals to NPOS, buffer is assumed to be zero-terminated
3934  template <typename TChar>
3935  static typename enable_if< is_integral<TChar>::value && (1 < sizeof(TChar)), CStringUTF8>::type
3936  AsUTF8(const TChar* src, SIZE_TYPE tchar_count = NPOS)
3937  {
3938  CStringUTF8 u8;
3939  return x_Append(u8, src, tchar_count);
3940  }
3941 
3942  /// Convert Unicode C++ string into UTF8 and append it to existing string
3943  ///
3944  /// @param dest
3945  /// Existing UTF8 string
3946  /// @param src
3947  /// Source Unicode string
3948  /// return
3949  /// reference to modified dest string
3950  template <typename TChar>
3951  static typename enable_if< is_integral<TChar>::value && (1 < sizeof(TChar)), CStringUTF8& >::type
3952  AppendAsUTF8(CStringUTF8& dest, const basic_string<TChar>& src)
3953  {
3954  return x_Append(dest, src.data(), src.size());
3955  }
3956 
3957  /// Convert Unicode character buffer into UTF8 and append it to existing string
3958  ///
3959  /// @param dest
3960  /// Existing UTF8 string
3961  /// @param src
3962  /// Source Unicode character buffer
3963  /// @param tchar_count
3964  /// Number of characters in the buffer;
3965  /// If it equals to NPOS, buffer is assumed to be zero-terminated
3966  /// return
3967  /// reference to modified dest string
3968  template <typename TChar>
3969  static typename enable_if< is_integral<TChar>::value && (1 < sizeof(TChar)), CStringUTF8& >::type
3970  AppendAsUTF8(CStringUTF8& dest, const TChar* src, SIZE_TYPE tchar_count = NPOS)
3971  {
3972  return x_Append(dest, src, tchar_count);
3973  }
3974 
3975  /// Convert Unicode symbol into UTF8 and append it to existing string
3976  ///
3977  /// @param dest
3978  /// Existing UTF8 string
3979  /// @param ch
3980  /// Unicode symbol
3981  /// return
3982  /// reference to modified dest string
3983  template <typename TChar>
3984  static typename enable_if< is_integral<TChar>::value && (1 < sizeof(TChar)), CStringUTF8& >::type
3986  {
3987  return x_Append(dest, &ch, 1);
3988  }
3989 
3990  /// Convert non-Unicode C++ string into UTF8 and append it to existing string
3991  ///
3992  /// @param dest
3993  /// Existing UTF8 string
3994  /// @param src
3995  /// Source string
3996  /// @param encoding
3997  /// Character encoding of the source string
3998  /// @param validate
3999  /// Verify the character encoding of the source
4000  /// return
4001  /// reference to modified dest string
4003  const CTempString& src,
4004  EEncoding encoding,
4005  EValidate validate = eNoValidate)
4006  {
4007  return x_Append(dest,src,encoding,validate);
4008  }
4009 
4010 #if defined(HAVE_WSTRING)
4011  /// Convert non-Unicode C++ string into UTF8 and append it to existing string
4012  ///
4013  /// @param dest
4014  /// Existing UTF8 string
4015  /// @param src
4016  /// Source string
4017  /// @param lcl
4018  /// Source string locale
4019  /// return
4020  /// reference to modified dest string
4022  const CTempString& src,
4023  const locale& lcl)
4024  {
4025  return x_Append(dest,src,lcl);
4026  }
4027 #endif
4028 
4029  /// Convert non-Unicode character into UTF8 and append it to existing string
4030  ///
4031  /// @param dest
4032  /// Existing UTF8 string
4033  /// @param ch
4034  /// Character
4035  /// @param encoding
4036  /// Character encoding
4037  /// @param validate
4038  /// Verify the character encoding of the source
4039  /// return
4040  /// reference to modified dest string
4042  char ch,
4043  EEncoding encoding,
4044  EValidate validate = eNoValidate)
4045  {
4046  return x_Append(dest,CTempString(&ch,1),encoding,validate);
4047  }
4048 
4049 #if defined(HAVE_WSTRING)
4050  /// Convert non-Unicode character into UTF8 and append it to existing string
4051  ///
4052  /// @param dest
4053  /// Existing UTF8 string
4054  /// @param ch
4055  /// Character
4056  /// @param lcl
4057  /// Character locale
4058  /// return
4059  /// reference to modified dest string
4061  char ch,
4062  const locale& lcl)
4063  {
4064  return x_Append(dest,CTempString(&ch,1),lcl);
4065  }
4066 #endif
4067 
4068  /// Convert UTF8 string into a single-byte character representation
4069  ///
4070  /// Can throw a CStringException if the conversion is impossible
4071  /// or the string has invalid UTF-8 encoding.
4072  ///
4073  /// @param src
4074  /// Source UTF8 string
4075  /// @param encoding
4076  /// Encoding of the result
4077  /// @param substitute_on_error
4078  /// If the conversion is impossible, append the provided string
4079  /// or, if substitute_on_error equals 0, throw an exception
4080  /// @param validate
4081  /// Verify UTF8 character encoding of the source
4082  /// @return
4083  /// C++ string
4084  static string AsSingleByteString
4085  (const CTempString& src, EEncoding encoding,
4086  const char* substitute_on_error = 0, EValidate validate = eNoValidate);
4087 
4088 #if defined(HAVE_WSTRING)
4089  static string AsSingleByteString
4090  (const CTempString& src, const locale& lcl,
4091  const char* substitute_on_error = 0, EValidate validate = eNoValidate);
4092 #endif
4093 
4094  /// Convert UTF8 string into Unicode
4095  ///
4096  /// Can throw a CStringException if the conversion is impossible
4097  /// or the string has invalid UTF-8 encoding.
4098  ///
4099  /// @param src
4100  /// Source UTF8 string
4101  /// @param substitute_on_error
4102  /// If the conversion is impossible, append the provided string
4103  /// or, if substitute_on_error equals 0, throw an exception
4104  /// @param validate
4105  /// Verify UTF8 character encoding of the source
4106  /// @attention
4107  /// Only for TStringUnicode, TStringUCS4, TStringUCS2, wstring types
4108  template <typename TChar>
4109  static typename enable_if< is_integral<TChar>::value && (1 < sizeof(TChar)), basic_string<TChar> >::type
4110  AsBasicString(const CTempString& src, const TChar* substitute_on_error, EValidate validate = eNoValidate)
4111  {
4112  return x_AsBasicString(src,substitute_on_error,validate);
4113  }
4114 
4115  template <typename TChar>
4116  static typename enable_if< is_integral<TChar>::value && (1 < sizeof(TChar)), basic_string<TChar> >::type
4118  {
4119  return x_AsBasicString<TChar>(src,nullptr,eNoValidate);
4120  }
4121 
4122  /// Get the number of symbols (code points) in UTF8 string
4123  ///
4124  /// @param src
4125  /// Source UTF8 string
4126  /// @return
4127  /// Number of symbols (code points)
4128  static SIZE_TYPE GetSymbolCount(const CTempString& src);
4129 
4130  /// Get the number of valid UTF-8 symbols (code points) in buffer
4131  ///
4132  /// @param src
4133  /// Character buffer
4134  /// @return
4135  /// Number of valid symbols (no exception thrown)
4136  static SIZE_TYPE GetValidSymbolCount(const CTempString& src);
4137 
4138  /// Get the number of valid UTF-8 bytes (code units) in buffer
4139  ///
4140  /// @param src
4141  /// Character buffer
4142  /// @return
4143  /// Number of valid bytes (no exception thrown)
4144  static SIZE_TYPE GetValidBytesCount(const CTempString& src);
4145 
4146  /// Check buffer for presence of UTF-8 byte sequence and return length of first symbol
4147  ///
4148  /// @param src
4149  /// Character buffer
4150  /// @return
4151  /// Number of bytes
4152  static SIZE_TYPE EvaluateSymbolLength(const CTempString& src);
4153 
4154  /// Check that the character is valid first byte of an UTF8 byte sequence
4155  ///
4156  /// @param ch
4157  /// Character
4158  /// @param more
4159  /// Number of additional bytes to expect
4160  /// @return
4161  /// true, if this is a valid first byte
4162  static bool EvaluateFirst(char ch, SIZE_TYPE& more) {
4163  return x_EvalFirst(ch, more);
4164  }
4165 
4166  /// Check that the character is valid continuation byte of an UTF8 byte sequence
4167  ///
4168  /// @param ch
4169  /// Character
4170  /// @return
4171  /// true, if this is a valid byte
4172  static bool EvaluateNext(char ch) {
4173  return x_EvalNext(ch);
4174  }
4175 
4176  /// Check the encoding of the C/C++ string
4177  ///
4178  /// Check that the encoding of the source is the same, or
4179  /// is compatible with the specified one
4180  /// @param src
4181  /// Source string
4182  /// @param encoding
4183  /// Character encoding form to check against
4184  /// @return
4185  /// Boolean result: encoding is same or compatible
4186  static bool MatchEncoding(const CTempString& src, EEncoding encoding);
4187 
4188  /// Guess the encoding of the C/C++ string
4189  ///
4190  /// It can distinguish between UTF-8, Latin1, and Win1252 only
4191  /// @param src
4192  /// Character buffer
4193  /// @return
4194  /// Encoding as guessed; eEncoding_Unknown if cannot guess
4195  static EEncoding GuessEncoding(const CTempString& src);
4196 
4197  /// Give Encoding name as string
4198  ///
4199  /// @param encoding
4200  /// EEncoding enum. (Throw CStringException if passed eEncoding_Unknown.)
4201  /// @return
4202  /// Encoding name
4203  static string EncodingToString(EEncoding encoding);
4204 
4205  /// Convert encoding name into EEncoding enum, taking into account synonyms
4206  /// as per http://www.iana.org/assignments/character-sets
4207  ///
4208  /// @param encoding_name
4209  /// Name of the encoding
4210  /// @return
4211  /// EEncoding enum; eEncoding_Unknown for unsupported encodings
4212  static EEncoding StringToEncoding(const CTempString& encoding_name);
4213 
4214  /// Convert encoded character into Unicode
4215  ///
4216  /// @param ch
4217  /// Encoded character
4218  /// @param encoding
4219  /// Character encoding
4220  /// @return
4221  /// Unicode code point (symbol)
4222  static TUnicodeSymbol CharToSymbol(char ch, EEncoding encoding);
4223 
4224 #if defined(HAVE_WSTRING)
4225  /// Convert encoded character into Unicode
4226  ///
4227  /// @param ch
4228  /// Encoded character
4229  /// @param lcl
4230  /// Character locale
4231  /// @return
4232  /// Unicode code point (symbol)
4233  static TUnicodeSymbol CharToSymbol(char ch, const locale& lcl);
4234 #endif
4235 
4236  /// Convert Unicode code point into encoded character
4237  ///
4238  /// @param sym
4239  /// Unicode code point (symbol)
4240  /// @param encoding
4241  /// Character encoding
4242  /// @return
4243  /// Encoded character
4244  static char SymbolToChar(TUnicodeSymbol sym, EEncoding encoding);
4245 
4246 #if defined(HAVE_WSTRING)
4247  /// Convert Unicode code point into encoded character
4248  ///
4249  /// @param sym
4250  /// Unicode code point (symbol)
4251  /// @param lcl
4252  /// Character locale
4253  /// @return
4254  /// Encoded character
4255  static char SymbolToChar(TUnicodeSymbol sym, const locale& lcl);
4256 #endif
4257 
4258  /// Determines if a symbol is whitespace
4259  /// per http://unicode.org/charts/uca/chart_Whitespace.html
4260  ///
4261  /// @param sym
4262  /// Unicode code point (symbol)
4263  /// @sa
4264  /// TruncateSpacesInPlace, TruncateSpaces_Unsafe, TruncateSpaces
4265  static bool IsWhiteSpace(TUnicodeSymbol sym);
4266 
4267  /// Truncate whitespace in the string (in-place)
4268  ///
4269  /// @param src
4270  /// UTF8 string
4271  /// @param side
4272  /// Which end of the string to truncate whitespace from.
4273  /// Default is to truncate whitespace from both ends.
4274  /// @return
4275  /// Reference to src
4276  /// @sa
4277  /// IsWhiteSpace, TruncateSpaces_Unsafe, TruncateSpaces
4278  static CStringUTF8& TruncateSpacesInPlace
4280 
4281  /// Truncate whitespace in the string
4282  ///
4283  /// @param str
4284  /// Source string, in UTF8 encoding
4285  /// @param side
4286  /// Which end of the string to truncate whitespace from.
4287  /// Default is to truncate whitespace from both ends.
4288  /// @sa
4289  /// IsWhiteSpace, TruncateSpacesInPlace, TruncateSpaces_Unsafe
4290  static CStringUTF8 TruncateSpaces
4291  (const CTempString& str, NStr::ETrunc side = NStr::eTrunc_Both);
4292 
4293  /// Truncate whitespace in the string
4294  ///
4295  /// @param str
4296  /// Source string, in UTF8 encoding
4297  /// @param side
4298  /// Which end of the string to truncate whitespace from.
4299  /// Default is to truncate whitespace from both ends.
4300  /// @attention
4301  /// The lifespan of the result string is the same as one of the source.
4302  /// So, for example, if the source is temporary string, then the result
4303  /// will be invalid right away (will point to already released memory).
4304  /// @sa
4305  /// IsWhiteSpace, TruncateSpacesInPlace, TruncateSpaces
4306  static CTempString TruncateSpaces_Unsafe
4307  (const CTempString& str, NStr::ETrunc side = NStr::eTrunc_Both);
4308 
4309  /// Convert sequence of UTF8 code units into Unicode code point
4310  ///
4311  /// @param src
4312  /// Zero-terminated buffer, in UTF8 encoding
4313  /// @return
4314  /// Unicode code point
4315  static TUnicodeSymbol Decode(const char*& src);
4316 
4317 #ifndef NCBI_COMPILER_WORKSHOP
4318  /// Convert sequence of UTF8 code units into Unicode code point
4319  ///
4320  /// @param src
4321  /// C++ string iterator
4322  /// @return
4323  /// Unicode code point
4324  static TUnicodeSymbol Decode(string::const_iterator& src);
4325 #endif
4326 
4327  /// Begin converting first character of UTF8 sequence into Unicode
4328  ///
4329  /// @param ch
4330  /// Character
4331  /// @param more
4332  /// If the character is valid, - how many more characters to expect
4333  /// @return
4334  /// Part of Unicode code point. Zero if the character is invalid.
4335  static TUnicodeSymbol DecodeFirst(char ch, SIZE_TYPE& more);
4336 
4337  /// Convert next character of UTF8 sequence into Unicode
4338  ///
4339  /// @param ch
4340  /// Character
4341  /// @param chU
4342  /// Incomplete Unicode code point
4343  /// @return
4344  /// Accumulated Unicode code point. Zero if the character is invalid.
4345  static TUnicodeSymbol DecodeNext(TUnicodeSymbol chU, char ch);
4346 
4347 private:
4348  static void x_Validate(const CTempString& str);
4349 
4350  static SIZE_TYPE x_GetValidSymbolCount
4351  (const CTempString& src, CTempString::const_iterator& err);
4352 
4353  static CStringUTF8& x_AppendChar(CStringUTF8& u8str, TUnicodeSymbol ch);
4354 
4355  static CStringUTF8& x_Append(CStringUTF8& u8str, const CTempString& src,
4356  EEncoding encoding, EValidate validate);
4357 #if defined(HAVE_WSTRING)
4358  static CStringUTF8& x_Append(CStringUTF8& u8str, const CTempString& src, const locale& lcl);
4359 #endif
4360  template <typename TChar>
4361  static bool x_TCharToUnicodeSymbol(TUnicodeSymbol& u, const TChar* src);
4362  template <typename TChar>
4363  static CStringUTF8& x_Append(CStringUTF8& u8str, const TChar* src, SIZE_TYPE tchar_count);
4364 
4365  template <typename TChar>
4366  static basic_string<TChar> x_AsBasicString
4367  (const CTempString& src,
4368  const TChar* substitute_on_error, EValidate validate);
4369 
4370  template <typename TIterator>
4371  static TUnicodeSymbol x_Decode(TIterator& src);
4372 
4373  static SIZE_TYPE x_BytesNeeded(TUnicodeSymbol ch);
4374  static bool x_EvalFirst(char ch, SIZE_TYPE& more);
4375  static bool x_EvalNext(char ch);
4376 
4377  // returns part of the string around an error in Utf8 encoding
4378  static CTempString x_GetErrorFragment(const CTempString& src);
4379 
4381 };
4382 
4383 // deprecated CStringUTF8 is there
4385 
4386 
4387 
4388 /////////////////////////////////////////////////////////////////////////////
4389 ///
4390 /// CParseTemplException --
4391 ///
4392 /// Define template class for parsing exception. This class is used to define
4393 /// exceptions for complex parsing tasks and includes an additional m_Pos
4394 /// data member. The constructor requires that an additional positional
4395 /// parameter be supplied along with the description message.
4396 
4397 template <class TBase>
4399 {
4400 public:
4401  /// Error types that for exception class.
4402  enum EErrCode {
4403  eErr ///< Generic error
4404  };
4405 
4406  /// Translate from the error code value to its string representation.
4407  virtual const char* GetErrCodeString(void) const override
4408  {
4409  switch (GetErrCode()) {
4410  case eErr: return "eErr";
4411  default: return CException::GetErrCodeString();
4412  }
4413  }
4414 
4415  /// Constructor.
4416  ///
4417  /// Report "pos" along with "what".
4419  const CException* prev_exception,
4420  EErrCode err_code,const string& message,
4421  string::size_type pos, EDiagSev severity = eDiag_Error)
4422  : TBase(info, prev_exception, message, severity, 0), m_Pos(pos)
4423  {
4424  this->x_Init(info,
4425  string("{") + NStr::SizetToString(m_Pos) +
4426  "} " + message,
4427  prev_exception,
4428  severity);
4429  this->x_InitErrCode((CException::EErrCode) err_code);
4430  }
4431 
4432  /// Constructor.
4434  : TBase(other)
4435  {
4436  m_Pos = other.m_Pos;
4437  this->x_Assign(other);
4438  }
4439 
4440  /// Destructor.
4441  virtual ~CParseTemplException(void) noexcept {}
4442 
4443  /// Report error position.
4444  virtual void ReportExtra(ostream& out) const override
4445  {
4446  out << "m_Pos = " << (unsigned long)m_Pos;
4447  }
4448 
4449  // Attributes.
4450 
4451  /// Get exception class type.
4452  virtual const char* GetType(void) const override
4453  { return "CParseTemplException"; }
4454 
4455  typedef int TErrCode;
4456  /// Get error code.
4457  TErrCode GetErrCode(void) const
4458  {
4459  return typeid(*this) == typeid(CParseTemplException<TBase>) ?
4460  (TErrCode) this->x_GetErrCode() :
4462  }
4463 
4464  /// Get error position.
4465  string::size_type GetPos(void) const noexcept { return m_Pos; }
4466 
4467 protected:
4469  const CException* prev_exception,
4470  const string& message,
4471  string::size_type pos, EDiagSev severity, CException::TFlags flags)
4472  : TBase(info, prev_exception, message, severity, flags), m_Pos(pos)
4473  {
4474  this->x_Init(info,
4475  string("{") + NStr::SizetToString(m_Pos) +
4476  "} " + message,
4477  prev_exception,
4478  severity);
4479  }
4480  /// Constructor.
4482  {
4483  m_Pos = 0;
4484  }
4485 
4486  /// Helper clone method.
4487  virtual const CException* x_Clone(void) const override
4488  {
4489  return new CParseTemplException<TBase>(*this);
4490  }
4491 
4492 private:
4493  string::size_type m_Pos; ///< Error position
4494 };
4495 
4496 
4497 /////////////////////////////////////////////////////////////////////////////
4498 ///
4499 /// CStringException --
4500 ///
4501 /// Define exceptions generated by string classes.
4502 ///
4503 /// CStringException inherits its basic functionality from
4504 /// CParseTemplException<CCoreException> and defines additional error codes
4505 /// for string parsing.
4506 
4508 {
4509 public:
4510  /// Error types that string classes can generate.
4511  enum EErrCode {
4512  eConvert, ///< Failure to convert string
4513  eBadArgs, ///< Bad arguments to string methods
4514  eFormat ///< Wrong format for any input to string methods
4515  };
4516 
4517  /// Translate from the error code value to its string representation.
4518  virtual const char* GetErrCodeString(void) const override;
4519 
4520  // Standard exception boilerplate code.
4522  CParseTemplException<CCoreException>, std::string::size_type);
4523 };
4524 
4525 
4526 
4527 /////////////////////////////////////////////////////////////////////////////
4528 ///
4529 /// CStringPairsParser --
4530 ///
4531 /// Base class for parsing a string to a set of name-value pairs.
4532 
4533 
4534 /// Decoder interface. Names and values can be decoded with different rules.
4536 {
4537 public:
4538  /// Type of string to be decoded
4541  eValue
4542  };
4543  /// Decode the string. Must throw CStringException if the source string
4544  /// is not valid.
4545  virtual string Decode(const CTempString src, EStringType stype) const = 0;
4546  virtual ~IStringDecoder(void) {}
4547 };
4548 
4549 
4550 /// Encoder interface. Names and values can be encoded with different rules.
4552 {
4553 public:
4554  /// Type of string to be decoded
4557  eValue
4558  };
4559  /// Encode the string.
4560  virtual string Encode(const CTempString src, EStringType stype) const = 0;
4561  virtual ~IStringEncoder(void) {}
4562 };
4563 
4564 
4565 /// URL-decoder for string pairs parser
4567 {
4568 public:
4570 
4571  virtual string Decode(const CTempString src, EStringType stype) const;
4572 
4573 private:
4575 };
4576 
4577 
4578 /// URL-encoder for string pairs parser
4580 {
4581 public:
4583 
4584  virtual string Encode(const CTempString src, EStringType stype) const;
4585 
4586 private:
4588 };
4589 
4590 
4591 /// Template for parsing string into pairs of name and value or merging
4592 /// them back into a single string.
4593 /// The container class must hold pairs of strings (pair<string, string>).
4594 template<class TContainer>
4596 {
4597 public:
4598  typedef TContainer TStrPairs;
4599  /// The container's value type must be pair<string, string>
4600  /// or a compatible type.
4602 
4603  /// Create parser with the specified decoder/encoder and default separators.
4604  ///
4605  /// @param decoder
4606  /// String decoder (Url, Xml etc.)
4607  /// @param own_decoder
4608  /// Decoder ownership flag
4609  /// @param decoder
4610  /// String encoder (Url, Xml etc.), optional
4611  /// @param own_encoder
4612  /// Encoder ownership flag, optional
4614  EOwnership own_decoder = eTakeOwnership,
4615  IStringEncoder* encoder = NULL,
4616  EOwnership own_encoder = eTakeOwnership)
4617  : m_ArgSep("&"),
4618  m_ValSep("="),
4619  m_Decoder(decoder, own_decoder),
4620  m_Encoder(encoder, own_encoder)
4621  {
4622  }
4623 
4624  /// Create parser with the specified parameters.
4625  ///
4626  /// @param arg_sep
4627  /// Separator between name+value pairs
4628  /// @param val_sep
4629  /// Separator between name and value
4630  /// @param decoder
4631  /// String decoder (Url, Xml etc.)
4632  /// @param own_decoder
4633  /// Decoder ownership flag
4634  /// @param encoder
4635  /// String encoder (Url, Xml etc.)
4636  /// @param own_encoder
4637  /// Encoder ownership flag
4638  CStringPairs(const CTempString arg_sep,
4639  const CTempString val_sep,
4640  IStringDecoder* decoder = NULL,
4641  EOwnership own_decoder = eTakeOwnership,
4642  IStringEncoder* encoder = NULL,
4643  EOwnership own_encoder = eTakeOwnership)
4644  : m_ArgSep(arg_sep),
4645  m_ValSep(val_sep),
4646  m_Decoder(decoder, own_decoder),
4647  m_Encoder(encoder, own_encoder)
4648  {
4649  }
4650 
4651  /// Create parser with the selected URL-encoding/decoding options
4652  /// and default separators.
4653  ///
4654  /// @param decode_flag
4655  /// URL-decoding flag
4656  /// @param encode_flag
4657  /// URL-encoding flag
4659  NStr::EUrlEncode encode_flag)
4660  : m_ArgSep("&"),
4661  m_ValSep("="),
4662  m_Decoder(new CStringDecoder_Url(decode_flag), eTakeOwnership),
4663  m_Encoder(new CStringEncoder_Url(encode_flag), eTakeOwnership)
4664  {
4665  }
4666 
4667  virtual ~CStringPairs(void) {}
4668 
4669  /// Set string decoder.
4670  ///
4671  /// @param decoder
4672  /// String decoder (Url, Xml etc.)
4673  /// @param own
4674  /// Decoder ownership flag
4676  { m_Decoder.reset(decoder, own); }
4677  /// Get decoder or NULL. Does not affect decoder ownership.
4679 
4680  /// Set string encoder.
4681  ///
4682  /// @param encoder
4683  /// String encoder (Url, Xml etc.)
4684  /// @param own
4685  /// Encoder ownership flag
4687  { m_Encoder.reset(encoder, own); }
4688  /// Get encoder or NULL. Does not affect encoder ownership.
4690 
4691  /// Parse the string.
4692  ///
4693  /// @param str
4694  /// String to parse. The parser assumes the string is formatted like
4695  /// "name1<valsep>value1<argsep>name2<valsep>value2...". Each name and
4696  /// value is passed to the decoder (if not NULL) before storing the pair.
4697  /// @param merge_argsep
4698  /// Flag for merging separators between pairs. By default the separators
4699  /// are merged to prevent pairs where both name and value are empty.
4700  void Parse(const CTempString str,
4701  NStr::EMergeDelims merge_argsep = NStr::eMergeDelims)
4702  {
4704  m_Decoder.get(), eNoOwnership, merge_argsep);
4705  }
4706 
4707  /// Parse the string using the provided decoder, put data into the
4708  /// container.
4709  ///
4710  /// @param pairs
4711  /// Container to be filled with the parsed name/value pairs
4712  /// @param str
4713  /// String to parse. The parser assumes the string is formatted like
4714  /// "name1<valsep>value1<argsep>name2<valsep>value2...". Each name and
4715  /// value is passed to the decoder (if not NULL) before storing the pair.
4716  /// @param decoder
4717  /// String decoder (Url, Xml etc.)
4718  /// @param own
4719  /// Flag indicating if the decoder must be deleted by the function.
4720  /// @param merge_argsep
4721  /// Flag for merging separators between pairs. By default the separators
4722  /// are merged to prevent pairs where both name and value are empty.
4723  static void Parse(TStrPairs& pairs,
4724  const CTempString str,
4725  const CTempString arg_sep,
4726  const CTempString val_sep,
4727  IStringDecoder* decoder = NULL,
4728  EOwnership own = eTakeOwnership,
4729  NStr::EMergeDelims merge_argsep = NStr::eMergeDelims)
4730  {
4731  AutoPtr<IStringDecoder> decoder_guard(decoder, own);
4732  list<string> lst;
4733  NStr::Split(str, arg_sep, lst, (NStr::TSplitFlags)merge_argsep);
4734  pairs.clear();
4735  ITERATE(list<string>, it, lst) {
4736  string name, val;
4737  NStr::SplitInTwo(*it, val_sep, name, val);
4738  if ( decoder ) {
4739  try {
4740  name = decoder->Decode(name, IStringDecoder::eName);
4741  val = decoder->Decode(val, IStringDecoder::eValue);
4742  }
4743  catch (const CStringException&) {
4744  // Discard all data
4745  pairs.clear();
4746  throw;
4747  }
4748  }
4749  pairs.insert(pairs.end(), TStrPair(name, val));
4750  }
4751  }
4752 
4753  /// Merge name-value pairs into a single string using the currently set
4754  /// separators and the provided encoder if any.
4755  string Merge(void) const
4756  {
4757  return Merge(m_Data, m_ArgSep, m_ValSep,
4759  }
4760 
4761  /// Merge name-value pairs from the provided container, separators
4762  /// and encoder. Delete the encoder if the ownership flag allows.
4763  ///
4764  /// @param pairs
4765  /// Container with the name/value pairs to be merged.
4766  /// @param arg_sep
4767  /// Separator to be inserted between pairs.
4768  /// @param val_sep
4769  /// Separator to be inserted between name and value.
4770  /// @param encoder
4771  /// String encoder (Url, Xml etc.)
4772  /// @param own
4773  /// Flag indicating if the encoder must be deleted by the function.
4774  static string Merge(const TStrPairs& pairs,
4775  const string& arg_sep,
4776  const string& val_sep,
4777  IStringEncoder* encoder = NULL,
4778  EOwnership own = eTakeOwnership)
4779  {
4780  AutoPtr<IStringEncoder> encoder_guard(encoder, own);
4781  string ret;
4782  ITERATE(typename TStrPairs, it, pairs) {
4783  if ( !ret.empty() ) {
4784  ret += arg_sep;
4785  }
4786  if ( encoder ) {
4787  ret += encoder->Encode(it->first, IStringEncoder::eName) +
4788  val_sep +
4789  encoder->Encode(it->second, IStringEncoder::eValue);
4790  }
4791  else {
4792  ret += it->first + val_sep + it->second;
4793  }
4794  }
4795  return ret;
4796  }
4797 
4798  /// Read data
4799  const TStrPairs& GetPairs(void) const { return m_Data; }
4800  /// Get non-const data
4801  TStrPairs& GetPairs(void) { return m_Data; }
4802 
4803 private:
4804  string m_ArgSep; // Separator between name+value pairs ("&")
4805  string m_ValSep; // Separator between name and value ("=")
4806  AutoPtr<IStringDecoder> m_Decoder; // String decoder (Url, Xml etc.)
4807  AutoPtr<IStringEncoder> m_Encoder; // String encoder (Url, Xml etc.)
4808  TStrPairs m_Data; // Parsed data
4809 };
4810 
4811 
4812 typedef vector<pair<string, string> > TStringPairsVector;
4814 
4815 
4816 /////////////////////////////////////////////////////////////////////////////
4817 ///
4818 /// CEncodedString --
4819 ///
4820 /// Class to detect if a string needs to be URL-encoded and hold both
4821 /// encoded and original versions.
4822 ///
4823 
4825 {
4826 public:
4828  CEncodedString(const CTempString s,
4830 
4831  /// Set new original string
4832  void SetString(const CTempString s,
4834 
4835  /// Check if the original string was encoded.
4836  bool IsEncoded(void) const { return m_Encoded.get() != 0; }
4837  /// Get the original unencoded string
4838  const string& GetOriginalString(void) const { return m_Original; }
4839  /// Get encoded string
4840  const string& GetEncodedString(void) const
4841  { return IsEncoded() ? *m_Encoded : m_Original; }
4842 
4843  /// Check if the string is empty
4844  bool IsEmpty(void) const { return m_Original.empty(); }
4845 
4846 private:
4847  string m_Original;
4848  unique_ptr<string> m_Encoded;
4849 };
4850 
4851 
4852 /////////////////////////////////////////////////////////////////////////////
4853 // Predicates
4854 //
4855 
4856 
4857 /////////////////////////////////////////////////////////////////////////////
4858 ///
4859 /// Define Case-sensitive string comparison methods.
4860 ///
4861 /// Used as arguments to template functions for specifying the type of
4862 /// comparison.
4863 
4864 template <typename T>
4866 {
4867  /// Return difference between "s1" and "s2".
4868  int Compare(const T& s1, const T& s2) const;
4869 
4870  /// Return TRUE if s1 < s2.
4871  bool Less(const T& s1, const T& s2) const;
4872 
4873  /// Return TRUE if s1 == s2.
4874  bool Equals(const T& s1, const T& s2) const;
4875 
4876  /// Return TRUE if s1 < s2.
4877  bool operator()(const T& s1, const T& s2) const;
4878 };
4879 
4882 
4883 
4884 
4885 /////////////////////////////////////////////////////////////////////////////
4886 ///
4887 /// Define Case-insensitive string comparison methods.
4888 ///
4889 /// Used as arguments to template functions for specifying the type of
4890 /// comparison.
4891 ///
4892 /// @sa PNocase_Conditional_Generic
4893 
4894 template <typename T>
4896 {
4897  /// Return difference between "s1" and "s2".
4898  int Compare(const T& s1, const T& s2) const;
4899 
4900  /// Return TRUE if s1 < s2.
4901  bool Less(const T& s1, const T& s2) const;
4902 
4903  /// Return TRUE if s1 == s2.
4904  bool Equals(const T& s1, const T& s2) const;
4905 
4906  /// Return TRUE if s1 < s2 ignoring case.
4907  bool operator()(const T& s1, const T& s2) const;
4908 };
4909 
4912 
4913 
4914 /////////////////////////////////////////////////////////////////////////////
4915 ///
4916 /// Define Case-insensitive string comparison methods.
4917 /// Case sensitivity can be turned on and off at runtime.
4918 ///
4919 /// Used as arguments to template functions for specifying the type of
4920 /// comparison.
4921 ///
4922 /// @sa PNocase_Generic
4923 
4924 template <typename T>
4926 {
4927 public:
4928  /// Construction
4930 
4931  /// Get comparison type
4933 
4934  /// Set comparison type
4935  void SetCase(NStr::ECase case_sens) { m_CaseSensitive = case_sens; }
4936 
4937  /// Return difference between "s1" and "s2".
4938  int Compare(const T& s1, const T& s2) const;
4939 
4940  /// Return TRUE if s1 < s2.
4941  bool Less(const T& s1, const T& s2) const;
4942 
4943  /// Return TRUE if s1 == s2.
4944  bool Equals(const T& s1, const T& s2) const;
4945 
4946  /// Return TRUE if s1 < s2 ignoring case.
4947  bool operator()(const T& s1, const T& s2) const;
4948 private:
4949  NStr::ECase m_CaseSensitive; ///< case sensitive when TRUE
4950 };
4951 
4954 
4955 
4956 /////////////////////////////////////////////////////////////////////////////
4957 ///
4958 /// Define Case-insensitive string equality (not less-than) comparison method
4959 ///
4960 /// Used as arguments to template functions for specifying the type of
4961 /// equality comparison
4962 ///
4963 /// @sa PEqualNocase_Conditional_Generic
4964 
4965 template <typename T>
4967  : public PNocase_Generic<T>
4968 {
4969 public:
4970  /// Return TRUE if s1 < s2 ignoring case.
4971  bool operator()(const T& s1, const T& s2) const
4972  {
4973  return this->Equals(s1, s2);
4974  }
4975 };
4976 
4979 
4980 
4981 /////////////////////////////////////////////////////////////////////////////
4982 ///
4983 /// Define Case-insensitive string equality (not less-than) comparison method
4984 /// Case sensitivity can be turned on and off at runtime.
4985 ///
4986 /// Used as arguments to template functions for specifying the type of
4987 /// comparison.
4988 ///
4989 /// @sa PEqualNocase_Generic
4990 
4991 template <typename T>
4993  : public PNocase_Conditional_Generic<T>
4994 {
4995 public:
4996  /// Construction
4998  : PNocase_Conditional_Generic<T>(case_sens)
4999  {
5000  }
5001 
5002  /// Return TRUE if s1 < s2 ignoring case.
5003  bool operator()(const T& s1, const T& s2) const
5004  {
5005  return this->Equals(s1, s2);
5006  }
5007 };
5008 
5011 
5012 /////////////////////////////////////////////////////////////////////////////
5013 ///
5014 /// PQuickStringLess implements an ordering of strings,
5015 /// that is more efficient than usual lexicographical order.
5016 /// It can be used in cases when no specific order is required,
5017 /// e.g. only simple key lookup is needed.
5018 /// Current implementation first compares lengths of strings,
5019 /// and will compare string data only when lengths are the same.
5020 ///
5022 {
5023  bool operator()(const CTempString s1, const CTempString s2) const {
5024  size_t len1 = s1.size(), len2 = s2.size();
5025  return len1 < len2 ||
5026  (len1 == len2 && ::memcmp(s1.data(), s2.data(), len1) < 0);
5027  }
5028 };
5029 
5030 
5031 /////////////////////////////////////////////////////////////////////////////
5032 // Algorithms
5033 //
5034 
5035 
5036 /// Check equivalence of arguments using predicate.
5037 template<class Arg1, class Arg2, class Pred>
5038 inline
5039 bool AStrEquiv(const Arg1& x, const Arg2& y, Pred pr)
5040 {
5041  return pr.Equals(x, y);
5042 }
5043 
5044 
5045 /* @} */
5046 
5047 
5048 
5049 /////////////////////////////////////////////////////////////////////////////
5050 //
5051 // IMPLEMENTATION of INLINE functions
5052 //
5053 /////////////////////////////////////////////////////////////////////////////
5054 
5055 
5056 /////////////////////////////////////////////////////////////////////////////
5057 // CNcbiEmptyString::
5058 //
5059 #if !defined(NCBI_OS_MSWIN) && \
5060  !(defined(NCBI_OS_LINUX) && \
5061  (defined(NCBI_COMPILER_GCC) || defined(NCBI_COMPILER_ANY_CLANG)))
5062 inline
5063 const string& CNcbiEmptyString::Get(void)
5064 {
5065  const string* str = m_Str;
5066  return str ? *str: FirstGet();
5067 }
5068 
5069 # ifdef HAVE_WSTRING
5070 inline
5071 const wstring& CNcbiEmptyWString::Get(void)
5072 {
5073  const wstring* str = m_Str;
5074  return str ? *str: FirstGet();
5075 }
5076 # endif
5077 #endif
5078 
5079 
5080 
5081 /////////////////////////////////////////////////////////////////////////////
5082 // NStr::
5083 //
5084 
5085 inline
5087  TNumToStringFlags flags, int base)
5088 {
5089  string ret;
5090  IntToString(ret, value, flags, base);
5091  return ret;
5092 }
5093 
5094 inline
5095 string NStr::IntToString(unsigned int value,
5096  TNumToStringFlags flags, int base)
5097 {
5098  string ret;
5099  IntToString(ret, (int)value, flags, base);
5100  return ret;
5101 }
5102 
5103 inline
5104 void NStr::IntToString(string& out_str, unsigned int value,
5105  TNumToStringFlags flags, int base)
5106 {
5107  IntToString(out_str, (int)value, flags, base);
5108 }
5109 
5110 inline
5111 string NStr::UIntToString(unsigned int value,
5112  TNumToStringFlags flags, int base)
5113 {
5114  string ret;
5115  ULongToString(ret, value, flags, base);
5116  return ret;
5117 }
5118 
5119 inline
5121  TNumToStringFlags flags, int base)
5122 {
5123  string ret;
5124  UIntToString(ret, (unsigned int)value, flags, base);
5125  return ret;
5126 }
5127 
5128 inline
5129 void NStr::UIntToString(string& out_str, unsigned int value,
5130  TNumToStringFlags flags, int base)
5131 {
5132  ULongToString(out_str, value, flags, base);
5133 }
5134 
5135 inline
5136 void NStr::UIntToString(string& out_str, int value,
5137  TNumToStringFlags flags, int base)
5138 {
5139  UIntToString(out_str, (unsigned int)value, flags, base);
5140 }
5141 
5142 inline
5144  TNumToStringFlags flags, int base)
5145 {
5146  string ret;
5147  LongToString(ret, value, flags, base);
5148  return ret;
5149 }
5150 
5151 inline
5152 string NStr::ULongToString(unsigned long value,
5153  TNumToStringFlags flags, int base)
5154 {
5155  string ret;
5156  ULongToString(ret, value, flags, base);
5157  return ret;
5158 }
5159 
5160 inline
5162  TNumToStringFlags flags, int base)
5163 {
5164  string ret;
5165  NStr::Int8ToString(ret, value, flags, base);
5166  return ret;
5167 }
5168 
5169 inline
5171  TNumToStringFlags flags, int base)
5172 {
5173  string ret;
5174  NStr::UInt8ToString(ret, value, flags, base);
5175  return ret;
5176 }
5177 
5178 inline
5180  TNumToStringFlags flags /* = 0 */,
5181  unsigned int max_digits /* = 3 */)
5182 {
5183  string ret;
5184  NStr::UInt8ToString_DataSize(ret, value, flags, max_digits);
5185  return ret;
5186 }
5187 
5188 inline
5191 {
5192  string str;
5194  return str;
5195 }
5196 
5197 inline
5198 int NStr::HexChar(char ch)
5199 {
5200  unsigned int rc = ch - '0';
5201  if (rc <= 9) {
5202  return rc;
5203  } else {
5204  rc = (ch | ' ') - 'a';
5205  return rc <= 5 ? int(rc + 10) : -1;
5206  }
5207 }
5208 
5209 inline
5210 int NStr::strcmp(const char* s1, const char* s2)
5211 {
5212  return ::strcmp(s1, s2);
5213 }
5214 
5215 inline
5216 int NStr::strncmp(const char* s1, const char* s2, size_t n)
5217 {
5218  return ::strncmp(s1, s2, n);
5219 }
5220 
5221 inline
5222 int NStr::strcasecmp(const char* s1, const char* s2)
5223 {
5224 #if defined(HAVE_STRICMP)
5225 #if NCBI_COMPILER_MSVC && (_MSC_VER >= 1400)
5226  return ::_stricmp(s1, s2);
5227 #else
5228  return ::stricmp(s1, s2);
5229 #endif
5230 
5231 #elif defined(HAVE_STRCASECMP_LC)
5232  return ::strcasecmp(s1, s2);
5233 
5234 #else
5235  int diff = 0;
5236  for ( ;; ++s1, ++s2) {
5237  char c1 = *s1;
5238  // calculate difference
5239  diff = tolower((unsigned char) c1) - tolower((unsigned char)(*s2));
5240  // if end of string or different
5241  if (!c1 || diff)
5242  break; // return difference
5243  }
5244  return diff;
5245 #endif
5246 }
5247 
5248 inline
5249 int NStr::strncasecmp(const char* s1, const char* s2, size_t n)
5250 {
5251 #if defined(HAVE_STRICMP)
5252 #if NCBI_COMPILER_MSVC && (_MSC_VER >= 1400)
5253  return ::_strnicmp(s1, s2, n);
5254 #else
5255  return ::strnicmp(s1, s2, n);
5256 #endif
5257 
5258 #elif defined(HAVE_STRCASECMP_LC)
5259  return ::strncasecmp(s1, s2, n);
5260 
5261 #else
5262  int diff = 0;
5263  for ( ; ; ++s1, ++s2, --n) {
5264  if (n == 0)
5265  return 0;
5266  char c1 = *s1;
5267  // calculate difference
5268  diff = tolower((unsigned char) c1) - tolower((unsigned char)(*s2));
5269  // if end of string or different
5270  if (!c1 || diff)
5271  break; // return difference
5272  }
5273  return diff;
5274 #endif
5275 }
5276 
5277 inline
5278 size_t NStr::strftime(char* s, size_t maxsize, const char* format,
5279  const struct tm* timeptr)
5280 {
5281  string x_format = Replace(format, "%T", "%H:%M:%S");
5282  ReplaceInPlace(x_format, "%D", "%m/%d/%y");
5283  return ::strftime(s, maxsize, x_format.c_str(), timeptr);
5284 }
5285 
5286 inline
5287 int NStr::CompareCase(const char* s1, const char* s2)
5288 {
5289  return NStr::strcmp(s1, s2);
5290 }
5291 
5292 inline
5293 int NStr::CompareNocase(const char* s1, const char* s2)
5294 {
5295  return NStr::strcasecmp(s1, s2);
5296 }
5297 
5298 inline
5300  const char* s2, ECase use_case)
5301 {
5302  return use_case == eCase ? CompareCase(s1.substr(pos, n), s2)
5303  : CompareNocase(s1.substr(pos, n), s2);
5304 }
5305 
5306 inline
5308  const CTempString s2, ECase use_case)
5309 {
5310  return use_case == eCase ? CompareCase(s1.substr(pos, n), s2)
5311  : CompareNocase(s1.substr(pos, n), s2);
5312 }
5313 
5314 inline
5315 int NStr::Compare(const char* s1, const char* s2, ECase use_case)
5316 {
5317  return use_case == eCase ? CompareCase(s1, s2) : CompareNocase(s1, s2);
5318 }
5319 
5320 inline
5321 int NStr::Compare(const CTempStringEx s1, const CTempStringEx s2, ECase use_case)
5322 {
5323  return use_case == eCase ? CompareCase(s1, s2) : CompareNocase(s1, s2);
5324 }
5325 
5326 inline
5327 bool NStr::EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char* s2)
5328 {
5329  return s1.substr(pos, n) == s2;
5330 }
5331 
5332 inline
5334 {
5335  return s1.substr(pos, n) == s2;
5336 }
5337 
5338 inline
5339 bool NStr::EqualCase(const char* s1, const char* s2)
5340 {
5341  size_t n = strlen(s1);
5342  if (n != strlen(s2)) {
5343  return false;
5344  }
5345  return NStr::strncmp(s1, s2, n) == 0;
5346 }
5347 
5348 inline
5350 {
5351  return s1 == s2;
5352 }
5353 
5354 inline
5355 bool NStr::EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char*