NCBI C++ ToolKit
ncbistre.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: ncbistre.cpp 99870 2023-05-18 14:01:14Z lavr $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Denis Vakatov
27  *
28  * File Description:
29  * NCBI C++ stream class wrappers
30  * Triggering between "new" and "old" C++ stream libraries
31  *
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbistd.hpp>
36 #include <corelib/ncbisys.hpp>
37 #include <corelib/ncbistre.hpp>
38 #include <corelib/stream_utils.hpp>
39 #if defined(NCBI_OS_UNIX)
40 # include <unistd.h>
41 #endif
42 
43 
45 
46 
47 #if defined(NCBI_OS_MSWIN) && defined(_UNICODE)
48 wstring ncbi_Utf8ToWstring(const char *utf8)
49 {
50  return _T_XSTRING(utf8);
51 }
52 #endif
53 
54 
55 CNcbiIstream& NcbiGetline(CNcbiIstream& is, string& str, const string& delims,
56  SIZE_TYPE* count)
57 {
58  str.erase();
59 
60  IOS_BASE::fmtflags f = is.flags();
61  is.unsetf(IOS_BASE::skipws);
62 #ifdef NO_PUBSYNC
63  if ( !is.ipfx(1) ) {
64  is.flags(f);
65  is.setstate(NcbiFailbit);
66  return is;
67  }
68 #else
69  CNcbiIstream::sentry s(is);
70  if ( !s ) {
71  is.flags(f);
72  is.setstate(NcbiFailbit);
73  return is;
74  }
75 #endif //NO_PUBSYNC
76  _ASSERT( is.good() );
77 
78  char buf[1024];
79  SIZE_TYPE pos = 0;
80  SIZE_TYPE size = 0;
81  SIZE_TYPE max_size = str.max_size();
82  SIZE_TYPE delim_count = 0;
83  IOS_BASE::iostate iostate = NcbiGoodbit/*0*/;
84  for (;;) {
85  CT_INT_TYPE ch = is.rdbuf()->sbumpc();
86  if ( CT_EQ_INT_TYPE(ch, CT_EOF) ) {
87  iostate = NcbiEofbit;
88  break;
89  }
90  SIZE_TYPE delim_pos = delims.find(CT_TO_CHAR_TYPE(ch));
91  if (delim_pos != NPOS) {
92  // Special case -- if two different delimiters are back to
93  // back and in the same order as in delims, treat them as
94  // a single delimiter (necessary for correct handling of
95  // DOS/MAC-style CR/LF endings).
96  ch = is.rdbuf()->sgetc();
97  if (!CT_EQ_INT_TYPE(ch, CT_EOF)
98  && delims.find(CT_TO_CHAR_TYPE(ch), delim_pos + 1) != NPOS) {
99  is.rdbuf()->sbumpc();
100  delim_count = 2;
101  } else {
102  delim_count = 1;
103  }
104  break;
105  }
106  if (size == max_size) {
107  CT_INT_TYPE bk = is.rdbuf()->sungetc();
108  iostate = CT_EQ_INT_TYPE(bk, ch) ? NcbiFailbit : NcbiBadbit;
109  break;
110  }
111 
112  buf[pos++] = CT_TO_CHAR_TYPE(ch);
113  if (pos == sizeof(buf)) {
114  str.append(buf, pos);
115  pos = 0;
116  }
117  size++;
118  }
119  if (pos > 0)
120  str.append(buf, pos);
121  if (count != NULL)
122  *count = size + delim_count;
123 
124 #ifdef NO_PUBSYNC
125  is.isfx();
126 #endif //NO_PUBSYNC
127  is.flags(f);
128  if (iostate) {
129  if (iostate == NcbiEofbit && str.empty())
130  iostate |= NcbiFailbit;
131  is.clear(iostate);
132  }
133  return is;
134 }
135 
136 
137 extern CNcbiIstream& NcbiGetline(CNcbiIstream& is, string& str, char delim,
138  SIZE_TYPE* count)
139 {
140 #if defined(NCBI_USE_OLD_IOSTREAM)
141  return NcbiGetline(is, str, string(1, delim), count);
142 #else
143  str.erase();
144 
145  if ( !is.good() ) {
146  is.setstate(NcbiFailbit);
147  return is;
148  }
149 
150  char buf[1024];
151  SIZE_TYPE size = 0;
152  SIZE_TYPE max_size = str.max_size();
153  do {
154  CT_INT_TYPE nextc = is.get();
155  if (CT_EQ_INT_TYPE(nextc, CT_EOF)
156  || CT_EQ_INT_TYPE(nextc, CT_TO_INT_TYPE(delim))) {
157  ++size;
158  break;
159  }
160  if ( !is.unget() )
161  break;
162  if (size == max_size) {
163  is.clear(NcbiFailbit);
164  break;
165  }
166  SIZE_TYPE n = max_size - size;
167  is.get(buf, n < sizeof(buf) ? n : sizeof(buf), delim);
168  n = (size_t) is.gcount();
169  str.append(buf, n);
170  size += n;
171  _ASSERT(size == str.length());
172  } while ( is.good() );
173 #endif
174 
175  if (is.rdstate() == NcbiEofbit && str.empty())
176  is.setstate(NcbiFailbit);
177  if (count != NULL)
178  *count = size;
179  return is;
180 }
181 
182 
183 // Platform-specific EndOfLine
184 const char* Endl(void)
185 {
186 #if defined(NCBI_OS_MSWIN)
187  static const char s_Endl[] = "\r\n";
188 #else /* assume UNIX-like EOLs */
189  static const char s_Endl[] = "\n";
190 #endif
191  return s_Endl;
192 }
193 
194 
195 // Get a line taking into account platform-specific of End-Of-Line
197 {
198 #if defined(NCBI_OS_MSWIN)
199  NcbiGetline(is, str, '\n', count);
200  if (!str.empty() && str[str.length() - 1] == '\r')
201  str.resize(str.length() - 1);
202 #elif defined(NCBI_OS_DARWIN)
203  NcbiGetline(is, str, "\r\n", count);
204 #else /* assume UNIX-like EOLs */
205  NcbiGetline(is, str, '\n', count);
206 #endif //NCBI_OS_...
207  return is;
208 }
209 
210 
212 {
213  if (!os.good() || is.bad())
214  return false;
215  if (CT_EQ_INT_TYPE(is.peek(), CT_EOF)) {
216  // NB: C++ Std says nothing about eofbit (27.6.1.3.27)
217  _ASSERT(!is.good());
218  return !is.bad();
219  }
220  os << is.rdbuf();
221  return os.good() && os.flush() ? true : false;
222 }
223 
224 
226 {
227  bool success = false;
228  try {
229  success = NcbiStreamCopy(os, is);
230  }
231  NCBI_CATCH_ALL("NcbiStreamCopy()");
232  if (!success) {
233  NCBI_THROW(CCoreException, eCore, "NcbiStreamCopy() failed");
234  }
235 }
236 
237 
239 {
240  if ( !is.good() ) {
241  is.setstate(NcbiFailbit);
242  NCBI_THROW(CCoreException, eCore, "Input stream already in bad state");
243  }
244  if ( os.bad() ) {
245  os.setstate(NcbiFailbit);
246  NCBI_THROW(CCoreException, eCore,
247  "Output stream already in bad state");
248  }
249  if (CT_EQ_INT_TYPE(is.peek(), CT_EOF)) {
250  // NB: C++ Std says nothing about eofbit (27.6.1.3.27)
251  _ASSERT( !is.good() );
252  if (is.bad()) {
253  NCBI_THROW(CCoreException, eCore,
254  "Input stream already in bad state (at EOF)");
255  }
256  }
257  char buffer[16384];
258  SIZE_TYPE ninstream = count, ninbuffer = 0;
259  auto outbuf = os.rdbuf();
260  while (ninstream > 0 || ninbuffer > 0) {
261  _ASSERT(ninbuffer < sizeof(buffer));
262  auto nwanted = min(sizeof(buffer) - ninbuffer, ninstream);
263  streamsize nread = 0;
264  if (nwanted > 0) {
265  is.read(buffer + ninbuffer, nwanted);
266  nread = is.gcount();
267  if ( !is.good() ) {
268  is.setstate(NcbiFailbit);
269  ninstream = nread;
270  }
271  }
272  auto nwritten = outbuf->sputn(buffer, nread + ninbuffer);
273  if ( os.bad() || nwritten == 0) {
274  os.setstate(NcbiFailbit);
275  NCBI_THROW(CCoreException, eCore, "Write error");
276  }
277  _ASSERT(static_cast<SIZE_TYPE>(nwritten) <= nread + ninbuffer);
278  ninbuffer = nread + ninbuffer - nwritten;
279  if (ninbuffer > 0) {
280  memmove(buffer, buffer + nwritten, ninbuffer);
281  }
282  _ASSERT(static_cast<SIZE_TYPE>(nread) <= ninstream);
283  ninstream -= nread;
284  }
285  if ( !os.flush() ) {
286  NCBI_THROW(CCoreException, eCore, "Flush error");
287  }
288  // Deferred to ensure writing as much as possible when reading fails
289  // and writing sometimes yields leftovers.
290  if ( is.bad() ) {
291  NCBI_THROW(CCoreException, eCore, "Read error");
292  }
293 }
294 
295 
296 size_t NcbiStreamToString(string* str, CNcbiIstream& is, size_t pos)
297 {
298  if (!is.good()) {
299  // Can't extract anything
300  if (str)
301  str->resize(pos);
302  is.setstate(NcbiFailbit);
303  return 0;
304  }
305 
306  char buf[5120];
307  size_t buf_size = sizeof(buf);
308  size_t str_size;
309 
310  if (str) {
311  str_size = pos;
312  if (str->size() < str_size + buf_size)
313  str->resize(str_size + buf_size);
314  } else
315  str_size = pos = 0;
316 
317  do {
318  try {
319  is.read(str ? &(*str)[str_size] : buf, buf_size);
320  } catch (...) {
321  if (str)
322  str->resize(str_size);
323  throw;
324  }
325  streamsize count = is.gcount();
326  str_size += (size_t) count;
327  if (str) {
328  if ((size_t) count == buf_size) {
329  if (buf_size < (1UL << 20))
330  buf_size <<= 1;
331  str->resize(str_size + buf_size);
332  } else
333  _ASSERT(!is.good());
334  }
335  } while (is.good());
336 
337  _ASSERT(str_size >= pos);
338  if (str)
339  str->resize(str_size);
340 
341  if (!(str_size -= pos)) {
342  // Nothing extracted
343  is.setstate(NcbiFailbit);
344  return 0;
345  }
346 
347  // NB: istream::read() sets both bits at EOF (27.6.1.3.28)
348  IOS_BASE::iostate iostate = is.rdstate();
349  if (iostate != (NcbiFailbit | NcbiEofbit))
350  return 0;
351  is.clear(iostate & ~NcbiFailbit);
352  return str_size;
353 }
354 
355 
357 {
358  while (is1 && is2) {
359  char c1 = (char)is1.get();
360  char c2 = (char)is2.get();
361  if (c1 != c2) {
362  return false;
363  }
364  }
365  return is1.eof() && is2.eof();
366 }
367 
368 
369 static inline
371  char* buf, size_t buf_size, char*& pos, size_t& sizeleft)
372 {
373  char c;
374  do {
375  if ( !sizeleft ) {
376  is.read(buf, buf_size);
377  sizeleft = (size_t) is.gcount();
378  pos = buf;
379  }
380  if (sizeleft > 0) {
381  c = *pos++;
382  --sizeleft;
383  } else {
384  return '\0';
385  }
386  } while ( (mode == eCompareText_IgnoreEol
387  && (c == '\n' || c == '\r')) ||
389  && isspace((unsigned char) c)) );
390  return c;
391 }
392 
393 
395  ECompareTextMode mode, size_t buf_size)
396 {
397  if ( !buf_size ) {
398  buf_size = 4 * 1024;
399  }
400  char* buf1 = new char[buf_size];
401  char* buf2 = new char[buf_size];
402  size_t size1 = 0, size2 = 0;
403  char *pos1 = 0, *pos2 = 0;
404  bool equal = true;
405  do {
406  char c1 = x_GetChar(is1, mode, buf1, buf_size, pos1, size1);
407  char c2 = x_GetChar(is2, mode, buf2, buf_size, pos2, size2);
408  equal = (c1 == c2);
409  if (!c1 || !c2) {
410  break;
411  }
412  } while ( equal );
413  delete[] buf1;
414  delete[] buf2;
415  return equal && is1.eof() && is2.eof();
416 }
417 
418 
419 bool NcbiStreamCompareText(CNcbiIstream& is, const string& str,
420  ECompareTextMode mode, size_t buf_size)
421 {
422  CNcbiIstrstream istr(str);
423  return NcbiStreamCompareText(is, istr, mode, buf_size);
424 }
425 
426 
427 CNcbiOstrstreamToString::operator string(void) const
428 {
429 #ifdef NCBI_SHUN_OSTRSTREAM
430  return m_Out.str();
431 #else
432  SIZE_TYPE len = (SIZE_TYPE) m_Out.pcount();
433  if ( !len ) {
434  return string();
435  }
436  const char* str = m_Out.str();
437  m_Out.freeze(false);
438  return string(str, len);
439 #endif
440 }
441 
442 
444 {
445 #ifdef NCBI_SHUN_OSTRSTREAM
446  out << s.m_Out.str();
447 #else
448  SIZE_TYPE len = (SIZE_TYPE) s.m_Out.pcount();
449  if ( len ) {
450  const char* str = s.m_Out.str();
451  s.m_Out.freeze(false);
452  out.write(str, len);
453  }
454 #endif
455  return out;
456 }
457 
458 
460 {
461  ITERATE ( string, c, s.m_String ) {
462  out.put(char(toupper((unsigned char)(*c))));
463  }
464  return out;
465 }
466 
467 
469 {
470  ITERATE ( string, c, s.m_String ) {
471  out.put(char(tolower((unsigned char)(*c))));
472  }
473  return out;
474 }
475 
476 
478 {
479  for ( const char* c = s.m_String; *c; ++c ) {
480  out.put(char(toupper((unsigned char)(*c))));
481  }
482  return out;
483 }
484 
485 
487 {
488  for ( const char* c = s.m_String; *c; ++c ) {
489  out.put(char(tolower((unsigned char)(*c))));
490  }
491  return out;
492 }
493 
494 
495 #ifdef NCBI_COMPILER_MSVC
496 # if _MSC_VER >= 1200 && _MSC_VER < 1300
498 {
499  return (out << NStr::Int8ToString(val));
500 }
501 # endif
502 #endif
503 
504 
505 string Printable(char c)
506 {
507  static const char kHex[] = "0123456789ABCDEF";
508 
509  string s;
510  switch ( c ) {
511  case '\0': s = "\\0"; break;
512  case '\a': s = "\\a"; break;
513  case '\b': s = "\\b"; break;
514  case '\f': s = "\\f"; break;
515  case '\n': s = "\\n"; break;
516  case '\r': s = "\\r"; break;
517  case '\t': s = "\\t"; break;
518  case '\v': s = "\\v"; break;
519  case '\\': s = "\\\\"; break;
520  case '\'': s = "\\'"; break;
521  case '"': s = "\\\""; break;
522  default:
523  if ( !isprint((unsigned char) c) ) {
524  s = "\\x";
525  s += kHex[(unsigned char) c / 16];
526  s += kHex[(unsigned char) c % 16];
527  } else
528  s = c;
529  break;
530  }
531  return s;
532 }
533 
534 
535 static inline
536 void s_WritePrintable(CNcbiOstream& out, char p, char c, char n)
537 {
538  switch ( c ) {
539  case '\a': out.write("\\a", 2); return;
540  case '\b': out.write("\\b", 2); return;
541  case '\f': out.write("\\f", 2); return;
542  case '\n': out.write("\\n", 2); return;
543  case '\r': out.write("\\r", 2); return;
544  case '\t': out.write("\\t", 2); return;
545  case '\v': out.write("\\v", 2); return;
546  case '\\': out.write("\\\\", 2); return;
547  case '\'': out.write("\\'", 2); return;
548  case '"': out.write("\\\"", 2); return;
549  case '?':
550  if (p == '?' || n == '?')
551  out.write("\\?", 2);
552  else
553  out.put(c);
554  return;
555  default:
556  if ( isprint((unsigned char) c) ) {
557  out.put(c);
558  return;
559  }
560  break;
561  }
562 
563  bool full = '0' <= n && n <= '7' ? true : false;
564  unsigned char v;
565  char octal[4];
566  int k = 1;
567 
568  *octal = '\\';
569  v = (unsigned char) c >> 6;
570  if (v || full) {
571  octal[k++] = char('0' + v);
572  full = true;
573  }
574  v = ((unsigned char) c >> 3) & 7;
575  if (v || full) {
576  octal[k++] = char('0' + v);
577  }
578  v = (unsigned char) c & 7;
579  octal[k++] = char('0' + v);
580  out.write(octal, k);
581 }
582 
583 
585 {
586  size_t size = s.m_String.size();
587  if (size) {
588  char prev = '\0';
589  const char* data = s.m_String.data();
590  for (size_t i = 0; i < size - 1; ++i) {
591  s_WritePrintable(out, prev, data[i], data[i + 1]);
592  prev = data[i];
593  }
594  s_WritePrintable(out, prev, data[size - 1], '\0');
595  }
596  return out;
597 }
598 
599 
601 {
602  const char* t = s.m_String;
603  char p = '\0';
604  char c = *t;
605  while (c) {
606  char n = *++t;
607  s_WritePrintable(out, p, c, n);
608  p = c;
609  c = n;
610  }
611  return out;
612 }
613 
614 
615 #if defined(NCBI_COMPILER_WORKSHOP)
616 // We have to use two #if's here because KAI C++ cannot handle #if foo == bar
617 # if (NCBI_COMPILER_VERSION == 530)
618 // The version that ships with the compiler is buggy.
619 // Here's a working (and simpler!) one.
620 template<>
621 istream& istream::read(char *s, streamsize n)
622 {
623  sentry ipfx(*this, 1);
624 
625  try {
626  if (rdbuf()->sgetc() == traits_type::eof()) {
627  // Workaround for bug in sgetn. *SIGH*.
628  __chcount = 0;
629  setstate(eofbit);
630  return *this;
631  }
632  __chcount = rdbuf()->sgetn(s, n);
633  if (__chcount == 0) {
634  setstate(eofbit);
635  } else if (__chcount < n) {
636  setstate(eofbit | failbit);
637  } else if (!ipfx) {
638  setstate(failbit);
639  }
640  } catch (...) {
641  setstate(badbit | failbit);
642  }
643 
644  return *this;
645 }
646 # endif /* NCBI_COMPILER_VERSION == 530 */
647 #endif /* NCBI_COMPILER_WORKSHOP */
648 
649 
653  EEncodingForm ef /* = eEncodingForm_Unknown */,
654  EReadUnknownNoBOM what_if_no_bom /* = eNoBOM_GuessEncoding */
655 )
656 {
658  result->erase();
659  if (!input.good()) {
660  return ef_bom;
661  }
662 
663  const int buf_size = 4096;//2048;//256;
664  char tmp[buf_size+2];
665  Uint2* us = reinterpret_cast<Uint2*>(tmp);
666 
667  // check for Byte Order Mark
668  const int bom_max = 4;
669  memset(tmp,0,bom_max);
670  input.read(tmp,bom_max);
671  int n = (int)input.gcount();
672  {
673  int bom_len=0;
674  Uchar* uc = reinterpret_cast<Uchar*>(tmp);
675  if (n >= 3 && uc[0] == 0xEF && uc[1] == 0xBB && uc[2] == 0xBF) {
676  ef_bom = eEncodingForm_Utf8;
677  uc[0] = uc[3];
678  bom_len=3;
679  }
680  else if (n >= 2 && (us[0] == 0xFEFF || us[0] == 0xFFFE)) {
681  if (us[0] == 0xFEFF) {
682  ef_bom = eEncodingForm_Utf16Native;
683  } else {
685  }
686  us[0] = us[1];
687  bom_len=2;
688  }
689  if (ef == eEncodingForm_Unknown || ef == ef_bom) {
690  ef = ef_bom;
691  n -= bom_len;
692  }
693  // else proceed at user's risk
694  }
695 
696  // keep reading
697  while (n != 0 || (input.good() && !input.eof())) {
698 
699  if (n == 0) {
700  input.read(tmp, buf_size);
701  n = (int) input.gcount();
702  result->reserve(max(result->capacity(), result->size() + n));
703  }
704  tmp[n] = '\0';
705 
706  switch (ef) {
708  {
709  char buf[buf_size];
710  NcbiSys_swab(tmp, buf, n);
711  memcpy(tmp, buf, n);
712  }
713  // no break here
715  {
716  Uint2* u = us;
717 #if 0
718  for (n = n/2; n--; ++u) {
719  result->Append(*u);
720  }
721 #else
722  *result += CUtf8::AsUTF8(u, n/2);
723 #endif
724  }
725  break;
727  //result->Append(tmp,eEncoding_ISO8859_1);
729  break;
731  //result->Append(tmp,eEncoding_Windows_1252);
733  break;
734  case eEncodingForm_Utf8:
735  //result->Append(tmp,eEncoding_UTF8);
736  result->append(tmp,n);
737  break;
738  default:
739  if (what_if_no_bom == eNoBOM_GuessEncoding) {
740  if (n == bom_max) {
741  input.read(tmp + n, buf_size - n);
742  n += (int) input.gcount();
743  result->reserve(max(result->capacity(), result->size() + n));
744  }
745  tmp[n] = '\0';
747  switch (enc) {
748  default:
749  case eEncoding_Unknown:
751  ef = eEncodingForm_Utf8;
752  //result->Append(tmp, enc);
753  *result += CUtf8::AsUTF8(tmp, enc);
754  }
755  else {
756  NCBI_THROW(CCoreException, eCore,
757  "ReadIntoUtf8: cannot guess text encoding");
758  }
759  break;
760  case eEncoding_UTF8:
761  ef = eEncodingForm_Utf8;
762  // no break here
763  case eEncoding_Ascii:
764  case eEncoding_ISO8859_1:
766  //result->Append(tmp, enc);
767  *result += CUtf8::AsUTF8(tmp,enc);
768  break;
769  }
770  } else {
771  //result->Append(tmp, eEncoding_UTF8);
772  result->append(tmp, n);
773  }
774  break;
775  }
776  n = 0;
777  }
778  return ef_bom;
779 }
780 
781 
783  EBOMDiscard discard_bom)
784 {
786  if (input.good()) {
787  const int bom_max = 4;
788  char tmp[bom_max];
789  memset(tmp, 0, bom_max);
790  Uint2* us = reinterpret_cast<Uint2*>(tmp);
791  Uchar* uc = reinterpret_cast<Uchar*>(tmp);
792  input.get(tmp[0]);
793  int n = (int) input.gcount();
794  if (n == 1 && (uc[0] == 0xEF || uc[0] == 0xFE || uc[0] == 0xFF)){
795  input.get(tmp[1]);
796  if (input.gcount() == 1) {
797  ++n;
798  if (us[0] == 0xFEFF) {
800  } else if (us[0] == 0xFFFE) {
802  } else if (uc[1] == 0xBB) {
803  input.get(tmp[2]);
804  if (input.gcount() == 1) {
805  ++n;
806  if (uc[2] == 0xBF) {
807  ef = eEncodingForm_Utf8;
808  }
809  }
810  }
811  }
812  }
813  if (ef == eEncodingForm_Unknown) {
814  if (n > 1) {
816  } else if (n == 1) {
817  input.unget();
818  }
819  } else {
820  if (discard_bom == eBOM_Keep) {
822  }
823  }
824  }
825  return ef;
826 }
827 
829 {
830  switch (bom.GetEncodingForm()) {
831  /// Stream has no BOM.
832  default:
836  break;
837  case eEncodingForm_Utf8:
838  str << Uint1(0xEF) << Uint1(0xBB) << Uint1(0xBF);
839  break;
841 #ifdef WORDS_BIGENDIAN
842  str << Uint1(0xFE) << Uint1(0xFF);
843 #else
844  str << Uint1(0xFF) << Uint1(0xFE);
845 #endif
846  break;
848 #ifdef WORDS_BIGENDIAN
849  str << Uint1(0xFF) << Uint1(0xFE);
850 #else
851  str << Uint1(0xFE) << Uint1(0xFF);
852 #endif
853  break;
854  }
855  return str;
856 }
857 
858 
859 #include "ncbi_base64.c"
860 
861 
863 
864 
865 // See in the header why it is outside of NCBI scope (SunPro bug workaround...)
866 
867 #if defined(NCBI_USE_OLD_IOSTREAM)
868 
870  const NCBI_NS_STD::string& str)
871 {
872  return str.empty() ? os : os << str.c_str();
873 }
874 
875 
878 {
879  int ch;
880  if ( !is.ipfx() )
881  return is;
882 
883  str.erase();
884 
885  SIZE_TYPE end = str.max_size();
886  if ( is.width() )
887  end = (streamsize) end < is.width() ? end : is.width();
888 
889  SIZE_TYPE i = 0;
890  for (ch = is.rdbuf()->sbumpc();
891  ch != EOF && !isspace((unsigned char) ch);
892  ch = is.rdbuf()->sbumpc()) {
893  str.append(1, (char) ch);
894  if (++i == end)
895  break;
896  }
897  if (ch == EOF)
898  is.clear(NcbiEofbit | is.rdstate());
899  if ( !i )
900  is.clear(NcbiFailbit | is.rdstate());
901 
902  is.width(0);
903  return is;
904 }
905 
906 #endif /* NCBI_USE_OLD_IOSTREAM */
#define true
Definition: bool.h:35
Byte Order Mark helper class to use in serialization.
Definition: ncbistre.hpp:1080
CCoreException –.
Definition: ncbiexpt.hpp:1476
Utility class for automatic conversion of strings to all lowercase letters.
Definition: ncbistre.hpp:862
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
Utility class for automatic conversion of strings (that may contain non-graphical characters) to a sa...
Definition: ncbistre.hpp:887
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
Utility class for automatic conversion of strings to all uppercase letters.
Definition: ncbistre.hpp:842
Include a standard set of the NCBI C++ Toolkit most basic headers.
static Uint4 bk
constexpr auto end(const ct_const_array< T, N > &in) noexcept
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:61
std::ofstream out("events_result.xml")
main entry point for tests
CNcbiIstream & operator>>(CNcbiIstream &s, const getcontig &c)
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
#define NCBI_CATCH_ALL(message)
This macro is deprecated - use *_X or *_XX variant instead of it.
Definition: ncbiexpt.hpp:587
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
unsigned char Uchar
Alias for unsigned char.
Definition: ncbitype.h:95
uint16_t Uint2
2-byte (16-bit) unsigned integer
Definition: ncbitype.h:101
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
std::string CStringUTF8
Definition: ncbistl.hpp:254
void NcbiStreamCopyThrow(CNcbiOstream &os, CNcbiIstream &is)
Same as NcbiStreamCopy() but throws an CCoreException when copy fails.
Definition: ncbistre.cpp:225
EEncodingForm
Helper functions to read plain-text data streams.
Definition: ncbistre.hpp:994
CNcbiOstream & operator<<(CNcbiOstream &out, const CNcbiOstrstreamToString &s)
Definition: ncbistre.cpp:443
string Printable(char c)
Convert one single character to a "printable" form.
Definition: ncbistre.cpp:505
EEncodingForm GetTextEncodingForm(CNcbiIstream &input, EBOMDiscard discard_bom)
Detect if the stream has BOM.
Definition: ncbistre.cpp:782
const string & m_String
Definition: ncbistre.hpp:845
#define CT_TO_CHAR_TYPE
Definition: ncbistre.hpp:735
#define NcbiEofbit
Definition: ncbistre.hpp:569
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
const char * m_String
Definition: ncbistre.hpp:852
EBOMDiscard
Whether to discard BOM or to keep it in the input stream.
Definition: ncbistre.hpp:1056
const string & m_String
Definition: ncbistre.hpp:865
#define CT_EOF
Definition: ncbistre.hpp:732
static void Pushback(CNcbiIstream &is, CT_CHAR_TYPE *buf, streamsize buf_size, void *del_ptr)
bool NcbiStreamCompare(CNcbiIstream &is1, CNcbiIstream &is2)
Compare stream contents in binary form.
Definition: ncbistre.cpp:356
const string & m_String
Definition: ncbistre.hpp:890
const char * Endl(void)
Platform-specific EndOfLine.
Definition: ncbistre.cpp:184
#define CT_TO_INT_TYPE
Definition: ncbistre.hpp:734
#define CT_INT_TYPE
Definition: ncbistre.hpp:728
#define NcbiFailbit
Definition: ncbistre.hpp:570
ECompareTextMode
Mode to compare streams in text form.
Definition: ncbistre.hpp:686
#define NcbiBadbit
Definition: ncbistre.hpp:571
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
#define CT_EQ_INT_TYPE
Definition: ncbistre.hpp:736
EEncodingForm ReadIntoUtf8(CNcbiIstream &input, CStringUTF8 *result, EEncodingForm ef, EReadUnknownNoBOM what_if_no_bom)
Read all input data from stream and try convert it into UTF8 string.
Definition: ncbistre.cpp:650
#define NcbiGoodbit
Definition: ncbistre.hpp:568
CNcbiOstrstream & m_Out
Definition: ncbistre.hpp:814
bool NcbiStreamCopy(CNcbiOstream &os, CNcbiIstream &is)
Copy the entire contents of stream "is" to stream "os".
Definition: ncbistre.cpp:211
EReadUnknownNoBOM
How to read the text if the encoding form is not known (i.e.
Definition: ncbistre.hpp:1014
const char * m_String
Definition: ncbistre.hpp:872
bool NcbiStreamCompareText(CNcbiIstream &is1, CNcbiIstream &is2, ECompareTextMode mode, size_t buf_size)
Compare stream contents in text form.
Definition: ncbistre.cpp:394
size_t NcbiStreamToString(string *str, CNcbiIstream &is, size_t pos)
Input the entire contents of an istream into a string (NULL causes drain).
Definition: ncbistre.cpp:296
EEncodingForm GetEncodingForm(void) const
Definition: ncbistre.hpp:1090
@ eEncodingForm_Utf16Foreign
Stream has UTF16 BOM. Byte order is nonnative for this OS.
Definition: ncbistre.hpp:1006
@ eEncodingForm_Utf8
Stream has UTF8 BOM.
Definition: ncbistre.hpp:1002
@ eEncodingForm_ISO8859_1
Stream has no BOM.
Definition: ncbistre.hpp:998
@ eEncodingForm_Windows_1252
Stream has no BOM.
Definition: ncbistre.hpp:1000
@ eEncodingForm_Unknown
Stream has no BOM.
Definition: ncbistre.hpp:996
@ eEncodingForm_Utf16Native
Stream has UTF16 BOM. Byte order is native for this OS.
Definition: ncbistre.hpp:1004
@ eBOM_Keep
Push the read BOM bytes back into the input stream.
Definition: ncbistre.hpp:1058
@ eCompareText_IgnoreEol
Skip end-of-line characters ('\r' and ' ')
Definition: ncbistre.hpp:688
@ eCompareText_IgnoreWhiteSpace
Definition: ncbistre.hpp:690
@ eNoBOM_GuessEncoding
Try to guess the text's encoding form.
Definition: ncbistre.hpp:1028
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
EEncoding
Definition: ncbistr.hpp:199
static string Int8ToString(Int8 value, TNumToStringFlags flags=0, int base=10)
Convert Int8 to string.
Definition: ncbistr.hpp:5158
#define NPOS
Definition: ncbistr.hpp:133
static EEncoding GuessEncoding(const CTempString &src)
Guess the encoding of the C/C++ string.
Definition: ncbistr.cpp:6687
static CStringUTF8 AsUTF8(const CTempString &src, EEncoding encoding, EValidate validate=eNoValidate)
Convert into UTF8 from a C/C++ string.
Definition: ncbistr.hpp:3888
#define _T_XSTRING(x)
Definition: ncbistr.hpp:179
static SIZE_TYPE GetValidBytesCount(const CTempString &src)
Get the number of valid UTF-8 bytes (code units) in buffer.
Definition: ncbistr.hpp:5656
@ eEncoding_Windows_1252
Definition: ncbistr.hpp:207
@ eEncoding_Ascii
Definition: ncbistr.hpp:202
@ eEncoding_ISO8859_1
Note: From the point of view of the C++.
Definition: ncbistr.hpp:203
@ eEncoding_UTF8
Definition: ncbistr.hpp:201
@ eEncoding_Unknown
Definition: ncbistr.hpp:200
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
char * buf
static int input()
int i
yy_size_t n
int len
mdb_mode_t mode
Definition: lmdb++.h:38
const struct ncbi::grid::netcache::search::fields::SIZE size
EIPRangeType t
Definition: ncbi_localip.c:101
int isspace(Uchar c)
Definition: ncbictype.hpp:69
int tolower(Uchar c)
Definition: ncbictype.hpp:72
int toupper(Uchar c)
Definition: ncbictype.hpp:73
int isprint(Uchar c)
Definition: ncbictype.hpp:67
void NcbiStreamCopyHead(CNcbiOstream &os, CNcbiIstream &is, SIZE_TYPE count)
Definition: ncbistre.cpp:238
CNcbiIstream & NcbiGetline(CNcbiIstream &is, string &str, const string &delims, SIZE_TYPE *count)
Definition: ncbistre.cpp:55
static void s_WritePrintable(CNcbiOstream &out, char p, char c, char n)
Definition: ncbistre.cpp:536
CNcbiIstream & NcbiGetlineEOL(CNcbiIstream &is, string &str, SIZE_TYPE *count)
Definition: ncbistre.cpp:196
static char x_GetChar(CNcbiIstream &is, ECompareTextMode mode, char *buf, size_t buf_size, char *&pos, size_t &sizeleft)
Definition: ncbistre.cpp:370
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
T max(T x_, T y_)
T min(T x_, T y_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
static char tmp[2048]
Definition: utf8.c:42
#define memmove(a, b, c)
static BOOL utf8
Definition: pcregrep.c:199
static pcre_uint8 * buffer
Definition: pcretest.c:1051
#define NcbiSys_swab
Definition: ncbisys.hpp:48
#define __int64
Definition: sse2neon.h:208
static const char * str(char *buf, int n)
Definition: stats.c:84
#define _ASSERT
else result
Definition: token2.c:20
uchar outbuf[(1000000+1000000)]
Definition: unzcrash.c:41
Modified on Fri Dec 08 08:23:43 2023 by modify_doxy.py rev. 669887