NCBI C++ ToolKit
iconv.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* FreeTDS - Library of routines accessing Sybase and Microsoft databases
2  * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 Brian Bruns
3  * Copyright (C) 2010 Frediano Ziglio
4  *
5  * This library is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Library General Public
7  * License as published by the Free Software Foundation; either
8  * version 2 of the License, or (at your option) any later version.
9  *
10  * This library is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Library General Public License for more details.
14  *
15  * You should have received a copy of the GNU Library General Public
16  * License along with this library; if not, write to the
17  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18  * Boston, MA 02111-1307, USA.
19  */
20 
21 /**
22  * \file
23  * \brief Handle character conversions to/from server
24  */
25 
26 #include <config.h>
27 
28 #include <stdarg.h>
29 #include <stdio.h>
30 #include <assert.h>
31 
32 #if HAVE_STRING_H
33 #include <string.h>
34 #endif /* HAVE_STRING_H */
35 #if HAVE_ERRNO_H
36 #include <errno.h>
37 #endif
38 
39 #include <freetds/tds.h>
40 #include <freetds/iconv.h>
41 #include <freetds/bool.h>
42 #include <freetds/bytes.h>
43 #if HAVE_ICONV
44 #include <iconv.h>
45 #endif
46 
47 #define CHARSIZE(charset) ( ((charset)->min_bytes_per_char == (charset)->max_bytes_per_char )? \
48  (charset)->min_bytes_per_char : 0 )
49 
50 
51 static int collate2charset(TDSCONNECTION * conn, TDS_UCHAR collate[5]);
52 static size_t skip_one_input_sequence(iconv_t cd, const TDS_ENCODING * charset, const char **input, size_t * input_size);
53 static int tds_iconv_info_init(TDSICONV * char_conv, int client_canonic, int server_canonic);
54 static bool tds_iconv_init(void);
55 static void _iconv_close(iconv_t * cd);
56 static void tds_iconv_info_close(TDSICONV * char_conv);
57 
58 
59 /**
60  * \ingroup libtds
61  * \defgroup conv Charset conversion
62  * Convert between different charsets.
63  */
64 
65 #define TDS_ICONV_ENCODING_TABLES
66 #include <freetds/encodings.h>
67 
68 /* this will contain real iconv names */
69 static const char *iconv_names[TDS_VECTOR_SIZE(canonic_charsets)];
70 static bool iconv_initialized = false;
71 static const char *ucs2name;
73 
74 enum
76 
77 static const struct {
79  /* this field must be aligned at least to 2 bytes */
80  char data[12];
81 } test_strings[4] = {
82  /* same string in required charsets */
83  { 4, "Ao\xD3\xE5" },
84  { 6, "Ao\xC3\x93\xC3\xA5" },
85  { 8, "A\x00o\x000\xD3\x00\xE5\x00" },
86  { 8, "\x00" "A\x00o\x000\xD3\x00\xE5" },
87 };
88 
89 /**
90  * Initialize charset searching for UTF-8, UCS-2 and ISO8859-1
91  */
92 static bool
94 {
95  int i;
96  iconv_t cd;
97 
98  /* first entries should be constants */
99  assert(strcmp(canonic_charsets[POS_ISO1].name, "ISO-8859-1") == 0);
100  assert(strcmp(canonic_charsets[POS_UTF8].name, "UTF-8") == 0);
101  assert(strcmp(canonic_charsets[POS_UCS2LE].name, "UCS-2LE") == 0);
102  assert(strcmp(canonic_charsets[POS_UCS2BE].name, "UCS-2BE") == 0);
103 
104  /* fast tests for GNU-iconv */
105  cd = tds_sys_iconv_open("ISO-8859-1", "UTF-8");
106  if (cd != (iconv_t) -1) {
107  iconv_names[POS_ISO1] = "ISO-8859-1";
108  iconv_names[POS_UTF8] = "UTF-8";
110  } else {
111 
112  /* search names for ISO8859-1 and UTF-8 */
113  for (i = 0; iconv_aliases[i].alias; ++i) {
114  int j;
115 
116  if (iconv_aliases[i].canonic != POS_ISO1)
117  continue;
118  for (j = 0; iconv_aliases[j].alias; ++j) {
119  if (iconv_aliases[j].canonic != POS_UTF8)
120  continue;
121 
122  cd = tds_sys_iconv_open(iconv_aliases[i].alias, iconv_aliases[j].alias);
123  if (cd != (iconv_t) -1) {
124  iconv_names[POS_ISO1] = iconv_aliases[i].alias;
125  iconv_names[POS_UTF8] = iconv_aliases[j].alias;
127  break;
128  }
129  }
130  if (iconv_names[POS_ISO1])
131  break;
132  }
133  /* required characters not found !!! */
134  if (!iconv_names[POS_ISO1]) {
135  tdsdump_log(TDS_DBG_ERROR, "iconv name for ISO-8859-1 not found\n");
136  return false;
137  }
138  }
139 
140  /* now search for UCS-2 */
141  cd = tds_sys_iconv_open(iconv_names[POS_ISO1], "UCS-2LE");
142  if (cd != (iconv_t) -1) {
143  iconv_names[POS_UCS2LE] = "UCS-2LE";
145  }
146  cd = tds_sys_iconv_open(iconv_names[POS_ISO1], "UCS-2BE");
147  if (cd != (iconv_t) -1) {
148  iconv_names[POS_UCS2BE] = "UCS-2BE";
150  }
151 
152  /* long search needed ?? */
154  for (i = 0; iconv_aliases[i].alias; ++i) {
155  if (strncmp(canonic_charsets[iconv_aliases[i].canonic].name, "UCS-2", 5) != 0)
156  continue;
157 
158  cd = tds_sys_iconv_open(iconv_aliases[i].alias, iconv_names[POS_ISO1]);
159  if (cd != (iconv_t) -1) {
160  char ib[1];
161  char ob[4];
162  size_t il, ol;
163  ICONV_CONST char *pib;
164  char *pob;
165  int byte_sequence = 0;
166 
167  /* try to convert 'A' and check result */
168  ib[0] = 0x41;
169  pib = ib;
170  pob = ob;
171  il = 1;
172  ol = 4;
173  ob[0] = ob[1] = 0;
174  if (tds_sys_iconv(cd, &pib, &il, &pob, &ol) != (size_t) - 1) {
175  /* byte order sequence ?? */
176  if (ol == 0) {
177  ob[0] = ob[2];
178  byte_sequence = 1;
179  /* TODO save somewhere */
180  }
181 
182  /* save name without sequence (if present) */
183  if (ob[0])
184  il = POS_UCS2LE;
185  else
186  il = POS_UCS2BE;
187  if (!iconv_names[il] || !byte_sequence)
188  iconv_names[il] = iconv_aliases[i].alias;
189  }
191  }
192  }
193  }
194  /* we need a UCS-2 (big endian or little endian) */
196  tdsdump_log(TDS_DBG_ERROR, "iconv name for UCS-2 not found\n");
197  return false;
198  }
199 
201 
202  for (i = 0; i < 4; ++i)
203  tdsdump_log(TDS_DBG_INFO1, "local name for %s is %s\n", canonic_charsets[i].name,
204  iconv_names[i] ? iconv_names[i] : "(null)");
205 
206  /* base conversions checks */
207  for (i = 0; i < 4 * 4; ++i) {
208  const int from = i / 4;
209  const int to = i % 4;
210  char ob[16];
211  size_t il, ol;
212  ICONV_CONST char *pib;
213  char *pob;
214  size_t res;
215 
216  if (!iconv_names[from] || !iconv_names[to])
217  continue;
218  cd = tds_sys_iconv_open(iconv_names[to], iconv_names[from]);
219  if (cd == (iconv_t) -1) {
220  tdsdump_log(TDS_DBG_ERROR, "iconv_open(%s, %s) failed\n", iconv_names[to], iconv_names[from]);
221  return false;
222  }
223 
224  pib = (ICONV_CONST char *) test_strings[from].data;
225  il = test_strings[from].len;
226  pob = ob;
227  ol = sizeof(ob);
228  res = tds_sys_iconv(cd, &pib, &il, &pob, &ol);
230 
231  if (res != 0
232  || sizeof(ob) - ol != test_strings[to].len
233  || memcmp(ob, test_strings[to].data, test_strings[to].len) != 0) {
234  tdsdump_log(TDS_DBG_ERROR, "iconv(%s, %s) failed res %d\n", iconv_names[to], iconv_names[from], (int) res);
235  tdsdump_log(TDS_DBG_ERROR, "len %d\n", (int) (sizeof(ob) - ol));
236  return false;
237  }
238  }
239 
240  /* success (it should always occurs) */
241  return true;
242 }
243 
244 /**
245  * Get iconv name given canonic
246  */
247 static const char *
248 tds_set_iconv_name(int charset)
249 {
250  int i;
251  iconv_t cd;
252  const char *name;
253 
256 
257  /* try using canonic name and UTF-8 and UCS2 */
258  name = canonic_charsets[charset].name;
260  if (cd != (iconv_t) -1)
261  goto found;
262  cd = tds_sys_iconv_open(ucs2name, name);
263  if (cd != (iconv_t) -1)
264  goto found;
265 
266  /* try all alternatives */
267  for (i = 0; iconv_aliases[i].alias; ++i) {
268  if (iconv_aliases[i].canonic != charset)
269  continue;
270 
271  name = iconv_aliases[i].alias;
273  if (cd != (iconv_t) -1)
274  goto found;
275  cd = tds_sys_iconv_open(ucs2name, name);
276  if (cd != (iconv_t) -1)
277  goto found;
278  }
279 
280  /* charset not found, pretend it's ISO 8859-1 */
281  iconv_names[charset] = canonic_charsets[POS_ISO1].name;
283  return NULL;
284 
285 found:
286  iconv_names[charset] = name;
289  return name;
290 }
291 
292 static void
294 {
295  /*
296  * (min|max)_bytes_per_char can be used to divide
297  * so init to safe values
298  */
299  conv->to.charset.min_bytes_per_char = 1;
300  conv->to.charset.max_bytes_per_char = 1;
303 
304  conv->to.charset.name = conv->from.charset.name = "";
305  conv->to.charset.canonic = conv->from.charset.canonic = 0;
306  conv->to.cd = (iconv_t) -1;
307  conv->from.cd = (iconv_t) -1;
308 }
309 
310 /**
311  * Allocate iconv stuff
312  * \return 0 for success
313  */
314 int
316 {
317  int i;
318  TDSICONV *char_conv;
319 
320  assert(!conn->char_convs);
321  if (!(conn->char_convs = tds_new(TDSICONV *, initial_char_conv_count + 1)))
322  return 1;
324  if (!char_conv) {
325  TDS_ZERO_FREE(conn->char_convs);
326  return 1;
327  }
328  conn->char_conv_count = initial_char_conv_count + 1;
329 
330  for (i = 0; i < initial_char_conv_count; ++i) {
331  conn->char_convs[i] = &char_conv[i];
332  tds_iconv_reset(&char_conv[i]);
333  }
334 
335  /* chardata is just a pointer to another iconv info */
336  conn->char_convs[initial_char_conv_count] = conn->char_convs[client2server_chardata];
337 
338  return 0;
339 }
340 
341 /**
342  * \addtogroup conv
343  * @{
344  * Set up the initial iconv conversion descriptors.
345  * When the socket is allocated, three TDSICONV structures are attached to iconv.
346  * They have fixed meanings:
347  * \li 0. Client <-> UCS-2 (client2ucs2)
348  * \li 1. Client <-> server single-byte charset (client2server_chardata)
349  *
350  * Other designs that use less data are possible, but these three conversion needs are
351  * very often needed. By reserving them, we avoid searching the array for our most common purposes.
352  *
353  * To solve different iconv names and portability problems FreeTDS maintains
354  * a list of aliases each charset.
355  *
356  * First we discover the names of our minimum required charsets (UTF-8, ISO8859-1 and UCS2).
357  * Later, as and when it's needed, we try to discover others.
358  *
359  * There is one list of canonic names (GNU iconv names) and two sets of aliases
360  * (one for other iconv implementations and another for Sybase). For every
361  * canonic charset name we cache the iconv name found during discovery.
362  */
363 TDSRET
364 tds_iconv_open(TDSCONNECTION * conn, const char *charset, int use_utf16)
365 {
366  static const char UCS_2LE[] = "UCS-2LE";
367  int canonic;
368  int canonic_charset = tds_canonical_charset(charset);
369  int canonic_env_charset = conn->env.charset ? tds_canonical_charset(conn->env.charset) : -1;
370  int fOK;
371 
372  TDS_ENCODING *client = &conn->char_convs[client2ucs2]->from.charset;
373  TDS_ENCODING *server = &conn->char_convs[client2ucs2]->to.charset;
374 
375  tdsdump_log(TDS_DBG_FUNC, "tds_iconv_open(%p, %s, %d)\n", conn, charset, use_utf16);
376 
377  /* TDS 5.0 support only UTF-16 encodings */
378  if (IS_TDS50(conn))
379  use_utf16 = true;
380 
381  /* initialize */
383  if (!iconv_initialized) {
384  if (!tds_iconv_init()) {
385  tdsdump_log(TDS_DBG_ERROR, "error: tds_iconv_init() failed; "
386  "try using GNU libiconv library\n");
388  return TDS_FAIL;
389  }
390  iconv_initialized = true;
391  }
393 
394  /*
395  * Client <-> UCS-2 (client2ucs2)
396  */
397  tdsdump_log(TDS_DBG_FUNC, "setting up conversions for client charset \"%s\"\n", charset);
398 
399  tdsdump_log(TDS_DBG_FUNC, "preparing iconv for \"%s\" <-> \"%s\" conversion\n", charset, UCS_2LE);
400 
401  fOK = 0;
402  if (use_utf16) {
403  canonic = TDS_CHARSET_UTF_16LE;
404  fOK = tds_iconv_info_init(conn->char_convs[client2ucs2], canonic_charset, canonic);
405  }
406  if (!fOK) {
407  canonic = TDS_CHARSET_UCS_2LE;
408  fOK = tds_iconv_info_init(conn->char_convs[client2ucs2], canonic_charset, canonic);
409  }
410  if (!fOK)
411  return TDS_FAIL;
412 
413  /*
414  * How many UTF-8 bytes we need is a function of what the input character set is.
415  * TODO This could definitely be more sophisticated, but it deals with the common case.
416  */
417  if (client->min_bytes_per_char == 1 && client->max_bytes_per_char == 4 && server->max_bytes_per_char == 1) {
418  /* ie client is UTF-8 and server is ISO-8859-1 or variant. */
419  client->max_bytes_per_char = 3;
420  }
421 
422  /*
423  * Client <-> server single-byte charset
424  * TODO: the server hasn't reported its charset yet, so this logic can't work here.
425  * not sure what to do about that yet.
426  */
427  conn->char_convs[client2server_chardata]->flags = TDS_ENCODING_MEMCPY;
428  if (canonic_env_charset >= 0) {
429  tdsdump_log(TDS_DBG_FUNC, "preparing iconv for \"%s\" <-> \"%s\" conversion\n", charset, conn->env.charset);
430  fOK = tds_iconv_info_init(conn->char_convs[client2server_chardata], canonic_charset, canonic_env_charset);
431  if (!fOK)
432  return TDS_FAIL;
433  } else {
434  conn->char_convs[client2server_chardata]->from.charset = canonic_charsets[canonic_charset];
435  conn->char_convs[client2server_chardata]->to.charset = canonic_charsets[canonic_charset];
436  }
437 
438  tdsdump_log(TDS_DBG_FUNC, "tds_iconv_open: done\n");
439  return TDS_SUCCESS;
440 }
441 
442 /**
443  * Open iconv descriptors to convert between character sets (both directions).
444  * 1. Look up the canonical names of the character sets.
445  * 2. Look up their widths.
446  * 3. Ask iconv to open a conversion descriptor.
447  * 4. Fail if any of the above offer any resistance.
448  * \remarks The charset names written to \a iconv will be the canonical names,
449  * not necessarily the names passed in.
450  */
451 static int
452 tds_iconv_info_init(TDSICONV * char_conv, int client_canonical, int server_canonical)
453 {
454  TDS_ENCODING *client = &char_conv->from.charset;
455  TDS_ENCODING *server = &char_conv->to.charset;
456 
457  assert(char_conv->to.cd == (iconv_t) -1);
458  assert(char_conv->from.cd == (iconv_t) -1);
459 
460  if (client_canonical < 0) {
461  tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: client charset name \"%d\" invalid\n", client_canonical);
462  return 0;
463  }
464 
465  if (server_canonical < 0) {
466  tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: server charset name \"%d\" invalid\n", server_canonical);
467  return 0;
468  }
469 
470  *client = canonic_charsets[client_canonical];
471  *server = canonic_charsets[server_canonical];
472 
473  /* special case, same charset, no conversion */
474  if (client_canonical == server_canonical) {
475  char_conv->to.cd = (iconv_t) -1;
476  char_conv->from.cd = (iconv_t) -1;
477  char_conv->flags = TDS_ENCODING_MEMCPY;
478  return 1;
479  }
480 
481  char_conv->flags = 0;
482 
483  /* get iconv names */
484  if (!iconv_names[client_canonical]) {
485  if (!tds_set_iconv_name(client_canonical)) {
486  tdsdump_log(TDS_DBG_FUNC, "Charset %d not supported by iconv, using \"%s\" instead\n",
487  client_canonical, iconv_names[client_canonical]);
488  }
489  }
490 
491  if (!iconv_names[server_canonical]) {
492  if (!tds_set_iconv_name(server_canonical)) {
493  tdsdump_log(TDS_DBG_FUNC, "Charset %d not supported by iconv, using \"%s\" instead\n",
494  server_canonical, iconv_names[server_canonical]);
495  }
496  }
497 
498  char_conv->to.cd = tds_sys_iconv_open(iconv_names[server_canonical], iconv_names[client_canonical]);
499  if (char_conv->to.cd == (iconv_t) -1) {
500  tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: cannot convert \"%s\"->\"%s\"\n", client->name, server->name);
501  }
502 
503  char_conv->from.cd = tds_sys_iconv_open(iconv_names[client_canonical], iconv_names[server_canonical]);
504  if (char_conv->from.cd == (iconv_t) -1) {
505  tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: cannot convert \"%s\"->\"%s\"\n", server->name, client->name);
506  }
507 
508  /* TODO, do some optimizations like UCS2 -> UTF8 min,max = 2,2 (UCS2) and 1,4 (UTF8) */
509 
510  /* tdsdump_log(TDS_DBG_FUNC, "tds_iconv_info_init: converting \"%s\"->\"%s\"\n", client->name, server->name); */
511 
512  return 1;
513 }
514 
515 
516 static void
518 {
519  static const iconv_t invalid = (iconv_t) -1;
520 
521  if (*cd != invalid) {
522  tds_sys_iconv_close(*cd);
523  *cd = invalid;
524  }
525 }
526 
527 static void
529 {
530  _iconv_close(&char_conv->to.cd);
531  _iconv_close(&char_conv->from.cd);
532 }
533 
534 void
536 {
537  int i;
538 
539  for (i = 0; i < conn->char_conv_count; ++i)
540  tds_iconv_info_close(conn->char_convs[i]);
541 }
542 
543 #define CHUNK_ALLOC 4
544 
545 void
547 {
548  int i;
549 
550  if (!conn->char_convs)
551  return;
553 
554  free(conn->char_convs[0]);
555  for (i = initial_char_conv_count + 1; i < conn->char_conv_count; i += CHUNK_ALLOC)
556  free(conn->char_convs[i]);
557  TDS_ZERO_FREE(conn->char_convs);
558  conn->char_conv_count = 0;
559 }
560 
561 static void
563 {
564  if (tds)
565  tdserror(tds_get_ctx(tds), tds, err, 0);
566 }
567 
568 /**
569  * Wrapper around iconv(3). Same parameters, with slightly different behavior.
570  * \param tds state information for the socket and the TDS protocol
571  * \param io Enumerated value indicating whether the data are being sent to or received from the server.
572  * \param conv information about the encodings involved, including the iconv(3) conversion descriptors.
573  * \param inbuf address of pointer to the input buffer of data to be converted.
574  * \param inbytesleft address of count of bytes in \a inbuf.
575  * \param outbuf address of pointer to the output buffer.
576  * \param outbytesleft address of count of bytes in \a outbuf.
577  * \retval number of irreversible conversions performed. -1 on error, see iconv(3) documentation for
578  * a description of the possible values of \e errno.
579  * \remarks Unlike iconv(3), none of the arguments can be nor point to NULL. Like iconv(3), all pointers will
580  * be updated. Success is signified by a nonnegative return code and \a *inbytesleft == 0.
581  * If the conversion descriptor in \a iconv is -1 or NULL, \a inbuf is copied to \a outbuf,
582  * and all parameters updated accordingly.
583  *
584  * If a character in \a inbuf cannot be converted because no such cbaracter exists in the
585  * \a outbuf character set, we emit messages similar to the ones Sybase emits when it fails such a conversion.
586  * The message varies depending on the direction of the data.
587  * On a read error, we emit Msg 2403, Severity 16 (EX_INFO):
588  * "WARNING! Some character(s) could not be converted into client's character set.
589  * Unconverted bytes were changed to question marks ('?')."
590  * On a write error we emit Msg 2402, Severity 16 (EX_USER):
591  * "Error converting client characters into server's character set. Some character(s) could not be converted."
592  * and return an error code. Client libraries relying on this routine should reflect an error back to the application.
593  *
594  * \todo Check for variable multibyte non-UTF-8 input character set.
595  * \todo Use more robust error message generation.
596  * \todo For reads, cope with \a outbuf encodings that don't have the equivalent of an ASCII '?'.
597  * \todo Support alternative to '?' for the replacement character.
598  */
599 size_t
601  const char **inbuf, size_t * inbytesleft, char **outbuf, size_t * outbytesleft)
602 {
603  static const iconv_t invalid = (iconv_t) -1;
604  TDSICONVDIR *from = NULL;
605  TDSICONVDIR *to = NULL;
606 
607  iconv_t error_cd = invalid;
608 
609  char quest_mark[] = "?"; /* best to leave non-const; implementations vary */
610  ICONV_CONST char *pquest_mark;
611  size_t lquest_mark;
612  size_t irreversible;
613  size_t one_character;
614  bool eilseq_raised = false;
615  int conv_errno;
616  /* cast away const-ness */
618 
619  assert(inbuf && inbytesleft && outbuf && outbytesleft);
620 
621  /* if empty there's nothing to return.
622  * This fix case with some iconv implementation that does
623  * not handle *inbuf == NULL and *inbytesleft == 0 as
624  * empty strings
625  */
626  if (*inbytesleft == 0)
627  return 0;
628 
629  switch (io) {
630  case to_server:
631  from = &conv->from;
632  to = &conv->to;
633  break;
634  case to_client:
635  from = &conv->to;
636  to = &conv->from;
637  break;
638  default:
639  tdsdump_log(TDS_DBG_FUNC, "tds_iconv: unable to determine if %d means in or out. \n", io);
640  assert(io == to_server || io == to_client);
641  break;
642  }
643 
644  /* silly case, memcpy */
645  if (conv->flags & TDS_ENCODING_MEMCPY || to->cd == invalid) {
646  size_t len = *inbytesleft < *outbytesleft ? *inbytesleft : *outbytesleft;
647 
648  memcpy(*outbuf, *inbuf, len);
649  conv_errno = *inbytesleft > *outbytesleft ? E2BIG : 0;
650  *inbytesleft -= len;
651  *outbytesleft -= len;
652  *inbuf += len;
653  *outbuf += len;
654  errno = conv_errno;
655  return conv_errno ? (size_t) -1 : 0;
656  }
657 
658  /*
659  * Call iconv() as many times as necessary, until we reach the end of input or exhaust output.
660  */
661  for (;;) {
662  conv_errno = 0;
663  irreversible = tds_sys_iconv(to->cd, (ICONV_CONST char **) inbuf, inbytesleft, outbuf, outbytesleft);
664 
665  /* iconv success, return */
666  if (irreversible != (size_t) - 1) {
667  if (irreversible > 0) {
668  eilseq_raised = true;
669  }
670  /* here we detect end of conversion and try to reset shift state */
671  if (inbuf) {
672  /*
673  * if inbuf or *inbuf is NULL iconv reset the shift state.
674  * Note that setting inbytesleft to NULL can cause core so don't do it!
675  */
676  inbuf = NULL;
677  continue;
678  }
679  break;
680  }
681 
682  /* save errno, other function could change its value */
683  conv_errno = errno;
684 
685  if (conv_errno == EILSEQ)
686  eilseq_raised = true;
687 
688  if (!eilseq_raised || io != to_client || !inbuf)
689  break;
690  /*
691  * Invalid input sequence encountered reading from server.
692  * Skip one input sequence, adjusting pointers.
693  */
694  one_character = skip_one_input_sequence(to->cd, &from->charset, inbuf, inbytesleft);
695 
696  if (!one_character)
697  break;
698 
699  /*
700  * To replace invalid input with '?', we have to convert a UTF-8 '?' into the output character set.
701  * In unimaginably weird circumstances, this might be impossible.
702  * We use UTF-8 instead of ASCII because some implementations
703  * do not convert singlebyte <-> singlebyte.
704  */
705  if (error_cd == invalid) {
707  if (error_cd == invalid) {
708  break; /* what to do? */
709  }
710  }
711 
712  lquest_mark = 1;
713  pquest_mark = quest_mark;
714 
715  irreversible = tds_sys_iconv(error_cd, &pquest_mark, &lquest_mark, outbuf, outbytesleft);
716 
717  if (irreversible == (size_t) - 1)
718  break;
719 
720  if (!*inbytesleft)
721  break;
722  }
723 
724  if (eilseq_raised && !suppress->eilseq) {
725  /* invalid multibyte input sequence encountered */
726  if (io == to_client) {
727  if (irreversible == (size_t) - 1) {
729  } else {
731  conv_errno = 0;
732  }
733  } else {
735  }
736  suppress->eilseq = 1;
737  }
738 
739  switch (conv_errno) {
740  case EINVAL: /* incomplete multibyte sequence is encountered */
741  if (suppress->einval)
742  break;
743  /* in chunk conversion this can mean we end a chunk inside a character */
745  suppress->einval = 1;
746  break;
747  case E2BIG: /* output buffer has no more room */
748  if (suppress->e2big)
749  break;
751  suppress->e2big = 1;
752  break;
753  default:
754  break;
755  }
756 
757  if (error_cd != invalid) {
758  tds_sys_iconv_close(error_cd);
759  }
760 
761  errno = conv_errno;
762  return irreversible;
763 }
764 
765 /**
766  * Get a iconv info structure, allocate and initialize if needed
767  */
768 TDSICONV *
769 tds_iconv_get_info(TDSCONNECTION * conn, int canonic_client, int canonic_server)
770 {
771  TDSICONV *info;
772  int i;
773 
774  /* search a charset from already allocated charsets */
775  for (i = conn->char_conv_count; --i >= initial_char_conv_count;)
776  if (canonic_client == conn->char_convs[i]->from.charset.canonic
777  && canonic_server == conn->char_convs[i]->to.charset.canonic)
778  return conn->char_convs[i];
779 
780  /* allocate a new iconv structure */
781  if (conn->char_conv_count % CHUNK_ALLOC == ((initial_char_conv_count + 1) % CHUNK_ALLOC)) {
782  TDSICONV **p;
783  TDSICONV *infos;
784 
785  infos = tds_new(TDSICONV, CHUNK_ALLOC);
786  if (!infos)
787  return NULL;
788  p = (TDSICONV **) realloc(conn->char_convs, sizeof(TDSICONV *) * (conn->char_conv_count + CHUNK_ALLOC));
789  if (!p) {
790  free(infos);
791  return NULL;
792  }
793  conn->char_convs = p;
794  memset(infos, 0, sizeof(TDSICONV) * CHUNK_ALLOC);
795  for (i = 0; i < CHUNK_ALLOC; ++i) {
796  conn->char_convs[i + conn->char_conv_count] = &infos[i];
797  tds_iconv_reset(&infos[i]);
798  }
799  }
800  info = conn->char_convs[conn->char_conv_count++];
801 
802  /* init */
803  if (tds_iconv_info_init(info, canonic_client, canonic_server))
804  return info;
805 
807  --conn->char_conv_count;
808  return NULL;
809 }
810 
811 TDSICONV *
812 tds_iconv_get(TDSCONNECTION * conn, const char *client_charset, const char *server_charset)
813 {
814  int canonic_client_charset_num = tds_canonical_charset(client_charset);
815  int canonic_server_charset_num = tds_canonical_charset(server_charset);
816 
817  if (canonic_client_charset_num < 0) {
818  tdsdump_log(TDS_DBG_FUNC, "tds_iconv_get: what is charset \"%s\"?\n", client_charset);
819  return NULL;
820  }
821  if (canonic_server_charset_num < 0) {
822  tdsdump_log(TDS_DBG_FUNC, "tds_iconv_get: what is charset \"%s\"?\n", server_charset);
823  return NULL;
824  }
825 
826  return tds_iconv_get_info(conn, canonic_client_charset_num, canonic_server_charset_num);
827 }
828 
829 /* change singlebyte conversions according to server */
830 static void
831 tds_srv_charset_changed_num(TDSCONNECTION * conn, int canonic_charset_num)
832 {
833  TDSICONV *char_conv = conn->char_convs[client2server_chardata];
834 
835  if (IS_TDS7_PLUS(conn) && canonic_charset_num == TDS_CHARSET_ISO_8859_1)
836  canonic_charset_num = TDS_CHARSET_CP1252;
837 
838  tdsdump_log(TDS_DBG_FUNC, "setting server single-byte charset to \"%s\"\n", canonic_charsets[canonic_charset_num].name);
839 
840  if (canonic_charset_num == char_conv->to.charset.canonic)
841  return;
842 
843  /* find and set conversion */
844  char_conv = tds_iconv_get_info(conn, conn->char_convs[client2ucs2]->from.charset.canonic, canonic_charset_num);
845  if (char_conv)
846  conn->char_convs[client2server_chardata] = char_conv;
847 }
848 
849 void
851 {
852  int n = tds_canonical_charset(charset);
853 
854  /* ignore request to change to unknown charset */
855  if (n < 0) {
856  tdsdump_log(TDS_DBG_FUNC, "tds_srv_charset_changed: what is charset \"%s\"?\n", charset);
857  return;
858  }
859 
861 }
862 
863 /* change singlebyte conversions according to server */
864 void
866 {
868 }
869 
870 /**
871  * Move the input sequence pointer to the next valid position.
872  * Used when an input character cannot be converted.
873  * \returns number of bytes to skip.
874  */
875 /* FIXME possible buffer reading overflow ?? */
876 static size_t
877 skip_one_input_sequence(iconv_t cd, const TDS_ENCODING * charset, const char **input, size_t * input_size)
878 {
879  unsigned charsize = CHARSIZE(charset);
880  char ib[16];
881  char ob[16];
882  ICONV_CONST char *pib;
883  char *pob;
884  size_t il, ol, l;
885  iconv_t cd2;
886 
887 
888  /* usually fixed size and UTF-8 do not have state, so do not reset it */
889  if (charsize)
890  goto skip_charsize;
891 
892  if (0 == strcmp(charset->name, "UTF-8")) {
893  /*
894  * Deal with UTF-8.
895  * bytes | bits | representation
896  * 1 | 7 | 0vvvvvvv
897  * 2 | 11 | 110vvvvv 10vvvvvv
898  * 3 | 16 | 1110vvvv 10vvvvvv 10vvvvvv
899  * 4 | 21 | 11110vvv 10vvvvvv 10vvvvvv 10vvvvvv
900  */
901  int c = **input;
902 
903  c = c & (c >> 1);
904  do {
905  ++charsize;
906  } while ((c <<= 1) & 0x80);
907  goto skip_charsize;
908  }
909 
910  /* handle state encoding */
911 
912  /* extract state from iconv */
913  pob = ib;
914  ol = sizeof(ib);
915  tds_sys_iconv(cd, NULL, NULL, &pob, &ol);
916 
917  /* init destination conversion */
918  /* TODO use largest fixed size for this platform */
919  cd2 = tds_sys_iconv_open("UCS-4", charset->name);
920  if (cd2 == (iconv_t) -1)
921  return 0;
922 
923  /* add part of input */
924  il = ol;
925  if (il > *input_size)
926  il = *input_size;
927  l = sizeof(ib) - ol;
928  memcpy(ib + l, *input, il);
929  il += l;
930 
931  /* translate a single character */
932  pib = ib;
933  pob = ob;
934  /* TODO use size of largest fixed charset */
935  ol = 4;
936  tds_sys_iconv(cd2, &pib, &il, &pob, &ol);
937 
938  /* adjust input */
939  l = (pib - ib) - l;
940  *input += l;
941  *input_size -= l;
942 
943  /* extract state */
944  pob = ib;
945  ol = sizeof(ib);
946  tds_sys_iconv(cd, NULL, NULL, &pob, &ol);
947 
948  /* set input state */
949  pib = ib;
950  il = sizeof(ib) - ol;
951  pob = ob;
952  ol = sizeof(ob);
953  tds_sys_iconv(cd, &pib, &il, &pob, &ol);
954 
955  tds_sys_iconv_close(cd2);
956 
957  if (l != 0)
958  return l;
959 
960  /* last blindly attempt, skip minimum bytes */
961  charsize = charset->min_bytes_per_char;
962 
963  /* fall through */
964 
965 skip_charsize:
966  if (charsize > *input_size)
967  return 0;
968  *input += charsize;
969  *input_size -= charsize;
970  return charsize;
971 }
972 
973 #include <freetds/charset_lookup.h>
974 
975 /**
976  * Determine canonical iconv character set.
977  * \returns canonical position, or -1 if lookup failed.
978  * \remarks Returned name can be used in bytes_per_char(), above.
979  */
980 int
981 tds_canonical_charset(const char *charset_name)
982 {
983  const struct charset_alias *c = charset_lookup(charset_name, strlen(charset_name));
984  return c ? c->canonic : -1;
985 }
986 
987 /**
988  * Determine canonical iconv character set name.
989  * \returns canonical name, or NULL if lookup failed.
990  * \remarks Returned name can be used in bytes_per_char(), above.
991  */
992 const char *
993 tds_canonical_charset_name(const char *charset_name)
994 {
995  int res;
996 
997  /* get numeric pos */
998  res = tds_canonical_charset(charset_name);
999  if (res >= 0)
1000  return canonic_charsets[res].name;
1001 
1002  return charset_name; /* hope for the best */
1003 }
1004 
1005 static int
1007 {
1008  int cp = 0;
1009  const int sql_collate = collate[4];
1010  /* extract 16 bit of LCID (it's 20 bits but higher 4 are just variations) */
1011  const int lcid = TDS_GET_UA2LE(collate);
1012 
1013  /* starting with bit 20 (little endian, so 3rd byte bit 4) there are 8 bits:
1014  * fIgnoreCase fIgnoreAccent fIgnoreKana fIgnoreWidth fBinary fBinary2 fUTF8 FRESERVEDBIT
1015  * so fUTF8 is on the 4th byte bit 2 */
1016  if ((collate[3] & 0x4) != 0 && IS_TDS74_PLUS(conn))
1017  return TDS_CHARSET_UTF_8;
1018 
1019  /*
1020  * The table from the MSQLServer reference "Windows Collation Designators"
1021  * and from " NLS Information for Microsoft Windows XP".
1022  *
1023  * See also https://go.microsoft.com/fwlink/?LinkId=119987 [MSDN-SQLCollation]
1024  */
1025 
1026  switch (sql_collate) {
1027  case 30: /* SQL_Latin1_General_CP437_BIN */
1028  case 31: /* SQL_Latin1_General_CP437_CS_AS */
1029  case 32: /* SQL_Latin1_General_CP437_CI_AS */
1030  case 33: /* SQL_Latin1_General_Pref_CP437_CI_AS */
1031  case 34: /* SQL_Latin1_General_CP437_CI_AI */
1032  return TDS_CHARSET_CP437;
1033  case 40: /* SQL_Latin1_General_CP850_BIN */
1034  case 41: /* SQL_Latin1_General_CP850_CS_AS */
1035  case 42: /* SQL_Latin1_General_CP850_CI_AS */
1036  case 43: /* SQL_Latin1_General_Pref_CP850_CI_AS */
1037  case 44: /* SQL_Latin1_General_CP850_CI_AI */
1038  case 49: /* SQL_1xCompat_CP850_CI_AS */
1039  case 55: /* SQL_AltDiction_CP850_CS_AS */
1040  case 56: /* SQL_AltDiction_Pref_CP850_CI_AS */
1041  case 57: /* SQL_AltDiction_CP850_CI_AI */
1042  case 58: /* SQL_Scandinavian_Pref_CP850_CI_AS */
1043  case 59: /* SQL_Scandinavian_CP850_CS_AS */
1044  case 60: /* SQL_Scandinavian_CP850_CI_AS */
1045  case 61: /* SQL_AltDiction_CP850_CI_AS */
1046  return TDS_CHARSET_CP850;
1047  case 80: /* SQL_Latin1_General_1250_BIN */
1048  case 81: /* SQL_Latin1_General_CP1250_CS_AS */
1049  case 82: /* SQL_Latin1_General_CP1250_CI_AS */
1050  return TDS_CHARSET_CP1250;
1051  case 105: /* SQL_Latin1_General_CP1251_CS_AS */
1052  case 106: /* SQL_Latin1_General_CP1251_CI_AS */
1053  return TDS_CHARSET_CP1251;
1054  case 113: /* SQL_Latin1_General_CP1253_CS_AS */
1055  case 114: /* SQL_Latin1_General_CP1253_CI_AS */
1056  case 120: /* SQL_MixDiction_CP1253_CS_AS */
1057  case 121: /* SQL_AltDiction_CP1253_CS_AS */
1058  case 122: /* SQL_AltDiction2_CP1253_CS_AS */
1059  case 124: /* SQL_Latin1_General_CP1253_CI_AI */
1060  return TDS_CHARSET_CP1253;
1061  case 137: /* SQL_Latin1_General_CP1255_CS_AS */
1062  case 138: /* SQL_Latin1_General_CP1255_CI_AS */
1063  return TDS_CHARSET_CP1255;
1064  case 145: /* SQL_Latin1_General_CP1256_CS_AS */
1065  case 146: /* SQL_Latin1_General_CP1256_CI_AS */
1066  return TDS_CHARSET_CP1256;
1067  case 153: /* SQL_Latin1_General_CP1257_CS_AS */
1068  case 154: /* SQL_Latin1_General_CP1257_CI_AS */
1069  return TDS_CHARSET_CP1257;
1070  }
1071 
1072  switch (lcid) {
1073  case 0x405:
1074  case 0x40e: /* 0x1040e */
1075  case 0x415:
1076  case 0x418:
1077  case 0x41a:
1078  case 0x41b:
1079  case 0x41c:
1080  case 0x424:
1081  case 0x442:
1082  case 0x81a:
1083  case 0x104e: /* ?? */
1084  case 0x141a:
1085  cp = TDS_CHARSET_CP1250;
1086  break;
1087  case 0x402:
1088  case 0x419:
1089  case 0x422:
1090  case 0x423:
1091  case 0x42f:
1092  case 0x43f:
1093  case 0x440:
1094  case 0x444:
1095  case 0x450:
1096  case 0x82c:
1097  case 0x843:
1098  case 0xc1a:
1099  case 0x46d:
1100  case 0x201a:
1101  case 0x485:
1102  cp = TDS_CHARSET_CP1251;
1103  break;
1104  case 0x1007:
1105  case 0x1009:
1106  case 0x100a:
1107  case 0x100c:
1108  case 0x1407:
1109  case 0x1409:
1110  case 0x140a:
1111  case 0x140c:
1112  case 0x1809:
1113  case 0x180a:
1114  case 0x180c:
1115  case 0x1c09:
1116  case 0x1c0a:
1117  case 0x2009:
1118  case 0x200a:
1119  case 0x2409:
1120  case 0x240a:
1121  case 0x2809:
1122  case 0x280a:
1123  case 0x2c09:
1124  case 0x2c0a:
1125  case 0x3009:
1126  case 0x300a:
1127  case 0x3409:
1128  case 0x340a:
1129  case 0x380a:
1130  case 0x3c0a:
1131  case 0x400a:
1132  case 0x403:
1133  case 0x406:
1134  case 0x417:
1135  case 0x42e:
1136  case 0x43b:
1137  case 0x452:
1138  case 0x462:
1139  case 0x47a:
1140  case 0x47c:
1141  case 0x47e:
1142  case 0x483:
1143  case 0x407: /* 0x10407 */
1144  case 0x409:
1145  case 0x40a:
1146  case 0x40b:
1147  case 0x40c:
1148  case 0x40f:
1149  case 0x410:
1150  case 0x413:
1151  case 0x414:
1152  case 0x416:
1153  case 0x41d:
1154  case 0x421:
1155  case 0x42d:
1156  case 0x436:
1157  case 0x437: /* 0x10437 */
1158  case 0x438:
1159  /*case 0x439: ??? Unicode only */
1160  case 0x43e:
1161  case 0x440a:
1162  case 0x441:
1163  case 0x456:
1164  case 0x480a:
1165  case 0x4c0a:
1166  case 0x500a:
1167  case 0x807:
1168  case 0x809:
1169  case 0x80a:
1170  case 0x80c:
1171  case 0x810:
1172  case 0x813:
1173  case 0x814:
1174  case 0x816:
1175  case 0x81d:
1176  case 0x83b:
1177  case 0x83e:
1178  case 0x85f:
1179  case 0xc07:
1180  case 0xc09:
1181  case 0xc0a:
1182  case 0xc0c:
1183  cp = TDS_CHARSET_CP1252;
1184  break;
1185  case 0x408:
1186  cp = TDS_CHARSET_CP1253;
1187  break;
1188  case 0x41f:
1189  case 0x42c:
1190  case 0x443:
1191  cp = TDS_CHARSET_CP1254;
1192  break;
1193  case 0x40d:
1194  cp = TDS_CHARSET_CP1255;
1195  break;
1196  case 0x1001:
1197  case 0x1401:
1198  case 0x1801:
1199  case 0x1c01:
1200  case 0x2001:
1201  case 0x2401:
1202  case 0x2801:
1203  case 0x2c01:
1204  case 0x3001:
1205  case 0x3401:
1206  case 0x3801:
1207  case 0x3c01:
1208  case 0x4001:
1209  case 0x401:
1210  case 0x480:
1211  case 0x420:
1212  case 0x429:
1213  case 0x48c:
1214  case 0x801:
1215  case 0xc01:
1216  cp = TDS_CHARSET_CP1256;
1217  break;
1218  case 0x425:
1219  case 0x426:
1220  case 0x427:
1221  case 0x827: /* ?? */
1222  cp = TDS_CHARSET_CP1257;
1223  break;
1224  case 0x42a:
1225  cp = TDS_CHARSET_CP1258;
1226  break;
1227  case 0x41e:
1228  cp = TDS_CHARSET_CP874;
1229  break;
1230  case 0x411: /* 0x10411 */
1231  cp = TDS_CHARSET_CP932;
1232  break;
1233  case 0x1004:
1234  case 0x804: /* 0x20804 */
1235  cp = TDS_CHARSET_GB18030;
1236  break;
1237  case 0x412: /* 0x10412 */
1238  cp = TDS_CHARSET_CP949;
1239  break;
1240  case 0x1404:
1241  case 0x404: /* 0x30404 */
1242  case 0xc04:
1243  cp = TDS_CHARSET_CP950;
1244  break;
1245  default:
1246  cp = TDS_CHARSET_CP1252;
1247  }
1248 
1249  return cp;
1250 }
1251 
1252 /**
1253  * Get iconv information from a LCID (to support different column encoding under MSSQL2K)
1254  */
1255 TDSICONV *
1257 {
1258  int canonic_charset = collate2charset(conn, collate);
1259 
1260  /* same as client (usually this is true, so this improve performance) ? */
1261  if (conn->char_convs[client2server_chardata]->to.charset.canonic == canonic_charset)
1262  return conn->char_convs[client2server_chardata];
1263 
1264  return tds_iconv_get_info(conn, conn->char_convs[client2ucs2]->from.charset.canonic, canonic_charset);
1265 }
1266 
1267 /** @} */
static const struct charset_alias * charset_lookup(register const char *str, register size_t len)
static CS_CONNECTION * conn
Definition: ct_dynamic.c:25
#define TDS_GET_UA2LE(ptr)
Definition: bytes.h:56
TDS_ICONV_DIRECTION
Definition: iconv.h:70
@ to_client
Definition: iconv.h:70
@ to_server
Definition: iconv.h:70
void * iconv_t
Definition: iconv.h:28
#define TDS_ENCODING_MEMCPY
Definition: iconv.h:95
#define EILSEQ
Definition: iconv.h:44
#define TDS_FAIL
Definition: tds.h:204
#define tds_new(type, n)
Definition: tds.h:1392
@ client2server_chardata
Definition: tds.h:1110
@ client2ucs2
Definition: tds.h:1109
@ initial_char_conv_count
Definition: tds.h:1111
#define tdsdump_log
Definition: tds.h:1561
#define TDS_DBG_INFO1
Definition: tds.h:900
#define IS_TDS50(x)
Definition: tds.h:1701
unsigned char TDS_UCHAR
Definition: tds.h:145
@ TDSEICONVO
Definition: tds.h:299
@ TDSEICONVI
Definition: tds.h:300
@ TDSEICONV2BIG
Definition: tds.h:301
@ TDSEICONVAVAIL
Definition: tds.h:298
@ TDSEICONVIU
Definition: tds.h:297
#define TDS_VECTOR_SIZE(x)
Definition: tds.h:360
#define IS_TDS7_PLUS(x)
Definition: tds.h:1708
#define tds_new0(type, n)
Definition: tds.h:1393
int TDSRET
Definition: tds.h:201
#define TDS_DBG_ERROR
Definition: tds.h:903
#define TDS_SUCCESS
Definition: tds.h:203
#define TDS_ZERO_FREE(x)
Definition: tds.h:359
#define tds_get_ctx(tds)
Definition: tds.h:1294
#define IS_TDS74_PLUS(x)
Definition: tds.h:1712
#define TDS_DBG_FUNC
Definition: tds.h:898
#define tds_mutex_lock(x)
Definition: thread.h:421
#define tds_mutex_unlock(x)
Definition: thread.h:423
int tds_iconv_alloc(TDSCONNECTION *conn)
Allocate iconv stuff.
Definition: iconv.c:286
@ POS_ISO1
Definition: iconv.c:76
@ POS_UTF8
Definition: iconv.c:76
@ POS_UCS2LE
Definition: iconv.c:76
@ POS_UCS2BE
Definition: iconv.c:76
static TDSICONV * conv
Definition: charconv.c:168
static TDSSOCKET * tds
Definition: collations.c:37
#define tdserror
int tds_canonical_charset(const char *charset_name)
Determine canonical iconv character set.
Definition: iconv.c:963
TDSICONV * tds_iconv_get_info(TDSCONNECTION *conn, int canonic_client, int canonic_server)
Get a iconv info structure, allocate and initialize if needed.
Definition: iconv.c:769
#define TDS_MUTEX_INITIALIZER
Definition: thread.h:335
#define tds_mutex
Definition: thread.h:336
static const char * tds_set_iconv_name(int charset)
Get iconv name given canonic.
Definition: iconv.c:248
static const char * iconv_names[TDS_VECTOR_SIZE(canonic_charsets)]
Definition: iconv.c:69
static bool iconv_initialized
Definition: iconv.c:70
static void tds_iconv_reset(TDSICONV *conv)
Definition: iconv.c:293
static bool tds_iconv_init(void)
Initialize charset searching for UTF-8, UCS-2 and ISO8859-1.
Definition: iconv.c:93
static const char * ucs2name
Definition: iconv.c:71
static const struct @1014 test_strings[4]
uint32_t len
Definition: iconv.c:78
static tds_mutex iconv_mtx
Definition: iconv.c:72
#define CHARSIZE(charset)
Definition: iconv.c:47
char data[12]
Definition: iconv.c:80
static size_t skip_one_input_sequence(iconv_t cd, const TDS_ENCODING *charset, const char **input, size_t *input_size)
Move the input sequence pointer to the next valid position.
Definition: iconv.c:877
static int tds_iconv_info_init(TDSICONV *char_conv, int client_canonic, int server_canonic)
Open iconv descriptors to convert between character sets (both directions).
Definition: iconv.c:452
static void tds_srv_charset_changed_num(TDSCONNECTION *conn, int canonic_charset_num)
Definition: iconv.c:831
static void _iconv_close(iconv_t *cd)
Definition: iconv.c:517
static void tds_iconv_info_close(TDSICONV *char_conv)
Definition: iconv.c:528
static void tds_iconv_err(TDSSOCKET *tds, int err)
Definition: iconv.c:562
Uint4 uint32_t
#define NULL
Definition: ncbistd.hpp:225
TDSRET tds_iconv_open(TDSCONNECTION *conn, const char *charset, int use_utf16)
Definition: iconv.c:335
void tds_iconv_close(TDSCONNECTION *conn)
Definition: iconv.c:511
void tds_srv_charset_changed(TDSCONNECTION *conn, const char *charset)
Definition: iconv.c:825
const char * tds_canonical_charset_name(const char *charset_name)
Determine canonical iconv character set name.
Definition: iconv.c:982
size_t tds_sys_iconv(iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft)
Definition: iconv.c:396
#define CHUNK_ALLOC
Definition: iconv.c:543
size_t tds_iconv(TDSSOCKET *tds, TDSICONV *conv, TDS_ICONV_DIRECTION io, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft)
Wrapper around iconv(3).
Definition: iconv.c:576
static int collate2charset(TDSCONNECTION *conn, TDS_UCHAR collate[5])
Definition: iconv.c:1006
iconv_t tds_sys_iconv_open(const char *tocode, const char *fromcode)
Inputs are FreeTDS canonical names, no other.
Definition: iconv.c:337
TDSICONV * tds_iconv_get(TDSCONNECTION *conn, const char *client_charset, const char *server_charset)
Definition: iconv.c:787
void tds7_srv_charset_changed(TDSCONNECTION *conn, int sql_collate, int lcid)
Definition: iconv.c:840
void tds_iconv_free(TDSCONNECTION *conn)
Definition: iconv.c:522
int tds_sys_iconv_close(iconv_t cd)
Definition: iconv.c:390
TDSICONV * tds_iconv_from_collate(TDSCONNECTION *conn, TDS_UCHAR collate[5])
Get iconv information from a LCID (to support different column encoding under MSSQL2K)
Definition: iconv.c:1235
@ TDS_CHARSET_CP850
Definition: encodings.h:498
@ TDS_CHARSET_CP950
Definition: encodings.h:505
@ TDS_CHARSET_CP1252
Definition: encodings.h:489
@ TDS_CHARSET_UTF_8
Definition: encodings.h:469
@ TDS_CHARSET_CP932
Definition: encodings.h:502
@ TDS_CHARSET_CP1250
Definition: encodings.h:487
@ TDS_CHARSET_CP949
Definition: encodings.h:504
@ TDS_CHARSET_CP1256
Definition: encodings.h:493
@ TDS_CHARSET_GB18030
Definition: encodings.h:509
@ TDS_CHARSET_CP1251
Definition: encodings.h:488
@ TDS_CHARSET_CP874
Definition: encodings.h:501
@ TDS_CHARSET_CP1255
Definition: encodings.h:492
@ TDS_CHARSET_CP437
Definition: encodings.h:497
@ TDS_CHARSET_UCS_2LE
Definition: encodings.h:470
@ TDS_CHARSET_CP1254
Definition: encodings.h:491
@ TDS_CHARSET_UTF_16LE
Definition: encodings.h:472
@ TDS_CHARSET_CP1253
Definition: encodings.h:490
@ TDS_CHARSET_CP1258
Definition: encodings.h:495
@ TDS_CHARSET_ISO_8859_1
Definition: encodings.h:468
@ TDS_CHARSET_CP1257
Definition: encodings.h:494
static int input()
int i
yy_size_t n
static MDB_envinfo info
Definition: mdb_load.c:37
int strncmp(const char *str1, const char *str2, size_t count)
Definition: odbc_utils.hpp:133
int strcmp(const char *str1, const char *str2)
Definition: odbc_utils.hpp:160
#define ICONV_CONST
static CNamedPipeClient * client
#define assert(x)
Definition: srv_diag.hpp:58
short int canonic
Information relevant to libiconv.
Definition: tds.h:683
unsigned char max_bytes_per_char
Definition: tds.h:687
const char * name
name of the encoding (ie UTF-8)
Definition: tds.h:685
unsigned char min_bytes_per_char
Definition: tds.h:686
unsigned char canonic
internal numeric index into array of all encodings
Definition: tds.h:689
unsigned int einval
Definition: iconv.h:81
unsigned int eilseq
Definition: iconv.h:80
unsigned int e2big
Definition: iconv.h:79
Information for a server connection.
Definition: tds.h:1211
TDS_ENCODING charset
Definition: iconv.h:86
iconv_t cd
Definition: iconv.h:88
unsigned int flags
Definition: iconv.h:96
struct tdsiconvdir to from
Definition: iconv.h:93
TDS_ERRNO_MESSAGE_FLAGS suppress
Definition: iconv.h:106
uchar inbuf[1000000]
Definition: unzcrash.c:40
uchar outbuf[(1000000+1000000)]
Definition: unzcrash.c:41
void free(voidpf ptr)
Modified on Tue May 28 05:51:48 2024 by modify_doxy.py rev. 669887