123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174 |
- /* Character set conversion.
- Copyright (C) 2007, 2009-2023 Free Software Foundation, Inc.
- This file is free software: you can redistribute it and/or modify
- it under the terms of the GNU Lesser General Public License as
- published by the Free Software Foundation; either version 2.1 of the
- License, or (at your option) any later version.
- This file is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Lesser General Public License for more details.
- You should have received a copy of the GNU Lesser General Public License
- along with this program. If not, see <https://www.gnu.org/licenses/>. */
- #include <config.h>
- /* Specification. */
- #include <iconv.h>
- #include <errno.h>
- #include <string.h>
- #include "c-ctype.h"
- #include "c-strcase.h"
- #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
- /* Namespace cleanliness. */
- #define mapping_lookup rpl_iconv_open_mapping_lookup
- /* The macro ICONV_FLAVOR is defined to one of these or undefined. */
- #define ICONV_FLAVOR_AIX "iconv_open-aix.h"
- #define ICONV_FLAVOR_HPUX "iconv_open-hpux.h"
- #define ICONV_FLAVOR_IRIX "iconv_open-irix.h"
- #define ICONV_FLAVOR_OSF "iconv_open-osf.h"
- #define ICONV_FLAVOR_SOLARIS "iconv_open-solaris.h"
- #define ICONV_FLAVOR_ZOS "iconv_open-zos.h"
- #ifdef ICONV_FLAVOR
- # include ICONV_FLAVOR
- #endif
- iconv_t
- rpl_iconv_open (const char *tocode, const char *fromcode)
- #undef iconv_open
- {
- char fromcode_upper[32];
- char tocode_upper[32];
- char *fromcode_upper_end;
- char *tocode_upper_end;
- #if REPLACE_ICONV_UTF
- /* Special handling of conversion between UTF-8 and UTF-{16,32}{BE,LE}.
- Do this here, before calling the real iconv_open(), because OSF/1 5.1
- iconv() to these encoding inserts a BOM, which is wrong.
- We do not need to handle conversion between arbitrary encodings and
- UTF-{16,32}{BE,LE}, because the 'striconveh' module implements two-step
- conversion through UTF-8.
- The _ICONV_* constants are chosen to be disjoint from any iconv_t
- returned by the system's iconv_open() functions. Recall that iconv_t
- is a scalar type. */
- if (c_toupper (fromcode[0]) == 'U'
- && c_toupper (fromcode[1]) == 'T'
- && c_toupper (fromcode[2]) == 'F'
- && fromcode[3] == '-')
- {
- if (c_toupper (tocode[0]) == 'U'
- && c_toupper (tocode[1]) == 'T'
- && c_toupper (tocode[2]) == 'F'
- && tocode[3] == '-')
- {
- if (strcmp (fromcode + 4, "8") == 0)
- {
- if (c_strcasecmp (tocode + 4, "16BE") == 0)
- return _ICONV_UTF8_UTF16BE;
- if (c_strcasecmp (tocode + 4, "16LE") == 0)
- return _ICONV_UTF8_UTF16LE;
- if (c_strcasecmp (tocode + 4, "32BE") == 0)
- return _ICONV_UTF8_UTF32BE;
- if (c_strcasecmp (tocode + 4, "32LE") == 0)
- return _ICONV_UTF8_UTF32LE;
- }
- else if (strcmp (tocode + 4, "8") == 0)
- {
- if (c_strcasecmp (fromcode + 4, "16BE") == 0)
- return _ICONV_UTF16BE_UTF8;
- if (c_strcasecmp (fromcode + 4, "16LE") == 0)
- return _ICONV_UTF16LE_UTF8;
- if (c_strcasecmp (fromcode + 4, "32BE") == 0)
- return _ICONV_UTF32BE_UTF8;
- if (c_strcasecmp (fromcode + 4, "32LE") == 0)
- return _ICONV_UTF32LE_UTF8;
- }
- }
- }
- #endif
- /* Do *not* add special support for 8-bit encodings like ASCII or ISO-8859-1
- here. This would lead to programs that work in some locales (such as the
- "C" or "en_US" locales) but do not work in East Asian locales. It is
- better if programmers make their programs depend on GNU libiconv (except
- on glibc systems), e.g. by using the AM_ICONV macro and documenting the
- dependency in an INSTALL or DEPENDENCIES file. */
- /* Try with the original names first.
- This covers the case when fromcode or tocode is a lowercase encoding name
- that is understood by the system's iconv_open but not listed in our
- mappings table. */
- {
- iconv_t cd = iconv_open (tocode, fromcode);
- if (cd != (iconv_t)(-1))
- return cd;
- }
- /* Convert the encodings to upper case, because
- 1. in the arguments of iconv_open() on AIX, HP-UX, and OSF/1 the case
- matters,
- 2. it makes searching in the table faster. */
- {
- const char *p = fromcode;
- char *q = fromcode_upper;
- while ((*q = c_toupper (*p)) != '\0')
- {
- p++;
- q++;
- if (q == &fromcode_upper[SIZEOF (fromcode_upper)])
- {
- errno = EINVAL;
- return (iconv_t)(-1);
- }
- }
- fromcode_upper_end = q;
- }
- {
- const char *p = tocode;
- char *q = tocode_upper;
- while ((*q = c_toupper (*p)) != '\0')
- {
- p++;
- q++;
- if (q == &tocode_upper[SIZEOF (tocode_upper)])
- {
- errno = EINVAL;
- return (iconv_t)(-1);
- }
- }
- tocode_upper_end = q;
- }
- #ifdef ICONV_FLAVOR
- /* Apply the mappings. */
- {
- const struct mapping *m =
- mapping_lookup (fromcode_upper, fromcode_upper_end - fromcode_upper);
- fromcode = (m != NULL ? m->vendor_name : fromcode_upper);
- }
- {
- const struct mapping *m =
- mapping_lookup (tocode_upper, tocode_upper_end - tocode_upper);
- tocode = (m != NULL ? m->vendor_name : tocode_upper);
- }
- #else
- fromcode = fromcode_upper;
- tocode = tocode_upper;
- #endif
- return iconv_open (tocode, fromcode);
- }
|