123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312 |
- // natIconv.cc -- Java side of iconv() reader.
- /* Copyright (C) 2000, 2001, 2003, 2006, 2011 Free Software Foundation
- This file is part of libgcj.
- This software is copyrighted work licensed under the terms of the
- Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
- details. */
- /* Author: Tom Tromey <tromey@redhat.com>. */
- #include <config.h>
- #include <gcj/cni.h>
- #include <jvm.h>
- #include <gnu/gcj/convert/Input_iconv.h>
- #include <gnu/gcj/convert/Output_iconv.h>
- #include <java/io/CharConversionException.h>
- #include <java/io/UnsupportedEncodingException.h>
- #include <errno.h>
- #ifdef HAVE_ICONV
- #include <iconv.h>
- template<typename T>
- static inline size_t
- iconv_adapter (size_t (*iconv_f) (iconv_t, T, size_t *, char **, size_t *),
- iconv_t handle, char **inbuf, size_t *inavail,
- char **outbuf, size_t *outavail)
- {
- return (*iconv_f) (handle, (T) inbuf, inavail, outbuf, outavail);
- }
- #endif
- void
- gnu::gcj::convert::Input_iconv::init (jstring encoding)
- {
- #ifdef HAVE_ICONV
- jsize len = _Jv_GetStringUTFLength (encoding);
- char buffer[len + 1];
- _Jv_GetStringUTFRegion (encoding, 0, encoding->length(), buffer);
- buffer[len] = '\0';
- iconv_t h = iconv_open ("UCS-2", buffer);
- if (h == (iconv_t) -1)
- throw new ::java::io::UnsupportedEncodingException (encoding);
- JvAssert (h != NULL);
- handle = reinterpret_cast<gnu::gcj::RawData *> (h);
- #else /* HAVE_ICONV */
- // If no iconv, just throw an exception.
- throw new ::java::io::UnsupportedEncodingException (encoding);
- #endif /* HAVE_ICONV */
- }
- void
- gnu::gcj::convert::Input_iconv::finalize (void)
- {
- #ifdef HAVE_ICONV
- if (handle != NULL)
- {
- iconv_close ((iconv_t) handle);
- handle = NULL;
- }
- #endif /* HAVE_ICONV */
- }
- jint
- gnu::gcj::convert::Input_iconv::read (jcharArray outbuffer,
- jint outpos, jint count)
- {
- #ifdef HAVE_ICONV
- jbyte *bytes = elements (inbuffer);
- jchar *out = elements (outbuffer);
- size_t inavail = inlength - inpos;
- size_t old_in = inavail;
- size_t outavail = count * sizeof (jchar);
- size_t old_out = outavail;
- char *inbuf = (char *) &bytes[inpos];
- char *outbuf = (char *) &out[outpos];
- size_t r = iconv_adapter (iconv, (iconv_t) handle,
- &inbuf, &inavail,
- &outbuf, &outavail);
- if (r == (size_t) -1)
- {
- // If we see EINVAL then there is an incomplete sequence at the
- // end of the input buffer. If we see E2BIG then we ran out of
- // space in the output buffer. However, in both these cases
- // some conversion might have taken place. So we fall through
- // to the normal case.
- if (errno != EINVAL && errno != E2BIG)
- throw new ::java::io::CharConversionException ();
- }
- if (iconv_byte_swap)
- {
- size_t max = (old_out - outavail) / sizeof (jchar);
- for (size_t i = 0; i < max; ++i)
- {
- // Byte swap.
- jchar c = (((out[outpos + i] & 0xff) << 8)
- | ((out[outpos + i] >> 8) & 0xff));
- outbuf[i] = c;
- }
- }
- inpos += old_in - inavail;
- return (old_out - outavail) / sizeof (jchar);
- #else /* HAVE_ICONV */
- return -1;
- #endif /* HAVE_ICONV */
- }
- void
- gnu::gcj::convert::Input_iconv::done ()
- {
- #ifdef HAVE_ICONV
- // 50 bytes should be enough for any reset sequence.
- size_t avail = 50;
- char tmp[avail];
- char *p = tmp;
- // Calling iconv() with a NULL INBUF pointer will cause iconv() to
- // switch to its initial state. We don't care about the output that
- // might be generated in that situation.
- iconv_adapter (iconv, (iconv_t) handle, NULL, NULL, &p, &avail);
- BytesToUnicode::done ();
- #else /* HAVE_ICONV */
- // If no iconv, do nothing
- #endif /* HAVE_ICONV */
- }
- void
- gnu::gcj::convert::Output_iconv::init (jstring encoding)
- {
- #ifdef HAVE_ICONV
- jsize len = _Jv_GetStringUTFLength (encoding);
- char buffer[len + 1];
- _Jv_GetStringUTFRegion (encoding, 0, encoding->length(), buffer);
- buffer[len] = '\0';
- iconv_t h = iconv_open (buffer, "UCS-2");
- if (h == (iconv_t) -1)
- throw new ::java::io::UnsupportedEncodingException (encoding);
- JvAssert (h != NULL);
- handle = reinterpret_cast<gnu::gcj::RawData *> (h);
- #else /* HAVE_ICONV */
- // If no iconv, just throw an exception.
- throw new ::java::io::UnsupportedEncodingException (encoding);
- #endif /* HAVE_ICONV */
- }
- void
- gnu::gcj::convert::Output_iconv::finalize (void)
- {
- #ifdef HAVE_ICONV
- if (handle != NULL)
- {
- iconv_close ((iconv_t) handle);
- handle = NULL;
- }
- #endif /* HAVE_ICONV */
- }
- jint
- gnu::gcj::convert::Output_iconv::write (jcharArray inbuffer,
- jint inpos, jint inlength)
- {
- #ifdef HAVE_ICONV
- jchar *chars = elements (inbuffer);
- jbyte *out = elements (buf);
- jchar *temp_buffer = NULL;
- size_t inavail = inlength * sizeof (jchar);
- size_t old_in = inavail;
- size_t outavail = buf->length - count;
- size_t old_out = outavail;
- char *inbuf = (char *) &chars[inpos];
- char *outbuf = (char *) &out[count];
- if (iconv_byte_swap)
- {
- // Ugly performance penalty -- don't use losing systems!
- temp_buffer = (jchar *) _Jv_Malloc (inlength * sizeof (jchar));
- for (int i = 0; i < inlength; ++i)
- {
- // Byte swap.
- jchar c = (((chars[inpos + i] & 0xff) << 8)
- | ((chars[inpos + i] >> 8) & 0xff));
- temp_buffer[i] = c;
- }
- inbuf = (char *) temp_buffer;
- }
- size_t loop_old_in = old_in;
- while (1)
- {
- size_t r = iconv_adapter (iconv, (iconv_t) handle,
- &inbuf, &inavail,
- &outbuf, &outavail);
- if (r == (size_t) -1)
- {
- if (errno == EINVAL)
- {
- // Incomplete byte sequence at the end of the input
- // buffer. This shouldn't be able to happen here.
- break;
- }
- else if (errno == E2BIG)
- {
- // Output buffer is too small.
- break;
- }
- else if (errno == EILSEQ || inavail == loop_old_in)
- {
- // Untranslatable sequence. Since glibc 2.1.3 doesn't
- // properly set errno, we also assume that this is what
- // is happening if no conversions took place. (This can
- // be a bogus assumption if in fact the output buffer is
- // too small.) We skip the first character and try
- // again.
- inavail -= 2;
- if (inavail == 0)
- break;
- loop_old_in -= 2;
- inbuf += 2;
- }
- }
- else
- break;
- }
- if (temp_buffer != NULL)
- _Jv_Free (temp_buffer);
- count += old_out - outavail;
- return (old_in - inavail) / sizeof (jchar);
- #else /* HAVE_ICONV */
- return -1;
- #endif /* HAVE_ICONV */
- }
- jboolean
- gnu::gcj::convert::IOConverter::iconv_init (void)
- {
- // Some versions of iconv() always return their UCS-2 results in
- // big-endian order, and they also require UCS-2 inputs to be in
- // big-endian order. For instance, glibc 2.1.3 does this. If the
- // UTF-8=>UCS-2 iconv converter has this feature, then we assume
- // that all UCS-2 converters do. (This might not be the best
- // heuristic, but is is all we've got.)
- jboolean result = false;
- #ifdef HAVE_ICONV
- iconv_t handle = iconv_open ("UCS-2", "UTF-8");
- if (handle != (iconv_t) -1)
- {
- jchar c;
- unsigned char in[4];
- char *inp, *outp;
- size_t inc, outc, r;
- // This is the UTF-8 encoding of \ufeff. At least Tru64 UNIX libiconv
- // needs the trailing NUL byte, otherwise iconv fails with EINVAL.
- in[0] = 0xef;
- in[1] = 0xbb;
- in[2] = 0xbf;
- in[3] = 0x00;
- inp = (char *) in;
- inc = 4;
- outp = (char *) &c;
- outc = 2;
- r = iconv_adapter (iconv, handle, &inp, &inc, &outp, &outc);
- // Conversion must be complete for us to use the result.
- if (r != (size_t) -1 && inc == 0 && outc == 0)
- result = (c != 0xfeff);
- // Release iconv handle.
- iconv_close (handle);
- }
- #endif /* HAVE_ICONV */
- return result;
- }
- void
- gnu::gcj::convert::Output_iconv::done ()
- {
- #ifdef HAVE_ICONV
- // 50 bytes should be enough for any reset sequence.
- size_t avail = 50;
- char tmp[avail];
- char *p = tmp;
- // Calling iconv() with a NULL INBUF pointer will cause iconv() to
- // switch to its initial state. We don't care about the output that
- // might be generated in that situation.
- iconv_adapter (iconv, (iconv_t) handle, NULL, NULL, &p, &avail);
- UnicodeToBytes::done ();
- #else /* HAVE_ICONV */
- // If no iconv, do nothing
- #endif /* HAVE_ICONV */
- }
|