natIconv.cc 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. // natIconv.cc -- Java side of iconv() reader.
  2. /* Copyright (C) 2000, 2001, 2003, 2006, 2011 Free Software Foundation
  3. This file is part of libgcj.
  4. This software is copyrighted work licensed under the terms of the
  5. Libgcj License. Please consult the file "LIBGCJ_LICENSE" for
  6. details. */
  7. /* Author: Tom Tromey <tromey@redhat.com>. */
  8. #include <config.h>
  9. #include <gcj/cni.h>
  10. #include <jvm.h>
  11. #include <gnu/gcj/convert/Input_iconv.h>
  12. #include <gnu/gcj/convert/Output_iconv.h>
  13. #include <java/io/CharConversionException.h>
  14. #include <java/io/UnsupportedEncodingException.h>
  15. #include <errno.h>
  16. #ifdef HAVE_ICONV
  17. #include <iconv.h>
  18. template<typename T>
  19. static inline size_t
  20. iconv_adapter (size_t (*iconv_f) (iconv_t, T, size_t *, char **, size_t *),
  21. iconv_t handle, char **inbuf, size_t *inavail,
  22. char **outbuf, size_t *outavail)
  23. {
  24. return (*iconv_f) (handle, (T) inbuf, inavail, outbuf, outavail);
  25. }
  26. #endif
  27. void
  28. gnu::gcj::convert::Input_iconv::init (jstring encoding)
  29. {
  30. #ifdef HAVE_ICONV
  31. jsize len = _Jv_GetStringUTFLength (encoding);
  32. char buffer[len + 1];
  33. _Jv_GetStringUTFRegion (encoding, 0, encoding->length(), buffer);
  34. buffer[len] = '\0';
  35. iconv_t h = iconv_open ("UCS-2", buffer);
  36. if (h == (iconv_t) -1)
  37. throw new ::java::io::UnsupportedEncodingException (encoding);
  38. JvAssert (h != NULL);
  39. handle = reinterpret_cast<gnu::gcj::RawData *> (h);
  40. #else /* HAVE_ICONV */
  41. // If no iconv, just throw an exception.
  42. throw new ::java::io::UnsupportedEncodingException (encoding);
  43. #endif /* HAVE_ICONV */
  44. }
  45. void
  46. gnu::gcj::convert::Input_iconv::finalize (void)
  47. {
  48. #ifdef HAVE_ICONV
  49. if (handle != NULL)
  50. {
  51. iconv_close ((iconv_t) handle);
  52. handle = NULL;
  53. }
  54. #endif /* HAVE_ICONV */
  55. }
  56. jint
  57. gnu::gcj::convert::Input_iconv::read (jcharArray outbuffer,
  58. jint outpos, jint count)
  59. {
  60. #ifdef HAVE_ICONV
  61. jbyte *bytes = elements (inbuffer);
  62. jchar *out = elements (outbuffer);
  63. size_t inavail = inlength - inpos;
  64. size_t old_in = inavail;
  65. size_t outavail = count * sizeof (jchar);
  66. size_t old_out = outavail;
  67. char *inbuf = (char *) &bytes[inpos];
  68. char *outbuf = (char *) &out[outpos];
  69. size_t r = iconv_adapter (iconv, (iconv_t) handle,
  70. &inbuf, &inavail,
  71. &outbuf, &outavail);
  72. if (r == (size_t) -1)
  73. {
  74. // If we see EINVAL then there is an incomplete sequence at the
  75. // end of the input buffer. If we see E2BIG then we ran out of
  76. // space in the output buffer. However, in both these cases
  77. // some conversion might have taken place. So we fall through
  78. // to the normal case.
  79. if (errno != EINVAL && errno != E2BIG)
  80. throw new ::java::io::CharConversionException ();
  81. }
  82. if (iconv_byte_swap)
  83. {
  84. size_t max = (old_out - outavail) / sizeof (jchar);
  85. for (size_t i = 0; i < max; ++i)
  86. {
  87. // Byte swap.
  88. jchar c = (((out[outpos + i] & 0xff) << 8)
  89. | ((out[outpos + i] >> 8) & 0xff));
  90. outbuf[i] = c;
  91. }
  92. }
  93. inpos += old_in - inavail;
  94. return (old_out - outavail) / sizeof (jchar);
  95. #else /* HAVE_ICONV */
  96. return -1;
  97. #endif /* HAVE_ICONV */
  98. }
  99. void
  100. gnu::gcj::convert::Input_iconv::done ()
  101. {
  102. #ifdef HAVE_ICONV
  103. // 50 bytes should be enough for any reset sequence.
  104. size_t avail = 50;
  105. char tmp[avail];
  106. char *p = tmp;
  107. // Calling iconv() with a NULL INBUF pointer will cause iconv() to
  108. // switch to its initial state. We don't care about the output that
  109. // might be generated in that situation.
  110. iconv_adapter (iconv, (iconv_t) handle, NULL, NULL, &p, &avail);
  111. BytesToUnicode::done ();
  112. #else /* HAVE_ICONV */
  113. // If no iconv, do nothing
  114. #endif /* HAVE_ICONV */
  115. }
  116. void
  117. gnu::gcj::convert::Output_iconv::init (jstring encoding)
  118. {
  119. #ifdef HAVE_ICONV
  120. jsize len = _Jv_GetStringUTFLength (encoding);
  121. char buffer[len + 1];
  122. _Jv_GetStringUTFRegion (encoding, 0, encoding->length(), buffer);
  123. buffer[len] = '\0';
  124. iconv_t h = iconv_open (buffer, "UCS-2");
  125. if (h == (iconv_t) -1)
  126. throw new ::java::io::UnsupportedEncodingException (encoding);
  127. JvAssert (h != NULL);
  128. handle = reinterpret_cast<gnu::gcj::RawData *> (h);
  129. #else /* HAVE_ICONV */
  130. // If no iconv, just throw an exception.
  131. throw new ::java::io::UnsupportedEncodingException (encoding);
  132. #endif /* HAVE_ICONV */
  133. }
  134. void
  135. gnu::gcj::convert::Output_iconv::finalize (void)
  136. {
  137. #ifdef HAVE_ICONV
  138. if (handle != NULL)
  139. {
  140. iconv_close ((iconv_t) handle);
  141. handle = NULL;
  142. }
  143. #endif /* HAVE_ICONV */
  144. }
  145. jint
  146. gnu::gcj::convert::Output_iconv::write (jcharArray inbuffer,
  147. jint inpos, jint inlength)
  148. {
  149. #ifdef HAVE_ICONV
  150. jchar *chars = elements (inbuffer);
  151. jbyte *out = elements (buf);
  152. jchar *temp_buffer = NULL;
  153. size_t inavail = inlength * sizeof (jchar);
  154. size_t old_in = inavail;
  155. size_t outavail = buf->length - count;
  156. size_t old_out = outavail;
  157. char *inbuf = (char *) &chars[inpos];
  158. char *outbuf = (char *) &out[count];
  159. if (iconv_byte_swap)
  160. {
  161. // Ugly performance penalty -- don't use losing systems!
  162. temp_buffer = (jchar *) _Jv_Malloc (inlength * sizeof (jchar));
  163. for (int i = 0; i < inlength; ++i)
  164. {
  165. // Byte swap.
  166. jchar c = (((chars[inpos + i] & 0xff) << 8)
  167. | ((chars[inpos + i] >> 8) & 0xff));
  168. temp_buffer[i] = c;
  169. }
  170. inbuf = (char *) temp_buffer;
  171. }
  172. size_t loop_old_in = old_in;
  173. while (1)
  174. {
  175. size_t r = iconv_adapter (iconv, (iconv_t) handle,
  176. &inbuf, &inavail,
  177. &outbuf, &outavail);
  178. if (r == (size_t) -1)
  179. {
  180. if (errno == EINVAL)
  181. {
  182. // Incomplete byte sequence at the end of the input
  183. // buffer. This shouldn't be able to happen here.
  184. break;
  185. }
  186. else if (errno == E2BIG)
  187. {
  188. // Output buffer is too small.
  189. break;
  190. }
  191. else if (errno == EILSEQ || inavail == loop_old_in)
  192. {
  193. // Untranslatable sequence. Since glibc 2.1.3 doesn't
  194. // properly set errno, we also assume that this is what
  195. // is happening if no conversions took place. (This can
  196. // be a bogus assumption if in fact the output buffer is
  197. // too small.) We skip the first character and try
  198. // again.
  199. inavail -= 2;
  200. if (inavail == 0)
  201. break;
  202. loop_old_in -= 2;
  203. inbuf += 2;
  204. }
  205. }
  206. else
  207. break;
  208. }
  209. if (temp_buffer != NULL)
  210. _Jv_Free (temp_buffer);
  211. count += old_out - outavail;
  212. return (old_in - inavail) / sizeof (jchar);
  213. #else /* HAVE_ICONV */
  214. return -1;
  215. #endif /* HAVE_ICONV */
  216. }
  217. jboolean
  218. gnu::gcj::convert::IOConverter::iconv_init (void)
  219. {
  220. // Some versions of iconv() always return their UCS-2 results in
  221. // big-endian order, and they also require UCS-2 inputs to be in
  222. // big-endian order. For instance, glibc 2.1.3 does this. If the
  223. // UTF-8=>UCS-2 iconv converter has this feature, then we assume
  224. // that all UCS-2 converters do. (This might not be the best
  225. // heuristic, but is is all we've got.)
  226. jboolean result = false;
  227. #ifdef HAVE_ICONV
  228. iconv_t handle = iconv_open ("UCS-2", "UTF-8");
  229. if (handle != (iconv_t) -1)
  230. {
  231. jchar c;
  232. unsigned char in[4];
  233. char *inp, *outp;
  234. size_t inc, outc, r;
  235. // This is the UTF-8 encoding of \ufeff. At least Tru64 UNIX libiconv
  236. // needs the trailing NUL byte, otherwise iconv fails with EINVAL.
  237. in[0] = 0xef;
  238. in[1] = 0xbb;
  239. in[2] = 0xbf;
  240. in[3] = 0x00;
  241. inp = (char *) in;
  242. inc = 4;
  243. outp = (char *) &c;
  244. outc = 2;
  245. r = iconv_adapter (iconv, handle, &inp, &inc, &outp, &outc);
  246. // Conversion must be complete for us to use the result.
  247. if (r != (size_t) -1 && inc == 0 && outc == 0)
  248. result = (c != 0xfeff);
  249. // Release iconv handle.
  250. iconv_close (handle);
  251. }
  252. #endif /* HAVE_ICONV */
  253. return result;
  254. }
  255. void
  256. gnu::gcj::convert::Output_iconv::done ()
  257. {
  258. #ifdef HAVE_ICONV
  259. // 50 bytes should be enough for any reset sequence.
  260. size_t avail = 50;
  261. char tmp[avail];
  262. char *p = tmp;
  263. // Calling iconv() with a NULL INBUF pointer will cause iconv() to
  264. // switch to its initial state. We don't care about the output that
  265. // might be generated in that situation.
  266. iconv_adapter (iconv, (iconv_t) handle, NULL, NULL, &p, &avail);
  267. UnicodeToBytes::done ();
  268. #else /* HAVE_ICONV */
  269. // If no iconv, do nothing
  270. #endif /* HAVE_ICONV */
  271. }