charset.c 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. /*
  2. * GRUB -- GRand Unified Bootloader
  3. * Copyright (C) 1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009 Free Software Foundation, Inc.
  4. *
  5. * GRUB is free software: you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation, either version 3 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * GRUB is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License
  16. * along with GRUB. If not, see <http://www.gnu.org/licenses/>.
  17. */
  18. /* Convert a (possibly null-terminated) UTF-8 string of at most SRCSIZE
  19. bytes (if SRCSIZE is -1, it is ignored) in length to a UTF-16 string.
  20. Return the number of characters converted. DEST must be able to hold
  21. at least DESTSIZE characters. If an invalid sequence is found, return -1.
  22. If SRCEND is not NULL, then *SRCEND is set to the next byte after the
  23. last byte used in SRC. */
  24. #include <grub/symbol.h>
  25. #include <grub/charset.h>
  26. #include <grub/mm.h>
  27. #include <grub/misc.h>
  28. GRUB_EXPORT(grub_utf8_to_utf16);
  29. GRUB_EXPORT(grub_ucs4_to_utf8_alloc);
  30. GRUB_EXPORT(grub_utf8_to_ucs4_alloc);
  31. grub_ssize_t
  32. grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
  33. const grub_uint8_t *src, grub_size_t srcsize,
  34. const grub_uint8_t **srcend)
  35. {
  36. grub_uint16_t *p = dest;
  37. int count = 0;
  38. grub_uint32_t code = 0;
  39. if (srcend)
  40. *srcend = src;
  41. while (srcsize && destsize)
  42. {
  43. grub_uint32_t c = *src++;
  44. if (srcsize != (grub_size_t)-1)
  45. srcsize--;
  46. if (count)
  47. {
  48. if ((c & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT)
  49. {
  50. /* invalid */
  51. return -1;
  52. }
  53. else
  54. {
  55. code <<= 6;
  56. code |= (c & GRUB_UINT8_6_TRAILINGBITS);
  57. count--;
  58. }
  59. }
  60. else
  61. {
  62. if (c == 0)
  63. break;
  64. if ((c & GRUB_UINT8_1_LEADINGBIT) == 0)
  65. code = c;
  66. else if ((c & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS)
  67. {
  68. count = 1;
  69. code = c & GRUB_UINT8_5_TRAILINGBITS;
  70. }
  71. else if ((c & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS)
  72. {
  73. count = 2;
  74. code = c & GRUB_UINT8_4_TRAILINGBITS;
  75. }
  76. else if ((c & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS)
  77. {
  78. count = 3;
  79. code = c & GRUB_UINT8_3_TRAILINGBITS;
  80. }
  81. else if ((c & GRUB_UINT8_6_LEADINGBITS) == GRUB_UINT8_5_LEADINGBITS)
  82. {
  83. count = 4;
  84. code = c & GRUB_UINT8_2_TRAILINGBITS;
  85. }
  86. else if ((c & GRUB_UINT8_7_LEADINGBITS) == GRUB_UINT8_6_LEADINGBITS)
  87. {
  88. count = 5;
  89. code = c & GRUB_UINT8_1_TRAILINGBIT;
  90. }
  91. else
  92. return -1;
  93. }
  94. if (count == 0)
  95. {
  96. if (destsize < 2 && code >= GRUB_UCS2_LIMIT)
  97. break;
  98. if (code >= GRUB_UCS2_LIMIT)
  99. {
  100. *p++ = GRUB_UTF16_UPPER_SURROGATE (code);
  101. *p++ = GRUB_UTF16_LOWER_SURROGATE (code);
  102. destsize -= 2;
  103. }
  104. else
  105. {
  106. *p++ = code;
  107. destsize--;
  108. }
  109. }
  110. }
  111. if (srcend)
  112. *srcend = src;
  113. return p - dest;
  114. }
  115. /* Convert UCS-4 to UTF-8. */
  116. char *
  117. grub_ucs4_to_utf8_alloc (grub_uint32_t *src, grub_size_t size)
  118. {
  119. grub_size_t remaining;
  120. grub_uint32_t *ptr;
  121. grub_size_t cnt = 0;
  122. grub_uint8_t *ret, *dest;
  123. remaining = size;
  124. ptr = src;
  125. while (remaining--)
  126. {
  127. grub_uint32_t code = *ptr++;
  128. if (code <= 0x007F)
  129. cnt++;
  130. else if (code <= 0x07FF)
  131. cnt += 2;
  132. else if ((code >= 0xDC00 && code <= 0xDFFF)
  133. || (code >= 0xD800 && code <= 0xDBFF))
  134. /* No surrogates in UCS-4... */
  135. cnt++;
  136. else
  137. cnt += 3;
  138. }
  139. cnt++;
  140. ret = grub_malloc (cnt);
  141. if (!ret)
  142. return 0;
  143. dest = ret;
  144. remaining = size;
  145. ptr = src;
  146. while (remaining--)
  147. {
  148. grub_uint32_t code = *ptr++;
  149. if (code <= 0x007F)
  150. *dest++ = code;
  151. else if (code <= 0x07FF)
  152. {
  153. *dest++ = (code >> 6) | 0xC0;
  154. *dest++ = (code & 0x3F) | 0x80;
  155. }
  156. else if ((code >= 0xDC00 && code <= 0xDFFF)
  157. || (code >= 0xD800 && code <= 0xDBFF))
  158. {
  159. /* No surrogates in UCS-4... */
  160. *dest++ = '?';
  161. }
  162. else
  163. {
  164. *dest++ = (code >> 12) | 0xE0;
  165. *dest++ = ((code >> 6) & 0x3F) | 0x80;
  166. *dest++ = (code & 0x3F) | 0x80;
  167. }
  168. }
  169. *dest = 0;
  170. return (char *) ret;
  171. }
  172. int
  173. grub_is_valid_utf8 (const grub_uint8_t *src, grub_size_t srcsize)
  174. {
  175. grub_uint32_t code = 0;
  176. int count = 0;
  177. while (srcsize)
  178. {
  179. grub_uint32_t c = *src++;
  180. if (srcsize != (grub_size_t)-1)
  181. srcsize--;
  182. if (count)
  183. {
  184. if ((c & 0xc0) != 0x80)
  185. {
  186. /* invalid */
  187. return 0;
  188. }
  189. else
  190. {
  191. code <<= 6;
  192. code |= (c & 0x3f);
  193. count--;
  194. }
  195. }
  196. else
  197. {
  198. if (c == 0)
  199. break;
  200. if ((c & 0x80) == 0x00)
  201. code = c;
  202. else if ((c & 0xe0) == 0xc0)
  203. {
  204. count = 1;
  205. code = c & 0x1f;
  206. }
  207. else if ((c & 0xf0) == 0xe0)
  208. {
  209. count = 2;
  210. code = c & 0x0f;
  211. }
  212. else if ((c & 0xf8) == 0xf0)
  213. {
  214. count = 3;
  215. code = c & 0x07;
  216. }
  217. else if ((c & 0xfc) == 0xf8)
  218. {
  219. count = 4;
  220. code = c & 0x03;
  221. }
  222. else if ((c & 0xfe) == 0xfc)
  223. {
  224. count = 5;
  225. code = c & 0x01;
  226. }
  227. else
  228. return 0;
  229. }
  230. }
  231. return 1;
  232. }
  233. int
  234. grub_utf8_to_ucs4_alloc (const char *msg, grub_uint32_t **unicode_msg,
  235. grub_uint32_t **last_position)
  236. {
  237. grub_size_t msg_len = grub_strlen (msg);
  238. *unicode_msg = grub_malloc (grub_strlen (msg) * sizeof (grub_uint32_t));
  239. if (!*unicode_msg)
  240. {
  241. grub_printf ("utf8_to_ucs4 ERROR1: %s", msg);
  242. return -1;
  243. }
  244. msg_len = grub_utf8_to_ucs4 (*unicode_msg, msg_len,
  245. (grub_uint8_t *) msg, -1, 0);
  246. *last_position = *unicode_msg + msg_len;
  247. return msg_len;
  248. }