123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275 |
- /*
- * GRUB -- GRand Unified Bootloader
- * Copyright (C) 1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009 Free Software Foundation, Inc.
- *
- * GRUB is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * GRUB is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GRUB. If not, see <http://www.gnu.org/licenses/>.
- */
- /* Convert a (possibly null-terminated) UTF-8 string of at most SRCSIZE
- bytes (if SRCSIZE is -1, it is ignored) in length to a UTF-16 string.
- Return the number of characters converted. DEST must be able to hold
- at least DESTSIZE characters. If an invalid sequence is found, return -1.
- If SRCEND is not NULL, then *SRCEND is set to the next byte after the
- last byte used in SRC. */
- #include <grub/symbol.h>
- #include <grub/charset.h>
- #include <grub/mm.h>
- #include <grub/misc.h>
- GRUB_EXPORT(grub_utf8_to_utf16);
- GRUB_EXPORT(grub_ucs4_to_utf8_alloc);
- GRUB_EXPORT(grub_utf8_to_ucs4_alloc);
- grub_ssize_t
- grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
- const grub_uint8_t *src, grub_size_t srcsize,
- const grub_uint8_t **srcend)
- {
- grub_uint16_t *p = dest;
- int count = 0;
- grub_uint32_t code = 0;
- if (srcend)
- *srcend = src;
- while (srcsize && destsize)
- {
- grub_uint32_t c = *src++;
- if (srcsize != (grub_size_t)-1)
- srcsize--;
- if (count)
- {
- if ((c & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT)
- {
- /* invalid */
- return -1;
- }
- else
- {
- code <<= 6;
- code |= (c & GRUB_UINT8_6_TRAILINGBITS);
- count--;
- }
- }
- else
- {
- if (c == 0)
- break;
- if ((c & GRUB_UINT8_1_LEADINGBIT) == 0)
- code = c;
- else if ((c & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS)
- {
- count = 1;
- code = c & GRUB_UINT8_5_TRAILINGBITS;
- }
- else if ((c & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS)
- {
- count = 2;
- code = c & GRUB_UINT8_4_TRAILINGBITS;
- }
- else if ((c & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS)
- {
- count = 3;
- code = c & GRUB_UINT8_3_TRAILINGBITS;
- }
- else if ((c & GRUB_UINT8_6_LEADINGBITS) == GRUB_UINT8_5_LEADINGBITS)
- {
- count = 4;
- code = c & GRUB_UINT8_2_TRAILINGBITS;
- }
- else if ((c & GRUB_UINT8_7_LEADINGBITS) == GRUB_UINT8_6_LEADINGBITS)
- {
- count = 5;
- code = c & GRUB_UINT8_1_TRAILINGBIT;
- }
- else
- return -1;
- }
- if (count == 0)
- {
- if (destsize < 2 && code >= GRUB_UCS2_LIMIT)
- break;
- if (code >= GRUB_UCS2_LIMIT)
- {
- *p++ = GRUB_UTF16_UPPER_SURROGATE (code);
- *p++ = GRUB_UTF16_LOWER_SURROGATE (code);
- destsize -= 2;
- }
- else
- {
- *p++ = code;
- destsize--;
- }
- }
- }
- if (srcend)
- *srcend = src;
- return p - dest;
- }
- /* Convert UCS-4 to UTF-8. */
- char *
- grub_ucs4_to_utf8_alloc (grub_uint32_t *src, grub_size_t size)
- {
- grub_size_t remaining;
- grub_uint32_t *ptr;
- grub_size_t cnt = 0;
- grub_uint8_t *ret, *dest;
- remaining = size;
- ptr = src;
- while (remaining--)
- {
- grub_uint32_t code = *ptr++;
- if (code <= 0x007F)
- cnt++;
- else if (code <= 0x07FF)
- cnt += 2;
- else if ((code >= 0xDC00 && code <= 0xDFFF)
- || (code >= 0xD800 && code <= 0xDBFF))
- /* No surrogates in UCS-4... */
- cnt++;
- else
- cnt += 3;
- }
- cnt++;
- ret = grub_malloc (cnt);
- if (!ret)
- return 0;
- dest = ret;
- remaining = size;
- ptr = src;
- while (remaining--)
- {
- grub_uint32_t code = *ptr++;
- if (code <= 0x007F)
- *dest++ = code;
- else if (code <= 0x07FF)
- {
- *dest++ = (code >> 6) | 0xC0;
- *dest++ = (code & 0x3F) | 0x80;
- }
- else if ((code >= 0xDC00 && code <= 0xDFFF)
- || (code >= 0xD800 && code <= 0xDBFF))
- {
- /* No surrogates in UCS-4... */
- *dest++ = '?';
- }
- else
- {
- *dest++ = (code >> 12) | 0xE0;
- *dest++ = ((code >> 6) & 0x3F) | 0x80;
- *dest++ = (code & 0x3F) | 0x80;
- }
- }
- *dest = 0;
- return (char *) ret;
- }
- int
- grub_is_valid_utf8 (const grub_uint8_t *src, grub_size_t srcsize)
- {
- grub_uint32_t code = 0;
- int count = 0;
- while (srcsize)
- {
- grub_uint32_t c = *src++;
- if (srcsize != (grub_size_t)-1)
- srcsize--;
- if (count)
- {
- if ((c & 0xc0) != 0x80)
- {
- /* invalid */
- return 0;
- }
- else
- {
- code <<= 6;
- code |= (c & 0x3f);
- count--;
- }
- }
- else
- {
- if (c == 0)
- break;
- if ((c & 0x80) == 0x00)
- code = c;
- else if ((c & 0xe0) == 0xc0)
- {
- count = 1;
- code = c & 0x1f;
- }
- else if ((c & 0xf0) == 0xe0)
- {
- count = 2;
- code = c & 0x0f;
- }
- else if ((c & 0xf8) == 0xf0)
- {
- count = 3;
- code = c & 0x07;
- }
- else if ((c & 0xfc) == 0xf8)
- {
- count = 4;
- code = c & 0x03;
- }
- else if ((c & 0xfe) == 0xfc)
- {
- count = 5;
- code = c & 0x01;
- }
- else
- return 0;
- }
- }
- return 1;
- }
- int
- grub_utf8_to_ucs4_alloc (const char *msg, grub_uint32_t **unicode_msg,
- grub_uint32_t **last_position)
- {
- grub_size_t msg_len = grub_strlen (msg);
- *unicode_msg = grub_malloc (grub_strlen (msg) * sizeof (grub_uint32_t));
- if (!*unicode_msg)
- {
- grub_printf ("utf8_to_ucs4 ERROR1: %s", msg);
- return -1;
- }
- msg_len = grub_utf8_to_ucs4 (*unicode_msg, msg_len,
- (grub_uint8_t *) msg, -1, 0);
- *last_position = *unicode_msg + msg_len;
- return msg_len;
- }
|