mbtowc.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320
  1. /*
  2. * MultiByteToWideChar implementation
  3. *
  4. * Copyright 2000 Alexandre Julliard
  5. *
  6. * This library is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * This library is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with this library; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  19. */
  20. #include <string.h>
  21. #include "wine/asm.h"
  22. #ifdef __ASM_OBSOLETE
  23. #include "wine/unicode.h"
  24. extern const unsigned short nfd_table[] DECLSPEC_HIDDEN;
  25. static const WCHAR *get_decomposition( WCHAR ch, unsigned int *len )
  26. {
  27. unsigned short offset = nfd_table[nfd_table[ch >> 8] + ((ch >> 4) & 0xf)] + (ch & 0xf);
  28. unsigned short start = nfd_table[offset];
  29. unsigned short end = nfd_table[offset + 1];
  30. if ((*len = end - start)) return nfd_table + start;
  31. *len = 1;
  32. return NULL;
  33. }
  34. /* check the code whether it is in Unicode Private Use Area (PUA). */
  35. /* MB_ERR_INVALID_CHARS raises an error converting from 1-byte character to PUA. */
  36. static inline int is_private_use_area_char(WCHAR code)
  37. {
  38. return (code >= 0xe000 && code <= 0xf8ff);
  39. }
  40. /* check src string for invalid chars; return non-zero if invalid char found */
  41. static inline int check_invalid_chars_sbcs( const struct sbcs_table *table, int flags,
  42. const unsigned char *src, unsigned int srclen )
  43. {
  44. const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
  45. const WCHAR def_unicode_char = table->info.def_unicode_char;
  46. const unsigned char def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8]
  47. + (def_unicode_char & 0xff)];
  48. while (srclen)
  49. {
  50. if ((cp2uni[*src] == def_unicode_char && *src != def_char) ||
  51. is_private_use_area_char(cp2uni[*src])) break;
  52. src++;
  53. srclen--;
  54. }
  55. return srclen;
  56. }
  57. /* mbstowcs for single-byte code page */
  58. /* all lengths are in characters, not bytes */
  59. static inline int mbstowcs_sbcs( const struct sbcs_table *table, int flags,
  60. const unsigned char *src, unsigned int srclen,
  61. WCHAR *dst, unsigned int dstlen )
  62. {
  63. const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
  64. int ret = srclen;
  65. if (dstlen < srclen)
  66. {
  67. /* buffer too small: fill it up to dstlen and return error */
  68. srclen = dstlen;
  69. ret = -1;
  70. }
  71. while (srclen >= 16)
  72. {
  73. dst[0] = cp2uni[src[0]];
  74. dst[1] = cp2uni[src[1]];
  75. dst[2] = cp2uni[src[2]];
  76. dst[3] = cp2uni[src[3]];
  77. dst[4] = cp2uni[src[4]];
  78. dst[5] = cp2uni[src[5]];
  79. dst[6] = cp2uni[src[6]];
  80. dst[7] = cp2uni[src[7]];
  81. dst[8] = cp2uni[src[8]];
  82. dst[9] = cp2uni[src[9]];
  83. dst[10] = cp2uni[src[10]];
  84. dst[11] = cp2uni[src[11]];
  85. dst[12] = cp2uni[src[12]];
  86. dst[13] = cp2uni[src[13]];
  87. dst[14] = cp2uni[src[14]];
  88. dst[15] = cp2uni[src[15]];
  89. src += 16;
  90. dst += 16;
  91. srclen -= 16;
  92. }
  93. /* now handle the remaining characters */
  94. src += srclen;
  95. dst += srclen;
  96. switch (srclen)
  97. {
  98. case 15: dst[-15] = cp2uni[src[-15]];
  99. case 14: dst[-14] = cp2uni[src[-14]];
  100. case 13: dst[-13] = cp2uni[src[-13]];
  101. case 12: dst[-12] = cp2uni[src[-12]];
  102. case 11: dst[-11] = cp2uni[src[-11]];
  103. case 10: dst[-10] = cp2uni[src[-10]];
  104. case 9: dst[-9] = cp2uni[src[-9]];
  105. case 8: dst[-8] = cp2uni[src[-8]];
  106. case 7: dst[-7] = cp2uni[src[-7]];
  107. case 6: dst[-6] = cp2uni[src[-6]];
  108. case 5: dst[-5] = cp2uni[src[-5]];
  109. case 4: dst[-4] = cp2uni[src[-4]];
  110. case 3: dst[-3] = cp2uni[src[-3]];
  111. case 2: dst[-2] = cp2uni[src[-2]];
  112. case 1: dst[-1] = cp2uni[src[-1]];
  113. case 0: break;
  114. }
  115. return ret;
  116. }
  117. /* mbstowcs for single-byte code page with char decomposition */
  118. static int mbstowcs_sbcs_decompose( const struct sbcs_table *table, int flags,
  119. const unsigned char *src, unsigned int srclen,
  120. WCHAR *dst, unsigned int dstlen )
  121. {
  122. const WCHAR * const cp2uni = (flags & MB_USEGLYPHCHARS) ? table->cp2uni_glyphs : table->cp2uni;
  123. const WCHAR *decomp;
  124. unsigned int len, decomp_len;
  125. if (!dstlen) /* compute length */
  126. {
  127. for (len = 0; srclen; srclen--, src++, len += decomp_len)
  128. get_decomposition( cp2uni[*src], &decomp_len );
  129. return len;
  130. }
  131. for (len = dstlen; srclen && len; srclen--, src++, dst += decomp_len, len -= decomp_len)
  132. {
  133. if ((decomp = get_decomposition( cp2uni[*src], &decomp_len )))
  134. {
  135. if (len < decomp_len) break;
  136. memcpy( dst, decomp, decomp_len * sizeof(WCHAR) );
  137. }
  138. else *dst = cp2uni[*src];
  139. }
  140. if (srclen) return -1; /* overflow */
  141. return dstlen - len;
  142. }
  143. /* query necessary dst length for src string */
  144. static inline int get_length_dbcs( const struct dbcs_table *table,
  145. const unsigned char *src, unsigned int srclen )
  146. {
  147. const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
  148. int len;
  149. for (len = 0; srclen; srclen--, src++, len++)
  150. {
  151. if (cp2uni_lb[*src] && srclen > 1 && src[1])
  152. {
  153. src++;
  154. srclen--;
  155. }
  156. }
  157. return len;
  158. }
  159. /* check src string for invalid chars; return non-zero if invalid char found */
  160. static inline int check_invalid_chars_dbcs( const struct dbcs_table *table,
  161. const unsigned char *src, unsigned int srclen )
  162. {
  163. const WCHAR * const cp2uni = table->cp2uni;
  164. const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
  165. const WCHAR def_unicode_char = table->info.def_unicode_char;
  166. const unsigned short def_char = table->uni2cp_low[table->uni2cp_high[def_unicode_char >> 8]
  167. + (def_unicode_char & 0xff)];
  168. while (srclen)
  169. {
  170. unsigned char off = cp2uni_lb[*src];
  171. if (off) /* multi-byte char */
  172. {
  173. if (srclen == 1) break; /* partial char, error */
  174. if (cp2uni[(off << 8) + src[1]] == def_unicode_char &&
  175. ((src[0] << 8) | src[1]) != def_char) break;
  176. src++;
  177. srclen--;
  178. }
  179. else if ((cp2uni[*src] == def_unicode_char && *src != def_char) ||
  180. is_private_use_area_char(cp2uni[*src])) break;
  181. src++;
  182. srclen--;
  183. }
  184. return srclen;
  185. }
  186. /* mbstowcs for double-byte code page */
  187. /* all lengths are in characters, not bytes */
  188. static inline int mbstowcs_dbcs( const struct dbcs_table *table,
  189. const unsigned char *src, unsigned int srclen,
  190. WCHAR *dst, unsigned int dstlen )
  191. {
  192. const WCHAR * const cp2uni = table->cp2uni;
  193. const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
  194. unsigned int len;
  195. if (!dstlen) return get_length_dbcs( table, src, srclen );
  196. for (len = dstlen; srclen && len; len--, srclen--, src++, dst++)
  197. {
  198. unsigned char off = cp2uni_lb[*src];
  199. if (off && srclen > 1 && src[1])
  200. {
  201. src++;
  202. srclen--;
  203. *dst = cp2uni[(off << 8) + *src];
  204. }
  205. else *dst = cp2uni[*src];
  206. }
  207. if (srclen) return -1; /* overflow */
  208. return dstlen - len;
  209. }
  210. /* mbstowcs for double-byte code page with character decomposition */
  211. static int mbstowcs_dbcs_decompose( const struct dbcs_table *table,
  212. const unsigned char *src, unsigned int srclen,
  213. WCHAR *dst, unsigned int dstlen )
  214. {
  215. const WCHAR * const cp2uni = table->cp2uni;
  216. const unsigned char * const cp2uni_lb = table->cp2uni_leadbytes;
  217. const WCHAR *decomp;
  218. unsigned int len, decomp_len;
  219. WCHAR ch;
  220. if (!dstlen) /* compute length */
  221. {
  222. for (len = 0; srclen; srclen--, src++, len += decomp_len)
  223. {
  224. unsigned char off = cp2uni_lb[*src];
  225. if (off && srclen > 1 && src[1])
  226. {
  227. src++;
  228. srclen--;
  229. ch = cp2uni[(off << 8) + *src];
  230. }
  231. else ch = cp2uni[*src];
  232. get_decomposition( ch, &decomp_len );
  233. }
  234. return len;
  235. }
  236. for (len = dstlen; srclen && len; srclen--, src++, dst += decomp_len, len -= decomp_len)
  237. {
  238. unsigned char off = cp2uni_lb[*src];
  239. if (off && srclen > 1 && src[1])
  240. {
  241. src++;
  242. srclen--;
  243. ch = cp2uni[(off << 8) + *src];
  244. }
  245. else ch = cp2uni[*src];
  246. if ((decomp = get_decomposition( ch, &decomp_len )))
  247. {
  248. if (len < decomp_len) break;
  249. memcpy( dst, decomp, decomp_len * sizeof(WCHAR) );
  250. }
  251. else *dst = ch;
  252. }
  253. if (srclen) return -1; /* overflow */
  254. return dstlen - len;
  255. }
  256. /* return -1 on dst buffer overflow, -2 on invalid input char */
  257. int wine_cp_mbstowcs_obsolete( const union cptable *table, int flags,
  258. const char *s, int srclen, WCHAR *dst, int dstlen )
  259. {
  260. const unsigned char *src = (const unsigned char*) s;
  261. if (table->info.char_size == 1)
  262. {
  263. if (flags & MB_ERR_INVALID_CHARS)
  264. {
  265. if (check_invalid_chars_sbcs( &table->sbcs, flags, src, srclen )) return -2;
  266. }
  267. if (!(flags & MB_COMPOSITE))
  268. {
  269. if (!dstlen) return srclen;
  270. return mbstowcs_sbcs( &table->sbcs, flags, src, srclen, dst, dstlen );
  271. }
  272. return mbstowcs_sbcs_decompose( &table->sbcs, flags, src, srclen, dst, dstlen );
  273. }
  274. else /* mbcs */
  275. {
  276. if (flags & MB_ERR_INVALID_CHARS)
  277. {
  278. if (check_invalid_chars_dbcs( &table->dbcs, src, srclen )) return -2;
  279. }
  280. if (!(flags & MB_COMPOSITE))
  281. return mbstowcs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
  282. else
  283. return mbstowcs_dbcs_decompose( &table->dbcs, src, srclen, dst, dstlen );
  284. }
  285. }
  286. __ASM_OBSOLETE(wine_cp_mbstowcs);
  287. #endif /* __ASM_OBSOLETE */