wctomb.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. /*
  2. * WideCharToMultiByte implementation
  3. *
  4. * Copyright 2000 Alexandre Julliard
  5. *
  6. * This library is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * This library is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with this library; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
  19. */
  20. #include <string.h>
  21. #include "wine/asm.h"
  22. #ifdef __ASM_OBSOLETE
  23. #include "unicode.h"
  24. extern WCHAR wine_compose( const WCHAR *str ) DECLSPEC_HIDDEN;
  25. /****************************************************************/
  26. /* sbcs support */
  27. /* check if 'ch' is an acceptable sbcs mapping for 'wch' */
  28. static inline int is_valid_sbcs_mapping( const struct sbcs_table *table, int flags,
  29. WCHAR wch, unsigned char ch )
  30. {
  31. if ((flags & WC_NO_BEST_FIT_CHARS) || ch == (unsigned char)table->info.def_char)
  32. return (table->cp2uni[ch] == wch);
  33. return 1;
  34. }
  35. /* query necessary dst length for src string */
  36. static int get_length_sbcs( const struct sbcs_table *table, int flags,
  37. const WCHAR *src, unsigned int srclen, int *used )
  38. {
  39. const unsigned char * const uni2cp_low = table->uni2cp_low;
  40. const unsigned short * const uni2cp_high = table->uni2cp_high;
  41. int ret, tmp;
  42. WCHAR composed;
  43. if (!used) used = &tmp; /* avoid checking on every char */
  44. *used = 0;
  45. for (ret = 0; srclen; ret++, src++, srclen--)
  46. {
  47. WCHAR wch = *src;
  48. unsigned char ch;
  49. if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = wine_compose(src)))
  50. {
  51. /* now check if we can use the composed char */
  52. ch = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
  53. if (is_valid_sbcs_mapping( table, flags, composed, ch ))
  54. {
  55. /* we have a good mapping, use it */
  56. src++;
  57. srclen--;
  58. continue;
  59. }
  60. /* no mapping for the composed char, check the other flags */
  61. if (flags & WC_DEFAULTCHAR) /* use the default char instead */
  62. {
  63. *used = 1;
  64. src++; /* skip the non-spacing char */
  65. srclen--;
  66. continue;
  67. }
  68. if (flags & WC_DISCARDNS) /* skip the second char of the composition */
  69. {
  70. src++;
  71. srclen--;
  72. }
  73. /* WC_SEPCHARS is the default */
  74. }
  75. if (!*used)
  76. {
  77. ch = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
  78. *used = !is_valid_sbcs_mapping( table, flags, wch, ch );
  79. }
  80. }
  81. return ret;
  82. }
  83. /* wcstombs for single-byte code page */
  84. static inline int wcstombs_sbcs( const struct sbcs_table *table,
  85. const WCHAR *src, unsigned int srclen,
  86. char *dst, unsigned int dstlen )
  87. {
  88. const unsigned char * const uni2cp_low = table->uni2cp_low;
  89. const unsigned short * const uni2cp_high = table->uni2cp_high;
  90. int ret = srclen;
  91. if (dstlen < srclen)
  92. {
  93. /* buffer too small: fill it up to dstlen and return error */
  94. srclen = dstlen;
  95. ret = -1;
  96. }
  97. while (srclen >= 16)
  98. {
  99. dst[0] = uni2cp_low[uni2cp_high[src[0] >> 8] + (src[0] & 0xff)];
  100. dst[1] = uni2cp_low[uni2cp_high[src[1] >> 8] + (src[1] & 0xff)];
  101. dst[2] = uni2cp_low[uni2cp_high[src[2] >> 8] + (src[2] & 0xff)];
  102. dst[3] = uni2cp_low[uni2cp_high[src[3] >> 8] + (src[3] & 0xff)];
  103. dst[4] = uni2cp_low[uni2cp_high[src[4] >> 8] + (src[4] & 0xff)];
  104. dst[5] = uni2cp_low[uni2cp_high[src[5] >> 8] + (src[5] & 0xff)];
  105. dst[6] = uni2cp_low[uni2cp_high[src[6] >> 8] + (src[6] & 0xff)];
  106. dst[7] = uni2cp_low[uni2cp_high[src[7] >> 8] + (src[7] & 0xff)];
  107. dst[8] = uni2cp_low[uni2cp_high[src[8] >> 8] + (src[8] & 0xff)];
  108. dst[9] = uni2cp_low[uni2cp_high[src[9] >> 8] + (src[9] & 0xff)];
  109. dst[10] = uni2cp_low[uni2cp_high[src[10] >> 8] + (src[10] & 0xff)];
  110. dst[11] = uni2cp_low[uni2cp_high[src[11] >> 8] + (src[11] & 0xff)];
  111. dst[12] = uni2cp_low[uni2cp_high[src[12] >> 8] + (src[12] & 0xff)];
  112. dst[13] = uni2cp_low[uni2cp_high[src[13] >> 8] + (src[13] & 0xff)];
  113. dst[14] = uni2cp_low[uni2cp_high[src[14] >> 8] + (src[14] & 0xff)];
  114. dst[15] = uni2cp_low[uni2cp_high[src[15] >> 8] + (src[15] & 0xff)];
  115. src += 16;
  116. dst += 16;
  117. srclen -= 16;
  118. }
  119. /* now handle remaining characters */
  120. src += srclen;
  121. dst += srclen;
  122. switch(srclen)
  123. {
  124. case 15: dst[-15] = uni2cp_low[uni2cp_high[src[-15] >> 8] + (src[-15] & 0xff)];
  125. case 14: dst[-14] = uni2cp_low[uni2cp_high[src[-14] >> 8] + (src[-14] & 0xff)];
  126. case 13: dst[-13] = uni2cp_low[uni2cp_high[src[-13] >> 8] + (src[-13] & 0xff)];
  127. case 12: dst[-12] = uni2cp_low[uni2cp_high[src[-12] >> 8] + (src[-12] & 0xff)];
  128. case 11: dst[-11] = uni2cp_low[uni2cp_high[src[-11] >> 8] + (src[-11] & 0xff)];
  129. case 10: dst[-10] = uni2cp_low[uni2cp_high[src[-10] >> 8] + (src[-10] & 0xff)];
  130. case 9: dst[-9] = uni2cp_low[uni2cp_high[src[-9] >> 8] + (src[-9] & 0xff)];
  131. case 8: dst[-8] = uni2cp_low[uni2cp_high[src[-8] >> 8] + (src[-8] & 0xff)];
  132. case 7: dst[-7] = uni2cp_low[uni2cp_high[src[-7] >> 8] + (src[-7] & 0xff)];
  133. case 6: dst[-6] = uni2cp_low[uni2cp_high[src[-6] >> 8] + (src[-6] & 0xff)];
  134. case 5: dst[-5] = uni2cp_low[uni2cp_high[src[-5] >> 8] + (src[-5] & 0xff)];
  135. case 4: dst[-4] = uni2cp_low[uni2cp_high[src[-4] >> 8] + (src[-4] & 0xff)];
  136. case 3: dst[-3] = uni2cp_low[uni2cp_high[src[-3] >> 8] + (src[-3] & 0xff)];
  137. case 2: dst[-2] = uni2cp_low[uni2cp_high[src[-2] >> 8] + (src[-2] & 0xff)];
  138. case 1: dst[-1] = uni2cp_low[uni2cp_high[src[-1] >> 8] + (src[-1] & 0xff)];
  139. case 0: break;
  140. }
  141. return ret;
  142. }
  143. /* slow version of wcstombs_sbcs that handles the various flags */
  144. static int wcstombs_sbcs_slow( const struct sbcs_table *table, int flags,
  145. const WCHAR *src, unsigned int srclen,
  146. char *dst, unsigned int dstlen,
  147. const char *defchar, int *used )
  148. {
  149. const unsigned char * const uni2cp_low = table->uni2cp_low;
  150. const unsigned short * const uni2cp_high = table->uni2cp_high;
  151. unsigned char def;
  152. unsigned int len;
  153. int tmp;
  154. WCHAR composed;
  155. if (!defchar)
  156. def = table->info.def_char & 0xff;
  157. else
  158. def = *defchar;
  159. if (!used) used = &tmp; /* avoid checking on every char */
  160. *used = 0;
  161. for (len = dstlen; srclen && len; dst++, len--, src++, srclen--)
  162. {
  163. WCHAR wch = *src;
  164. if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = wine_compose(src)))
  165. {
  166. /* now check if we can use the composed char */
  167. *dst = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
  168. if (is_valid_sbcs_mapping( table, flags, composed, *dst ))
  169. {
  170. /* we have a good mapping, use it */
  171. src++;
  172. srclen--;
  173. continue;
  174. }
  175. /* no mapping for the composed char, check the other flags */
  176. if (flags & WC_DEFAULTCHAR) /* use the default char instead */
  177. {
  178. *dst = def;
  179. *used = 1;
  180. src++; /* skip the non-spacing char */
  181. srclen--;
  182. continue;
  183. }
  184. if (flags & WC_DISCARDNS) /* skip the second char of the composition */
  185. {
  186. src++;
  187. srclen--;
  188. }
  189. /* WC_SEPCHARS is the default */
  190. }
  191. *dst = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
  192. if (!is_valid_sbcs_mapping( table, flags, wch, *dst ))
  193. {
  194. *dst = def;
  195. *used = 1;
  196. }
  197. }
  198. if (srclen) return -1; /* overflow */
  199. return dstlen - len;
  200. }
  201. /****************************************************************/
  202. /* dbcs support */
  203. /* check if 'ch' is an acceptable dbcs mapping for 'wch' */
  204. static inline int is_valid_dbcs_mapping( const struct dbcs_table *table, int flags,
  205. WCHAR wch, unsigned short ch )
  206. {
  207. if ((flags & WC_NO_BEST_FIT_CHARS) || ch == table->info.def_char)
  208. {
  209. /* check if char maps back to the same Unicode value */
  210. if (ch & 0xff00)
  211. {
  212. unsigned char off = table->cp2uni_leadbytes[ch >> 8];
  213. return (table->cp2uni[(off << 8) + (ch & 0xff)] == wch);
  214. }
  215. return (table->cp2uni[ch & 0xff] == wch);
  216. }
  217. return 1;
  218. }
  219. /* compute the default char for the dbcs case */
  220. static inline WCHAR get_defchar_dbcs( const struct dbcs_table *table, const char *defchar )
  221. {
  222. if (!defchar) return table->info.def_char;
  223. if (!defchar[1]) return (unsigned char)defchar[0];
  224. return ((unsigned char)defchar[0] << 8) | (unsigned char)defchar[1];
  225. }
  226. /* query necessary dst length for src string */
  227. static int get_length_dbcs( const struct dbcs_table *table, int flags,
  228. const WCHAR *src, unsigned int srclen,
  229. const char *defchar, int *used )
  230. {
  231. const unsigned short * const uni2cp_low = table->uni2cp_low;
  232. const unsigned short * const uni2cp_high = table->uni2cp_high;
  233. WCHAR defchar_value, composed;
  234. int len, tmp;
  235. if (!defchar && !used && !(flags & WC_COMPOSITECHECK))
  236. {
  237. for (len = 0; srclen; srclen--, src++, len++)
  238. {
  239. if (uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)] & 0xff00) len++;
  240. }
  241. return len;
  242. }
  243. defchar_value = get_defchar_dbcs( table, defchar );
  244. if (!used) used = &tmp; /* avoid checking on every char */
  245. *used = 0;
  246. for (len = 0; srclen; len++, srclen--, src++)
  247. {
  248. unsigned short res;
  249. WCHAR wch = *src;
  250. if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = wine_compose(src)))
  251. {
  252. /* now check if we can use the composed char */
  253. res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
  254. if (is_valid_dbcs_mapping( table, flags, composed, res ))
  255. {
  256. /* we have a good mapping for the composed char, use it */
  257. if (res & 0xff00) len++;
  258. src++;
  259. srclen--;
  260. continue;
  261. }
  262. /* no mapping for the composed char, check the other flags */
  263. if (flags & WC_DEFAULTCHAR) /* use the default char instead */
  264. {
  265. if (defchar_value & 0xff00) len++;
  266. *used = 1;
  267. src++; /* skip the non-spacing char */
  268. srclen--;
  269. continue;
  270. }
  271. if (flags & WC_DISCARDNS) /* skip the second char of the composition */
  272. {
  273. src++;
  274. srclen--;
  275. }
  276. /* WC_SEPCHARS is the default */
  277. }
  278. res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
  279. if (!is_valid_dbcs_mapping( table, flags, wch, res ))
  280. {
  281. res = defchar_value;
  282. *used = 1;
  283. }
  284. if (res & 0xff00) len++;
  285. }
  286. return len;
  287. }
  288. /* wcstombs for double-byte code page */
  289. static inline int wcstombs_dbcs( const struct dbcs_table *table,
  290. const WCHAR *src, unsigned int srclen,
  291. char *dst, unsigned int dstlen )
  292. {
  293. const unsigned short * const uni2cp_low = table->uni2cp_low;
  294. const unsigned short * const uni2cp_high = table->uni2cp_high;
  295. int len;
  296. for (len = dstlen; srclen && len; len--, srclen--, src++)
  297. {
  298. unsigned short res = uni2cp_low[uni2cp_high[*src >> 8] + (*src & 0xff)];
  299. if (res & 0xff00)
  300. {
  301. if (len == 1) break; /* do not output a partial char */
  302. len--;
  303. *dst++ = res >> 8;
  304. }
  305. *dst++ = (char)res;
  306. }
  307. if (srclen) return -1; /* overflow */
  308. return dstlen - len;
  309. }
  310. /* slow version of wcstombs_dbcs that handles the various flags */
  311. static int wcstombs_dbcs_slow( const struct dbcs_table *table, int flags,
  312. const WCHAR *src, unsigned int srclen,
  313. char *dst, unsigned int dstlen,
  314. const char *defchar, int *used )
  315. {
  316. const unsigned short * const uni2cp_low = table->uni2cp_low;
  317. const unsigned short * const uni2cp_high = table->uni2cp_high;
  318. WCHAR defchar_value = get_defchar_dbcs( table, defchar );
  319. WCHAR composed;
  320. int len, tmp;
  321. if (!used) used = &tmp; /* avoid checking on every char */
  322. *used = 0;
  323. for (len = dstlen; srclen && len; len--, srclen--, src++)
  324. {
  325. unsigned short res;
  326. WCHAR wch = *src;
  327. if ((flags & WC_COMPOSITECHECK) && (srclen > 1) && (composed = wine_compose(src)))
  328. {
  329. /* now check if we can use the composed char */
  330. res = uni2cp_low[uni2cp_high[composed >> 8] + (composed & 0xff)];
  331. if (is_valid_dbcs_mapping( table, flags, composed, res ))
  332. {
  333. /* we have a good mapping for the composed char, use it */
  334. src++;
  335. srclen--;
  336. goto output_char;
  337. }
  338. /* no mapping for the composed char, check the other flags */
  339. if (flags & WC_DEFAULTCHAR) /* use the default char instead */
  340. {
  341. res = defchar_value;
  342. *used = 1;
  343. src++; /* skip the non-spacing char */
  344. srclen--;
  345. goto output_char;
  346. }
  347. if (flags & WC_DISCARDNS) /* skip the second char of the composition */
  348. {
  349. src++;
  350. srclen--;
  351. }
  352. /* WC_SEPCHARS is the default */
  353. }
  354. res = uni2cp_low[uni2cp_high[wch >> 8] + (wch & 0xff)];
  355. if (!is_valid_dbcs_mapping( table, flags, wch, res ))
  356. {
  357. res = defchar_value;
  358. *used = 1;
  359. }
  360. output_char:
  361. if (res & 0xff00)
  362. {
  363. if (len == 1) break; /* do not output a partial char */
  364. len--;
  365. *dst++ = res >> 8;
  366. }
  367. *dst++ = (char)res;
  368. }
  369. if (srclen) return -1; /* overflow */
  370. return dstlen - len;
  371. }
  372. /* wide char to multi byte string conversion */
  373. /* return -1 on dst buffer overflow */
  374. int wine_cp_wcstombs_obsolete( const union cptable *table, int flags,
  375. const WCHAR *src, int srclen,
  376. char *dst, int dstlen, const char *defchar, int *used )
  377. {
  378. if (table->info.char_size == 1)
  379. {
  380. if (flags || defchar || used)
  381. {
  382. if (!dstlen) return get_length_sbcs( &table->sbcs, flags, src, srclen, used );
  383. return wcstombs_sbcs_slow( &table->sbcs, flags, src, srclen,
  384. dst, dstlen, defchar, used );
  385. }
  386. if (!dstlen) return srclen;
  387. return wcstombs_sbcs( &table->sbcs, src, srclen, dst, dstlen );
  388. }
  389. else /* mbcs */
  390. {
  391. if (!dstlen) return get_length_dbcs( &table->dbcs, flags, src, srclen, defchar, used );
  392. if (flags || defchar || used)
  393. return wcstombs_dbcs_slow( &table->dbcs, flags, src, srclen,
  394. dst, dstlen, defchar, used );
  395. return wcstombs_dbcs( &table->dbcs, src, srclen, dst, dstlen );
  396. }
  397. }
  398. __ASM_OBSOLETE(wine_cp_wcstombs);
  399. #endif /* __ASM_OBSOLETE */