mbrtowc.c 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. /* Convert multibyte character to wide character.
  2. Copyright (C) 1999-2002, 2005-2023 Free Software Foundation, Inc.
  3. Written by Bruno Haible <bruno@clisp.org>, 2008.
  4. This file is free software: you can redistribute it and/or modify
  5. it under the terms of the GNU Lesser General Public License as
  6. published by the Free Software Foundation; either version 2.1 of the
  7. License, or (at your option) any later version.
  8. This file is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU Lesser General Public License for more details.
  12. You should have received a copy of the GNU Lesser General Public License
  13. along with this program. If not, see <https://www.gnu.org/licenses/>. */
  14. #include <config.h>
  15. /* Specification. */
  16. #include <wchar.h>
  17. #if GNULIB_defined_mbstate_t
  18. /* Implement mbrtowc() on top of mbtowc() for the non-UTF-8 locales
  19. and directly for the UTF-8 locales. */
  20. # include <errno.h>
  21. # include <stdint.h>
  22. # include <stdlib.h>
  23. # if defined _WIN32 && !defined __CYGWIN__
  24. # define WIN32_LEAN_AND_MEAN /* avoid including junk */
  25. # include <windows.h>
  26. # elif HAVE_PTHREAD_API
  27. # include <pthread.h>
  28. # if HAVE_THREADS_H && HAVE_WEAK_SYMBOLS
  29. # include <threads.h>
  30. # pragma weak thrd_exit
  31. # define c11_threads_in_use() (thrd_exit != NULL)
  32. # else
  33. # define c11_threads_in_use() 0
  34. # endif
  35. # elif HAVE_THREADS_H
  36. # include <threads.h>
  37. # endif
  38. # include "attribute.h"
  39. # include "lc-charset-dispatch.h"
  40. # include "mbtowc-lock.h"
  41. static_assert (sizeof (mbstate_t) >= 4);
  42. static char internal_state[4];
  43. size_t
  44. mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
  45. {
  46. # define FITS_IN_CHAR_TYPE(wc) ((wc) <= WCHAR_MAX)
  47. # include "mbrtowc-impl.h"
  48. }
  49. #else
  50. /* Override the system's mbrtowc() function. */
  51. # if MBRTOWC_IN_C_LOCALE_MAYBE_EILSEQ
  52. # include "hard-locale.h"
  53. # include <locale.h>
  54. # endif
  55. # undef mbrtowc
  56. size_t
  57. rpl_mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
  58. {
  59. size_t ret;
  60. wchar_t wc;
  61. # if MBRTOWC_NULL_ARG2_BUG || MBRTOWC_RETVAL_BUG || MBRTOWC_EMPTY_INPUT_BUG
  62. if (s == NULL)
  63. {
  64. pwc = NULL;
  65. s = "";
  66. n = 1;
  67. }
  68. # endif
  69. # if MBRTOWC_EMPTY_INPUT_BUG
  70. if (n == 0)
  71. return (size_t) -2;
  72. # endif
  73. if (! pwc)
  74. pwc = &wc;
  75. # if MBRTOWC_RETVAL_BUG
  76. {
  77. static mbstate_t internal_state;
  78. /* Override mbrtowc's internal state. We cannot call mbsinit() on the
  79. hidden internal state, but we can call it on our variable. */
  80. if (ps == NULL)
  81. ps = &internal_state;
  82. if (!mbsinit (ps))
  83. {
  84. /* Parse the rest of the multibyte character byte for byte. */
  85. size_t count = 0;
  86. for (; n > 0; s++, n--)
  87. {
  88. ret = mbrtowc (&wc, s, 1, ps);
  89. if (ret == (size_t)(-1))
  90. return (size_t)(-1);
  91. count++;
  92. if (ret != (size_t)(-2))
  93. {
  94. /* The multibyte character has been completed. */
  95. *pwc = wc;
  96. return (wc == 0 ? 0 : count);
  97. }
  98. }
  99. return (size_t)(-2);
  100. }
  101. }
  102. # endif
  103. # if MBRTOWC_STORES_INCOMPLETE_BUG
  104. ret = mbrtowc (&wc, s, n, ps);
  105. if (ret < (size_t) -2 && pwc != NULL)
  106. *pwc = wc;
  107. # else
  108. ret = mbrtowc (pwc, s, n, ps);
  109. # endif
  110. # if MBRTOWC_NUL_RETVAL_BUG
  111. if (ret < (size_t) -2 && !*pwc)
  112. return 0;
  113. # endif
  114. # if MBRTOWC_IN_C_LOCALE_MAYBE_EILSEQ
  115. if ((size_t) -2 <= ret && n != 0 && ! hard_locale (LC_CTYPE))
  116. {
  117. unsigned char uc = *s;
  118. *pwc = uc;
  119. return 1;
  120. }
  121. # endif
  122. return ret;
  123. }
  124. #endif