mbrtowc-impl-utf8.h 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. /* Convert multibyte character to wide character.
  2. Copyright (C) 1999-2002, 2005-2023 Free Software Foundation, Inc.
  3. This file is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU Lesser General Public License as
  5. published by the Free Software Foundation; either version 2.1 of the
  6. License, or (at your option) any later version.
  7. This file is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <https://www.gnu.org/licenses/>. */
  13. /* Written by Bruno Haible <bruno@clisp.org>, 2008. */
  14. /* This file contains the part of the body of the mbrtowc and mbrtoc32 functions
  15. that handles the special case of the UTF-8 encoding. */
  16. /* Cf. unistr/u8-mbtouc.c. */
  17. unsigned char c = (unsigned char) p[0];
  18. if (c < 0x80)
  19. {
  20. if (pwc != NULL)
  21. *pwc = c;
  22. res = (c == 0 ? 0 : 1);
  23. goto success;
  24. }
  25. if (c >= 0xc2)
  26. {
  27. if (c < 0xe0)
  28. {
  29. if (m == 1)
  30. goto incomplete;
  31. else /* m >= 2 */
  32. {
  33. unsigned char c2 = (unsigned char) p[1];
  34. if ((c2 ^ 0x80) < 0x40)
  35. {
  36. if (pwc != NULL)
  37. *pwc = ((unsigned int) (c & 0x1f) << 6)
  38. | (unsigned int) (c2 ^ 0x80);
  39. res = 2;
  40. goto success;
  41. }
  42. }
  43. }
  44. else if (c < 0xf0)
  45. {
  46. if (m == 1)
  47. goto incomplete;
  48. else
  49. {
  50. unsigned char c2 = (unsigned char) p[1];
  51. if ((c2 ^ 0x80) < 0x40
  52. && (c >= 0xe1 || c2 >= 0xa0)
  53. && (c != 0xed || c2 < 0xa0))
  54. {
  55. if (m == 2)
  56. goto incomplete;
  57. else /* m >= 3 */
  58. {
  59. unsigned char c3 = (unsigned char) p[2];
  60. if ((c3 ^ 0x80) < 0x40)
  61. {
  62. unsigned int wc =
  63. (((unsigned int) (c & 0x0f) << 12)
  64. | ((unsigned int) (c2 ^ 0x80) << 6)
  65. | (unsigned int) (c3 ^ 0x80));
  66. if (FITS_IN_CHAR_TYPE (wc))
  67. {
  68. if (pwc != NULL)
  69. *pwc = wc;
  70. res = 3;
  71. goto success;
  72. }
  73. }
  74. }
  75. }
  76. }
  77. }
  78. else if (c <= 0xf4)
  79. {
  80. if (m == 1)
  81. goto incomplete;
  82. else
  83. {
  84. unsigned char c2 = (unsigned char) p[1];
  85. if ((c2 ^ 0x80) < 0x40
  86. && (c >= 0xf1 || c2 >= 0x90)
  87. && (c < 0xf4 || (/* c == 0xf4 && */ c2 < 0x90)))
  88. {
  89. if (m == 2)
  90. goto incomplete;
  91. else
  92. {
  93. unsigned char c3 = (unsigned char) p[2];
  94. if ((c3 ^ 0x80) < 0x40)
  95. {
  96. if (m == 3)
  97. goto incomplete;
  98. else /* m >= 4 */
  99. {
  100. unsigned char c4 = (unsigned char) p[3];
  101. if ((c4 ^ 0x80) < 0x40)
  102. {
  103. unsigned int wc =
  104. (((unsigned int) (c & 0x07) << 18)
  105. | ((unsigned int) (c2 ^ 0x80) << 12)
  106. | ((unsigned int) (c3 ^ 0x80) << 6)
  107. | (unsigned int) (c4 ^ 0x80));
  108. if (FITS_IN_CHAR_TYPE (wc))
  109. {
  110. if (pwc != NULL)
  111. *pwc = wc;
  112. res = 4;
  113. goto success;
  114. }
  115. }
  116. }
  117. }
  118. }
  119. }
  120. }
  121. }
  122. }
  123. goto invalid;