dclib-punycode.h 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. /***************************************************************************
  2. * *
  3. * _____ ____ *
  4. * | __ \ / __ \ _ _ _____ *
  5. * | | \ \ / / \_\ | | | | _ \ *
  6. * | | \ \| | | | | | |_| | *
  7. * | | | || | | | | | ___/ *
  8. * | | / /| | __ | | | | _ \ *
  9. * | |__/ / \ \__/ / | |___| | |_| | *
  10. * |_____/ \____/ |_____|_|_____/ *
  11. * *
  12. * Wiimms source code library *
  13. * *
  14. ***************************************************************************
  15. * *
  16. * Copyright (c) 2012-2022 by Dirk Clemens <wiimm@wiimm.de> *
  17. * *
  18. ***************************************************************************
  19. * *
  20. * This library is free software; you can redistribute it and/or modify *
  21. * it under the terms of the GNU General Public License as published by *
  22. * the Free Software Foundation; either version 2 of the License, or *
  23. * (at your option) any later version. *
  24. * *
  25. * This library is distributed in the hope that it will be useful, *
  26. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  27. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  28. * GNU General Public License for more details. *
  29. * *
  30. * See file gpl-2.0.txt or http://www.gnu.org/licenses/gpl-2.0.txt *
  31. * *
  32. ***************************************************************************/
  33. #ifndef DC_LIB_PUNYCODE_H
  34. #define DC_LIB_PUNYCODE_H 1
  35. #include "dclib-basics.h"
  36. #include <limits.h>
  37. //
  38. ///////////////////////////////////////////////////////////////////////////////
  39. /////////////// Domain2*() ///////////////
  40. ///////////////////////////////////////////////////////////////////////////////
  41. uint Domain2UTF8
  42. (
  43. // returns the number of scanned bytes of 'source' or 0 on error
  44. char *buf, // valid destination buffer
  45. int buf_size, // size of 'buf' >= 4
  46. const void *source, // NULL or UTF-8 domain to translate
  47. int source_len // length of 'source'; if <0: use strlen(source)
  48. );
  49. ///////////////////////////////////////////////////////////////////////////////
  50. uint Domain2ASCII
  51. (
  52. // returns the number of scanned bytes of 'source' or 0 on error
  53. char *buf, // valid destination buffer
  54. int buf_size, // size of 'buf' >= 5
  55. const void *source, // NULL or ASCII domain to translate
  56. int source_len // length of 'source'; if <0: use strlen(source)
  57. );
  58. //
  59. ///////////////////////////////////////////////////////////////////////////////
  60. /////////////// punycode lib ///////////////
  61. ///////////////////////////////////////////////////////////////////////////////
  62. enum punycode_status
  63. {
  64. punycode_success,
  65. punycode_bad_input, /* Input is invalid. */
  66. punycode_big_output, /* Output would exceed the space provided. */
  67. punycode_overflow /* Input needs wider integers to process. */
  68. };
  69. #if UINT_MAX >= (1 << 26) - 1
  70. typedef unsigned int punycode_uint;
  71. #else
  72. typedef unsigned long punycode_uint;
  73. #endif
  74. ///////////////////////////////////////////////////////////////////////////////
  75. enum punycode_status punycode_encode
  76. (
  77. punycode_uint input_length,
  78. const punycode_uint input[],
  79. const unsigned char case_flags[],
  80. punycode_uint *output_length,
  81. char output[]
  82. );
  83. /* punycode_encode() converts Unicode to Punycode. The input */
  84. /* is represented as an array of Unicode code points (not code */
  85. /* units; surrogate pairs are not allowed), and the output */
  86. /* will be represented as an array of ASCII code points. The */
  87. /* output string is *not* null-terminated; it will contain */
  88. /* zeros if and only if the input contains zeros. (Of course */
  89. /* the caller can leave room for a terminator and add one if */
  90. /* needed.) The input_length is the number of code points in */
  91. /* the input. The output_length is an in/out argument: the */
  92. /* caller passes in the maximum number of code points that it */
  93. /* can receive, and on successful return it will contain the */
  94. /* number of code points actually output. The case_flags array */
  95. /* holds input_length boolean values, where nonzero suggests that */
  96. /* the corresponding Unicode character be forced to uppercase */
  97. /* after being decoded (if possible), and zero suggests that */
  98. /* it be forced to lowercase (if possible). ASCII code points */
  99. /* are encoded literally, except that ASCII letters are forced */
  100. /* to uppercase or lowercase according to the corresponding */
  101. /* uppercase flags. If case_flags is a null pointer then ASCII */
  102. /* letters are left as they are, and other code points are */
  103. /* treated as if their uppercase flags were zero. The return */
  104. /* value can be any of the punycode_status values defined above */
  105. /* except punycode_bad_input; if not punycode_success, then */
  106. /* output_size and output might contain garbage. */
  107. ///////////////////////////////////////////////////////////////////////////////
  108. enum punycode_status punycode_decode
  109. (
  110. punycode_uint input_length,
  111. const char input[],
  112. punycode_uint *output_length,
  113. punycode_uint output[],
  114. unsigned char case_flags[]
  115. );
  116. /* punycode_decode() converts Punycode to Unicode. The input is */
  117. /* represented as an array of ASCII code points, and the output */
  118. /* will be represented as an array of Unicode code points. The */
  119. /* input_length is the number of code points in the input. The */
  120. /* output_length is an in/out argument: the caller passes in */
  121. /* the maximum number of code points that it can receive, and */
  122. /* on successful return it will contain the actual number of */
  123. /* code points output. The case_flags array needs room for at */
  124. /* least output_length values, or it can be a null pointer if the */
  125. /* case information is not needed. A nonzero flag suggests that */
  126. /* the corresponding Unicode character be forced to uppercase */
  127. /* by the caller (if possible), while zero suggests that it be */
  128. /* forced to lowercase (if possible). ASCII code points are */
  129. /* output already in the proper case, but their flags will be set */
  130. /* appropriately so that applying the flags would be harmless. */
  131. /* The return value can be any of the punycode_status values */
  132. /* defined above; if not punycode_success, then output_length, */
  133. /* output, and case_flags might contain garbage. On success, the */
  134. /* decoder will never need to write an output_length greater than */
  135. /* input_length, because of how the encoding is defined. */
  136. //
  137. ///////////////////////////////////////////////////////////////////////////////
  138. /////////////// E N D ///////////////
  139. ///////////////////////////////////////////////////////////////////////////////
  140. #endif // DC_LIB_PUNYCODE_H