dclib-utf8.h 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. /***************************************************************************
  2. * *
  3. * _____ ____ *
  4. * | __ \ / __ \ _ _ _____ *
  5. * | | \ \ / / \_\ | | | | _ \ *
  6. * | | \ \| | | | | | |_| | *
  7. * | | | || | | | | | ___/ *
  8. * | | / /| | __ | | | | _ \ *
  9. * | |__/ / \ \__/ / | |___| | |_| | *
  10. * |_____/ \____/ |_____|_|_____/ *
  11. * *
  12. * Wiimms source code library *
  13. * *
  14. ***************************************************************************
  15. * *
  16. * Copyright (c) 2012-2022 by Dirk Clemens <wiimm@wiimm.de> *
  17. * *
  18. ***************************************************************************
  19. * *
  20. * This library is free software; you can redistribute it and/or modify *
  21. * it under the terms of the GNU General Public License as published by *
  22. * the Free Software Foundation; either version 2 of the License, or *
  23. * (at your option) any later version. *
  24. * *
  25. * This library is distributed in the hope that it will be useful, *
  26. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  27. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  28. * GNU General Public License for more details. *
  29. * *
  30. * See file gpl-2.0.txt or http://www.gnu.org/licenses/gpl-2.0.txt *
  31. * *
  32. ***************************************************************************/
  33. #ifndef DCLIB_UTF8_H
  34. #define DCLIB_UTF8_H 1
  35. #ifndef WIN_DCLIB
  36. #include "dclib-basics.h"
  37. ///////////////////////////////////////////////////////////////////////////////
  38. ///// this software is taken from dcLib2 and now publiced under GPL2. /////
  39. ///////////////////////////////////////////////////////////////////////////////
  40. // [[dcUnicodeConsts]]
  41. typedef enum dcUnicodeConsts
  42. {
  43. DCLIB_UNICODE_MAX_UTF8_1 = 0x7f,
  44. DCLIB_UNICODE_MAX_UTF8_2 = 0x7ff,
  45. DCLIB_UNICODE_MAX_UTF8_3 = 0xffff,
  46. DCLIB_UNICODE_MAX_UTF8_4 = 0x1fffff,
  47. DCLIB_UNICODE_CODE_MASK = 0x1fffff,
  48. } dcUnicodeConsts;
  49. ///////////////////////////////////////////////////////////////////////////////
  50. // [[dcUTF8Mode]]
  51. typedef enum dcUTF8Mode
  52. {
  53. DC_UTF8_ILLEGAL = 0x0000, // Illegale UTF8 Zeichen-Kombination
  54. DC_UTF8_1CHAR = 0x0001, // Das Zeichen ist ein Einzelzeichen
  55. DC_UTF8_2CHAR = 0x0002, // Beginn einer 2-Zeichen Sequenz
  56. DC_UTF8_CONT_22 = 0x0004, // ein Fortsetzungszeichen an Pos 2 einer 2-er Sequenz
  57. DC_UTF8_3CHAR = 0x0008, // Beginn einer 3-Zeichen Sequenz
  58. DC_UTF8_CONT_23 = 0x0010, // ein Fortsetzungszeichen an Pos 2 einer 3-er Sequenz
  59. DC_UTF8_CONT_33 = 0x0020, // ein Fortsetzungszeichen an Pos 3 einer 3-er Sequenz
  60. DC_UTF8_4CHAR = 0x0040, // Beginn einer 4-Zeichen Sequenz
  61. DC_UTF8_CONT_24 = 0x0080, // ein Fortsetzungszeichen an Pos 2 einer 4-er Sequenz
  62. DC_UTF8_CONT_34 = 0x0100, // ein Fortsetzungszeichen an Pos 3 einer 4-er Sequenz
  63. DC_UTF8_CONT_44 = 0x0200, // ein Fortsetzungszeichen an Pos 4 einer 4-er Sequenz
  64. DC_UTF8_CONT_ANY = 0x0400, // ein Fortsetzungszeichen an beliebger Stelle
  65. DC_UTF8_1CHAR_POSSIBLE = 0x0800, // als Einzelzeichen darstellbar
  66. DC_UTF8_2CHAR_POSSIBLE = 0x1000, // als 2-er Sequenz darstellbar
  67. DC_UTF8_3CHAR_POSSIBLE = 0x2000, // als 3-er Sequenz darstellbar
  68. DC_UTF8_4CHAR_POSSIBLE = 0x4000, // als 4-er Sequenz darstellbar
  69. } dcUTF8Mode;
  70. ///////////////////////////////////////////////////////////////////////////////
  71. extern const unsigned short TableUTF8Mode[0x100];
  72. static inline dcUTF8Mode CheckUTF8Mode ( unsigned char ch )
  73. { return (dcUTF8Mode)TableUTF8Mode[ch]; }
  74. int GetUTF8CharLength ( u32 code );
  75. char * NextUTF8Char ( ccp str );
  76. char * NextUTF8CharE ( ccp str, ccp end );
  77. char * PrevUTF8Char ( ccp str );
  78. char * PrevUTF8CharB ( ccp str, ccp begin );
  79. char * SkipUTF8Char ( ccp str, int skip );
  80. char * SkipUTF8CharE ( ccp str, ccp end, int skip );
  81. u32 GetUTF8Char ( ccp str );
  82. u32 ScanUTF8Char ( ccp * str );
  83. u32 ScanUTF8CharE ( ccp * str, ccp end );
  84. u32 ScanUTF8CharInc ( ccp * str );
  85. u32 ScanUTF8CharIncE ( ccp * str, ccp end );
  86. u32 GetUTF8AnsiChar ( ccp str );
  87. u32 ScanUTF8AnsiChar ( ccp * str );
  88. u32 ScanUTF8AnsiCharE ( ccp * str, ccp end );
  89. int ScanUTF8Length ( ccp str );
  90. int ScanUTF8LengthE ( ccp str, ccp end );
  91. int CalcUTF8PrintFW ( ccp str, ccp end, uint wanted_fw );
  92. char * PrintUTF8Char ( char * buf, u32 code );
  93. char * PrintUTF8CharToCircBuf ( u32 code );
  94. exmem_t AlignUTF8 ( exmem_dest_t *dest, ccp str, int str_len, int fw, int prec );
  95. ccp AlignUTF8ToCircBuf ( ccp str, int fw, int prec );
  96. ///////////////////////////////////////////////////////////////////////////////
  97. // special variants, that ignore known escape sequences
  98. char * NextEUTF8Char ( ccp str );
  99. char * NextEUTF8CharE ( ccp str, ccp end );
  100. char * SkipEUTF8Char ( ccp str, int skip );
  101. char * SkipEUTF8CharE ( ccp str, ccp end, int skip );
  102. int ScanEUTF8Length ( ccp str );
  103. int ScanEUTF8LengthE ( ccp str, ccp end );
  104. exmem_t AlignEUTF8 ( exmem_dest_t *dest, ccp str, int str_len, int fw, int prec );
  105. ccp AlignEUTF8ToCircBuf ( ccp str, int fw, int prec );
  106. ///////////////////////////////////////////////////////////////////////////////
  107. static inline int strlen8 ( ccp str )
  108. { return ScanUTF8Length(str); }
  109. static inline int strlen8e ( ccp str, ccp end )
  110. { return ScanUTF8LengthE(str,end); }
  111. static inline int strlene8 ( ccp str )
  112. { return ScanEUTF8Length(str); }
  113. static inline int strlene8e ( ccp str, ccp end )
  114. { return ScanEUTF8LengthE(str,end); }
  115. ///////////////////////////////////////////////////////////////////////////////
  116. typedef struct dcUnicodeTripel
  117. {
  118. u32 code1;
  119. u32 code2;
  120. u32 code3;
  121. } dcUnicodeTripel;
  122. extern const dcUnicodeTripel TableUnicodeDecomp[];
  123. const dcUnicodeTripel * DecomposeUnicode ( u32 code );
  124. //
  125. ///////////////////////////////////////////////////////////////////////////////
  126. /////////////// END ///////////////
  127. ///////////////////////////////////////////////////////////////////////////////
  128. #endif // WIN_DCLIB
  129. #endif // DCLIB_UTF8_H