unicode.c 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134
  1. /*
  2. * libsfn/unicode.c
  3. *
  4. * Copyright (C) 2020 bzt (bztsrc@gitlab)
  5. *
  6. * Permission is hereby granted, free of charge, to any person
  7. * obtaining a copy of this software and associated documentation
  8. * files (the "Software"), to deal in the Software without
  9. * restriction, including without limitation the rights to use, copy,
  10. * modify, merge, publish, distribute, sublicense, and/or sell copies
  11. * of the Software, and to permit persons to whom the Software is
  12. * furnished to do so, subject to the following conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be
  15. * included in all copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  18. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  19. * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  20. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  21. * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  22. * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  24. * DEALINGS IN THE SOFTWARE.
  25. *
  26. * @brief UNICODE functions
  27. *
  28. */
  29. #include <stdint.h>
  30. #include <stdlib.h>
  31. #include <string.h>
  32. #define _UNICODE_BLOCKSDATA
  33. #define _UNICODE_NAMESDATA
  34. #include "unicode.h"
  35. #include "stb_png.h" /* to get stbi_zlib_decode_malloc_guesssize_headerflag() */
  36. static char *unicodedb = NULL, ut[10];
  37. /**
  38. * Initialize UNICODE names data
  39. */
  40. void uniname_init()
  41. {
  42. int i, j;
  43. char *ptr, *end;
  44. ptr = unicodedb = (char *)stbi_zlib_decode_malloc_guesssize_headerflag((const char*)&unicode_dat,
  45. UNICODE_DAT_SIZE, 4096, (int*)&i, 1);
  46. if(!ptr) return;
  47. end = ptr + i;
  48. for(i=j=0;i < UNICODE_NUMNAMES && ptr < end;i++) {
  49. while(*((uint16_t*)ptr) & 0x8000) {
  50. j += -(*((int16_t*)ptr));
  51. ptr += 2;
  52. }
  53. uninames[i].unicode = j++;
  54. uninames[i].rtl = *ptr++;
  55. uninames[i].name = ptr;
  56. while(*ptr && ptr < end) ptr++;
  57. ptr++;
  58. }
  59. uninames[0].name = "NOGLYPH";
  60. uninames[UNICODE_NUMNAMES].name = "";
  61. }
  62. /**
  63. * Return the UNICODE name data index for UNICODE
  64. */
  65. int uniname(int unicode)
  66. {
  67. register int i=0, j=UNICODE_NUMNAMES-1, k, l=22;
  68. if(!unicodedb) uniname_init();
  69. if(!unicode) return 0;
  70. if(unicode > 0x10FFFF) return UNICODE_NUMNAMES;
  71. while(l--) {
  72. k = i + ((j-i) >> 1);
  73. if(uninames[k].unicode == unicode) return k;
  74. if(i >= j) break;
  75. if(uninames[k].unicode < unicode) i = k + 1; else j = k;
  76. }
  77. return UNICODE_NUMNAMES;
  78. }
  79. /**
  80. * Free resources
  81. */
  82. void uniname_free()
  83. {
  84. if(unicodedb)
  85. free(unicodedb);
  86. }
  87. /**
  88. * Convert UNICODE code point into UTF-8 sequence
  89. */
  90. char *utf8(int i)
  91. {
  92. if(i<0x80) { ut[0]=i; ut[1]=0;
  93. } else if(i<0x800) {
  94. ut[0]=((i>>6)&0x1F)|0xC0;
  95. ut[1]=(i&0x3F)|0x80;
  96. ut[2]=0;
  97. } else if(i<0x10000) {
  98. ut[0]=((i>>12)&0x0F)|0xE0;
  99. ut[1]=((i>>6)&0x3F)|0x80;
  100. ut[2]=(i&0x3F)|0x80;
  101. ut[3]=0;
  102. } else {
  103. ut[0]=((i>>18)&0x07)|0xF0;
  104. ut[1]=((i>>12)&0x3F)|0x80;
  105. ut[2]=((i>>6)&0x3F)|0x80;
  106. ut[3]=(i&0x3F)|0x80;
  107. ut[4]=0;
  108. }
  109. return ut;
  110. }
  111. /**
  112. * Block name comparison according to UNICODE Inc.
  113. */
  114. int unicmp(char *a, char *b)
  115. {
  116. for(;*a && *b;a++,b++) {
  117. while(*a==' ' || *a=='-' || *a=='_') a++;
  118. while(*b==' ' || *b=='-' || *b=='_') b++;
  119. if(tolowercase(*a) != tolowercase(*b)) return 1;
  120. }
  121. return *a || *b;
  122. }