bigram.c 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. /*
  2. * Copyright (c) 2009 Openmoko Inc.
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. */
  17. #include <string.h>
  18. #include <stdlib.h>
  19. #include "msg.h"
  20. #include "bigram.h"
  21. #include "lcd_buf_draw.h"
  22. #ifdef WIKIPCF
  23. extern void showMsg(int currentLevel, char *format, ...);
  24. #include <assert.h>
  25. #else
  26. #include "file-io.h"
  27. #endif
  28. char aBigram[128][2];
  29. int32_t aCharIdx[128];
  30. #ifdef WIKIPCF
  31. void init_bigram(FILE *fd)
  32. {
  33. init_char_idx();
  34. ssize_t n = fread(aBigram, 1, sizeof(aBigram), fd);
  35. assert(sizeof(aBigram) == n);
  36. }
  37. #else
  38. void init_bigram(int fd)
  39. {
  40. init_char_idx();
  41. wl_read(fd, aBigram, sizeof(aBigram));
  42. }
  43. #endif
  44. void init_char_idx()
  45. {
  46. char c;
  47. int i;
  48. int idx = 1;
  49. memset(aCharIdx, 0, sizeof(aCharIdx));
  50. for (i = 0; i < 128; i++)
  51. {
  52. c = (char)i;
  53. if (is_supported_search_char(c))
  54. {
  55. if ('a' <= c && c <= 'z')
  56. aCharIdx[i] = aCharIdx[(int)'A' + (c - 'a')];
  57. else
  58. aCharIdx[i] = idx++;
  59. }
  60. }
  61. }
  62. int bigram_char_idx(char c)
  63. {
  64. return aCharIdx[(int)c];
  65. }
  66. void bigram_encode(char *outStr, char *inStr)
  67. {
  68. int i;
  69. int len;
  70. int rc;
  71. char c;
  72. int idxMatchedBigram;
  73. *outStr = '\0';
  74. len = strlen(inStr);
  75. while (len >= 2)
  76. {
  77. idxMatchedBigram = -1;
  78. for (i=0; i < 128; i++)
  79. {
  80. if ((rc = memcmp(inStr, &aBigram[i][0], 2)) == 0)
  81. {
  82. idxMatchedBigram = i;
  83. break;
  84. }
  85. else if (rc < 0)
  86. break;
  87. }
  88. if (idxMatchedBigram >= 0)
  89. {
  90. c = (char)i;
  91. c |= 0x80;
  92. *outStr = c;
  93. outStr++;
  94. inStr += 2;
  95. len -= 2;
  96. }
  97. else
  98. {
  99. *outStr = *inStr;
  100. outStr++;
  101. inStr++;
  102. len--;
  103. }
  104. }
  105. while (len > 0)
  106. {
  107. *outStr = *inStr;
  108. outStr++;
  109. inStr++;
  110. len--;
  111. }
  112. *outStr = '\0';
  113. }
  114. void bigram_decode(char *outStr, char *inStr, int lenMax)
  115. {
  116. unsigned char c;
  117. while (lenMax > 1 && (c = *inStr++) != '\0')
  118. {
  119. if (c >= 128)
  120. {
  121. *outStr = aBigram[c-128][0];
  122. outStr++;
  123. lenMax--;
  124. if (lenMax > 1)
  125. {
  126. *outStr = aBigram[c-128][1];
  127. outStr++;
  128. lenMax--;
  129. }
  130. }
  131. else
  132. {
  133. *outStr = c;
  134. outStr++;
  135. lenMax--;
  136. }
  137. }
  138. *outStr = '\0';
  139. }
  140. int is_supported_search_char(char c)
  141. {
  142. if (c && (strchr(SUPPORTED_SEARCH_CHARS, c) || ('A' <= c && c <= 'Z')))
  143. return 1;
  144. else
  145. return 0;
  146. }
  147. int search_string_cmp(char *title, char *search, int len) // assuming search consists of lowercase only
  148. {
  149. int rc = 0;
  150. char c = 0;
  151. #if 0 // some debug message
  152. #ifdef WIKIPCF
  153. char temp[512];
  154. memcpy(temp, search, len);
  155. temp[len] = '\0';
  156. showMsg(3, "[%s][%s]\n", title, temp);
  157. #endif
  158. #endif
  159. while (!rc && len > 0)
  160. {
  161. c = *title;
  162. if (c && !is_supported_search_char(c))
  163. {
  164. title++;
  165. }
  166. else
  167. {
  168. if ('A' <= c && c <= 'Z')
  169. c += 32;
  170. if (c == *search)
  171. {
  172. title++;
  173. search++;
  174. len--;
  175. }
  176. else if (c > *search)
  177. rc = 1;
  178. else
  179. rc = -1;
  180. }
  181. }
  182. return rc;
  183. }