bigram.c 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. /*
  2. * Copyright (c) 2009 Openmoko Inc.
  3. *
  4. * This program is free software: you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation, either version 3 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. */
  17. #include <string.h>
  18. #include <stdlib.h>
  19. #include "msg.h"
  20. #include "bigram.h"
  21. #include "lcd_buf_draw.h"
  22. #ifdef WIKIPCF
  23. extern void showMsg(int currentLevel, char *format, ...);
  24. #include <assert.h>
  25. char aBigram[128][2];
  26. #else
  27. #include "file-io.h"
  28. #include "wiki_info.h"
  29. extern int nCurrentWiki;
  30. char aBigram[MAX_WIKIS_PER_DEVICE][128][2];
  31. #endif
  32. int32_t aCharIdx[128];
  33. #ifdef WIKIPCF
  34. void init_bigram(FILE *fd)
  35. {
  36. init_char_idx();
  37. ssize_t n = fread(aBigram, 1, sizeof(aBigram), fd);
  38. assert(sizeof(aBigram) == n);
  39. }
  40. #else
  41. void init_bigram(int fd)
  42. {
  43. init_char_idx();
  44. wl_read(fd, aBigram[nCurrentWiki], sizeof(aBigram) / MAX_WIKIS_PER_DEVICE);
  45. }
  46. #endif
  47. void init_char_idx()
  48. {
  49. char c;
  50. int i;
  51. int idx = 1;
  52. static int inited = 0;
  53. if (inited)
  54. return;
  55. inited = 1;
  56. memset(aCharIdx, 0, sizeof(aCharIdx));
  57. for (i = 0; i < 128; i++)
  58. {
  59. c = (char)i;
  60. if (is_supported_search_char(c))
  61. {
  62. if ('a' <= c && c <= 'z')
  63. aCharIdx[i] = aCharIdx[(int)'A' + (c - 'a')];
  64. else
  65. aCharIdx[i] = idx++;
  66. }
  67. }
  68. }
  69. int bigram_char_idx(char c)
  70. {
  71. return aCharIdx[(int)c];
  72. }
  73. #ifdef WIKIPCF
  74. void bigram_encode(char *outStr, char *inStr)
  75. {
  76. int i;
  77. int len;
  78. int rc;
  79. char c;
  80. int idxMatchedBigram;
  81. *outStr = '\0';
  82. len = strlen(inStr);
  83. while (len >= 2)
  84. {
  85. idxMatchedBigram = -1;
  86. for (i=0; i < 128; i++)
  87. {
  88. if ((rc = memcmp(inStr, &aBigram[i][0], 2)) == 0)
  89. {
  90. idxMatchedBigram = i;
  91. break;
  92. }
  93. else if (rc < 0)
  94. break;
  95. }
  96. if (idxMatchedBigram >= 0)
  97. {
  98. c = (char)i;
  99. c |= 0x80;
  100. *outStr = c;
  101. outStr++;
  102. inStr += 2;
  103. len -= 2;
  104. }
  105. else
  106. {
  107. *outStr = *inStr;
  108. outStr++;
  109. inStr++;
  110. len--;
  111. }
  112. }
  113. while (len > 0)
  114. {
  115. *outStr = *inStr;
  116. outStr++;
  117. inStr++;
  118. len--;
  119. }
  120. *outStr = '\0';
  121. }
  122. void bigram_decode(char *outStr, char *inStr, int lenMax)
  123. {
  124. unsigned char c;
  125. while (lenMax > 1 && (c = *inStr++) != '\0')
  126. {
  127. if (c >= 128 && aBigram[c-128][0])
  128. {
  129. *outStr = aBigram[c-128][0];
  130. outStr++;
  131. lenMax--;
  132. if (lenMax > 1)
  133. {
  134. *outStr = aBigram[c-128][1];
  135. outStr++;
  136. lenMax--;
  137. }
  138. }
  139. else
  140. {
  141. *outStr = c;
  142. outStr++;
  143. lenMax--;
  144. }
  145. }
  146. *outStr = '\0';
  147. }
  148. #else
  149. void bigram_decode(char *outStr, char *inStr, int lenMax)
  150. {
  151. unsigned char c;
  152. while (lenMax > 1 && (c = *inStr++) != '\0')
  153. {
  154. if (c >= 128 && aBigram[nCurrentWiki][c-128][0])
  155. {
  156. *outStr = aBigram[nCurrentWiki][c-128][0];
  157. outStr++;
  158. lenMax--;
  159. if (lenMax > 1)
  160. {
  161. *outStr = aBigram[nCurrentWiki][c-128][1];
  162. outStr++;
  163. lenMax--;
  164. }
  165. }
  166. else
  167. {
  168. *outStr = c;
  169. outStr++;
  170. lenMax--;
  171. }
  172. }
  173. *outStr = '\0';
  174. }
  175. #endif
  176. int is_supported_search_char(char c)
  177. {
  178. if (c && (strchr(SUPPORTED_SEARCH_CHARS, c) || ('A' <= c && c <= 'Z')))
  179. return 1;
  180. else
  181. return 0;
  182. }
  183. int search_string_cmp(char *title, char *search, int len) // assuming search consists of lowercase only
  184. {
  185. int rc = 0;
  186. char c = 0;
  187. #if 0 // some debug message
  188. #ifdef WIKIPCF
  189. char temp[512];
  190. memcpy(temp, search, len);
  191. temp[len] = '\0';
  192. showMsg(3, "[%s][%s]\n", title, temp);
  193. #endif
  194. #endif
  195. while (!rc && len > 0)
  196. {
  197. c = *title;
  198. if (c && !is_supported_search_char(c))
  199. {
  200. title++;
  201. }
  202. else
  203. {
  204. if ('A' <= c && c <= 'Z')
  205. c += 32;
  206. if (c == *search)
  207. {
  208. title++;
  209. search++;
  210. len--;
  211. }
  212. else if (c > *search)
  213. rc = 1;
  214. else
  215. rc = -1;
  216. }
  217. }
  218. return rc;
  219. }