lib.h 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. #ifndef __SD_LIB_H__
  2. #define __SD_LIB_H__
  3. #include <cstdio>
  4. #include <list>
  5. #include <memory>
  6. #include <string>
  7. #include <vector>
  8. #include "dictziplib.hpp"
  9. const int MAX_MATCH_ITEM_PER_LIB = 100;
  10. const int MAX_FUZZY_DISTANCE = 3; // at most MAX_FUZZY_DISTANCE-1 differences allowed when find similar words
  11. struct cacheItem
  12. {
  13. guint32 offset;
  14. gchar *data;
  15. //write code here to make it inline
  16. cacheItem()
  17. {
  18. data = NULL;
  19. }
  20. ~cacheItem()
  21. {
  22. g_free(data);
  23. }
  24. };
  25. const int WORDDATA_CACHE_NUM = 10;
  26. const int INVALID_INDEX = -100;
  27. class DictBase
  28. {
  29. public:
  30. DictBase();
  31. ~DictBase();
  32. gchar * GetWordData(guint32 idxitem_offset, guint32 idxitem_size);
  33. bool containSearchData();
  34. bool SearchData(std::vector<std::string> &SearchWords, guint32 idxitem_offset, guint32 idxitem_size, gchar *origin_data);
  35. protected:
  36. std::string sametypesequence;
  37. FILE *dictfile;
  38. std::auto_ptr<dictData> dictdzfile;
  39. private:
  40. cacheItem cache[WORDDATA_CACHE_NUM];
  41. gint cache_cur;
  42. };
  43. //this structure contain all information about dictionary
  44. struct DictInfo
  45. {
  46. std::string ifo_file_name;
  47. guint32 wordcount;
  48. std::string bookname;
  49. std::string author;
  50. std::string email;
  51. std::string website;
  52. std::string date;
  53. std::string description;
  54. guint32 index_file_size;
  55. std::string sametypesequence;
  56. bool load_from_ifo_file(const std::string& ifofilename, bool istreedict);
  57. };
  58. class index_file
  59. {
  60. public:
  61. guint32 wordentry_offset;
  62. guint32 wordentry_size;
  63. virtual ~index_file()
  64. {}
  65. virtual bool load(const std::string& url, gulong wc, gulong fsize) = 0;
  66. virtual const gchar *get_key(glong idx) = 0;
  67. virtual void get_data(glong idx) = 0;
  68. virtual const gchar *get_key_and_data(glong idx) = 0;
  69. virtual bool lookup(const char *str, glong &idx) = 0;
  70. };
  71. class Dict : public DictBase
  72. {
  73. private:
  74. std::string ifo_file_name;
  75. gulong wordcount;
  76. std::string bookname;
  77. std::auto_ptr<index_file> idx_file;
  78. bool load_ifofile(const std::string& ifofilename, gulong &idxfilesize);
  79. public:
  80. Dict()
  81. {}
  82. bool load(const std::string& ifofilename);
  83. gulong narticles()
  84. {
  85. return wordcount;
  86. }
  87. const std::string& dict_name()
  88. {
  89. return bookname;
  90. }
  91. const std::string& ifofilename()
  92. {
  93. return ifo_file_name;
  94. }
  95. const gchar *get_key(glong index)
  96. {
  97. return idx_file->get_key(index);
  98. }
  99. gchar *get_data(glong index)
  100. {
  101. idx_file->get_data(index);
  102. return DictBase::GetWordData(idx_file->wordentry_offset, idx_file->wordentry_size);
  103. }
  104. void get_key_and_data(glong index, const gchar **key, guint32 *offset, guint32 *size)
  105. {
  106. *key = idx_file->get_key_and_data(index);
  107. *offset = idx_file->wordentry_offset;
  108. *size = idx_file->wordentry_size;
  109. }
  110. bool Lookup(const char *str, glong &idx)
  111. {
  112. return idx_file->lookup(str, idx);
  113. }
  114. bool LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen);
  115. };
  116. typedef std::list<std::string> strlist_t;
  117. class Libs
  118. {
  119. public:
  120. typedef void (*progress_func_t)(void);
  121. Libs(progress_func_t f = NULL);
  122. ~Libs();
  123. void load_dict(const std::string& url);
  124. void load(const strlist_t& dicts_dirs,
  125. const strlist_t& order_list,
  126. const strlist_t& disable_list);
  127. void reload(const strlist_t& dicts_dirs,
  128. const strlist_t& order_list,
  129. const strlist_t& disable_list);
  130. glong narticles(int idict)
  131. {
  132. return oLib[idict]->narticles();
  133. }
  134. const std::string& dict_name(int idict)
  135. {
  136. return oLib[idict]->dict_name();
  137. }
  138. gint ndicts()
  139. {
  140. return oLib.size();
  141. }
  142. const gchar * poGetWord(glong iIndex, int iLib)
  143. {
  144. return oLib[iLib]->get_key(iIndex);
  145. }
  146. gchar * poGetWordData(glong iIndex, int iLib)
  147. {
  148. if (iIndex == INVALID_INDEX)
  149. return NULL;
  150. return oLib[iLib]->get_data(iIndex);
  151. }
  152. const gchar *poGetCurrentWord(glong *iCurrent);
  153. const gchar *poGetNextWord(const gchar *word, glong *iCurrent);
  154. const gchar *poGetPreWord(glong *iCurrent);
  155. bool LookupWord(const gchar* sWord, glong& iWordIndex, int iLib)
  156. {
  157. return oLib[iLib]->Lookup(sWord, iWordIndex);
  158. }
  159. bool LookupSimilarWord(const gchar* sWord, glong & iWordIndex, int iLib);
  160. bool SimpleLookupWord(const gchar* sWord, glong & iWordIndex, int iLib);
  161. bool LookupWithFuzzy(const gchar *sWord, gchar *reslist[], gint reslist_size, gint iLib);
  162. gint LookupWithRule(const gchar *sWord, gchar *reslist[]);
  163. bool LookupData(const gchar *sWord, std::vector<gchar *> *reslist);
  164. private:
  165. std::vector<Dict *> oLib; // word Libs.
  166. int iMaxFuzzyDistance;
  167. progress_func_t progress_func;
  168. };
  169. typedef enum {
  170. qtSIMPLE, qtREGEXP, qtFUZZY, qtDATA
  171. } query_t;
  172. extern query_t analyze_query(const char *s, std::string& res);
  173. #endif//!__SD_LIB_H__