multitran.cpp 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. /*****************************************************************************
  2. * This file is a part of QStarDict, a StarDict clone written using Qt *
  3. * multitran.cpp - Plugin for multitran-data (multitran.sf.net) *
  4. * Copyright (C) 2008 Nick Shaforostoff *
  5. * Copyright (C) 2004 Stanislav Ievlev *
  6. * *
  7. * This program is free software; you can redistribute it and/or modify *
  8. * it under the terms of the GNU General Public License as published by *
  9. * the Free Software Foundation; either version 2 of the License, or *
  10. * (at your option) any later version. *
  11. * *
  12. * This program is distributed in the hope that it will be useful, *
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of *
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
  15. * GNU General Public License for more details. *
  16. * *
  17. * You should have received a copy of the GNU General Public License along *
  18. * with this program; if not, write to the Free Software Foundation, Inc., *
  19. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. *
  20. *****************************************************************************/
  21. #include "multitran.h"
  22. //#include "settingsdialog.h"
  23. #include <QCoreApplication>
  24. // #include <QSettings>
  25. #include <QTextCodec>
  26. /*
  27. /usr/include/mt/query
  28. /usr/include/btree
  29. /usr/include/mt/support
  30. /usr/include/facet
  31. */
  32. #include <facet/identification.hh>
  33. #include <facet/alphabet.hh>
  34. #include <facet/typographic.hh>
  35. #include <mt/query/linguas.hh>
  36. #include <mt/query/translation.hh>
  37. #include <mt/query/singleton.hh>
  38. #include <mt/query/config.hh>
  39. #include <mt/query/file_map.hh>
  40. #include <mt/support/path.hh>
  41. #include <mt/query/lgk.hh>
  42. #include <mt/support/str.hh>
  43. #include <mt/query/lang_info.hh>
  44. #include <algorithm>
  45. #include <functional>
  46. #include <memory>
  47. #include <iostream>
  48. #include <stdexcept>
  49. #include <vector>
  50. #include <string>
  51. typedef mt::singleton_array<mt::file_map> txtdb_type;
  52. struct compare_names
  53. {
  54. compare_names(const std::string& from,const std::string& to):
  55. from_(from),
  56. to_(to)
  57. {}
  58. bool operator()(const mt::lang_pair& lng1,const mt::lang_pair& lng2)
  59. {
  60. return distance(lng1) < distance(lng2);
  61. }
  62. int distance(const mt::lang_pair& lng)
  63. {
  64. std::string from_name=mt::lang_name(lng.first);
  65. std::string to_name=mt::lang_name(lng.second);
  66. return (!from_.empty() && !from_name.compare(0,from_.size(),from_)) +
  67. (!to_.empty() && !to_name.compare(0,to_.size(),to_));
  68. }
  69. std::string from_,to_;
  70. };
  71. int compare_articles(const mt::article& a1,const mt::article& a2)
  72. {
  73. if (a1.lgk() != a2.lgk())
  74. return a2.lgk() > a1.lgk();
  75. else
  76. return a2.subject() > a1.subject();
  77. }
  78. struct show
  79. {
  80. show(std::string& r_, bool& found_): r(r_),found(found_) {}
  81. void operator()(mt::article_set as)
  82. {
  83. mt::file_map& subj = txtdb_type::instance(mt::datapath+mt::path_separator()+"subjects.txt");
  84. mt::file_map& spart = txtdb_type::instance(mt::datapath+mt::path_separator()+"speechparts.txt");
  85. if (!as.articles_.empty())
  86. {
  87. found=true;
  88. std::sort(as.articles_.begin(),as.articles_.end(),compare_articles);
  89. int prev_lgk = -1;
  90. std::string prev_subject = "x";
  91. for(size_t i=0;i<as.articles_.size();++i)
  92. {
  93. const mt::article& a = as.articles_[i];
  94. if (prev_lgk != a.lgk())
  95. {
  96. r+="<tr><td><b>"+a.orig()+","+
  97. spart.any_name(mt::to_string<int>(mt::speech_part(a.lgk())))+"</b></td></tr>";
  98. prev_lgk = a.lgk();
  99. prev_subject = "x";//reset subject
  100. }
  101. if (prev_subject != a.subject())
  102. {
  103. r+="<tr><td></td><td><font class=\"explanation\">";
  104. r+=subj.any_name(a.subject());
  105. r+="</font></td><td>";
  106. r+=a.translated();
  107. prev_subject = a.subject();
  108. }
  109. else
  110. r+=", "+a.translated();
  111. }
  112. r+="</td></tr>";
  113. }
  114. }
  115. std::string &r;
  116. bool& found;
  117. };
  118. std::string do_translate(const std::string& text,mt::lang_code from,mt::lang_code to)
  119. {
  120. bool found=false;
  121. std::string r="<table>";
  122. mt::phrase ph;
  123. mt::fill_phrase(ph,text,from);
  124. mt::translation tr(ph,from,to);
  125. std::for_each(tr.asets().begin(), tr.asets().end(), show(r,found));
  126. r+="</table>";
  127. if (found)
  128. return r;
  129. return "";
  130. }
  131. Multitran::Multitran(QObject *parent)
  132. : QObject(parent)
  133. {
  134. // QSettings settings("qstardict","qstardict");
  135. // m_dictDirs = settings.value("Multitran/dictDirs", m_dictDirs).toStringList();
  136. // m_reformatLists = settings.value("Multitran/reformatLists", true).toBool();
  137. }
  138. Multitran::~Multitran()
  139. {
  140. // QSettings settings("qstardict","qstardict");
  141. // settings.setValue("Multitran/dictDirs", m_dictDirs);
  142. // settings.setValue("Multitran/reformatLists", m_reformatLists);
  143. }
  144. QStringList Multitran::availableDicts() const
  145. {
  146. return QStringList("Multitran");
  147. }
  148. void Multitran::setLoadedDicts(const QStringList &loadedDicts)
  149. {
  150. }
  151. Multitran::DictInfo Multitran::dictInfo(const QString &dict)
  152. {
  153. // ::DictInfo nativeInfo;
  154. // nativeInfo.wordcount = 0;
  155. DictInfo result(name(), dict);
  156. result.setAuthor("Multitran.ru");
  157. result.setDescription(tr("1 mln words excerpt of multitran.ru"));
  158. result.setWordsCount(-1);
  159. return result;
  160. }
  161. bool Multitran::isTranslatable(const QString &dict, const QString &word)
  162. {
  163. return true;
  164. }
  165. Multitran::Translation Multitran::translate(const QString &dict, const QString &word)
  166. {
  167. QTextCodec* c=QTextCodec::codecForMib(2251);
  168. std::string text=c->fromUnicode(word).data();
  169. std::string from_lang,to_lang;
  170. int i=word.size();
  171. while(--i>=0)
  172. if (word.at(i).unicode()>127)
  173. break;
  174. if (i!=-1)
  175. from_lang="russian";
  176. else
  177. from_lang="english";
  178. mt::linguas avail_langs;
  179. mt::linguas::iterator lang = std::max_element(avail_langs.begin(),
  180. avail_langs.end(),
  181. compare_names(from_lang,to_lang));
  182. if (lang == avail_langs.end() ||
  183. (!from_lang.empty() && !to_lang.empty() && (compare_names(from_lang,to_lang).distance(*lang)!=2)))
  184. {
  185. //std::cerr<<"illegal language names"<<std::endl;
  186. return Translation();
  187. }
  188. //"<hr width=50%><center><b>multitran</b><center><hr width=50%>";
  189. QString queryResult=c->toUnicode(do_translate(lower_str(lang->first,text),
  190. lang->first,lang->second).c_str());
  191. if (queryResult.isEmpty())
  192. return Translation();
  193. return Translation(word,"Multitran",queryResult);
  194. }
  195. QStringList Multitran::findSimilarWords(const QString &dict, const QString &word)
  196. {
  197. return QStringList();
  198. }
  199. int Multitran::execSettingsDialog(QWidget *parent)
  200. {
  201. //::SettingsDialog dialog(this, parent);
  202. //return dialog.exec();
  203. return 0;
  204. }
  205. // vim: tabstop=4 softtabstop=4 shiftwidth=4 expandtab cindent textwidth=120 formatoptions=tc