speller.cc 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863
  1. #include <unistd.h> // exec, pipe, fork...
  2. #include <algorithm> // std::sort
  3. #include "speller.h"
  4. #include "mk_wcwidth.h"
  5. #include "converters.h"
  6. #include "editor.h"
  7. #include "dialogline.h"
  8. #include "dbg.h"
  9. // A Correction class encapsulates an incorrect word, its position
  10. // in the text, and a list of seggested corrections.
  11. class Correction {
  12. bool valid;
  13. public:
  14. Correction(const char *s, int aLine);
  15. bool is_valid() { return valid; }
  16. unistring incorrect;
  17. // we also save a version of the word represented
  18. // in the speller encoding, so we don't have to convert
  19. // the word back later.
  20. cstring incorrect_original;
  21. std::vector<unistring> suggestions;
  22. // The position of the incorrect word: line number and offset within.
  23. int line;
  24. int offset;
  25. // hspell sometimes returns spelling-hints (short textual explanation
  26. // of why the word is incorrect).
  27. unistring hint;
  28. void add_hint(const unistring &s) {
  29. if (!hint.empty())
  30. hint.push_back('\n');
  31. hint.append(s);
  32. }
  33. };
  34. // A Corrections class holds a list of Correction objects pertaining
  35. // to one paragraph of text.
  36. class Corrections {
  37. std::vector<Correction *> array;
  38. // A function object to sort the Correction objects by their offset
  39. // within the paragraph.
  40. struct cmp_corrections {
  41. bool operator() (const Correction *a, const Correction *b) const {
  42. return a->offset < b->offset;
  43. }
  44. };
  45. public:
  46. Corrections() {}
  47. ~Corrections();
  48. void clear();
  49. void add(Correction *crctn);
  50. bool empty() const { return array.empty(); }
  51. int size() const { return (int)array.size(); }
  52. Correction *operator[] (int i)
  53. { return array[i]; }
  54. // The speller (e.g. hspell) may not report incorrect words in
  55. // the order in which they appear in the paragraph. This is because
  56. // hspell delegates the work to [ia]spell after it finishes reporting
  57. // the incorrect Hebrew words. However, since we want to present
  58. // the user the words in the right order, we have to sort them first.
  59. void sort() {
  60. std::sort(array.begin(), array.end(), cmp_corrections());
  61. }
  62. };
  63. Corrections::~Corrections()
  64. {
  65. clear();
  66. }
  67. void Corrections::clear()
  68. {
  69. for (int i = 0; i < size(); i++)
  70. delete array[i];
  71. array.clear();
  72. }
  73. void Corrections::add(Correction *crctn)
  74. {
  75. array.push_back(crctn);
  76. }
  77. // A Correction constructor parses an ispell-a line.
  78. //
  79. // The detailed description of the ispell-a protocol can be found
  80. // in the ispell man page. In short, when the speller finds an incorrect
  81. // word and has some spell suggestions, it returns:
  82. //
  83. // [&?] incorrect-word count offset: word, word, word, word
  84. //
  85. // When it has no suggestions, it returns:
  86. //
  87. // # <<incorrect>> <<offset>>
  88. //
  89. // If the protocol-line does not conform to the above syntaxes, we
  90. // ignore it and mark the object as invalid.
  91. Correction::Correction(const char *s, int aLine)
  92. {
  93. if (*s != '&' && *s != '?' && *s != '#') {
  94. valid = false;
  95. return;
  96. }
  97. valid = true;
  98. line = aLine;
  99. offset = -1;
  100. bool has_suggestions = (*s != '#');
  101. const char *pos, *start;
  102. start = pos = s + 2;
  103. while (*pos != ' ')
  104. pos++;
  105. incorrect.init_from_utf8(start, pos);
  106. offset = strtol(pos, (char **)&pos, 10);
  107. if (has_suggestions)
  108. offset = strtol(pos, (char **)&pos, 10);
  109. // we sent the speller lines prefixed with "^", so we need
  110. // to decrease by one.
  111. offset--;
  112. // the following post[1,2] tests are needed because
  113. // hspell returns "?" instead of "#" when there are
  114. // no suggestions.
  115. if (has_suggestions && pos[1] && pos[2]) {
  116. unistring word;
  117. do {
  118. start = pos += 2;
  119. while (*pos && *pos != ',')
  120. pos++;
  121. word.init_from_utf8(start, pos);
  122. suggestions.push_back(word);
  123. } while (*pos);
  124. }
  125. }
  126. //////////////////////////// SpellerWnd //////////////////////////////////
  127. SpellerWnd::SpellerWnd(Editor &aApp) :
  128. app(aApp)
  129. {
  130. create_window();
  131. label.highlight();
  132. label.set_text(_("Speller Results"));
  133. // The following are the keys the user presses to select
  134. // a spelling suggestion. These can be modified using gettext's
  135. // message catalogs.
  136. word_keys.init_from_utf8(
  137. _("1234567890:;<=>@bcdefhijklmnopqstuvwxyz[\\]^_`"
  138. "BCDEFHIJKLMNOPQSTUVWXYZ{|}~"));
  139. }
  140. void SpellerWnd::resize(int lines, int columns, int y, int x)
  141. {
  142. Widget::resize(lines, columns, y, x);
  143. label.resize(1, columns, y, x);
  144. editbox.resize(lines - 1, columns, y + 1, x);
  145. }
  146. void SpellerWnd::update()
  147. {
  148. label.update();
  149. editbox.update();
  150. }
  151. bool SpellerWnd::is_dirty() const
  152. {
  153. return label.is_dirty() || editbox.is_dirty();
  154. }
  155. void SpellerWnd::invalidate_view()
  156. {
  157. label.invalidate_view();
  158. editbox.invalidate_view();
  159. }
  160. INTERACTIVE void SpellerWnd::layout_windows()
  161. {
  162. app.layout_windows();
  163. }
  164. INTERACTIVE void SpellerWnd::refresh()
  165. {
  166. app.refresh();
  167. }
  168. void SpellerWnd::clear()
  169. {
  170. editbox.new_document();
  171. }
  172. void SpellerWnd::append(const unistring &us)
  173. {
  174. editbox.insert_text(us);
  175. }
  176. void SpellerWnd::append(const char *s)
  177. {
  178. unistring us;
  179. us.init_from_utf8(s);
  180. editbox.insert_text(us);
  181. }
  182. void SpellerWnd::end_menu(MenuResult result)
  183. {
  184. menu_result = result;
  185. finished = true;
  186. }
  187. INTERACTIVE void SpellerWnd::ignore_word()
  188. {
  189. end_menu(splIgnore);
  190. }
  191. INTERACTIVE void SpellerWnd::add_to_dict()
  192. {
  193. end_menu(splAdd);
  194. }
  195. INTERACTIVE void SpellerWnd::edit_replacement()
  196. {
  197. end_menu(splEdit);
  198. }
  199. INTERACTIVE void SpellerWnd::abort_spelling()
  200. {
  201. end_menu(splAbort);
  202. }
  203. INTERACTIVE void SpellerWnd::abort_spelling_restore_cursor()
  204. {
  205. end_menu(splAbortRestoreCursor);
  206. }
  207. INTERACTIVE void SpellerWnd::set_global_decision()
  208. {
  209. global_decision = true;
  210. editbox.set_read_only(false);
  211. editbox.move_beginning_of_buffer();
  212. append(_("--GLOBAL DECISION--\n"));
  213. editbox.set_read_only(true);
  214. }
  215. // handle_event() -
  216. //
  217. // A typical SpellerWnd window displays:
  218. //
  219. // (1) begging (2) begin (3) begun (4) bagging (5) beguine
  220. //
  221. // In brackets are the keys the user presses to choose a
  222. // spelling suggestion. We handle these keys here.
  223. bool SpellerWnd::handle_event(const Event &evt)
  224. {
  225. if (Widget::handle_event(evt))
  226. return true;
  227. if (evt.is_literal()) {
  228. int idx = word_keys.index(evt.ch);
  229. if (idx != -1 && idx < (int)correction->suggestions.size()) {
  230. suggestion_choice = idx;
  231. end_menu(splChoice);
  232. }
  233. return true;
  234. }
  235. return editbox.handle_event(evt);
  236. }
  237. // exec_correction_menu() - Setup the SpellerWnd contents and then
  238. // execute a modal menu (using an event loop). It returns the user's
  239. // action.
  240. MenuResult SpellerWnd::exec_correction_menu(Correction &crctn)
  241. {
  242. // we save the Correction object in a member variable because
  243. // other methods (e.g. handle_event) use it.
  244. correction = &crctn;
  245. u8string title;
  246. title.cformat(_("Suggestions for '%s'"),
  247. u8string(correction->incorrect).c_str());
  248. label.set_text(title.c_str());
  249. editbox.set_read_only(false);
  250. clear();
  251. for (int i = 0; i < (int)correction->suggestions.size()
  252. && i < word_keys.len(); i++)
  253. {
  254. u8string utf8_word(correction->suggestions[i]);
  255. u8string utf8_key(word_keys.substr(i, 1));
  256. u8string word_tmplt;
  257. if (i != 0)
  258. append("\xC2\xA0 "); // UNI_NO_BREAK_SPACE
  259. word_tmplt.cformat(_("(%s)\xC2\xA0%s"),
  260. utf8_key.c_str(), utf8_word.c_str());
  261. append(word_tmplt.c_str());
  262. }
  263. if (correction->suggestions.empty())
  264. append(_("No suggestions for this word."));
  265. append("\n\n");
  266. if (!correction->hint.empty()) {
  267. append(correction->hint);
  268. append("\n\n");
  269. }
  270. append(_("[SPC to leave unchanged, 'a' to add to private dictionary, "
  271. "'r' to edit word, 'q' to exit and restore cursor, ^C to "
  272. "exit and leave cursor, or one of the above characters "
  273. "to replace. 'g' to make your decision global.]"));
  274. editbox.set_read_only(true);
  275. editbox.move_beginning_of_buffer();
  276. global_decision = false;
  277. finished = false;
  278. while (!finished) {
  279. Event evt;
  280. app.update_terminal();
  281. get_next_event(evt, editbox.wnd);
  282. handle_event(evt);
  283. }
  284. return menu_result;
  285. }
  286. ///////////////////////////// Speller ////////////////////////////////////
  287. #define SPELER_REPLACE_HISTORY 110
  288. // the following UNLOAD_SPELLER routine is a temporary hack to
  289. // a pipe problem (see TODO).
  290. static Speller *global_speller_instance = NULL;
  291. void UNLOAD_SPELLER()
  292. {
  293. if (global_speller_instance)
  294. global_speller_instance->unload();
  295. }
  296. // replace_table is a hash-table that matches any incorrect word
  297. // with its correct spelling. It is used to implement the "Replace
  298. // All" function. Also, when the value of the key is the empty
  299. // string, it means to ignore the word (that's how "Ignore All" is
  300. // implemented).
  301. std::map<unistring, unistring> replace_table;
  302. Speller::Speller(Editor &aApp, DialogLine &aDialog) :
  303. app(aApp),
  304. dialog(aDialog)
  305. {
  306. loaded = false;
  307. global_speller_instance = this;
  308. }
  309. // load() - loads the speller. it forks and execs the speller. it setups
  310. // pipes for communication.
  311. //
  312. // Warning: the code is not foolproof! it expects the child process to
  313. // print an identity string. if the child prints nothing, this function
  314. // hangs!
  315. bool Speller::load(const char *cmd, const char *encoding)
  316. {
  317. if (is_loaded())
  318. return true;
  319. conv_to_speller =
  320. ConverterFactory::get_converter_to(encoding);
  321. conv_from_speller =
  322. ConverterFactory::get_converter_from(encoding);
  323. if (!conv_to_speller || !conv_from_speller) {
  324. dialog.show_message_fmt(_("Can't find converter '%s'"), encoding);
  325. return false;
  326. }
  327. conv_to_speller->enable_ilseq_repr();
  328. dialog.show_message(_("Loading speller..."));
  329. dialog.immediate_update();
  330. if (pipe(fd_to_spl) < 0 || pipe(fd_from_spl) < 0) {
  331. dialog.show_message(_("pipe() error"));
  332. return false;
  333. }
  334. pid_t pid;
  335. if ((pid = fork()) < 0) {
  336. dialog.show_message(_("fork() error"));
  337. return false;
  338. }
  339. if (pid == 0) {
  340. DISABLE_SIGTSTP();
  341. // we're in the child.
  342. dup2(fd_to_spl[0], STDIN_FILENO);
  343. dup2(fd_from_spl[1], STDOUT_FILENO);
  344. dup2(fd_from_spl[1], STDERR_FILENO);
  345. close(fd_from_spl[0]); close(fd_to_spl[0]);
  346. close(fd_from_spl[1]); close(fd_to_spl[1]);
  347. execlp("/bin/sh", "sh", "-c", cmd, NULL);
  348. // write the error back to the parent
  349. u8string err;
  350. err.cformat(_("Error %d (%s)\n"), errno, strerror(errno));
  351. write(STDOUT_FILENO, err.c_str(), err.size());
  352. exit(1);
  353. }
  354. dialog.show_message(_("Waiting for the speller to finish loading..."));
  355. dialog.immediate_update();
  356. u8string identity = read_line();
  357. if (identity.c_str()[0] != '@') {
  358. dialog.show_message_fmt(_("Error: Not a speller: %s"),
  359. identity.c_str());
  360. unload();
  361. return false;
  362. } else {
  363. // display the speller identity for a brief moment.
  364. dialog.show_message(identity.c_str());
  365. dialog.immediate_update();
  366. sleep(1);
  367. write_line("@ActivateExtendedProtocol\n"); // for future extensions :-)
  368. dialog.show_message(_("Speller loaded OK."));
  369. loaded = true;
  370. return true;
  371. }
  372. }
  373. void Speller::unload()
  374. {
  375. if (loaded) {
  376. close(fd_from_spl[0]); close(fd_to_spl[0]);
  377. close(fd_from_spl[1]); close(fd_to_spl[1]);
  378. delete conv_to_speller;
  379. delete conv_from_speller;
  380. loaded = false;
  381. }
  382. }
  383. // convert_from_unistr() and convert_to_unistr() convert from unicode
  384. // to the speller encoding and vice versa.
  385. void convert_from_unistr(cstring &cstr, const unistring &str,
  386. Converter *conv)
  387. {
  388. char *buf = new char[str.len() * 6 + 1]; // Max UTF-8 seq is 6.
  389. unichar *us_p = (unichar *)str.begin();
  390. char *cs_p = buf;
  391. conv->convert(&cs_p, &us_p, str.len());
  392. cstr = cstring(buf, cs_p);
  393. }
  394. void convert_to_unistr(unistring &str, const cstring &cstr,
  395. Converter *conv)
  396. {
  397. str.resize(cstr.size());
  398. unichar *us_p = (unichar *)str.begin();
  399. char *cs_p = (char *)&*cstr.begin(); // convert iterator to pointer
  400. conv->convert(&us_p, &cs_p, cstr.size());
  401. str.resize(us_p - str.begin());
  402. }
  403. void Speller::add_to_dictionary(Correction &correction)
  404. {
  405. replace_table[correction.incorrect] = unistring(); // "Ignore All"
  406. cstring cstr;
  407. cstr.cformat("*%s\n", correction.incorrect_original.c_str());
  408. write_line(cstr.c_str());
  409. write_line("#\n");
  410. }
  411. // interactive_correct() - let the user interactively correct the
  412. // spelling mistakes. For every incorrect word, it:
  413. //
  414. // 1. highlights the word
  415. // 2. calls exec_correction_menu() to display the menu
  416. // 3. acts based on the user action.
  417. //
  418. // returns 'false' if the user aborts.
  419. bool Speller::interactive_correct(Corrections &corrections,
  420. EditBox &wedit,
  421. SpellerWnd &splwnd,
  422. bool &restore_cursor)
  423. {
  424. for (int cur_crctn = 0; cur_crctn < corrections.size(); cur_crctn++)
  425. {
  426. Correction &correction = *corrections[cur_crctn];
  427. MenuResult menu_result;
  428. unistring replace_with;
  429. if (replace_table.find(correction.incorrect) != replace_table.end()) {
  430. replace_with = replace_table[correction.incorrect];
  431. menu_result = splEdit;
  432. } else {
  433. // highlight the word
  434. wedit.unset_primary_mark();
  435. wedit.set_cursor_position(Point(correction.line,
  436. correction.offset));
  437. wedit.set_primary_mark();
  438. for (int i = 0; i < correction.incorrect.len(); i++)
  439. wedit.move_forward_char();
  440. menu_result = splwnd.exec_correction_menu(correction);
  441. if (menu_result == splChoice) {
  442. replace_with = correction.suggestions[
  443. splwnd.get_suggestion_choice()];
  444. } else if (menu_result == splEdit) {
  445. bool alt_kbd = wedit.get_alt_kbd();
  446. replace_with = dialog.query(_("Replace with:"),
  447. correction.incorrect, SPELER_REPLACE_HISTORY,
  448. InputLine::cmpltOff, &alt_kbd);
  449. wedit.set_alt_kbd(alt_kbd);
  450. }
  451. }
  452. switch (menu_result) {
  453. case splAbort:
  454. restore_cursor = false;
  455. return false;
  456. break;
  457. case splAbortRestoreCursor:
  458. restore_cursor = true;
  459. return false;
  460. break;
  461. case splIgnore:
  462. if (splwnd.is_global_decision())
  463. replace_table[correction.incorrect] = unistring();
  464. break;
  465. case splAdd:
  466. add_to_dictionary(correction);
  467. break;
  468. case splChoice:
  469. case splEdit:
  470. if (!replace_with.empty()) {
  471. wedit.set_cursor_position(Point(correction.line,
  472. correction.offset));
  473. wedit.replace_text(replace_with, correction.incorrect.len());
  474. if (splwnd.is_global_decision())
  475. replace_table[correction.incorrect] = replace_with;
  476. // Since we modified the text, the offsets of the
  477. // following Correction objects must be adjusted.
  478. for (int i = cur_crctn + 1; i < corrections.size(); i++) {
  479. if (corrections[i]->offset > correction.offset) {
  480. corrections[i]->offset +=
  481. replace_with.len() - correction.incorrect.len();
  482. }
  483. }
  484. }
  485. break;
  486. }
  487. app.update_terminal();
  488. }
  489. return true;
  490. }
  491. // adjust_word_offset() - the speller reports the offsets of incorrect
  492. // words, but some spellers (like hspell) report incorrect offsets, so
  493. // we need to detect these cases and find the words ourselves.
  494. void adjust_word_offset(Correction &c, const unistring &str)
  495. {
  496. if (str.index(c.incorrect, c.offset) != c.offset) {
  497. // first, search the word near the reported offset
  498. int from = c.offset - 10;
  499. c.offset = str.index(c.incorrect, (from < 0) ? 0 : from);
  500. if (c.offset == -1) {
  501. // wasn't found, so search starting from the beginning
  502. // of the paragraph.
  503. if ((c.offset = str.index(c.incorrect, 0)) == -1)
  504. c.offset = 0;
  505. }
  506. }
  507. }
  508. // get_word_boundaries() - get the boundaries of the word on which the
  509. // cursor stands.
  510. void get_word_boundaries(const unistring &str, int cursor, int &wbeg, int &wend)
  511. {
  512. // If the cursor stands just past the word, treat it as if it
  513. // stants on the word.
  514. if ((cursor == str.len() || !BiDi::is_wordch(str[cursor]))
  515. && cursor > 0 && BiDi::is_wordch(str[cursor-1]))
  516. cursor--;
  517. wbeg = wend = cursor;
  518. if (cursor < str.len() && BiDi::is_wordch(str[cursor])) {
  519. while (wbeg > 0 && BiDi::is_wordch(str[wbeg-1]))
  520. wbeg--;
  521. while (wend < str.len()-1 && BiDi::is_wordch(str[wend+1]))
  522. wend++;
  523. wend++;
  524. }
  525. }
  526. // erase_special_characters_words() - erases/modifies characters
  527. // or words that may cause problems to the speller:
  528. //
  529. // 0. If we're checking emails and the line is quoted (">"), erase it.
  530. // 1. remove words with combining characters (e.g. Hebrew points)
  531. // 2. remove ispell's "\"
  532. // 3. convert Hebrew maqaf to ASCII one.
  533. void erase_special_characters_words(unistring &str, bool erase_quotes)
  534. {
  535. if (erase_quotes) {
  536. // If we're checking emails, erase lines starting
  537. // with ">" (with optional preceding spaces).
  538. int i = 0;
  539. while (i < str.len() && str[i] == ' ')
  540. i++;
  541. if (i < str.len() && str[i] == '>') {
  542. for (i = 0; i < str.len(); i++)
  543. str[i] = ' ';
  544. }
  545. }
  546. for (int i = 0; i < str.len(); i++) {
  547. if (str[i] == UNI_HEB_MAQAF)
  548. str[i] = '-';
  549. if (str[i] == '\\') // ispell's line continuation char.
  550. str[i] = ' ';
  551. }
  552. for (int i = 0; i < str.len(); i++) {
  553. if (mk_wcwidth(str[i]) == 0) {
  554. if (BiDi::is_nsm(str[i])) {
  555. // delete the word in which the NSM is.
  556. int wbeg, wend;
  557. get_word_boundaries(str, i, wbeg, wend);
  558. for (int j = wbeg; j < wend; j++)
  559. str[j] = ' ';
  560. } else {
  561. // probably some formatting code (RLM, LRM, etc)
  562. str[i] = ' ';
  563. }
  564. }
  565. }
  566. }
  567. // erase_before_after_word() - erases the text segment preceding or the
  568. // text segment following the word on which the cursor stands.
  569. void erase_before_after_word(unistring &str, int cursor, bool bef, bool aft)
  570. {
  571. int wbeg, wend;
  572. get_word_boundaries(str, cursor, wbeg, wend);
  573. if (bef)
  574. for (int i = 0; i < wbeg; i++)
  575. str[i] = ' ';
  576. if (aft) {
  577. // but don't erase the hebrew maqaf (ascii-transliterated)
  578. if (wend < str.len() && str[wend] == '-')
  579. wend++;
  580. for (int i = wend; i < str.len(); i++)
  581. str[i] = ' ';
  582. }
  583. }
  584. // spell_check() - the principal method.
  585. void Speller::spell_check(splRng range, EditBox &wedit, SpellerWnd &splwnd)
  586. {
  587. if (!is_loaded()) {
  588. dialog.show_message(_("Speller is not loaded"));
  589. return;
  590. }
  591. bool cancel_spelling = false;
  592. if (range == splRngWord)
  593. write_line("%\n"); // exit terse mode
  594. else
  595. write_line("!\n"); // enter terse mode
  596. // Find the start and end paragraphs corresponding to
  597. // the requested range.
  598. int start_para, end_para;
  599. Point cursor_origin;
  600. wedit.get_cursor_position(cursor_origin);
  601. if (range == splRngAll) {
  602. start_para = 0;
  603. end_para = wedit.get_number_of_paragraphs() - 1;
  604. } else {
  605. start_para = cursor_origin.para;
  606. if (range == splRngForward)
  607. end_para = wedit.get_number_of_paragraphs() - 1;
  608. else
  609. end_para = start_para;
  610. }
  611. // Some variabls that are used when range==splRngWord
  612. bool sole_word_correct = false;
  613. unistring sole_word;
  614. unistring sole_word_root;
  615. bool restore_cursor = true;
  616. for (int i = start_para; i <= end_para && !cancel_spelling; i++)
  617. {
  618. dialog.show_message_fmt(_("Spell checking... %d/%d"),
  619. i+1, wedit.get_number_of_paragraphs());
  620. dialog.immediate_update();
  621. unistring para = wedit.get_paragraph_text(i);
  622. // erase/modify some characters/words
  623. erase_special_characters_words(para,
  624. (wedit.get_syn_hlt() == EditBox::synhltEmail) && (range != splRngWord));
  625. if (i == start_para) {
  626. if (range != splRngAll) {
  627. // erase text we're not supposed to check.
  628. erase_before_after_word(para, cursor_origin.pos,
  629. true, range != splRngForward);
  630. // after finishing checking splRgnForward/splRgnWord,
  631. // we restore the cursor to the start of the word on
  632. // which it stood.
  633. int wbeg, wend;
  634. get_word_boundaries(para, cursor_origin.pos, wbeg, wend);
  635. cursor_origin.pos = wbeg;
  636. // also, when checking a sole word, keep it because
  637. // we need to display it later in the dialog-line.
  638. if (range == splRngWord)
  639. sole_word = para.substr(wbeg, wend - wbeg);
  640. } else {
  641. // after finishing checking the whole document, we
  642. // restore cursor position to the first column of
  643. // the paragraph.
  644. cursor_origin.pos = 0;
  645. }
  646. }
  647. // Convert the text to the speller encoding
  648. // :TODO: special treatment for UTF-8.
  649. cstring cstr;
  650. convert_from_unistr(cstr, para, conv_to_speller);
  651. // Send "^text" to speller
  652. cstr.insert(0, "^");
  653. cstr += "\n";
  654. write_line(cstr.c_str());
  655. // Read the speller reply, till encountering the empty string,
  656. // and construct a Corrections collection.
  657. Corrections corrections;
  658. Correction *last_corretion = NULL;
  659. do {
  660. cstr = read_line();
  661. if (cstr.size() != 0) {
  662. unistring ustr;
  663. convert_to_unistr(ustr, cstr, conv_from_speller);
  664. Correction *c = new Correction(u8string(ustr).c_str(), i);
  665. if (c->is_valid()) {
  666. // store the speller-encoded word too, in case
  667. // we need to feed it back (like in the "*<<word>>"
  668. // command).
  669. convert_from_unistr(c->incorrect_original, c->incorrect,
  670. conv_to_speller);
  671. adjust_word_offset(*c, para);
  672. corrections.add(c);
  673. last_corretion = c;
  674. } else {
  675. delete c;
  676. // Special support for hspell's hints.
  677. if ((ustr[0] == ' ' || ustr[0] == 'H') && last_corretion)
  678. last_corretion->add_hint(ustr.substr(1));
  679. // When spell-checking a sole word, we're in
  680. // non-terse mode.
  681. if (range == splRngWord) {
  682. if (ustr[0] == '*' || ustr[0] == '+') {
  683. sole_word_correct = true;
  684. if (ustr[0] == '+' && ustr.len() > 2)
  685. sole_word_root = ustr.substr(2);
  686. }
  687. }
  688. }
  689. }
  690. } while (cstr.size() != 0);
  691. corrections.sort();
  692. // :TODO: adjust UTF-8 offsets.
  693. if ((cancel_spelling = terminal::was_ctrl_c_pressed()))
  694. restore_cursor = false;
  695. // hand the Corrections collection to the method that interacts
  696. // with the user.
  697. if (!cancel_spelling && !corrections.empty()) {
  698. dialog.show_message_fmt(_("A misspelling was found at %d/%d"),
  699. i+1, wedit.get_number_of_paragraphs());
  700. cancel_spelling = !interactive_correct(corrections,
  701. wedit, splwnd, restore_cursor);
  702. }
  703. }
  704. wedit.unset_primary_mark();
  705. if (restore_cursor && range != splRngWord)
  706. wedit.set_cursor_position(cursor_origin);
  707. if (sole_word_correct) {
  708. if (sole_word_root.empty())
  709. dialog.show_message_fmt(_("Word '%s' is correct"),
  710. u8string(sole_word).c_str());
  711. else
  712. dialog.show_message_fmt(_("Word '%s' is correct because of %s"),
  713. u8string(sole_word).c_str(),
  714. u8string(sole_word_root).c_str());
  715. } else {
  716. dialog.show_message(_("Spell cheking done"));
  717. }
  718. }
  719. // read_line() - read a line from the speller
  720. cstring Speller::read_line()
  721. {
  722. u8string str;
  723. char ch;
  724. while (read(fd_from_spl[0], &ch, 1)) {
  725. if (ch != '\n')
  726. str += ch;
  727. else
  728. break;
  729. }
  730. return str;
  731. }
  732. // write_line() - write a line to the speller
  733. void Speller::write_line(const char *s)
  734. {
  735. write(fd_to_spl[1], s, strlen(s));
  736. }