123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240 |
- /* SpellingChecker.cpp
- *
- * Copyright (C) 1999-2007,2011,2012,2015-2018 Paul Boersma
- *
- * This code is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This code is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- * See the GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this work. If not, see <http://www.gnu.org/licenses/>.
- */
- #include "SpellingChecker.h"
- #include <ctype.h>
- #include "oo_DESTROY.h"
- #include "SpellingChecker_def.h"
- #include "oo_COPY.h"
- #include "SpellingChecker_def.h"
- #include "oo_EQUAL.h"
- #include "SpellingChecker_def.h"
- #include "oo_CAN_WRITE_AS_ENCODING.h"
- #include "SpellingChecker_def.h"
- #include "oo_WRITE_TEXT.h"
- #include "SpellingChecker_def.h"
- #include "oo_WRITE_BINARY.h"
- #include "SpellingChecker_def.h"
- #include "oo_READ_TEXT.h"
- #include "SpellingChecker_def.h"
- #include "oo_READ_BINARY.h"
- #include "SpellingChecker_def.h"
- #include "oo_DESCRIPTION.h"
- #include "SpellingChecker_def.h"
- #include "../kar/longchar.h"
- Thing_implement (SpellingChecker, Daata, 0);
- autoSpellingChecker WordList_upto_SpellingChecker (WordList me) {
- try {
- autoSpellingChecker thee = Thing_new (SpellingChecker);
- thy wordList = Data_copy (me);
- thy userDictionary = StringSet_create ();
- thy separatingCharacters = Melder_dup (U".,;:()\"");
- return thee;
- } catch (MelderError) {
- Melder_throw (me, U": not converted to SpellingChecker.");
- }
- }
- autoWordList SpellingChecker_extractWordList (SpellingChecker me) {
- return Data_copy (my wordList.get());
- }
- void SpellingChecker_replaceWordList (SpellingChecker me, WordList list) {
- try {
- my wordList = Data_copy (list);
- } catch (MelderError) {
- Melder_throw (me, U": word list not replaced.");
- }
- }
- autoStringSet SpellingChecker_extractUserDictionary (SpellingChecker me) {
- try {
- if (my userDictionary->size == 0)
- Melder_throw (U"This spelling checker does not contain a user dictionary.");
- return Data_copy (my userDictionary.get());
- } catch (MelderError) {
- Melder_throw (me, U": user dictionary not extracted.");
- }
- }
- void SpellingChecker_replaceUserDictionary (SpellingChecker me, StringSet userDictionary) {
- try {
- my userDictionary = Data_copy (userDictionary);
- } catch (MelderError) {
- Melder_throw (me, U": user dictionary not replaced.");
- }
- }
- static int startsWithCapital (conststring32 word) {
- return iswupper ((int) word [0]) || (word [0] == '\\' && iswupper ((int) word [1]));
- }
- bool SpellingChecker_isWordAllowed (SpellingChecker me, conststring32 word) {
- int wordLength = str32len (word);
- if (my allowAllWordsContaining && my allowAllWordsContaining [0]) {
- char32 *p = & my allowAllWordsContaining [0];
- while (*p) {
- /*
- * Find next token in list of allowed string parts.
- */
- char32 token [100], *q = & token [0];
- /*
- * Skip spaces in list.
- */
- while (*p == U' ') p ++;
- /*
- * Collect one token string from list.
- */
- while (*p != U'\0' && *p != U' ') {
- *q ++ = *p ++;
- }
- *q = U'\0'; // trailing null character
- /*
- * Allow word if it contains this token.
- */
- if (str32str (word, token)) return true;
- }
- }
- if (my allowAllNames) {
- /*
- * Allow word if it starts with a capital.
- */
- if (startsWithCapital (word)) {
- return true;
- }
- if (my namePrefixes && my namePrefixes [0]) {
- char32 *p = & my namePrefixes [0];
- while (*p) {
- char32 token [100], *q = & token [0];
- while (*p == U' ') p ++;
- while (*p != U'\0' && *p != U' ') *q ++ = *p ++;
- *q = U'\0'; // trailing null character
- /*
- * Allow word if starts with this prefix
- * and this prefix is followed by a capital.
- */
- if (str32str (word, token) == word && startsWithCapital (word + str32len (token))) {
- return true;
- }
- }
- }
- } else if (my allowAllAbbreviations && startsWithCapital (word)) {
- const char32 *p = & word [0];
- for (;;) {
- if (*p == '\0') return true;
- if (iswlower ((int) *p)) break;
- p ++;
- }
- }
- if (my allowAllWordsStartingWith && my allowAllWordsStartingWith [0]) {
- const char32 *p = & my allowAllWordsStartingWith [0];
- while (*p) {
- char32 token [100], *q = & token [0];
- int tokenLength;
- while (*p == U' ') p ++;
- while (*p != U'\0' && *p != U' ') *q ++ = *p ++;
- *q = U'\0'; // trailing null character
- tokenLength = str32len (token);
- if (wordLength >= tokenLength && str32nequ (token, word, tokenLength)) {
- return true;
- }
- }
- }
- if (my allowAllWordsEndingIn && my allowAllWordsEndingIn [0]) {
- const char32 *p = & my allowAllWordsEndingIn [0];
- while (*p) {
- char32 token [100], *q = & token [0];
- int tokenLength;
- while (*p == U' ') p ++;
- while (*p != U'\0' && *p != U' ') *q ++ = *p ++;
- *q = U'\0'; // trailing null character
- tokenLength = str32len (token);
- if (wordLength >= tokenLength && str32nequ (token, word + wordLength - tokenLength, tokenLength)) {
- return true;
- }
- }
- }
- if (WordList_hasWord (my wordList.get(), word))
- return true;
- if (my userDictionary->size > 0) {
- if (str32len (word) > 3333) return false; // superfluous, because WordList_hasWord already checked; but safe
- static char32 buffer [3*3333+1];
- Longchar_genericize32 (word, buffer);
- if (my userDictionary -> lookUp (buffer) != 0)
- return true;
- }
- return false;
- }
- void SpellingChecker_addNewWord (SpellingChecker me, conststring32 word) {
- try {
- autostring32 generic (3 * str32len (word));
- Longchar_genericize32 (word, generic.get());
- my userDictionary -> addString_copy (generic.get());
- } catch (MelderError) {
- Melder_throw (me, U": word \"", word, U"\" not added.");
- }
- }
- char32 * SpellingChecker_nextNotAllowedWord (SpellingChecker me, conststring32 sentence, integer *start) {
- const char32 *p = & sentence [*start];
- for (;;) {
- if (*p == U'\0') {
- return nullptr; // all words allowed
- } else if (*p == U'(' && my allowAllParenthesized) {
- p ++;
- for (;;) {
- if (*p == U'\0') {
- return nullptr; // everything is parenthesized...
- } else if (*p == U')') {
- p ++;
- break;
- } else {
- p ++;
- }
- }
- } else if (*p == U' ' || (my separatingCharacters && str32chr (my separatingCharacters.get(), *p))) {
- p ++;
- } else {
- static char32 word [100];
- char32 *q = & word [0];
- *start = p - sentence;
- for (;;) {
- if (*p == U'\0' || *p == U' ' || (my separatingCharacters && str32chr (my separatingCharacters.get(), *p))) {
- *q ++ = U'\0';
- if (SpellingChecker_isWordAllowed (me, word)) {
- /* Don't increment p (may contain a zero or a parenthesis). */
- break;
- } else {
- return word;
- }
- } else {
- *q ++ = *p ++;
- }
- }
- }
- }
- return nullptr; // all words allowed
- }
- /* End of file SpellingChecker.cpp */
|