translate.h 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786
  1. /*
  2. * Copyright (C) 2005 to 2014 by Jonathan Duddington
  3. * email: jonsd@users.sourceforge.net
  4. * Copyright (C) 2015-2017 Reece H. Dunn
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 3 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License
  17. * along with this program; if not, see: <http://www.gnu.org/licenses/>.
  18. */
  19. #ifdef __cplusplus
  20. extern "C"
  21. {
  22. #endif
  23. #define L(c1, c2) (c1<<8)+c2 // combine two characters into an integer for translator name
  24. #define L3(c1, c2, c3) (c1<<16)+(c2<<8) + c3 // combine three characters into an integer for translator name
  25. #define CTRL_EMBEDDED 0x01 // control character at the start of an embedded command
  26. #define REPLACED_E 'E' // 'e' replaced by silent e
  27. #define N_WORD_PHONEMES 200 // max phonemes in a word
  28. #define N_WORD_BYTES 160 // max bytes for the UTF8 characters in a word
  29. #define N_CLAUSE_WORDS 300 // max words in a clause
  30. #define N_TR_SOURCE 800 // the source text of a single clause (UTF8 bytes)
  31. #define N_RULE_GROUP2 120 // max num of two-letter rule chains
  32. #define N_HASH_DICT 1024
  33. #define N_LETTER_GROUPS 95 // maximum is 127-32
  34. // dictionary flags, word 1
  35. // bits 0-3 stressed syllable, bit 6=unstressed
  36. #define FLAG_SKIPWORDS 0x80
  37. #define FLAG_PREPAUSE 0x100
  38. #define FLAG_STRESS_END 0x200 // full stress if at end of clause
  39. #define FLAG_STRESS_END2 0x400 // full stress if at end of clause, or only followed by unstressed
  40. #define FLAG_UNSTRESS_END 0x800 // reduce stress at end of clause
  41. #define FLAG_SPELLWORD 0x1000 // re-translate the word as individual letters, separated by spaces
  42. #define FLAG_ACCENT_BEFORE 0x1000 // say this accent name before the letter name
  43. #define FLAG_ABBREV 0x2000 // spell as letters, even with a vowel, OR use specified pronunciation rather than split into letters
  44. #define FLAG_DOUBLING 0x4000 // doubles the following consonant
  45. #define BITNUM_FLAG_ALT 14 // bit number of FLAG_ALT_TRANS - 1
  46. #define FLAG_ALT_TRANS 0x8000 // language specific
  47. #define FLAG_ALT2_TRANS 0x10000 // language specific
  48. #define FLAG_ALT3_TRANS 0x20000 // language specific
  49. #define FLAG_ALT4_TRANS 0x40000 // language specific
  50. #define FLAG_ALT5_TRANS 0x80000 // language specific
  51. #define FLAG_ALT6_TRANS 0x100000 // language specific
  52. #define FLAG_ALT7_TRANS 0x200000 // language specific
  53. #define FLAG_COMBINE 0x800000 // combine with the next word
  54. #define FLAG_ALLOW_DOT 0x01000000 // ignore '.' after word (abbreviation)
  55. #define FLAG_NEEDS_DOT 0x02000000 // only if the word is followed by a dot
  56. #define FLAG_WAS_UNPRONOUNCABLE 0x04000000 // the unpronounceable routine was used
  57. #define FLAG_MAX3 0x08000000 // limit to 3 repeats
  58. #define FLAG_PAUSE1 0x10000000 // shorter prepause
  59. #define FLAG_TEXTMODE 0x20000000 // word translates to replacement text, not phonemes
  60. #define BITNUM_FLAG_TEXTMODE 29
  61. #define FLAG_FOUND_ATTRIBUTES 0x40000000 // word was found in the dictionary list (has attributes)
  62. #define FLAG_FOUND 0x80000000 // pronunciation was found in the dictionary list
  63. // dictionary flags, word 2
  64. #define FLAG_VERBF 0x1 // verb follows
  65. #define FLAG_VERBSF 0x2 // verb follows, may have -s suffix
  66. #define FLAG_NOUNF 0x4 // noun follows
  67. #define FLAG_PASTF 0x8 // past tense follows
  68. #define FLAG_VERB 0x10 // pronunciation for verb
  69. #define FLAG_NOUN 0x20 // pronunciation for noun
  70. #define FLAG_PAST 0x40 // pronunciation for past tense
  71. #define FLAG_VERB_EXT 0x100 // extend the 'verb follows'
  72. #define FLAG_CAPITAL 0x200 // pronunciation if initial letter is upper case
  73. #define FLAG_ALLCAPS 0x400 // only if the word is all capitals
  74. #define FLAG_ACCENT 0x800 // character name is base-character name + accent name
  75. #define FLAG_HYPHENATED 0x1000 // multiple-words, but needs hyphen between parts 1 and 2
  76. #define FLAG_SENTENCE 0x2000 // only if the clause is a sentence
  77. #define FLAG_ONLY 0x4000
  78. #define FLAG_ONLY_S 0x8000
  79. #define FLAG_STEM 0x10000 // must have a suffix
  80. #define FLAG_ATEND 0x20000 // use this pronunciation if at end of clause
  81. #define FLAG_ATSTART 0x40000 // use this pronunciation if at start of clause
  82. #define FLAG_NATIVE 0x80000 // not if we've switched translators
  83. #define FLAG_LOOKUP_SYMBOL 0x40000000 // to indicate called from Lookup()
  84. #define BITNUM_FLAG_ALLCAPS 0x2a
  85. #define BITNUM_FLAG_HYPHENATED 0x2c
  86. #define BITNUM_FLAG_ONLY 0x2e
  87. #define BITNUM_FLAG_ONLY_S 0x2f
  88. // wordflags, flags in source word
  89. #define FLAG_ALL_UPPER 0x1 // no lower case letters in the word
  90. #define FLAG_FIRST_UPPER 0x2 // first letter is upper case
  91. #define FLAG_UPPERS 0x3 // FLAG_ALL_UPPER | FLAG_FIRST_UPPER
  92. #define FLAG_HAS_PLURAL 0x4 // upper-case word with s or 's lower-case ending
  93. #define FLAG_PHONEMES 0x8 // word is phonemes
  94. #define FLAG_LAST_WORD 0x10 // last word in clause
  95. #define FLAG_EMBEDDED 0x40 // word is preceded by embedded commands
  96. #define FLAG_HYPHEN 0x80
  97. #define FLAG_NOSPACE 0x100 // word is not seperated from previous word by a space
  98. #define FLAG_FIRST_WORD 0x200 // first word in clause
  99. #define FLAG_FOCUS 0x400 // the focus word of a clause
  100. #define FLAG_EMPHASIZED 0x800
  101. #define FLAG_EMPHASIZED2 0xc00 // FLAG_FOCUS | FLAG_EMPHASIZED
  102. #define FLAG_DONT_SWITCH_TRANSLATOR 0x1000
  103. #define FLAG_SUFFIX_REMOVED 0x2000
  104. #define FLAG_HYPHEN_AFTER 0x4000
  105. #define FLAG_ORDINAL 0x8000 // passed to TranslateNumber() to indicate an ordinal number
  106. #define FLAG_HAS_DOT 0x10000 // dot after this word
  107. #define FLAG_COMMA_AFTER 0x20000 // comma after this word
  108. #define FLAG_MULTIPLE_SPACES 0x40000 // word is preceded by multiple spaces, newline, or tab
  109. #define FLAG_INDIVIDUAL_DIGITS 0x80000 // speak number as individual digits
  110. #define FLAG_DELETE_WORD 0x100000 // don't speak this word, it has been spoken as part of the previous word
  111. #define FLAG_CHAR_REPLACED 0x200000 // characters have been replaced by .replace in the *_rules
  112. #define FLAG_TRANSLATOR2 0x400000 // retranslating using a different language
  113. #define FLAG_PREFIX_REMOVED 0x800000 // a prefix has been removed from this word
  114. #define FLAG_SUFFIX_VOWEL 0x08000000 // remember an initial vowel from the suffix
  115. #define FLAG_NO_TRACE 0x10000000 // passed to TranslateRules() to suppress dictionary lookup printout
  116. #define FLAG_NO_PREFIX 0x20000000
  117. #define FLAG_UNPRON_TEST 0x80000000 // do unpronounability test on the beginning of the word
  118. // prefix/suffix flags (bits 8 to 14, bits 16 to 22) don't use 0x8000, 0x800000
  119. #define SUFX_E 0x0100 // e may have been added
  120. #define SUFX_I 0x0200 // y may have been changed to i
  121. #define SUFX_P 0x0400 // prefix
  122. #define SUFX_V 0x0800 // suffix means use the verb form pronunciation
  123. #define SUFX_D 0x1000 // previous letter may have been doubled
  124. #define SUFX_F 0x2000 // verb follows
  125. #define SUFX_Q 0x4000 // don't retranslate
  126. #define SUFX_T 0x10000 // don't affect the stress position in the stem
  127. #define SUFX_B 0x20000 // break, this character breaks the word into stem and suffix (used with SUFX_P)
  128. #define SUFX_A 0x40000 // remember that the suffix starts with a vowel
  129. #define SUFX_M 0x80000 // bit 19, allow multiple suffixes
  130. #define SUFX_UNPRON 0x8000 // used to return $unpron flag from *_rules
  131. #define FLAG_ALLOW_TEXTMODE 0x02 // allow dictionary to translate to text rather than phonemes
  132. #define FLAG_SUFX 0x04
  133. #define FLAG_SUFX_S 0x08
  134. #define FLAG_SUFX_E_ADDED 0x10
  135. // codes in dictionary rules
  136. #define RULE_PRE 1
  137. #define RULE_POST 2
  138. #define RULE_PHONEMES 3
  139. #define RULE_PH_COMMON 4 // At start of rule. Its phoneme string is used by subsequent rules
  140. #define RULE_CONDITION 5 // followed by condition number (byte)
  141. #define RULE_GROUP_START 6
  142. #define RULE_GROUP_END 7
  143. #define RULE_PRE_ATSTART 8 // as RULE_PRE but also match with 'start of word'
  144. #define RULE_LINENUM 9 // next 2 bytes give a line number, for debugging purposes
  145. #define RULE_STRESSED 10 // &
  146. #define RULE_DOUBLE 11 // %
  147. #define RULE_INC_SCORE 12 // +
  148. #define RULE_DEL_FWD 13 // #
  149. #define RULE_ENDING 14 // S
  150. #define RULE_DIGIT 15 // D digit
  151. #define RULE_NONALPHA 16 // Z non-alpha
  152. #define RULE_LETTERGP 17 // A B C H F G Y letter group number
  153. #define RULE_LETTERGP2 18 // L + letter group number
  154. #define RULE_CAPITAL 19 // ! word starts with a capital letter
  155. #define RULE_REPLACEMENTS 20 // section for character replacements
  156. #define RULE_SYLLABLE 21 // @
  157. #define RULE_SKIPCHARS 23 // J
  158. #define RULE_NO_SUFFIX 24 // N
  159. #define RULE_NOTVOWEL 25 // K
  160. #define RULE_IFVERB 26 // V
  161. #define RULE_DOLLAR 28 // $ commands
  162. #define RULE_NOVOWELS 29 // X no vowels up to word boundary
  163. #define RULE_SPELLING 31 // W while spelling letter-by-letter
  164. #define RULE_LAST_RULE 31
  165. // Rule codes above 31 are the ASCII code representation of the character
  166. // used to specify the rule.
  167. #define RULE_SPACE 32 // ascii space
  168. #define RULE_DEC_SCORE 60 // <
  169. #define DOLLAR_UNPR 0x01
  170. #define DOLLAR_NOPREFIX 0x02
  171. #define DOLLAR_LIST 0x03
  172. #define LETTERGP_A 0
  173. #define LETTERGP_B 1
  174. #define LETTERGP_C 2
  175. #define LETTERGP_H 3
  176. #define LETTERGP_F 4
  177. #define LETTERGP_G 5
  178. #define LETTERGP_Y 6
  179. #define LETTERGP_VOWEL2 7
  180. // Punctuation types returned by ReadClause()
  181. //@{
  182. #define CLAUSE_PAUSE 0x00000FFF // pause (x 10mS)
  183. #define CLAUSE_INTONATION_TYPE 0x00007000 // intonation type
  184. #define CLAUSE_OPTIONAL_SPACE_AFTER 0x00008000 // don't need space after the punctuation
  185. #define CLAUSE_TYPE 0x000F0000 // phrase type
  186. #define CLAUSE_PUNCTUATION_IN_WORD 0x00100000 // punctuation character can be inside a word (Armenian)
  187. #define CLAUSE_SPEAK_PUNCTUATION_NAME 0x00200000 // speak the name of the punctuation character
  188. #define CLAUSE_DOT_AFTER_LAST_WORD 0x00400000 // dot after the last word
  189. #define CLAUSE_PAUSE_LONG 0x00800000 // x 320mS to the CLAUSE_PAUSE value
  190. #define CLAUSE_INTONATION_FULL_STOP 0x00000000
  191. #define CLAUSE_INTONATION_COMMA 0x00001000
  192. #define CLAUSE_INTONATION_QUESTION 0x00002000
  193. #define CLAUSE_INTONATION_EXCLAMATION 0x00003000
  194. #define CLAUSE_INTONATION_NONE 0x00004000
  195. #define CLAUSE_TYPE_NONE 0x00000000
  196. #define CLAUSE_TYPE_EOF 0x00010000
  197. #define CLAUSE_TYPE_VOICE_CHANGE 0x00020000
  198. #define CLAUSE_TYPE_CLAUSE 0x00040000
  199. #define CLAUSE_TYPE_SENTENCE 0x00080000
  200. #define CLAUSE_NONE ( 0 | CLAUSE_INTONATION_NONE | CLAUSE_TYPE_NONE)
  201. #define CLAUSE_PARAGRAPH (70 | CLAUSE_INTONATION_FULL_STOP | CLAUSE_TYPE_SENTENCE)
  202. #define CLAUSE_EOF (40 | CLAUSE_INTONATION_FULL_STOP | CLAUSE_TYPE_SENTENCE | CLAUSE_TYPE_EOF)
  203. #define CLAUSE_VOICE ( 0 | CLAUSE_INTONATION_NONE | CLAUSE_TYPE_VOICE_CHANGE)
  204. #define CLAUSE_PERIOD (40 | CLAUSE_INTONATION_FULL_STOP | CLAUSE_TYPE_SENTENCE)
  205. #define CLAUSE_COMMA (20 | CLAUSE_INTONATION_COMMA | CLAUSE_TYPE_CLAUSE)
  206. #define CLAUSE_SHORTCOMMA ( 4 | CLAUSE_INTONATION_COMMA | CLAUSE_TYPE_CLAUSE)
  207. #define CLAUSE_SHORTFALL ( 4 | CLAUSE_INTONATION_FULL_STOP | CLAUSE_TYPE_CLAUSE)
  208. #define CLAUSE_QUESTION (40 | CLAUSE_INTONATION_QUESTION | CLAUSE_TYPE_SENTENCE)
  209. #define CLAUSE_EXCLAMATION (45 | CLAUSE_INTONATION_EXCLAMATION | CLAUSE_TYPE_SENTENCE)
  210. #define CLAUSE_COLON (30 | CLAUSE_INTONATION_FULL_STOP | CLAUSE_TYPE_CLAUSE)
  211. #define CLAUSE_SEMICOLON (30 | CLAUSE_INTONATION_COMMA | CLAUSE_TYPE_CLAUSE)
  212. int clause_type_from_codepoint(uint32_t c);
  213. //@}
  214. #define SAYAS_CHARS 0x12
  215. #define SAYAS_GLYPHS 0x13
  216. #define SAYAS_SINGLE_CHARS 0x14
  217. #define SAYAS_KEY 0x24
  218. #define SAYAS_DIGITS 0x40 // + number of digits
  219. #define SAYAS_DIGITS1 0xc1
  220. #define CHAR_EMPHASIS 0x0530 // this is an unused character code
  221. #define CHAR_COMMA_BREAK 0x0557 // unused character code
  222. // Rule:
  223. // [4] [match] [1 pre] [2 post] [3 phonemes] 0
  224. // match 1 pre 2 post 0 - use common phoneme string
  225. // match 1 pre 2 post 3 0 - empty phoneme string
  226. typedef const char *constcharptr;
  227. typedef struct {
  228. int points;
  229. const char *phonemes;
  230. int end_type;
  231. char *del_fwd;
  232. } MatchRecord;
  233. // used to mark words with the source[] buffer
  234. typedef struct {
  235. unsigned int flags;
  236. unsigned short start;
  237. unsigned char pre_pause;
  238. unsigned char wmark;
  239. unsigned short sourceix;
  240. unsigned char length;
  241. } WORD_TAB;
  242. typedef struct {
  243. int type;
  244. int parameter[N_SPEECH_PARAM];
  245. } PARAM_STACK;
  246. extern PARAM_STACK param_stack[];
  247. extern const int param_defaults[N_SPEECH_PARAM];
  248. typedef struct {
  249. const char *name;
  250. int offset;
  251. unsigned short range_min, range_max;
  252. int language;
  253. int flags;
  254. } ALPHABET;
  255. extern ALPHABET alphabets[];
  256. extern ALPHABET *current_alphabet;
  257. // alphabet flags
  258. #define AL_DONT_NAME 0x01 // don't speak the alphabet name
  259. #define AL_NOT_LETTERS 0x02 // don't use the language for speaking letters
  260. #define AL_WORDS 0x04 // use the language to speak words
  261. #define AL_NOT_CODE 0x08 // don't speak the character code
  262. #define AL_NO_SYMBOL 0x10 // don't repeat "symbol" or "character"
  263. #define N_LOPTS 21
  264. #define LOPT_DIERESES 1
  265. // 1=remove [:] from unstressed syllables, 2= remove from unstressed or non-penultimate syllables
  266. // bit 4=0, if stress < 4, bit 4=1, if not the highest stress in the word
  267. #define LOPT_IT_LENGTHEN 2
  268. // 1=german
  269. #define LOPT_PREFIXES 3
  270. // non-zero, change voiced/unoiced to match last consonant in a cluster
  271. // bit 0=use regressive voicing
  272. // bit 1=LANG=cz,bg don't propagate over [v]
  273. // bit 2=don't propagate acress word boundaries
  274. // bit 3=LANG=pl, propagate over liquids and nasals
  275. // bit 4=LANG=cz,sk don't progagate to [v]
  276. // bit 8=devoice word-final consonants
  277. #define LOPT_REGRESSIVE_VOICING 4
  278. // 0=default, 1=no check, other allow this character as an extra initial letter (default is 's')
  279. #define LOPT_UNPRONOUNCABLE 5
  280. // select length_mods tables, (length_mod_tab) + (length_mod_tab0 * 100)
  281. #define LOPT_LENGTH_MODS 6
  282. // increase this to prevent sonorants being shortened before shortened (eg. unstressed) vowels
  283. #define LOPT_SONORANT_MIN 7
  284. // bit 0: don't break vowels at word boundary
  285. #define LOPT_WORD_MERGE 8
  286. // max. amplitude for vowel at the end of a clause
  287. #define LOPT_MAXAMP_EOC 9
  288. // bit 0=reduce even if phonemes are specified in the **_list file
  289. // bit 1=don't reduce the strongest vowel in a word which is marked 'unstressed'
  290. #define LOPT_REDUCE 10
  291. // LANG=cs,sk combine some prepositions with the following word, if the combination has N or fewer syllables
  292. // bits 0-3 N syllables
  293. // bit 4=only if the second word has $alt attribute
  294. // bit 5=not if the second word is end-of-sentence
  295. #define LOPT_COMBINE_WORDS 11
  296. // change [t] when followed by unstressed vowel
  297. #define LOPT_REDUCE_T 12
  298. // 1 = allow capitals inside a word
  299. // 2 = stressed syllable is indicated by capitals
  300. #define LOPT_CAPS_IN_WORD 13
  301. // bit 0=Italian "syntactic doubling" of consoants in the word after a word marked with $double attribute
  302. // bit 1=also after a word which ends with a stressed vowel
  303. #define LOPT_IT_DOUBLING 14
  304. // Call ApplySpecialAttributes() if $alt or $alt2 is set for a word
  305. // bit 1: stressed syllable: $alt change [e],[o] to [E],[O], $alt2 change [E],[O] to [e],[o]
  306. #define LOPT_ALT 15
  307. // pause for bracket (default=4), pause when annoucing bracket names (default=2)
  308. #define LOPT_BRACKET_PAUSE 16
  309. // bit 1, don't break clause before annoucning . ? !
  310. #define LOPT_ANNOUNCE_PUNCT 17
  311. // recognize long vowels (0 = don't recognize)
  312. #define LOPT_LONG_VOWEL_THRESHOLD 18
  313. // bit 0: Don't allow suffices if there is no previous syllable
  314. #define LOPT_SUFFIX 19
  315. // bit 0 Apostrophe at start of word is part of the word
  316. // bit 1 Apostrophe at end of word is part of the word
  317. #define LOPT_APOSTROPHE 20
  318. // stress_rule
  319. #define STRESSPOSN_1L 0 // 1st syllable
  320. #define STRESSPOSN_2L 1 // 2nd syllable
  321. #define STRESSPOSN_2R 2 // penultimate
  322. #define STRESSPOSN_1R 3 // final syllable
  323. #define STRESSPOSN_3R 4 // antipenultimate
  324. typedef struct {
  325. // bits0-2 separate words with (1=pause_vshort, 2=pause_short, 3=pause, 4=pause_long 5=[?] phonemme)
  326. // bit 3=don't use linking phoneme
  327. // bit4=longer pause before STOP, VSTOP,FRIC
  328. // bit5=length of a final vowel doesn't depend on the next phoneme
  329. int word_gap;
  330. int vowel_pause;
  331. int stress_rule; // 1=first syllable, 2=penultimate, 3=last
  332. #define S_NO_DIM 0x02
  333. #define S_FINAL_DIM 0x04
  334. #define S_FINAL_DIM_ONLY 0x06
  335. // bit1=don't set diminished stress,
  336. // bit2=mark unstressed final syllables as diminished
  337. // bit3=set consecutive unstressed syllables in unstressed words to diminished, but not in stressed words
  338. #define S_FINAL_NO_2 0x10
  339. // bit4=don't allow secondary stress on last syllable
  340. #define S_NO_AUTO_2 0x20
  341. // bit5-don't use automatic secondary stress
  342. #define S_2_TO_HEAVY 0x40
  343. // bit6=light syllable followed by heavy, move secondary stress to the heavy syllable. LANG=Finnish
  344. #define S_FIRST_PRIMARY 0x80
  345. // bit7=if more than one primary stress, make the subsequent primaries to secondary stress
  346. #define S_FINAL_VOWEL_UNSTRESSED 0x100
  347. // bit8=don't apply default stress to a word-final vowel
  348. #define S_FINAL_SPANISH 0x200
  349. // bit9=stress last syllable if it doesn't end in vowel or "s" or "n" LANG=Spanish
  350. #define S_2_SYL_2 0x1000
  351. // bit12= In a 2-syllable word, if one has primary stress then give the other secondary stress
  352. #define S_INITIAL_2 0x2000
  353. // bit13= If there is only one syllable before the primary stress, give it a secondary stress
  354. #define S_MID_DIM 0x10000
  355. // bit 16= Set (not first or last) syllables to diminished stress
  356. #define S_PRIORITY_STRESS 0x20000
  357. // bit17= "priority" stress reduces other primary stress to "unstressed" not "secondary"
  358. #define S_EO_CLAUSE1 0x40000
  359. // bit18= don't lengthen short vowels more than long vowels at end-of-clause
  360. #define S_FINAL_LONG 0x80000
  361. // bit19=stress on final syllable if it has a long vowel, but previous syllable has a short vowel
  362. #define S_HYPEN_UNSTRESS 0x100000
  363. // bit20= hyphenated words, 2nd part is unstressed
  364. #define S_NO_EOC_LENGTHEN 0x200000
  365. // bit21= don't lengthen vowels at end-of-clause
  366. // bit15= Give stress to the first unstressed syllable
  367. int stress_flags;
  368. int unstressed_wd1; // stress for $u word of 1 syllable
  369. int unstressed_wd2; // stress for $u word of >1 syllable
  370. int param[N_LOPTS];
  371. int param2[N_LOPTS];
  372. unsigned char *length_mods;
  373. unsigned char *length_mods0;
  374. #define NUM_THOUS_SPACE 0x4
  375. #define NUM_DECIMAL_COMMA 0x8
  376. #define NUM_SWAP_TENS 0x10
  377. #define NUM_AND_UNITS 0x20
  378. #define NUM_HUNDRED_AND 0x40
  379. #define NUM_SINGLE_AND 0x80
  380. #define NUM_SINGLE_STRESS 0x100
  381. #define NUM_SINGLE_VOWEL 0x200
  382. #define NUM_OMIT_1_HUNDRED 0x400
  383. #define NUM_1900 0x800
  384. #define NUM_ALLOW_SPACE 0x1000
  385. #define NUM_DFRACTION_1 0x2000
  386. #define NUM_DFRACTION_2 0x4000
  387. #define NUM_DFRACTION_3 0x6000
  388. #define NUM_DFRACTION_4 0x8000
  389. #define NUM_DFRACTION_5 0xa000
  390. #define NUM_DFRACTION_6 0xc000
  391. #define NUM_DFRACTION_7 0xe000 // lang=si, alternative form of number for decimal fraction digits (except the last)
  392. #define NUM_ORDINAL_DOT 0x10000
  393. #define NUM_NOPAUSE 0x20000
  394. #define NUM_AND_HUNDRED 0x40000
  395. #define NUM_THOUSAND_AND 0x80000
  396. #define NUM_VIGESIMAL 0x100000
  397. #define NUM_OMIT_1_THOUSAND 0x200000
  398. #define NUM_ZERO_HUNDRED 0x400000
  399. #define NUM_HUNDRED_AND_DIGIT 0x800000
  400. #define NUM_ROMAN 0x1000000
  401. #define NUM_ROMAN_CAPITALS 0x2000000
  402. #define NUM_ROMAN_AFTER 0x4000000
  403. #define NUM_ROMAN_ORDINAL 0x8000000
  404. #define NUM_SINGLE_STRESS_L 0x10000000
  405. // bits0-1=which numbers routine to use.
  406. // bit2= thousands separator must be space
  407. // bit3= , decimal separator, not .
  408. // bit4=use three-and-twenty rather than twenty-three
  409. // bit5='and' between tens and units
  410. // bit6=add "and" after hundred or thousand
  411. // bit7=don't have "and" both after hundreds and also between tens and units
  412. // bit8=only one primary stress in tens+units
  413. // bit9=only one vowel betwen tens and units
  414. // bit10=omit "one" before "hundred"
  415. // bit11=say 19** as nineteen hundred
  416. // bit12=allow space as thousands separator (in addition to langopts.thousands_sep)
  417. // bits13-15 post-decimal-digits 0=single digits, 1=(LANG=it) 2=(LANG=pl) 3=(LANG=ro)
  418. // bit16= dot after number indicates ordinal
  419. // bit17= don't add pause after a number
  420. // bit18= 'and' before hundreds
  421. // bit19= 'and' after thousands if there are no hundreds
  422. // bit20= vigesimal number, if tens are not found
  423. // bit21= omit "one" before "thousand"
  424. // bit22= say "zero" before hundred
  425. // bit23= add "and" after hundreds and thousands, only if there are digits and no tens
  426. // bit24= recognize roman numbers
  427. // bit25= Roman numbers only if upper case
  428. // bit26= say "roman" after the number, not before
  429. // bit27= Roman numbers are ordinal numbers
  430. // bit28= only one primary stress in tens+units (on the tens)
  431. int numbers;
  432. #define NUM2_THOUSANDS_VAR1 0x40
  433. #define NUM2_THOUSANDS_VAR2 0x80
  434. #define NUM2_THOUSANDS_VAR3 0xc0
  435. #define NUM2_THOUSANDS_VAR4 0x100
  436. #define NUM2_THOUSANDS_VAR5 0x140
  437. #define NUM2_SWAP_THOUSANDS 0x200
  438. #define NUM2_ORDINAL_NO_AND 0x800
  439. #define NUM2_MULTIPLE_ORDINAL 0x1000
  440. #define NUM2_NO_TEEN_ORDINALS 0x2000
  441. #define NUM2_MYRIADS 0x4000
  442. #define NUM2_ENGLISH_NUMERALS 0x8000
  443. #define NUM2_PERCENT_BEFORE 0x10000
  444. #define NUM2_OMIT_1_HUNDRED_ONLY 0x20000
  445. #define NUM2_ORDINAL_AND_THOUSANDS 0x40000
  446. #define NUM2_ORDINAL_DROP_VOWEL 0x80000 // currently only for tens and units
  447. #define NUM2_ZERO_TENS 0x100000
  448. // bits 1-4 use variant form of numbers before thousands,millions,etc.
  449. // bits 6-8 use different forms of thousand, million, etc (M MA MB)
  450. // bit9=(LANG=rw) say "thousand" and "million" before its number, not after
  451. // bit11=(LANG=es,an) don't say 'and' between tens and units for ordinal numbers
  452. // bit12=(LANG=el,es) use ordinal form of hundreds and tens as well as units
  453. // bit13=(LANG=pt) don't use 11-19 numbers to make ordinals
  454. // bit14=(LANG=ko) use myriads (groups of 4 digits) not thousands (groups of 3)
  455. // bit15=(LANG=ne) speak (non-replaced) English numerals in English
  456. // bit16=(LANG=si) say "%" before the number
  457. // bit17=(LANG=ml) omit "one" before hundred only if there are no previous digits
  458. // bit18=(LANG=ta) same variant for ordinals and thousands (#o = #a)
  459. // bit19=(LANG=te) drop final vowel from cardial number before adding ordinal suffix
  460. // bit20=(LANG=zh) say zero tens
  461. int numbers2;
  462. #define BREAK_THOUSANDS 0x49249248
  463. int break_numbers; // which digits to break the number into thousands, millions, etc (Hindi has 100,000 not 1,000,000)
  464. int max_roman;
  465. int min_roman;
  466. int thousands_sep;
  467. int decimal_sep;
  468. int max_digits; // max number of digits which can be spoken as an integer number (rather than individual digits)
  469. const char *ordinal_indicator; // UTF-8 string
  470. const unsigned char *roman_suffix; // add this (ordinal) suffix to Roman numbers (LANG=an)
  471. // bit 0, accent name before the letter name, bit 1 "capital" after letter name
  472. int accents;
  473. int tone_language; // 1=tone language
  474. int intonation_group;
  475. unsigned char tunes[6];
  476. int long_stop; // extra mS pause for a lengthened stop
  477. int phoneme_change; // TEST, change phonemes, after translation
  478. char max_initial_consonants;
  479. char spelling_stress; // 0=default, 1=stress first letter
  480. char tone_numbers;
  481. char ideographs; // treat as separate words
  482. char textmode; // the meaning of FLAG_TEXTMODE is reversed (to save data when *_list file is compiled)
  483. char dotless_i; // uses letter U+0131
  484. int testing; // testing options: bit 1= specify stressed syllable in the form: "outdoor/2"
  485. int listx; // compile *_listx after *list
  486. const unsigned int *replace_chars; // characters to be substitutes
  487. int our_alphabet; // offset for main alphabet (if not set in letter_bits_offset)
  488. int alt_alphabet; // offset for another language to recognize
  489. int alt_alphabet_lang; // language for the alt_alphabet
  490. int max_lengthmod;
  491. int lengthen_tonic; // lengthen the tonic syllable
  492. int suffix_add_e; // replace a suffix (which has the SUFX_E flag) with this character
  493. } LANGUAGE_OPTIONS;
  494. // a parameter of ChangePhonemes()
  495. typedef struct {
  496. int flags;
  497. unsigned char stress; // stress level of this vowel
  498. unsigned char stress_highest; // the highest stress level of a vowel in this word
  499. unsigned char n_vowels; // number of vowels in the word
  500. unsigned char vowel_this; // syllable number of this vowel (counting from 1)
  501. unsigned char vowel_stressed; // syllable number of the highest stressed vowel
  502. } CHANGEPH;
  503. typedef struct {
  504. LANGUAGE_OPTIONS langopts;
  505. int translator_name;
  506. int transpose_max;
  507. int transpose_min;
  508. const char *transpose_map;
  509. char dictionary_name[40];
  510. char phonemes_repeat[20];
  511. int phonemes_repeat_count;
  512. int phoneme_tab_ix;
  513. unsigned char stress_amps[8];
  514. unsigned char stress_amps_r[8];
  515. short stress_lengths[8];
  516. int dict_condition; // conditional apply some pronunciation rules and dict.lookups
  517. int dict_min_size;
  518. espeak_ng_ENCODING encoding;
  519. const wchar_t *char_plus_apostrophe; // single chars + apostrophe treated as words
  520. const wchar_t *punct_within_word; // allow these punctuation characters within words
  521. const unsigned short *chars_ignore;
  522. // holds properties of characters: vowel, consonant, etc for pronunciation rules
  523. unsigned char letter_bits[256];
  524. int letter_bits_offset;
  525. const wchar_t *letter_groups[8];
  526. /* index1=option, index2 by 0=. 1=, 2=?, 3=! 4=none */
  527. #define INTONATION_TYPES 8
  528. #define PUNCT_INTONATIONS 6
  529. unsigned char punct_to_tone[INTONATION_TYPES][PUNCT_INTONATIONS];
  530. char *data_dictrules; // language_1 translation rules file
  531. char *data_dictlist; // language_2 dictionary lookup file
  532. char *dict_hashtab[N_HASH_DICT]; // hash table to index dictionary lookup file
  533. char *letterGroups[N_LETTER_GROUPS];
  534. // groups1 and groups2 are indexes into data_dictrules, set up by InitGroups()
  535. // the two-letter rules for each letter must be consecutive in the language_rules source
  536. char *groups1[256]; // translation rule lists, index by single letter
  537. char *groups3[128]; // index by offset letter
  538. char *groups2[N_RULE_GROUP2]; // translation rule lists, indexed by two-letter pairs
  539. unsigned int groups2_name[N_RULE_GROUP2]; // the two letter pairs for groups2[]
  540. int n_groups2; // number of groups2[] entries used
  541. unsigned char groups2_count[256]; // number of 2 letter groups for this initial letter
  542. unsigned char groups2_start[256]; // index into groups2
  543. const short *frequent_pairs; // list of frequent pairs of letters, for use in compressed *_list
  544. int expect_verb;
  545. int expect_past; // expect past tense
  546. int expect_verb_s;
  547. int expect_noun;
  548. int prev_last_stress;
  549. char *clause_end;
  550. int word_vowel_count; // number of vowels so far
  551. int word_stressed_count; // number of vowels so far which could be stressed
  552. int clause_upper_count; // number of upper case letters in the clause
  553. int clause_lower_count; // number of lower case letters in the clause
  554. int prepause_timeout;
  555. int end_stressed_vowel; // word ends with stressed vowel
  556. int prev_dict_flags[2]; // dictionary flags from previous word
  557. int clause_terminator;
  558. } Translator;
  559. extern int option_tone2;
  560. #define OPTION_EMPHASIZE_ALLCAPS 0x100
  561. #define OPTION_EMPHASIZE_PENULTIMATE 0x200
  562. extern int option_tone_flags;
  563. extern int option_phonemes;
  564. extern int option_phoneme_events;
  565. extern int option_linelength; // treat lines shorter than this as end-of-clause
  566. extern int option_capitals;
  567. extern int option_punctuation;
  568. extern int option_endpause;
  569. extern int option_ssml;
  570. extern int option_phoneme_input; // allow [[phonemes]] in input text
  571. extern int option_phoneme_variants;
  572. extern int option_sayas;
  573. extern int option_wordgap;
  574. extern int count_characters;
  575. extern int count_words;
  576. extern int count_sentences;
  577. extern int skip_characters;
  578. extern int skip_words;
  579. extern int skip_sentences;
  580. extern int skipping_text;
  581. extern int end_character_position;
  582. extern int clause_start_char;
  583. extern int clause_start_word;
  584. extern char *namedata;
  585. extern int pre_pause;
  586. #define N_MARKER_LENGTH 50 // max.length of a mark name
  587. extern char skip_marker[N_MARKER_LENGTH];
  588. #define N_PUNCTLIST 60
  589. extern wchar_t option_punctlist[N_PUNCTLIST]; // which punctuation characters to announce
  590. extern unsigned char punctuation_to_tone[INTONATION_TYPES][PUNCT_INTONATIONS];
  591. extern Translator *translator;
  592. extern Translator *translator2;
  593. extern char dictionary_name[40];
  594. extern char ctrl_embedded; // to allow an alternative CTRL for embedded commands
  595. extern espeak_ng_TEXT_DECODER *p_decoder;
  596. extern int dictionary_skipwords;
  597. extern int (*uri_callback)(int, const char *, const char *);
  598. extern int (*phoneme_callback)(const char *);
  599. extern void SetLengthMods(Translator *tr, int value);
  600. void LoadConfig(void);
  601. int TransposeAlphabet(Translator *tr, char *text);
  602. #define LEADING_2_BITS 0xC0 // 0b11000000
  603. #define UTF8_TAIL_BITS 0x80 // 0b10000000
  604. ESPEAK_NG_API int utf8_in(int *c, const char *buf);
  605. int utf8_in2(int *c, const char *buf, int backwards);
  606. int utf8_out(unsigned int c, char *buf);
  607. int utf8_nbytes(const char *buf);
  608. int lookupwchar(const unsigned short *list, int c);
  609. int lookupwchar2(const unsigned short *list, int c);
  610. int Eof(void);
  611. char *strchr_w(const char *s, int c);
  612. int IsBracket(int c);
  613. void InitNamedata(void);
  614. void InitText(int flags);
  615. void InitText2(void);
  616. int IsDigit(unsigned int c);
  617. int IsDigit09(unsigned int c);
  618. int IsAlpha(unsigned int c);
  619. int IsVowel(Translator *tr, int c);
  620. int IsSuperscript(int letter);
  621. int isspace2(unsigned int c);
  622. int towlower2(unsigned int c); // Supports Turkish I
  623. const char *GetTranslatedPhonemeString(int phoneme_mode);
  624. const char *WordToString2(unsigned int word);
  625. ALPHABET *AlphabetFromChar(int c);
  626. Translator *SelectTranslator(const char *name);
  627. int SetTranslator2(const char *name);
  628. void DeleteTranslator(Translator *tr);
  629. void ProcessLanguageOptions(LANGUAGE_OPTIONS *langopts);
  630. int Lookup(Translator *tr, const char *word, char *ph_out);
  631. int LookupFlags(Translator *tr, const char *word, unsigned int **flags_out);
  632. int TranslateNumber(Translator *tr, char *word1, char *ph_out, unsigned int *flags, WORD_TAB *wtab, int control);
  633. int TranslateRoman(Translator *tr, char *word, char *ph_out, WORD_TAB *wtab);
  634. void ChangeWordStress(Translator *tr, char *word, int new_stress);
  635. void SetSpellingStress(Translator *tr, char *phonemes, int control, int n_chars);
  636. int TranslateLetter(Translator *tr, char *letter, char *phonemes, int control);
  637. void LookupLetter(Translator *tr, unsigned int letter, int next_byte, char *ph_buf, int control);
  638. void LookupAccentedLetter(Translator *tr, unsigned int letter, char *ph_buf);
  639. int LoadDictionary(Translator *tr, const char *name, int no_error);
  640. int LookupDictList(Translator *tr, char **wordptr, char *ph_out, unsigned int *flags, int end_flags, WORD_TAB *wtab);
  641. int HashDictionary(const char *string);
  642. void print_dictionary_flags(unsigned int *flags, char *buf, int buf_len);
  643. char *DecodeRule(const char *group_chars, int group_length, char *rule, int control);
  644. void MakePhonemeList(Translator *tr, int post_pause, int new_sentence);
  645. int ChangePhonemes_ru(Translator *tr, PHONEME_LIST2 *phlist, int n_ph, int index, PHONEME_TAB *ph, CHANGEPH *ch);
  646. void ApplySpecialAttribute2(Translator *tr, char *phonemes, int dict_flags);
  647. void AppendPhonemes(Translator *tr, char *string, int size, const char *ph);
  648. void CalcLengths(Translator *tr);
  649. void CalcPitches(Translator *tr, int clause_tone);
  650. int RemoveEnding(Translator *tr, char *word, int end_type, char *word_copy);
  651. int Unpronouncable(Translator *tr, char *word, int posn);
  652. void SetWordStress(Translator *tr, char *output, unsigned int *dictionary_flags, int tonic, int prev_stress);
  653. int TranslateRules(Translator *tr, char *p, char *phonemes, int size, char *end_phonemes, int end_flags, unsigned int *dict_flags);
  654. int TranslateWord(Translator *tr, char *word1, WORD_TAB *wtab, char *word_out);
  655. void TranslateClause(Translator *tr, int *tone, char **voice_change);
  656. int ReadClause(Translator *tr, char *buf, short *charix, int *charix_top, int n_buf, int *tone_type, char *voice_change);
  657. void SetVoiceStack(espeak_VOICE *v, const char *variant_name);
  658. void InterpretPhoneme(Translator *tr, int control, PHONEME_LIST *plist, PHONEME_DATA *phdata, WORD_PH_DATA *worddata);
  659. void InterpretPhoneme2(int phcode, PHONEME_DATA *phdata);
  660. char *WritePhMnemonic(char *phon_out, PHONEME_TAB *ph, PHONEME_LIST *plist, int use_ipa, int *flags);
  661. extern FILE *f_trans; // for logging
  662. #ifdef __cplusplus
  663. }
  664. #endif