arabluatex_fullvoc.lua 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993
  1. --[[
  2. This file is part of the `arabluatex' package
  3. ArabLuaTeX -- Processing ArabTeX notation under LuaLaTeX
  4. Copyright (C) 2016--2018 Robert Alessi
  5. Please send error reports and suggestions for improvements to Robert
  6. Alessi <alessi@robertalessi.net>
  7. This program is free software: you can redistribute it and/or modify
  8. it under the terms of the GNU General Public License as published by
  9. the Free Software Foundation, either version 3 of the License, or
  10. (at your option) any later version.
  11. This program is distributed in the hope that it will be useful, but
  12. WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. General Public License for more details.
  15. You should have received a copy of the GNU General Public License
  16. along with this program. If not, see
  17. <http://www.gnu.org/licenses/>.
  18. --]]
  19. hamzafv = {
  20. -- next line for ʾiʿrāb hyphen
  21. {a="(')(%-)([uaiUAI])", b="%1%3"},
  22. -- next lines for ʾalif alone
  23. {a="(%.A)([uai]?)l%-(%^n)", b="ا%2ل%3"}, --additional (^n is lunar)
  24. {a="([%(%[%|%<%s%-o])(%.A)([uai]?)l%-(%^n)", b="%1ا%3%4"}, --additional (^n is lunar) --p
  25. {a="(%.A)([uai]?)l%-([%_%^%.]?[tdrzsnc])", b="ا%2ل%3%3"},
  26. {a="([%(%[%|%<%s%-o])(%.A)([uai]?)l%-([%_%^%.]?[tdrzsnc])", b="%1ا%3ل%4%4"}, --p
  27. {a="%.A", b="ا"},
  28. -- hard coded hamza
  29. {a="|\"'", b="ء"},
  30. {a="A\"'", b="آ"},
  31. {a="[au]\"'", b="أ"},
  32. {a="w\"'", b="ؤ"},
  33. {a="i\"'", b="إ"},
  34. {a="y\"'", b="ئ"},
  35. {a="ؤ([^uaiUAI])", b="ؤْ%1"},
  36. {a="ؤ(%p?)$", b="ؤْ%1"},
  37. {a="ؤ(%p?%s)", b="ؤْ%1"},
  38. {a="أ([^uaiUAI])", b="أْ%1"},
  39. {a="أ(%p?)$", b="أْ%1"},
  40. {a="أ(%p?%s)", b="أْ%1"},
  41. {a="ئ([^uaiUAI])", b="ئْ%1"},
  42. {a="ئ(%p?)$", b="ئْ%1"},
  43. {a="ئ(%p?%s)", b="ئْ%1"},
  44. -- hamza takes tašdīd too
  45. {a="''([Uu])", b="ؤؤ%1"},
  46. {a="''([Aa])", b="أأ%1"},
  47. {a="''([Ii])", b="ئئ%1"},
  48. -- inseparable adverbial particle 'a- + 'a
  49. {a="\'(a)%-\'(a)", b="أ%1اأ%2"},
  50. -- initial long u and i (for a, see below)
  51. {a="%'%_U", b="أU"},
  52. {a="%'%_I", b="إI"},
  53. -- taḫfīfu 'l-hamza
  54. {a="'u'([^uaiUAI])", b="أU%1"},
  55. {a="'i'([^uaiUAI])", b="إI%1"},
  56. {a="([wf]a)%-\'([^uaiUAIl][^%-])", b="%1أْ%2"},
  57. {a="^u'([^uaiUAI])", b="اU%1"},
  58. {a="([^uaiUAIYN][%s%(%[%<])u'([^uaiUAI])", b="%1اU%2"},
  59. {a="^i'([^uaiUAI])", b="اI%1"},
  60. {a="([^uaiUAIYN][%s%(%[%<])i'([^uaiUAI])", b="%1اI%2"},
  61. -- madda (historic writing below)
  62. {a="'a'([^uaiUAI])", b="آ%1"},
  63. {a="([^uiyUI])\'a?A([%_%^%.]?[%`%'btjghxdrzsfqklmnywAY])", b="%1آ%2"},
  64. {a="^\'a?A([%_%^%.]?[%`%'btjghxdrzsfqklmnywAY])", b="آ%1"},
  65. {a="(%W)\'a?A([%_%^%.]?[%`%'btjghxdrzsfqklmnywAY])", b="%1آ%2"},
  66. {a="(A)(')(uN?%p?)$", b="aآء%3"},
  67. {a="(A)(')(uN?)(%p?%s)", b="aآء%3%4"},
  68. {a="(A)(')(iN?%p?)$", b="aآء%3"},
  69. {a="(A)(')(iN?)(%p?%s)", b="aآء%3%4"},
  70. {a="(A)(')([iI])", b="aآئ%3"}, -- historic madda
  71. {a="(A)(')(u)", b="aآؤ%3"}, -- historic madda
  72. {a="(A)(')", b="aآء"}, -- historic madda
  73. -- initial (needs both ^ and %W patterns)
  74. -- 'aw: the diphthong is to be resolved into 'awi' (next 8 lines)
  75. {a="^('aw)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1i"},
  76. {a="(%W)('aw)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1%2i"},
  77. {a="^('aw)(o)([%(%[%|%<]?\"?[uai])([%S]-o)", b="%1i"},
  78. {a="(%W)('aw)(o)([%(%[%|%<]?\"?[uai])([%S]-o)", b="%1%2i"},
  79. {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  80. {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
  81. {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"},
  82. {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"},
  83. -- then the 'initial' rules for the remaining cases
  84. {a="^(')([ua])", b="أ%2"},
  85. {a="^(')(i)", b="إ%2"},
  86. -- consider replacing initial %W with [%s%(%[%<%-]:
  87. -- {a="(%W)(')([ua])", b="%1أ%3"},
  88. -- {a="(%W)(')(i)", b="%1إ%3"},
  89. {a="([%s%(%[%<%-])(')([ua])", b="%1أ%3"},
  90. {a="([%s%(%[%<%-])(')(i)", b="%1إ%3"},
  91. -- final
  92. -- mi'aT is special orthography (unlike ^say'aN and .zim'aN):
  93. -- {a="(%^sa%.?[yY])(\"?%|?)(%')(aN)", b="%1%2ئ%4"}, --new
  94. -- {a="(.zi?m)(%')(aN)", b="%1ئ%3"}, --new
  95. {a="(mi)(%')(a[Tt])", b="%1ائ%3"},
  96. {a="(mi)(%')(aN%_?[AY])", b="%1أ%3"},
  97. -- final hamzah is on the line after a letter of prolongation or a
  98. -- consonant with sukūn
  99. {a="([^Auai])(')(\"?[uai]N?)(%p?)$", b="%1ء%3%4"}, --new
  100. {a="([^Auai])(')(\"?[uai]N?)(%p?%s)", b="%1ء%3%4"},
  101. -- u
  102. {a="(u)(')([uai]N?%p?)$", b="%1ؤ%3"},
  103. {a="(u)(')([uai]N?)(%p?%s)", b="%1ؤ%3%4"},
  104. {a="(u)(')(%p?)$", b="%1ؤْ%3"},
  105. {a="(u)(')(%p?%s)", b="%1ؤْ%3"},
  106. -- a
  107. {a="(a)(')(A%p?)$", b="%1آ"},
  108. {a="(a)(')(A)(%p?%s)", b="%1آ%4"},
  109. {a="(a)(')([u]N?%p?)$", b="%1أ%3"},
  110. {a="(a)(')([u]N?)(%p?%s)", b="%1أ%3%4"},
  111. {a="(a)(')(a%p?)$", b="%1أ%3"},
  112. {a="(a)(')(a)(%p?%s)", b="%1أ%3%4"},
  113. {a="(a)(')(aN%p?)$", b="%1أً"},
  114. {a="(a)(')(aN)(%p?%s)", b="%1أً%4"},
  115. {a="(a)(')([i]N?%p?)$", b="%1إ%3"},
  116. {a="(a)(')([i]N?)(%p?%s)", b="%1إ%3%4"},
  117. {a="(a)(')(%p?)$", b="%1أْ%3"},
  118. {a="(a)(')(%p?%s)", b="%1أْ%3"},
  119. -- i
  120. {a="(i)(')([uai]N?%p?)$", b="%1ئ%3"},
  121. {a="(i)(')([uai]N?)(%p?%s)", b="%1ئ%3%4"},
  122. {a="(i)(')(%p?)$", b="%1ئْ%3"},
  123. {a="(i)(')(%p?%s)", b="%1ئْ%3"},
  124. --
  125. -- middle
  126. {a="([UIwy])(')", b="%1ء"}, --new
  127. -- {a="([Iy])(')", b="%1ئ"}, --included in the above line
  128. {a="([^uai])(')([uU])", b="%1ؤ%3"},
  129. {a="([^uai])(')(%_?[aAY])", b="%1أ%3"},
  130. {a="([^uai])(')([iI])", b="%1ئ%3"},
  131. {a="(u)(')([uU])", b="%1ؤ%3"},
  132. {a="(u)(')(%_?[aAY])", b="%1ؤ%3"},
  133. {a="(u)(')([iI])", b="%1ئ%3"},
  134. {a="(a)(')(%_?[aAY])", b="%1أ%3"},
  135. {a="(a)(')([uU])", b="%1ؤ%3"},
  136. {a="(a)(')([iI])", b="%1ئ%3"},
  137. {a="(i)(')(%_?[aAY])", b="%1ئ%3"},
  138. {a="(i)(')([uU])", b="%1ئ%3"},
  139. {a="(i)(')([iI])", b="%1ئ%3"},
  140. {a="(a)(')([^uaiUAI])", b="%1أْ%3"},
  141. {a="(u)(')([^uaiUAI])", b="%1ؤْ%3"},
  142. {a="(i)(')([^uaiUAI])", b="%1ئْ%3"}
  143. }
  144. hamzafveasy = { -- differences marked below with 'easy'
  145. -- next line for ʾiʿrāb hyphen
  146. {a="(')(%-)([uaiUAI])", b="%1%3"},
  147. -- next lines for ʾalif alone
  148. {a="(%.A)([uai]?)l%-(%^n)", b="ا%2ل%3"}, --additional (^n is lunar)
  149. {a="([%(%[%|%<%s%-o])(%.A)([uai]?)l%-(%^n)", b="%1ا%3%4"}, --additional (^n is lunar) --p
  150. {a="(%.A)([uai]?)l%-([%_%^%.]?[tdrzsnc])", b="ا%2ل%3%3"},
  151. {a="([%(%[%|%<%s%-o])(%.A)([uai]?)l%-([%_%^%.]?[tdrzsnc])", b="%1ا%3ل%4%4"}, --p
  152. {a="%.A", b="ا"},
  153. -- hard coded hamza
  154. {a="|\"'", b="ء"},
  155. {a="A\"'", b="آ"},
  156. {a="[au]\"'", b="أ"},
  157. {a="w\"'", b="ؤ"},
  158. {a="i\"'", b="إ"},
  159. {a="y\"'", b="ئ"},
  160. {a="ؤ([^uaiUAI])", b="ؤْ%1"},
  161. {a="ؤ(%p?)$", b="ؤْ%1"},
  162. {a="ؤ(%p?%s)", b="ؤْ%1"},
  163. {a="أ([^uaiUAI])", b="أْ%1"},
  164. {a="أ(%p?)$", b="أْ%1"},
  165. {a="أ(%p?%s)", b="أْ%1"},
  166. {a="ئ([^uaiUAI])", b="ئْ%1"},
  167. {a="ئ(%p?)$", b="ئْ%1"},
  168. {a="ئ(%p?%s)", b="ئْ%1"},
  169. -- hamza takes tašdīd too
  170. {a="''([Uu])", b="ؤؤ%1"},
  171. {a="''([Aa])", b="أأ%1"},
  172. {a="''([Ii])", b="ئئ%1"},
  173. -- inseparable adverbial particle 'a- + 'a
  174. {a="\'(a)%-\'(a)", b="أ%1اأ%2"},
  175. -- initial long u and i (for a, see below)
  176. {a="%'%_U", b="أU"},
  177. {a="%'%_I", b="إI"},
  178. -- taḫfīfu 'l-hamza
  179. {a="'u'([^uaiUAI])", b="أU%1"},
  180. {a="'i'([^uaiUAI])", b="إI%1"},
  181. {a="([wf]a)%-\'([^uaiUAIl][^%-])", b="%1أْ%2"},
  182. {a="^u'([^uaiUAI])", b="اU%1"},
  183. {a="([^uaiUAIYN][%s%(%[%<])u'([^uaiUAI])", b="%1اU%2"},
  184. {a="^i'([^uaiUAI])", b="اI%1"},
  185. {a="([^uaiUAIYN][%s%(%[%<])i'([^uaiUAI])", b="%1اI%2"},
  186. -- madda (historic writing below)
  187. {a="'a'([^uaiUAI])", b="آ%1"},
  188. {a="([^uiyUI])\'a?A([%_%^%.]?[%`%'btjghxdrzsfqklmnywAY])", b="%1آ%2"},
  189. {a="^\'a?A([%_%^%.]?[%`%'btjghxdrzsfqklmnywAY])", b="آ%1"},
  190. {a="(%W)\'a?A([%_%^%.]?[%`%'btjghxdrzsfqklmnywAY])", b="%1آ%2"},
  191. --easy (begin)
  192. {a="(A)(')(uN?%p?)$", b="aاء%3"},
  193. {a="(A)(')(uN?)(%p?%s)", b="aاء%3%4"},
  194. {a="(A)(')(iN?%p?)$", b="aاء%3"},
  195. {a="(A)(')(iN?)(%p?%s)", b="aاء%3%4"},
  196. {a="(A)(')([iI])", b="aائ%3"}, -- historic madda
  197. {a="(A)(')(u)", b="aاؤ%3"}, -- historic madda
  198. {a="(A)(')", b="aاء"}, -- historic madda
  199. --easy (end)
  200. -- initial (needs both ^ and %W patterns)
  201. -- 'aw: the diphthong is to be resolved into 'awi' (next 8 lines)
  202. {a="^('aw)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1i"},
  203. {a="(%W)('aw)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1%2i"},
  204. {a="^('aw)(o)([%(%[%|%<]?\"?[uai])([%S]-o)", b="%1i"},
  205. {a="(%W)('aw)(o)([%(%[%|%<]?\"?[uai])([%S]-o)", b="%1%2i"},
  206. {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  207. {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
  208. {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"},
  209. {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"},
  210. -- then the 'initial' rules for the remaining cases
  211. {a="^(')([ua])", b="أ%2"},
  212. {a="^(')(i)", b="إ%2"},
  213. -- consider replacing initial %W with [%s%(%[%<%-]:
  214. -- {a="(%W)(')([ua])", b="%1أ%3"},
  215. -- {a="(%W)(')(i)", b="%1إ%3"},
  216. {a="([%s%(%[%<%-])(')([ua])", b="%1أ%3"},
  217. {a="([%s%(%[%<%-])(')(i)", b="%1إ%3"},
  218. -- final
  219. -- mi'aT is special orthography (unlike ^say'aN and .zim'aN)
  220. -- {a="(%^sa%.?[yY])(\"?%|?)(%')(aN)", b="%1%2ئ%4"}, --new
  221. -- {a="(.zi?m)(%')(aN)", b="%1ئ%3"}, --new
  222. {a="(mi)(%')(a[Tt])", b="%1ائ%3"},
  223. {a="(mi)(%')(aN%_?[AY])", b="%1أ%3"},
  224. -- easy (begin)
  225. -- The Munjid says that such words as radI'aN do not have the
  226. -- hamzah alone on the line, so take out the following two lines
  227. -- (final hamzah is on the line after a letter of prolongation or a
  228. -- consonant with sukūn)
  229. -- {a="([^Auai])(')(\"?[uai]N?)(%p?)$", b="%1ء%3%4"}, --new
  230. -- {a="([^Auai])(')(\"?[uai]N?)(%p?%s)", b="%1ء%3%4"},
  231. {a="([^Auai])(')(\"?aN)(%p?)$", b="%1ئ%3%4"}, --new
  232. {a="([^Auai])(')(\"?aN)(%p?%s)", b="%1ئ%3%4"}, --new
  233. {a="([^uai])(')(\"?a)(%p?)$", b="%1ء%3%4"}, --new
  234. {a="([^uai])(')(\"?a)(%p?%s)", b="%1ء%3%4"}, --new
  235. {a="([^uai])(')(\"?[ui]N?)(%p?)$", b="%1ء%3%4"}, --new
  236. {a="([^uai])(')(\"?[ui]N?)(%p?%s)", b="%1ء%3%4"}, --new
  237. --easy (end)
  238. -- u
  239. {a="(u)(')([uai]N?%p?)$", b="%1ؤ%3"},
  240. {a="(u)(')([uai]N?)(%p?%s)", b="%1ؤ%3%4"},
  241. {a="(u)(')(%p?)$", b="%1ؤْ%3"},
  242. {a="(u)(')(%p?%s)", b="%1ؤْ%3"},
  243. -- a
  244. {a="(a)(')(A%p?)$", b="%1آ"},
  245. {a="(a)(')(A)(%p?%s)", b="%1آ%4"},
  246. {a="(a)(')([u]N?%p?)$", b="%1أ%3"},
  247. {a="(a)(')([u]N?)(%p?%s)", b="%1أ%3%4"},
  248. {a="(a)(')(a%p?)$", b="%1أ%3"},
  249. {a="(a)(')(a)(%p?%s)", b="%1أ%3%4"},
  250. {a="(a)(')(aN%p?)$", b="%1أً"},
  251. {a="(a)(')(aN)(%p?%s)", b="%1أً%4"},
  252. {a="(a)(')([i]N?%p?)$", b="%1إ%3"},
  253. {a="(a)(')([i]N?)(%p?%s)", b="%1إ%3%4"},
  254. {a="(a)(')(%p?)$", b="%1أْ%3"},
  255. {a="(a)(')(%p?%s)", b="%1أْ%3"},
  256. -- i
  257. {a="(i)(')([uai]N?%p?)$", b="%1ئ%3"},
  258. {a="(i)(')([uai]N?)(%p?%s)", b="%1ئ%3%4"},
  259. {a="(i)(')(%p?)$", b="%1ئْ%3"},
  260. {a="(i)(')(%p?%s)", b="%1ئْ%3"},
  261. --
  262. -- middle
  263. {a="([Uw])(')", b="%1ء"}, --new
  264. {a="([Iy])(')", b="%1ئ"}, --easy
  265. {a="([^uai])(')([uU])", b="%1ؤ%3"},
  266. {a="([^uai])(')(%_?[aAY])", b="%1أ%3"},
  267. {a="([^uai])(')([iI])", b="%1ئ%3"},
  268. {a="(u)(')([uU])", b="%1ؤ%3"},
  269. {a="(u)(')(%_?[aAY])", b="%1ؤ%3"},
  270. {a="(u)(')([iI])", b="%1ئ%3"},
  271. {a="(a)(')(%_?[aAY])", b="%1أ%3"},
  272. {a="(a)(')([uU])", b="%1ؤ%3"},
  273. {a="(a)(')([iI])", b="%1ئ%3"},
  274. {a="(i)(')(%_?[aAY])", b="%1ئ%3"},
  275. {a="(i)(')([uU])", b="%1ئ%3"},
  276. {a="(i)(')([iI])", b="%1ئ%3"},
  277. {a="(a)(')([^uaiUAI])", b="%1أْ%3"},
  278. {a="(u)(')([^uaiUAI])", b="%1ؤْ%3"},
  279. {a="(i)(')([^uaiUAI])", b="%1ئْ%3"}
  280. }
  281. tanwinfv = { -- with assimilations (\SetArbDflt*)
  282. -- assimilations (begin)
  283. {a="(o[%S]-)(%-?[uai]N[UI]?)(o)([rlmnwy])", b="%4%4"},
  284. {a="(%-?[uai]NU)(%s)([rlmnwy])", b="%1%2%3%3"},
  285. -- assimilations (end)
  286. {a="(o[%S]-)(%-?[uai]N[UI]?)(o)([uai])", b="ٱ"},
  287. {a="%-?(uNU)(%s)([uai])", b="%1%2ٱ"},
  288. {a="%-?(aNU)(%s)([uai])", b="%1%2ٱ"},
  289. {a="%-?(iNU)(%s)([uai])", b="%1%2ٱ"},
  290. {a="%-?uNU", b="ٌو"},
  291. {a="%-?aNU", b="ًوا"},
  292. {a="%-?iNU", b="ٍو"},
  293. {a="%-?([uai]N)(%s)([uai])", b="%1%2ٱ"},
  294. {a="%-?(aN[%_]?[AY])(%s)([uai])", b="%1%2ٱ"},
  295. -- assimilations (begin)
  296. {a="%-?(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"},
  297. {a="(o[%S]-)(%-?aN)(_A)(o)([rlmnwy])", b="%5%5"},
  298. {a="(o[%S]-)(%-?aN)(Y)(o)([rlmnwy])", b="%5%5"},
  299. {a="%-?(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"},
  300. {a="%-?(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"},
  301. {a="(T)%-?(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"},
  302. {a="(ء)%-?(aN)(%s)([rlmnwy])", b="%1%2%3%4%4"}, --new
  303. {a="([^TA])%-?(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"},
  304. {a="%-?(iNI?)(%s)([rlmnwy])", b="ٍ%2%3%3"},
  305. -- assimilations (end)
  306. {a="(o[%S]-)(%-?aN)(_A)(o)([uai])", b="ٱ"},
  307. {a="(o[%S]-)(%-?aN)(Y)(o)([uai])", b="ٱ"},
  308. -- quoted tanwīn (begin)
  309. {a="%-?(\"uN)", b=""},
  310. {a="(B)%-?(\"aN)", b="%1"},
  311. {a="%-?(\"aN)(_A)", b="ى"},
  312. {a="%-?(\"aN)(Y)", b="ى"},
  313. {a="(T)%-?(\"aN)", b="%1"},
  314. {a="([اآ])(ء)%-?(\"aN)", b="%1%2"}, --new
  315. {a="([^TA])%-?(\"aN)", b="%1ا"},
  316. {a="%-?(\"iNI?)", b=""},
  317. -- quoted tanwīn (end)
  318. {a="%-?(uN)", b="ٌ"},
  319. {a="(B)%-?(aN)", b="%1ً"},
  320. {a="%-?(aN)(_A)", b="ًى"},
  321. {a="%-?(aN)(Y)", b="ًى"},
  322. {a="(T)%-?(aN)", b="%1ً"},
  323. {a="([اآ])(ء)%-?(aN)", b="%1%2ً"}, --new
  324. {a="([^TA])%-?(aN)", b="%1ًا"},
  325. {a="%-?(iNI?)", b="ٍ"}
  326. }
  327. tanwinfveasy = { -- no assimilations (see below)
  328. -- assimilations (begin)
  329. --easy {a="(o[%S]-)(%-?[uai]N[UI]?)(o)([rlmnwy])", b="%4%4"},
  330. --easy {a="(%-?[uai]NU)(%s)([rlmnwy])", b="%1%2%3%3"},
  331. -- assimilations (end)
  332. {a="(o[%S]-)(%-?[uai]N[UI]?)(o)([uai])", b="ٱ"},
  333. {a="%-?(uNU)(%s)([uai])", b="%1%2ٱ"},
  334. {a="%-?(aNU)(%s)([uai])", b="%1%2ٱ"},
  335. {a="%-?(iNU)(%s)([uai])", b="%1%2ٱ"},
  336. {a="%-?uNU", b="ٌو"},
  337. {a="%-?aNU", b="ًوا"},
  338. {a="%-?iNU", b="ٍو"},
  339. {a="%-?([uai]N)(%s)([uai])", b="%1%2ٱ"},
  340. {a="%-?(aN[%_]?[AY])(%s)([uai])", b="%1%2ٱ"},
  341. -- assimilations (begin)
  342. --easy {a="%-?(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"},
  343. --easy {a="(o[%S]-)(%-?aN)(_A)(o)([rlmnwy])", b="%5%5"},
  344. --easy {a="(o[%S]-)(%-?aN)(Y)(o)([rlmnwy])", b="%5%5"},
  345. --easy {a="%-?(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"},
  346. --easy {a="%-?(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"},
  347. --easy {a="(T)%-?(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"},
  348. --easy {a="(ء)%-?(aN)(%s)([rlmnwy])", b="%1%2%3%4%4"}, --new
  349. --easy {a="([^TA])%-?(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"},
  350. --easy {a="%-?(iNI?)(%s)([rlmnwy])", b="ٍ%2%3%3"},
  351. -- assimilations (end)
  352. {a="(o[%S]-)(%-?aN)(_A)(o)([uai])", b="ٱ"},
  353. {a="(o[%S]-)(%-?aN)(Y)(o)([uai])", b="ٱ"},
  354. -- quoted tanwīn (begin)
  355. {a="%-?(\"uN)", b=""},
  356. {a="(B)%-?(\"aN)", b="%1"},
  357. {a="%-?(\"aN)(_A)", b="ى"},
  358. {a="%-?(\"aN)(Y)", b="ى"},
  359. {a="(T)%-?(\"aN)", b="%1"},
  360. {a="([اآ])(ء)%-?(\"aN)", b="%1%2"}, --new
  361. {a="([^TA])%-?(\"aN)", b="%1ا"},
  362. {a="%-?(\"iNI?)", b=""},
  363. -- quoted tanwīn (end)
  364. {a="%-?(uN)", b="ٌ"},
  365. {a="(B)%-?(aN)", b="%1ً"},
  366. {a="%-?(aN)(_A)", b="ًى"},
  367. {a="%-?(aN)(Y)", b="ًى"},
  368. {a="(T)%-?(aN)", b="%1ً"},
  369. {a="([اآ])(ء)%-?(aN)", b="%1%2ً"}, --new
  370. {a="([^TA])%-?(aN)", b="%1ًا"},
  371. {a="%-?(iNI?)", b="ٍ"}
  372. }
  373. trigraphsfv = { -- trigraphs or more
  374. -- 'llatI / 'llad_I
  375. {a="^'ll(a)([%_]?[dt])", b="ٱلّ%1%2"},
  376. {a="([%(%[%|%<%s%-])'ll(a)([%_]?[dt])", b="%1ٱلّ%2%3"}, --p
  377. -- law: the diphthong is to be resolved into 'awi' (next 8 lines)
  378. {a="^(law)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1i"},
  379. {a="(%W)(law)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1%2i"},
  380. {a="^(law)(o)(\"?[uai])([%S]-o)", b="%1i"},
  381. {a="(%W)(law)(o)(\"?[uai])([%S]-o)", b="%1%2i"},
  382. {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  383. {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
  384. {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"},
  385. {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"},
  386. -- al- + lām
  387. {a="^(a)l%-(l)", b="ا%1ل%2%2"},
  388. {a="([%(%[%|%<%s%-o])(a)l%-(l)", b="%1ا%2ل%3%3"}, --p
  389. -- al- + solar consonant ('c' and '^n' are additional characters)
  390. {a="^(a)l%-(%^n)", b="ا%1ل%2"}, -- ^n is lunar
  391. {a="([%(%[%|%<%s%-o])(a)l%-(%^n)", b="%1ا%2ل%3"}, -- ^n is lunar --p
  392. {a="^(a)l%-([%_%^%.]?[tdrzsnc])", b="ا%1ل%2%2"},
  393. {a="([%(%[%|%<%s%-o])(a)l%-([%_%^%.]?[tdrzsnc])", b="%1ا%2ل%3%3"}, --p
  394. -- assim. art. + solar consonant ('c' and '^n' are additional characters)
  395. {a="^(a)(%^n)%-", b="ا%1ل"}, -- ^n is lunar
  396. {a="([%(%[%|%<%s%-o])(a)(%^n)%-", b="%1ا%2ل"}, -- ^n is lunar --p
  397. {a="^(a)([%_%^%.]?[tdrzsnc])%-", b="ا%1ل%2"},
  398. {a="([%(%[%|%<%s%-o])(a)([%_%^%.]?[tdrzsnc])%-", b="%1ا%2ل%3"}, --p
  399. -- al- + initial unstable hamza
  400. {a="^(a)l%-(\"?[uai])", b="ا%1ل%2ٱ"},
  401. {a="([%(%[%|%<%s%-o])(a)l%-(\"?[uai])", b="%1ا%2ل%3ٱ"}, --p
  402. -- li-/la + art. + initial unstable hamza is a special orthography
  403. {a="l([ai])%-l%-(\"?[uai])", b="ل%1ل%2ٱ"},
  404. -- al- + lunar consonant (i.e. what remains)
  405. {a="^(a)l%-", b="ا%1لْ"},
  406. {a="([%(%[%|%<%s%-o])(a)l%-", b="%1ا%2لْ"}, --p
  407. -- diphthongs to be resolved before ʾalif conjunctionis
  408. {a="(aW)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="awuا"},
  409. {a="(aw)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1u"},
  410. {a="(ay)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1i"},
  411. {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"},
  412. {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"},
  413. {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  414. -- art. with waṣla + lām
  415. {a="'l%-(l)", b="ٱل%1%1"},
  416. -- art. with waṣla + solar consonant
  417. -- ('c' and '^n' are additional characters)
  418. {a="'l%-(%^n)", b="ٱل%1"}, -- ^n is lunar
  419. {a="'l%-([%_%^%.]?[tdrzsnc])", b="ٱل%1%1"},
  420. -- li-/la- + art. + lām
  421. {a="l([ai])%-l%-(l)", b="ل%1%2%2"},
  422. -- assim. art. with waṣla + solar consonant
  423. -- ('c' and '^n' are additional characters)
  424. {a="'(%^n)%-", b="ٱل"}, -- ^n is lunar
  425. {a="'([%_%^%.]?[tdrzsnc])%-", b="ٱل%1"},
  426. -- li-/la- + art. + solar consonant is a special orthography
  427. -- ('c' and '^n' are additional characters)
  428. {a="l([ai])%-l%-(%^n)", b="ل%1ل%2"}, -- ^n is lunar
  429. {a="l([ai])%-l%-([%_%^%.]?[tdrzsnc])", b="ل%1ل%2%2"},
  430. -- li-/la- + assim. art. + solar consonant is a special orthography
  431. -- ('c' and '^n' are additional characters)
  432. {a="l([ai])%-(%^n)%-(%^n)", b="ل%1ل%3"}, -- ^n is lunar
  433. {a="l([ai])%-([%_%^%.]?[tdrzsnc])%-([%_%^%.]?[tdrzsnc])", b="ل%1ل%3%3"},
  434. -- art. with waṣla + initial unstable hamza
  435. {a="'l%-(\"?[uai])", b="ٱل%1ٱ"},
  436. -- art. with waṣla + lunar consonant (i.e. what remains)
  437. {a="'l%-", b="ٱلْ"},
  438. -- the silent wāw
  439. {a="uU(%p?)$", b="uو%1"},
  440. {a="uU(%p?%s)", b="uو%1"},
  441. {a="aU(%p?)$", b="aو%1"},
  442. {a="aU(%p?%s)", b="aو%1"},
  443. {a="iU(%p?)$", b="iو%1"},
  444. {a="iU(%p?%s)", b="iو%1"},
  445. -- words ending in -āT with silent wāw/yāʾ
  446. {a="(_a)UA", b="%1وا"},
  447. {a="(_a)U", b="%1و"},
  448. {a="(_a)I", b="%1ي"}
  449. }
  450. -- idgham/assimilation rules for trigraphs apply here : see voc lua
  451. -- file.
  452. trigraphsfveasy = { -- trigraphs or more (see 'easy' tag below for the diffs)
  453. -- 'llatI / 'llad_I
  454. {a="^'ll(a)([%_]?[dt])", b="ٱلّ%1%2"},
  455. {a="([%(%[%|%<%s%-])'ll(a)([%_]?[dt])", b="%1ٱلّ%2%3"}, --p
  456. -- law: the diphthong is to be resolved into 'awi' (next 8 lines)
  457. {a="^(law)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1i"},
  458. {a="(%W)(law)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1%2i"},
  459. {a="^(law)(o)(\"?[uai])([%S]-o)", b="%1i"},
  460. {a="(%W)(law)(o)(\"?[uai])([%S]-o)", b="%1%2i"},
  461. {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  462. {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
  463. {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"},
  464. {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"},
  465. -- al- + lām
  466. {a="^(a)l%-(l)", b="ا%1ل%2%2"},
  467. {a="([%(%[%|%<%s%-o])(a)l%-(l)", b="%1ا%2ل%3%3"}, --p
  468. -- al- + solar consonant ('c' and '^n' are additional characters)
  469. {a="^(a)l%-(%^n)", b="ا%1ل%2"}, -- ^n is lunar
  470. {a="([%(%[%|%<%s%-o])(a)l%-(%^n)", b="%1ا%2ل%3"}, -- ^n is lunar --p
  471. {a="^(a)l%-([%_%^%.]?[tdrzsnc])", b="ا%1ل%2%2"},
  472. {a="([%(%[%|%<%s%-o])(a)l%-([%_%^%.]?[tdrzsnc])", b="%1ا%2ل%3%3"}, --p
  473. -- assim. art. + solar consonant ('c' and '^n' are additional characters)
  474. {a="^(a)(%^n)%-", b="ا%1ل"}, -- ^n is lunar
  475. {a="([%(%[%|%<%s%-o])(a)(%^n)%-", b="%1ا%2ل"}, -- ^n is lunar --p
  476. {a="^(a)([%_%^%.]?[tdrzsnc])%-", b="ا%1ل%2"},
  477. {a="([%(%[%|%<%s%-o])(a)([%_%^%.]?[tdrzsnc])%-", b="%1ا%2ل%3"}, --p
  478. -- al- + initial unstable hamza
  479. {a="^(a)l%-(\"?[uai])", b="ا%1ل%2ٱ"},
  480. {a="([%(%[%|%<%s%-o])(a)l%-(\"?[uai])", b="%1ا%2ل%3ٱ"}, --p
  481. -- li-/la + art. + initial unstable hamza is a special orthography
  482. {a="l([ai])%-l%-(\"?[uai])", b="ل%1ل%2ٱ"},
  483. -- al- + lunar consonant (i.e. what remains)
  484. {a="^(a)l%-", b="ا%1لْ"},
  485. {a="([%(%[%|%<%s%-o])(a)l%-", b="%1ا%2لْ"}, --p
  486. -- diphthongs to be resolved before ʾalif conjunctionis
  487. {a="(aW)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="awuا"},
  488. {a="(aw)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1u"},
  489. {a="(ay)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1i"},
  490. {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"},
  491. {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"},
  492. {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  493. -- art. with waṣla + lām
  494. {a="'l%-(l)", b="ٱل%1%1"},
  495. -- art. with waṣla + solar consonant
  496. -- ('c' and '^n' are additional characters)
  497. {a="'l%-(%^n)", b="ٱل%1"}, -- ^n is lunar
  498. {a="'l%-([%_%^%.]?[tdrzsnc])", b="ٱل%1%1"},
  499. -- li-/la- + art. + lām
  500. {a="l([ai])%-l%-(l)", b="ل%1%2%2"},
  501. -- assim. art. with waṣla + solar consonant
  502. -- ('c' and '^n' are is an additional characters)
  503. {a="'(%^n)%-", b="ٱل"}, -- ^n is lunar
  504. {a="'([%_%^%.]?[tdrzsnc])%-", b="ٱل%1"},
  505. -- li-/la- + art. + solar consonant is a special orthography
  506. -- ('c' and '^n' are additional characters)
  507. {a="l([ai])%-l%-(%^n)", b="ل%1ل%2"}, -- ^n is lunar
  508. {a="l([ai])%-l%-([%_%^%.]?[tdrzsnc])", b="ل%1ل%2%2"},
  509. -- li-/la- + assim. art. + solar consonant is a special orthography
  510. -- ('c and '^n' are additional characters)
  511. {a="l([ai])%-(%^n)%-(%^n)", b="ل%1ل%3"}, -- ^n is lunar
  512. {a="l([ai])%-([%_%^%.]?[tdrzsnc])%-([%_%^%.]?[tdrzsnc])", b="ل%1ل%3%3"},
  513. -- art. with waṣla + initial unstable hamza
  514. {a="'l%-(\"?[uai])", b="ٱل%1ٱ"},
  515. -- art. with waṣla + lunar consonant (i.e. what remains)
  516. {a="'l%-", b="ٱلْ"},
  517. -- the silent wāw
  518. {a="uU(%p?)$", b="uو%1"},
  519. {a="uU(%p?%s)", b="uو%1"},
  520. {a="aU(%p?)$", b="aو%1"},
  521. {a="aU(%p?%s)", b="aو%1"},
  522. {a="iU(%p?)$", b="iو%1"},
  523. {a="iU(%p?%s)", b="iو%1"},
  524. -- words ending in -āT with silent wāw/yāʾ
  525. {a="(_a)UA", b="%1وا"},
  526. {a="(_a)U", b="%1و"},
  527. {a="(_a)I", b="%1ي"}
  528. }
  529. digraphsfvidgham = {
  530. -- ʾiʿrāb: straight double quote must be discarded
  531. {a="(%-)(\"?[UI]na)(%p?%s)", b="%2%3"},
  532. {a="(%-)(\"?[UI]na)(%p?)$", b="%2%3"},
  533. {a="(%-)(\"?At[ui])(%p?%s)", b="%2%3"},
  534. {a="(%-)(\"?At[ui])(%p?)$", b="%2%3"},
  535. {a="(%-)(\"?Ani)(%p?%s)", b="%2%3"},
  536. {a="(%-)(\"?Ani)(%p?)$", b="%2%3"},
  537. {a="(%-)(\"?ayni)(%p?%s)", b="%2%3"},
  538. {a="(%-)(\"?ayni)(%p?)$", b="%2%3"},
  539. {a="(%-)(\"?[uai])(%p?%s)", b="%2%3"},
  540. {a="(%-)(\"?[uai])(%p?)$", b="%2%3"},
  541. -- ʾiʿrāb (end)
  542. -- initial straight double quote gives a connective ʾalif
  543. {a="^\"[uai]", b="ٱ"},
  544. {a="([%(%[%|%<%s%-])\"[uai]", b="%1ٱ"}, --p
  545. -- diphthongs to be resolved before ʾalif conjunctionis
  546. {a="(aW)(o)(\"?[uai])([%S]-o)", b="awuا"},
  547. {a="(aW)(%s)([%(%[%|%<]?)(\"?[uai])", b="awuا%2%3ٱ"}, --p
  548. {a="(aw)(o)(\"?[uai])([%S]-o)", b="%1u"},
  549. {a="(aw)(%s)([%(%[%|%<]?)(\"?[uai])", b="%1u%2%3ٱ"}, --p
  550. {a="(ay)(o)(\"?[uai])([%S]-o)", b="%1i"},
  551. {a="(ay)(%s)([%(%[%|%<]?)(\"?[uai])", b="%1i%2%3ٱ"}, --p
  552. -- hyphen + initial alif without hamza
  553. -- {a="([uai]%-)(\"?[uai])", b="%1ٱ"},
  554. {a="([uai]%-)(\"?[uai])([%^%_%.%`]?)([%aأإؤئ])", b="%1ٱ%3%4"},
  555. -- initial alif without hamza (next 4 lines)
  556. {a="(o[%S]-)([uaiUAIY])(o)(\"?[uai])", b="ٱ"},
  557. {a="([%_]?[uaiUAIY])(%s[%(%[%|%<]?)(\"?[uai])", b="%1%2ٱ"}, --p
  558. {a="^([%(%[%|%<]?)(\"?[uai])", b="%1ا%2"}, --p
  559. {a="(%s)([%(%[%|%<]?)(\"?[uai])", b="%1%2ا%3"}, --p
  560. {a="%-%-", b="ـ"},
  561. {a="ؤؤ", b="ؤّ"},
  562. {a="أأ", b="أّ"},
  563. {a="ئئ", b="ئّ"},
  564. {a="bb", b="بّ"},
  565. {a="BB", b="ـّ"},
  566. {a="([%_%^%.])([tghdsz])([tghdsz])", b="%1%2|%3"},
  567. -- same as above for additional characters:
  568. {a="([%_%^%.])([cn])([cn])", b="%1%2|%3"},
  569. {a="tt", b="تّ"},
  570. {a="%_t%_t", b="ثّ"},
  571. {a="jj", b="جّ"},
  572. {a="%^g%^g", b="جّ"},
  573. {a="%.h%.h", b="حّ"},
  574. {a="xx", b="خّ"},
  575. {a="%_h%_h", b="خّ"},
  576. {a="dd", b="دّ"},
  577. {a="%_d%_d", b="ذّ"},
  578. {a="rr", b="رّ"},
  579. {a="zz", b="زّ"},
  580. {a="ss", b="سّ"},
  581. {a="%^s%^s", b="شّ"},
  582. {a="%.s%.s", b="صّ"},
  583. {a="%.d%.d", b="ضّ"},
  584. {a="%.t%.t", b="طّ"},
  585. {a="%.z%.z", b="ظّ"},
  586. {a="%`%`", b="عّ"},
  587. {a="%.g%.g", b="غّ"},
  588. {a="ff", b="فّ"},
  589. {a="qq", b="قّ"},
  590. {a="kk", b="كّ"},
  591. {a="ll", b="لّ"},
  592. {a="mm", b="مّ"},
  593. {a="nn", b="نّ"},
  594. {a="hh", b="هّ"},
  595. {a="ww", b="وّ"},
  596. {a="yy", b="يّ"},
  597. {a="%.y%.y", b="ىّ"},
  598. -- additional characters + šaddah (begin)
  599. {a="pp", b="پّ"},
  600. {a="vv", b="ڤّ"},
  601. {a="gg", b="گّ"},
  602. {a="%^c%^c", b="چّ"},
  603. {a="%^z%^z", b="ژّ"},
  604. {a="%^n%^n", b="ڭّ"},
  605. -- additional characters + šaddah (end)
  606. -- sukūn begin
  607. -- first, take out hyphen if any (next two lines):
  608. {a="([ai]Y)%-([uaiUAI])", b="%1%2"},
  609. {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])%-([uaiUAI])", b="%1%2"},
  610. {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])(%p?)$", b="%1ْ%2"},
  611. {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])(%p?%s)", b="%1ْ%2"},
  612. {a="([ai]Y)([%_]?[^%_uaiUAIYًٌٍ])", b="%1ْ%2"},
  613. {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])([%_]?[^%_uaiUAIYًٌٍ])", b="%1ْ%2"},
  614. -- take out sukūn in cases of assimilation
  615. {a="(n)(ْ)(%s)(ر)", b="%1%3%4"},
  616. {a="(n)(ْ)(%s)(و)", b="%1%3%4"},
  617. {a="(n)(ْ)(%s)(ي)", b="%1%3%4"},
  618. {a="(n)(ْ)(%s)(ل)", b="%1%3%4"},
  619. {a="(n)(ْ)(%s)(م)", b="%1%3%4"},
  620. {a="(n)(ْ)(%s)(ن)", b="%1%3%4"},
  621. {a="ْ\"", b="\""},
  622. -- sukūn end
  623. {a="_t", b="ث"},
  624. {a="%^g", b="ج"},
  625. {a="%.h", b="ح"},
  626. {a="_h", b="خ"},
  627. {a="_d", b="ذ"},
  628. {a="%^s", b="ش"},
  629. {a="%.s", b="ص"},
  630. {a="%.d", b="ض"},
  631. {a="%.t", b="ط"},
  632. {a="%.z", b="ظ"},
  633. {a="%.g", b="غ"},
  634. {a="%.y", b="ى"},
  635. -- additional characters (begin)
  636. {a="%^c", b="چ"},
  637. {a="%^z", b="ژ"},
  638. {a="%^n", b="ڭ"},
  639. -- additional characters (end)
  640. {a="(U)(A)", b="%1ا"},
  641. {a="WA", b="وْا"},
  642. {a="(a)W\"", b="%1وا"},
  643. {a="(a)W", b="%1وْا"},
  644. {a="_A", b="aى"},
  645. {a="_u", b="ٗ"},
  646. {a="_a", b="ٰ"},
  647. {a="_i", b="ٖ"},
  648. {a="%.b", b="ٮ"},
  649. {a="%.f", b="ڡ"},
  650. {a="%.q", b="ٯ"},
  651. {a="%.k", b="ک"},
  652. {a="%.n", b="ں"},
  653. {a="%^d", b="ڊ"}
  654. }
  655. digraphsfv = {
  656. -- iʿrāb: straight double quote must be discarded
  657. {a="(%-)(\"?[UI]na)(%p?%s)", b="%2%3"},
  658. {a="(%-)(\"?[UI]na)(%p?)$", b="%2%3"},
  659. {a="(%-)(\"?At[ui])(%p?%s)", b="%2%3"},
  660. {a="(%-)(\"?At[ui])(%p?)$", b="%2%3"},
  661. {a="(%-)(\"?Ani)(%p?%s)", b="%2%3"},
  662. {a="(%-)(\"?Ani)(%p?)$", b="%2%3"},
  663. {a="(%-)(\"?ayni)(%p?%s)", b="%2%3"},
  664. {a="(%-)(\"?ayni)(%p?)$", b="%2%3"},
  665. {a="(%-)(\"?[uai])(%p?%s)", b="%2%3"},
  666. {a="(%-)(\"?[uai])(%p?)$", b="%2%3"},
  667. -- ʾiʿrāb (end)
  668. -- initial straight double quote gives a connective ʾalif
  669. {a="^\"[uai]", b="ٱ"},
  670. {a="([%(%[%|%<%s%-])\"[uai]", b="%1ٱ"}, --p
  671. -- diphthongs to be resolved before ʾalif conjunctionis
  672. {a="(aW)(o)(\"?[uai])([%S]-o)", b="awuا"},
  673. {a="(aW)(%s)([%(%[%|%<]?)(\"?[uai])", b="awuا%2%3ٱ"}, --p
  674. {a="(aw)(o)(\"?[uai])([%S]-o)", b="%1u"},
  675. {a="(aw)(%s)([%(%[%|%<]?)(\"?[uai])", b="%1u%2%3ٱ"}, --p
  676. {a="(ay)(o)(\"?[uai])([%S]-o)", b="%1i"},
  677. {a="(ay)(%s)([%(%[%|%<]?)(\"?[uai])", b="%1i%2%3ٱ"}, --p
  678. -- hyphen + initial alif without hamza
  679. -- {a="([uai]%-)(\"?[uai])", b="%1ٱ"},
  680. {a="([uai]%-)(\"?[uai])([%^%_%.%`]?)([%aأإؤئ])", b="%1ٱ%3%4"},
  681. -- initial alif without hamza (next 4 lines)
  682. {a="(o[%S]-)([uaiUAIY])(o)(\"?[uai])", b="ٱ"},
  683. {a="([%_]?[uaiUAIY])(%s[%(%[%|%<]?)(\"?[uai])", b="%1%2ٱ"}, --p
  684. {a="^([%(%[%|%<]?)(\"?[uai])", b="%1ا%2"}, --p
  685. {a="(%s)([%(%[%|%<]?)(\"?[uai])", b="%1%2ا%3"}, --p
  686. {a="%-%-", b="ـ"},
  687. {a="ؤؤ", b="ؤّ"},
  688. {a="أأ", b="أّ"},
  689. {a="ئئ", b="ئّ"},
  690. {a="bb", b="بّ"},
  691. {a="BB", b="ـّ"},
  692. {a="([%_%^%.])([tghdsz])([tghdsz])", b="%1%2|%3"},
  693. -- same as above for additional characters:
  694. {a="([%_%^%.])([cn])([cn])", b="%1%2|%3"},
  695. {a="tt", b="تّ"},
  696. {a="%_t%_t", b="ثّ"},
  697. {a="jj", b="جّ"},
  698. {a="%^g%^g", b="جّ"},
  699. {a="%.h%.h", b="حّ"},
  700. {a="xx", b="خّ"},
  701. {a="%_h%_h", b="خّ"},
  702. {a="dd", b="دّ"},
  703. {a="%_d%_d", b="ذّ"},
  704. {a="rr", b="رّ"},
  705. {a="zz", b="زّ"},
  706. {a="ss", b="سّ"},
  707. {a="%^s%^s", b="شّ"},
  708. {a="%.s%.s", b="صّ"},
  709. {a="%.d%.d", b="ضّ"},
  710. {a="%.t%.t", b="طّ"},
  711. {a="%.z%.z", b="ظّ"},
  712. {a="%`%`", b="عّ"},
  713. {a="%.g%.g", b="غّ"},
  714. {a="ff", b="فّ"},
  715. {a="qq", b="قّ"},
  716. {a="kk", b="كّ"},
  717. {a="ll", b="لّ"},
  718. {a="mm", b="مّ"},
  719. {a="nn", b="نّ"},
  720. {a="hh", b="هّ"},
  721. {a="ww", b="وّ"},
  722. {a="yy", b="يّ"},
  723. {a="%.y%.y", b="ىّ"},
  724. -- additional characters + šaddah (begin)
  725. {a="pp", b="پّ"},
  726. {a="vv", b="ڤّ"},
  727. {a="gg", b="گّ"},
  728. {a="%^c%^c", b="چّ"},
  729. {a="%^z%^z", b="ژّ"},
  730. {a="%^n%^n", b="ڭّ"},
  731. -- additional characters + šaddah (end)
  732. -- sukūn begin
  733. -- first, take out hyphen if any (next two lines):
  734. {a="([ai]Y)%-([uaiUAI])", b="%1%2"},
  735. {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])%-([uaiUAI])", b="%1%2"},
  736. {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])(%p?)$", b="%1ْ%2"},
  737. {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])(%p?%s)", b="%1ْ%2"},
  738. {a="([ai]Y)([%_]?[^%_uaiUAIYًٌٍ])", b="%1ْ%2"},
  739. {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])([%_]?[^%_uaiUAIYًٌٍ])", b="%1ْ%2"},
  740. -- take out sukūn in cases of assimilation
  741. -- {a="(n)(ْ)(%s)(ر)", b="%1%3%4"},
  742. -- {a="(n)(ْ)(%s)(و)", b="%1%3%4"},
  743. -- {a="(n)(ْ)(%s)(ي)", b="%1%3%4"},
  744. -- {a="(n)(ْ)(%s)(ل)", b="%1%3%4"},
  745. -- {a="(n)(ْ)(%s)(م)", b="%1%3%4"},
  746. -- {a="(n)(ْ)(%s)(ن)", b="%1%3%4"},
  747. {a="ْ\"", b="\""},
  748. -- sukūn end
  749. {a="_t", b="ث"},
  750. {a="%^g", b="ج"},
  751. {a="%.h", b="ح"},
  752. {a="_h", b="خ"},
  753. {a="_d", b="ذ"},
  754. {a="%^s", b="ش"},
  755. {a="%.s", b="ص"},
  756. {a="%.d", b="ض"},
  757. {a="%.t", b="ط"},
  758. {a="%.z", b="ظ"},
  759. {a="%.g", b="غ"},
  760. {a="%.y", b="ى"},
  761. -- additional characters (begin)
  762. {a="%^c", b="چ"},
  763. {a="%^z", b="ژ"},
  764. {a="%^n", b="ڭ"},
  765. -- additional characters (end)
  766. {a="(U)(A)", b="%1ا"},
  767. {a="WA", b="وْا"},
  768. {a="(a)W\"", b="%1وا"},
  769. {a="(a)W", b="%1وْا"},
  770. {a="_A", b="aى"},
  771. {a="_u", b="ٗ"},
  772. {a="_a", b="ٰ"},
  773. {a="_i", b="ٖ"},
  774. {a="%.b", b="ٮ"},
  775. {a="%.f", b="ڡ"},
  776. {a="%.q", b="ٯ"},
  777. {a="%.k", b="ک"},
  778. {a="%.n", b="ں"},
  779. {a="%^d", b="ڊ"}
  780. }
  781. digraphsfveasy = { -- see the differences under 'easy' marker below
  782. -- ʾiʿrāb: straight double quote must be discarded
  783. {a="(%-)(\"?[UI]na)(%p?%s)", b="%2%3"},
  784. {a="(%-)(\"?[UI]na)(%p?)$", b="%2%3"},
  785. {a="(%-)(\"?At[ui])(%p?%s)", b="%2%3"},
  786. {a="(%-)(\"?At[ui])(%p?)$", b="%2%3"},
  787. {a="(%-)(\"?Ani)(%p?%s)", b="%2%3"},
  788. {a="(%-)(\"?Ani)(%p?)$", b="%2%3"},
  789. {a="(%-)(\"?ayni)(%p?%s)", b="%2%3"},
  790. {a="(%-)(\"?ayni)(%p?)$", b="%2%3"},
  791. {a="(%-)(\"?[uai])(%p?%s)", b="%2%3"},
  792. {a="(%-)(\"?[uai])(%p?)$", b="%2%3"},
  793. -- ʾiʿrāb (end)
  794. -- initial straight double quote gives a connective ʾalif
  795. {a="^\"[uai]", b="ٱ"},
  796. {a="([%(%[%|%<%s%-])\"[uai]", b="%1ٱ"}, --p
  797. -- diphthongs to be resolved before ʾalif conjunctionis
  798. {a="(aW)(o)(\"?[uai])([%S]-o)", b="awuا"},
  799. {a="(aW)(%s)([%(%[%|%<]?)(\"?[uai])", b="awuا%2%3ٱ"}, --p
  800. {a="(aw)(o)(\"?[uai])([%S]-o)", b="%1u"},
  801. {a="(aw)(%s)([%(%[%|%<]?)(\"?[uai])", b="%1u%2%3ٱ"}, --p
  802. {a="(ay)(o)(\"?[uai])([%S]-o)", b="%1i"},
  803. {a="(ay)(%s)([%(%[%|%<]?)(\"?[uai])", b="%1i%2%3ٱ"}, --p
  804. -- hyphen + initial alif without hamza
  805. -- {a="([uai]%-)(\"?[uai])", b="%1ٱ"},
  806. {a="([uai]%-)(\"?[uai])([%^%_%.%`]?)([%aأإؤئ])", b="%1ٱ%3%4"},
  807. -- initial alif without hamza (next 4 lines)
  808. {a="(o[%S]-)([uaiUAIY])(o)(\"?[uai])", b="ٱ"},
  809. {a="([%_]?[uaiUAIY])(%s[%(%[%|%<]?)(\"?[uai])", b="%1%2ٱ"}, --p
  810. {a="^([%(%[%|%<]?)(\"?[uai])", b="%1ا%2"}, --p
  811. {a="(%s)([%(%[%|%<]?)(\"?[uai])", b="%1%2ا%3"}, --p
  812. {a="%-%-", b="ـ"},
  813. {a="ؤؤ", b="ؤّ"},
  814. {a="أأ", b="أّ"},
  815. {a="ئئ", b="ئّ"},
  816. {a="bb", b="بّ"},
  817. {a="BB", b="ـّ"},
  818. {a="([%_%^%.])([tghdsz])([tghdsz])", b="%1%2|%3"},
  819. -- same as above for additional characters:
  820. {a="([%_%^%.])([cn])([cn])", b="%1%2|%3"},
  821. {a="tt", b="تّ"},
  822. {a="%_t%_t", b="ثّ"},
  823. {a="jj", b="جّ"},
  824. {a="%^g%^g", b="جّ"},
  825. {a="%.h%.h", b="حّ"},
  826. {a="xx", b="خّ"},
  827. {a="%_h%_h", b="خّ"},
  828. {a="dd", b="دّ"},
  829. {a="%_d%_d", b="ذّ"},
  830. {a="rr", b="رّ"},
  831. {a="zz", b="زّ"},
  832. {a="ss", b="سّ"},
  833. {a="%^s%^s", b="شّ"},
  834. {a="%.s%.s", b="صّ"},
  835. {a="%.d%.d", b="ضّ"},
  836. {a="%.t%.t", b="طّ"},
  837. {a="%.z%.z", b="ظّ"},
  838. {a="%`%`", b="عّ"},
  839. {a="%.g%.g", b="غّ"},
  840. {a="ff", b="فّ"},
  841. {a="qq", b="قّ"},
  842. {a="kk", b="كّ"},
  843. {a="ll", b="لّ"},
  844. {a="mm", b="مّ"},
  845. {a="nn", b="نّ"},
  846. {a="hh", b="هّ"},
  847. {a="ww", b="وّ"},
  848. {a="yy", b="يّ"},
  849. {a="%.y%.y", b="ىّ"},
  850. -- additional characters + šaddah (begin)
  851. {a="pp", b="پّ"},
  852. {a="vv", b="ڤّ"},
  853. {a="gg", b="گّ"},
  854. {a="%^c%^c", b="چّ"},
  855. {a="%^z%^z", b="ژّ"},
  856. {a="%^n%^n", b="ڭّ"},
  857. -- additional characters + šaddah (end)
  858. -- sukūn begin ('easy' needs these rules to be taken out); but
  859. -- first take out every previously generated sukūn by hamza rules,
  860. -- so there be no need to edit them:
  861. {a="ْ", b=""},
  862. -- first, take out hyphen if any (next two lines):
  863. -- {a="([ai]Y)%-([uaiUAI])", b="%1%2"},
  864. -- {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])%-([uaiUAI])", b="%1%2"},
  865. -- {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])(%p?)$", b="%1ْ%2"},
  866. -- {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])(%p?%s)", b="%1ْ%2"},
  867. -- {a="([ai]Y)([%_]?[^%_uaiUAIYًٌٍ])", b="%1ْ%2"},
  868. -- {a="([%_%^%.]?[Bbtjghxdrzs%`fqklmnwy])([%_]?[^%_uaiUAIYًٌٍ])", b="%1ْ%2"},
  869. -- take out sukūn in cases of assimilation
  870. -- {a="(n)(ْ)(%s)(ر)", b="%1%3%4"},
  871. -- {a="(n)(ْ)(%s)(و)", b="%1%3%4"},
  872. -- {a="(n)(ْ)(%s)(ي)", b="%1%3%4"},
  873. -- {a="(n)(ْ)(%s)([ل])", b="%1%3%4"},
  874. -- {a="(n)(ْ)(%s)([م])", b="%1%3%4"},
  875. -- {a="(n)(ْ)(%s)([ن])", b="%1%3%4"},
  876. -- {a="ْ\"", b="\""},
  877. -- sukūn end
  878. {a="_t", b="ث"},
  879. {a="%^g", b="ج"},
  880. {a="%.h", b="ح"},
  881. {a="_h", b="خ"},
  882. {a="_d", b="ذ"},
  883. {a="%^s", b="ش"},
  884. {a="%.s", b="ص"},
  885. {a="%.d", b="ض"},
  886. {a="%.t", b="ط"},
  887. {a="%.z", b="ظ"},
  888. {a="%.g", b="غ"},
  889. {a="%.y", b="ى"},
  890. -- additional characters (begin)
  891. {a="%^c", b="چ"},
  892. {a="%^z", b="ژ"},
  893. {a="%^n", b="ڭ"},
  894. -- additional characters (end)
  895. {a="(U)(A)", b="%1ا"},
  896. {a="WA", b="وْا"},
  897. {a="(a)W\"", b="%1وا"},
  898. {a="(a)W", b="%1وْا"},
  899. {a="_A", b="aى"},
  900. {a="_u", b="ٗ"},
  901. {a="_a", b="ٰ"},
  902. {a="_i", b="ٖ"},
  903. {a="%.b", b="ٮ"},
  904. {a="%.f", b="ڡ"},
  905. {a="%.q", b="ٯ"},
  906. {a="%.k", b="ک"},
  907. {a="%.n", b="ں"},
  908. {a="%^d", b="ڊ"}
  909. }
  910. singlefv = {
  911. {a="b", b="ب"},
  912. {a="t", b="ت"},
  913. {a="j", b="ج"},
  914. {a="x", b="خ"},
  915. {a="d", b="د"},
  916. {a="r", b="ر"},
  917. {a="z", b="ز"},
  918. {a="s", b="س"},
  919. {a="f", b="ف"},
  920. {a="`", b="ع"},
  921. {a="f", b="ف"},
  922. {a="q", b="ق"},
  923. {a="k", b="ك"},
  924. {a="l", b="ل"},
  925. {a="m", b="م"},
  926. {a="n", b="ن"},
  927. {a="h", b="ه"},
  928. {a="w", b="و"},
  929. {a="y", b="ي"},
  930. {a="T", b="ة"},
  931. -- additional characters (begin)
  932. {a="p", b="پ"},
  933. {a="v", b="ڤ"},
  934. {a="g", b="گ"},
  935. -- additional characters (end)
  936. {a="\"$", b=""},
  937. {a="\"(%W)", b="%1"},
  938. {a="\"([^uaiUAI])", b="%1"},
  939. {a="([^0-9])%-([^0-9])", b="%1%2"},
  940. {a="B", b="ـ"},
  941. }
  942. singlefveasy = { -- see the differences under 'easy' tag below
  943. {a="b", b="ب"},
  944. {a="t", b="ت"},
  945. {a="j", b="ج"},
  946. {a="x", b="خ"},
  947. {a="d", b="د"},
  948. {a="r", b="ر"},
  949. {a="z", b="ز"},
  950. {a="s", b="س"},
  951. {a="f", b="ف"},
  952. {a="`", b="ع"},
  953. {a="f", b="ف"},
  954. {a="q", b="ق"},
  955. {a="k", b="ك"},
  956. {a="l", b="ل"},
  957. {a="m", b="م"},
  958. {a="n", b="ن"},
  959. {a="h", b="ه"},
  960. {a="w", b="و"},
  961. {a="y", b="ي"},
  962. {a="T", b="ة"},
  963. -- additional characters (begin)
  964. {a="p", b="پ"},
  965. {a="v", b="ڤ"},
  966. {a="g", b="گ"},
  967. -- additional characters (end)
  968. -- easy (begin): \" needs to put back the sukūn
  969. {a="\"$", b="ْ"},
  970. {a="\"(%W)", b="ْ%1"},
  971. {a="\"([^uaiUAI])", b="ْ%1"},
  972. -- easy (end)
  973. {a="([^0-9])%-([^0-9])", b="%1%2"},
  974. {a="B", b="ـ"},
  975. }