arabluatex_voc.lua 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909
  1. --[[
  2. This file is part of the `arabluatex' package
  3. ArabLuaTeX -- Processing ArabTeX notation under LuaLaTeX
  4. Copyright (C) 2016--2019 Robert Alessi
  5. Please send error reports and suggestions for improvements to Robert
  6. Alessi <alessi@robertalessi.net>
  7. This program is free software: you can redistribute it and/or modify
  8. it under the terms of the GNU General Public License as published by
  9. the Free Software Foundation, either version 3 of the License, or
  10. (at your option) any later version.
  11. This program is distributed in the hope that it will be useful, but
  12. WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. General Public License for more details.
  15. You should have received a copy of the GNU General Public License
  16. along with this program. If not, see
  17. <http://www.gnu.org/licenses/>.
  18. --]]
  19. arbmarks = {
  20. {a="@bismillah", b="\\arabicfont{}^^^^fdfd", c="\\arb[trans]{bi-ismi \\uc{'l-l_ahi} 'l-ra.hm_ani 'l-ra.hImi}"},
  21. {a="@salam", b="\\arabicfont{}^^^^fdf5", c="\\arb[trans]{.sall_A\\arbnull{'l-l_ahu} \\uc{'l-l_ahu} `alay-hi wa-sallama}"},
  22. {a="@jalla", b="\\arabicfont{}^^^^fdfb", c="\\arb[trans]{^galla ^galAla-hu}"},
  23. {a="@slm", b="\\arabicfont{}^^^^fdfa", c="\\arb[trans]{.sall_A\\arbnull{'l-l_ahu} \\uc{'l-l_ahu} `alay-hi wa-sallama}"}
  24. }
  25. abjad = {
  26. {"a\"'", "b", "j", "d", "h", "w", "z", ".h", ".t"},
  27. {"y", "k", "l", "m", "n", "s", "`", "f", ".s", },
  28. {"q", "r", "^s", "t", "_t", "x", "_d", ".d", ".z", },
  29. {".g"}
  30. }
  31. numbers = {
  32. {a="0", b="٠"},
  33. {a="1", b="١"},
  34. {a="2", b="٢"},
  35. {a="3", b="٣"},
  36. {a="4", b="٤"},
  37. {a="5", b="٥"},
  38. {a="6", b="٦"},
  39. {a="7", b="٧"},
  40. {a="8", b="٨"},
  41. {a="9", b="٩"}
  42. }
  43. raw = {
  44. {a="A", b="َا"},
  45. {a="U", b="ُو"},
  46. {a="I", b="ِي"},
  47. {a="b", b="ب"},
  48. {a="t", b="ت"},
  49. {a="j", b="ج"},
  50. {a="x", b="خ"},
  51. {a="d", b="د"},
  52. {a="r", b="ر"},
  53. {a="z", b="ز"},
  54. {a="s", b="س"},
  55. {a="f", b="ف"},
  56. {a="`", b="ع"},
  57. {a="f", b="ف"},
  58. {a="q", b="ق"},
  59. {a="k", b="ك"},
  60. {a="l", b="ل"},
  61. {a="m", b="م"},
  62. {a="n", b="ن"},
  63. {a="h", b="ه"},
  64. {a="w", b="و"},
  65. {a="y", b="ي"},
  66. {a="T", b="ة"},
  67. {a="u", b="ُ"},
  68. {a="a", b="َ"},
  69. {a="i", b="ِ"}
  70. }
  71. buckwalter = {
  72. --- hard coded madda: hold it for now
  73. {a="%|", b="@"},
  74. {a="M", b="@"}, -- BW safe
  75. {a="%_", b="--"}, -- taṭwīl
  76. -- prevent any unwanted šadda from being generated
  77. {a="bb", b="b|b"},
  78. {a="tt", b="t|t"},
  79. {a="vv", b="v|v"},
  80. {a="jj", b="j|j"},
  81. {a="HH", b="H|H"},
  82. {a="xx", b="x|x"},
  83. {a="dd", b="d|d"},
  84. {a="%*%*", b="*|*"},
  85. {a="VV", b="V|V"}, -- BW safe
  86. {a="rr", b="r|r"},
  87. {a="ss", b="s|s"},
  88. {a="%$%$", b="$|$"},
  89. {a="cc", b="c|c"}, -- BW safe
  90. {a="SS", b="S|S"},
  91. {a="DD", b="D|D"},
  92. {a="TT", b="T|T"},
  93. {a="ZZ", b="Z|Z"},
  94. {a="EE", b="E|E"},
  95. {a="gg", b="g|g"},
  96. {a="ff", b="f|f"},
  97. {a="qq", b="q|q"},
  98. {a="kk", b="k|k"},
  99. {a="ll", b="l|l"},
  100. {a="mm", b="m|m"},
  101. {a="nn", b="n|n"},
  102. {a="hh", b="h|h"},
  103. {a="ww", b="w|w"},
  104. {a="yy", b="y|y"},
  105. -- hamza begin
  106. -- look into this later on:
  107. -- {a="%>a?A", b="@@@"}, -- hold this (madda)
  108. -- {a="%>a\'([^uai])", b="@@@%1"}, -- hold this (madda)
  109. {a="a?A\'", b="@@"}, -- hold this (classic madda)
  110. {a="\'", b="|\"\'"},
  111. {a="C", b="|\"\'"}, -- BW safe
  112. {a="%>", b="a\"\'"},
  113. {a="O", b="a\"\'"}, -- BW safe
  114. {a="%&", b="w\"\'"},
  115. {a="W", b="w\"\'"}, -- BW safe
  116. {a="%<", b="i\"\'"},
  117. {a="I", b="i\"\'"}, -- BW safe
  118. {a="%]", b="y\"\'"},
  119. {a="Q", b="y\"\'"},
  120. -- hamza end
  121. -- trigraphs
  122. {a="^Aal%-?", b="al-"},
  123. {a="(%W)Aal%-?", b="%1al-"},
  124. {a="(%s)Aal%-?", b="%1al-"},
  125. {a="([%-%s])Al%-?", b="%1\'l-"},
  126. {a="^A", b="a"},
  127. {a="(%W)A", b="%1a"},
  128. {a="(%s)A", b="%1a"},
  129. {a="(al%-[%g])(%~)", b="%1"},
  130. {a="(\'l%-[%g])(%~)", b="%1"},
  131. -- digraphs begin
  132. {a="aA", b="A"},
  133. {a="uw([^%~])", b="U%1"},
  134. {a="iy([^%~])", b="I%1"},
  135. -- digraphs end
  136. -- madda: get it back now
  137. -- {a="%@%@%@", b="\'A"},
  138. {a="%@%@", b="A\'"}, -- give back classic madda
  139. {a="%@", b="A\"\'"}, -- hard coded madda
  140. -- šadda:
  141. {a="([%g])(%~)", b="%1%1"},
  142. {a="%`", b="_a"},
  143. {a="e", b="_a"}, -- BW safe
  144. {a="v", b="_t"},
  145. {a="H", b=".h"},
  146. {a="%*", b="_d"},
  147. {a="V", b="_d"}, -- BW safe
  148. {a="%$", b="^s"},
  149. {a="c", b="^s"}, -- BW safe
  150. {a="S", b=".s"},
  151. {a="D", b=".d"},
  152. {a="T", b=".t"},
  153. {a="Z", b=".z"},
  154. {a="E", b="`"},
  155. {a="g", b=".g"},
  156. {a="p", b="T"},
  157. {a="N", b="uN"},
  158. {a="F", b="aN"},
  159. {a="K", b="iN"},
  160. {a="o", b="\""},
  161. {a="P", b="O"}, -- pass on to \arbnull
  162. -- hard-coded connective alif
  163. {a="%[", b="ٱ"},
  164. {a="L", b="ٱ"} -- BW safe
  165. }
  166. hamza = {
  167. -- next line for ʾiʿrāb hyphen
  168. {a="(')(%-)([uaiUAI])", b="%1%3"},
  169. -- next lines for ʾalif alone
  170. {a="(%.A)([uai]?)l%-(%^n)", b="ا%2ل%3"}, --additional (^n is lunar)
  171. {a="([%(%[%|%<%s%-O])(%.A)([uai]?)l%-(%^n)", b="%1ا%3%4"}, --additional (^n is lunar) --p
  172. {a="(%.A)([uai]?)l%-([%_%^%.]?[tdrzsnc])", b="ا%2ل%3%3"},
  173. {a="([%(%[%|%<%s%-O])(%.A)([uai]?)l%-([%_%^%.]?[tdrzsnc])", b="%1ا%3ل%4%4"}, --p
  174. {a="%.A", b="ا"},
  175. -- hard coded hamza
  176. {a="|\"'", b="ء"},
  177. {a="A\"'", b="آ"},
  178. {a="[au]\"'", b="أ"},
  179. {a="w\"'", b="ؤ"},
  180. {a="i\"'", b="إ"},
  181. {a="y\"'", b="ئ"},
  182. -- hamza takes tašdīd too
  183. {a="''([Uu])", b="ؤؤ%1"},
  184. {a="''([Aa])", b="أأ%1"},
  185. {a="''([Ii])", b="ئئ%1"},
  186. -- inseparable adverbial particle 'a- + 'a
  187. {a="\'(a)%-\'(a)", b="أ%1اأ%2"},
  188. -- initial long u and i (for a, see below)
  189. {a="%'%_U", b="أU"},
  190. {a="%'%_I", b="إI"},
  191. -- taḫfīfu 'l-hamza
  192. {a="'u'([^uaiUAI])", b="أU%1"},
  193. {a="'i'([^uaiUAI])", b="إI%1"},
  194. {a="([wf]a)%-\'([^uaiUAIl][^%-])", b="%1أ%2"},
  195. {a="^u'([^uaiUAI])", b="اU%1"},
  196. {a="([^uaiUAIYN][%s%(%[%<])u'([^uaiUAI])", b="%1اU%2"},
  197. {a="^i'([^uaiUAI])", b="اI%1"},
  198. {a="([^uaiUAIYN][%s%(%[%<])i'([^uaiUAI])", b="%1اI%2"},
  199. -- madda (historic writing below)
  200. {a="'a'([^uaiUAI])", b="آ%1"},
  201. {a="([^uiyUI])\'a?A([%_%^%.]?[%`%'btjghxdrzsfqklmnywAY])", b="%1آ%2"},
  202. {a="^\'a?A([%_%^%.]?[%`%'btjghxdrzsfqklmnywAY])", b="آ%1"},
  203. {a="\'a?A(O[%_%^%.]?[%`%'btjghxdrzsfqklmnywAY]-O)", b="آ"},
  204. {a="(%W)\'a?A([%_%^%.]?[%`%'btjghxdrzsfqklmnywAY])", b="%1آ%2"},
  205. {a="(A)(O%'[%S]-O)", b="آ"},
  206. {a="(A)(')(uN?%p*)$", b="aآء%3"},
  207. {a="(A)(')(uN?)(%p*%s)", b="aآء%3%4"},
  208. {a="(A)(')(iN?%p*)$", b="aآء%3"},
  209. {a="(A)(')(iN?)(%p*%s)", b="aآء%3%4"},
  210. {a="(A)(')([iI])", b="aآئ%3"}, -- historic madda
  211. {a="(A)(')(u)", b="aآؤ%3"}, -- historic madda
  212. {a="(A)(')", b="aآء"}, -- historic madda
  213. -- initial (needs both ^ and %W patterns)
  214. -- 'aw: the diphthong is to be resolved into 'awi' (next 8 lines)
  215. {a="^('aw)(O)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1i"},
  216. {a="(%W)('aw)(O)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1%2i"},
  217. {a="^('aw)(O)(\"?[uai])([%S]-O)", b="%1i"},
  218. {a="(%W)('aw)(O)(\"?[uai])([%S]-O)", b="%1%2i"},
  219. {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  220. {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
  221. {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, --p
  222. {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, --p
  223. -- then the 'initial' rules for the remaining cases
  224. {a="^(')([ua])", b="أ%2"},
  225. {a="^(')(i)", b="إ%2"},
  226. -- consider replacing initial %W with [%s%(%[%<%-]:
  227. -- {a="(%W)(')([ua])", b="%1أ%3"},
  228. -- {a="(%W)(')(i)", b="%1إ%3"},
  229. {a="([%s%(%[%<%-])(')([ua])", b="%1أ%3"},
  230. {a="([%s%(%[%<%-])(')(i)", b="%1إ%3"},
  231. -- final
  232. -- mi'aT is special orthography (unlike ^say'aN and .zim'aN):
  233. -- {a="(%^sa%.?[yY])(\"?%|?)(%')(aN)", b="%1%2ئ%4"}, --new
  234. -- {a="(.zi?m)(%')(aN)", b="%1ئ%3"}, --new
  235. {a="(mi)(%')(a[Tt])", b="%1ائ%3"},
  236. {a="(mi)(%')(aN%_?[AY])", b="%1أ%3"},
  237. -- final hamzah is on the line after a letter of prolongation or a
  238. -- consonant with sukūn
  239. {a="([^Auai])(')(\"?[uai]N?)(%p*)$", b="%1ء%3%4"}, --new
  240. {a="([^Auai])(')(\"?[uai]N?)(%p*%s)", b="%1ء%3%4"},
  241. -- u
  242. {a="(u)(')([uai]N?%p*)$", b="%1ؤ%3"},
  243. {a="(u)(')([uai]N?)(%p*%s)", b="%1ؤ%3%4"},
  244. {a="(u)(')(%p*)$", b="%1ؤ%3"},
  245. {a="(u)(')(%p*%s)", b="%1ؤ%3"},
  246. -- a
  247. {a="(a)(')(A%p*)$", b="%1آ"},
  248. {a="(a)(')(A)(%p*%s)", b="%1آ%4"},
  249. {a="(a)(')([u]N?%p*)$", b="%1أ%3"},
  250. {a="(a)(')([u]N?)(%p*%s)", b="%1أ%3%4"},
  251. {a="(a)(')(a%p*)$", b="%1أ%3"},
  252. {a="(a)(')(a)(%p*%s)", b="%1أ%3%4"},
  253. {a="(a)(')(aN%p*)$", b="%1أً"},
  254. {a="(a)(')(aN)(%p*%s)", b="%1أً%4"},
  255. {a="(a)(')([i]N?%p*)$", b="%1إ%3"},
  256. {a="(a)(')([i]N?)(%p*%s)", b="%1إ%3%4"},
  257. {a="(a)(')(%p*)$", b="%1أ%3"},
  258. {a="(a)(')(%p*%s)", b="%1أ%3"},
  259. -- i
  260. {a="(i)(')([uai]N?%p*)$", b="%1ئ%3"},
  261. {a="(i)(')([uai]N?)(%p*%s)", b="%1ئ%3%4"},
  262. {a="(i)(')(%p*)$", b="%1ئ%3"},
  263. {a="(i)(')(%p*%s)", b="%1ئ%3"},
  264. --
  265. -- middle
  266. {a="([UIwy])(')", b="%1ء"}, --new
  267. -- {a="([Iy])(')", b="%1ئ"}, -- included in the above line
  268. -- hamza is alone after letters of prolongation or sukūn
  269. -- {a="([^uai])(')([uU])", b="%1ؤ%3"},
  270. -- {a="([^uai])(')(%_?[aAY])", b="%1أ%3"},
  271. -- {a="([^uai])(')([iI])", b="%1ئ%3"},
  272. {a="([^uai])(')(%_?[uaiUAYI])", b="%1ء%3"},
  273. {a="(u)(')([UI])", b="%1ء%3"},
  274. {a="(u)(')([u])", b="%1ؤ%3"},
  275. {a="(u)(')(%_?[aAY])", b="%1ؤ%3"},
  276. {a="(u)(')([i])", b="%1ئ%3"},
  277. {a="(a)(')(%_?[aAY])", b="%1أ%3"},
  278. {a="(a)(')([uU])", b="%1ؤ%3"},
  279. {a="(a)(')([iI])", b="%1ئ%3"},
  280. {a="(i)(')([UI])", b="%1ء%3"},
  281. {a="(i)(')(%_?[aAY])", b="%1ئ%3"},
  282. {a="(i)(')([u])", b="%1ئ%3"},
  283. {a="(i)(')([i])", b="%1ئ%3"},
  284. {a="(a)(')([^uaiUAI])", b="%1أ%3"},
  285. {a="(u)(')([^uaiUAI])", b="%1ؤ%3"},
  286. {a="(i)(')([^uaiUAI])", b="%1ئ%3"}
  287. }
  288. hamzaeasy = { -- differences marked below with 'easy'
  289. -- next line for ʾiʿrāb hyphen
  290. {a="(')(%-)([uaiUAI])", b="%1%3"},
  291. -- next lines for ʾalif alone (easy)
  292. {a="(%.A)([uai]?)l%-(%^n)", b="ا%2ل%3"}, --additional (^n is lunar)
  293. {a="([%(%[%|%<%s%-O])(%.A)([uai]?)l%-(%^n)", b="%1ا%3%4"}, --additional (^n is lunar) --p
  294. {a="(%.A)([uai]?)l%-([%_%^%.]?[tdrzsnc])", b="ا%2ل%3"},
  295. {a="([%(%[%|%<%s%-O])(%.A)([uai]?)l%-([%_%^%.]?[tdrzsnc])", b="%1ا%3ل%4"}, --p
  296. {a="%.A", b="ا"},
  297. -- hard coded hamza
  298. {a="|\"'", b="ء"},
  299. {a="A\"'", b="آ"},
  300. {a="[au]\"'", b="أ"},
  301. {a="w\"'", b="ؤ"},
  302. {a="i\"'", b="إ"},
  303. {a="y\"'", b="ئ"},
  304. -- hamza takes tašdīd too
  305. {a="''([Uu])", b="ؤؤ%1"},
  306. {a="''([Aa])", b="أأ%1"},
  307. {a="''([Ii])", b="ئئ%1"},
  308. -- inseparable adverbial particle 'a- + 'a
  309. {a="\'(a)%-\'(a)", b="أ%1اأ%2"},
  310. -- initial long u and i (for a, see below)
  311. {a="%'%_U", b="أU"},
  312. {a="%'%_I", b="إI"},
  313. -- taḫfīfu 'l-hamza
  314. {a="'u'([^uaiUAI])", b="أU%1"},
  315. {a="'i'([^uaiUAI])", b="إI%1"},
  316. {a="([wf]a)%-\'([^uaiUAIl][^%-])", b="%1أ%2"},
  317. {a="^u'([^uaiUAI])", b="اU%1"},
  318. {a="([^uaiUAIYN][%s%(%[%<])u'([^uaiUAI])", b="%1اU%2"},
  319. {a="^i'([^uaiUAI])", b="اI%1"},
  320. {a="([^uaiUAIYN][%s%(%[%<])i'([^uaiUAI])", b="%1اI%2"},
  321. -- madda (historic writing below)
  322. {a="'a'([^uaiUAI])", b="آ%1"},
  323. {a="([^uiyUI])\'a?A([%_%^%.]?[%`%'btjghxdrzsfqklmnywAY])", b="%1آ%2"},
  324. {a="^\'a?A([%_%^%.]?[%`%'btjghxdrzsfqklmnywAY])", b="آ%1"},
  325. {a="\'a?A(O[%_%^%.]?[%`%'btjghxdrzsfqklmnywAY]-O)", b="آ"},
  326. {a="(%W)\'a?A([%_%^%.]?[%`%'btjghxdrzsfqklmnywAY])", b="%1آ%2"},
  327. --easy (begin)
  328. {a="(A)(O%'[%S]-O)", b="ا"},
  329. {a="(A)(')(uN?%p*)$", b="aاء%3"},
  330. {a="(A)(')(uN?)(%p*%s)", b="aاء%3%4"},
  331. {a="(A)(')(iN?%p*)$", b="aاء%3"},
  332. {a="(A)(')(iN?)(%p*%s)", b="aاء%3%4"},
  333. {a="(A)(')([iI])", b="aائ%3"}, -- historic madda
  334. {a="(A)(')(u)", b="aاؤ%3"}, -- historic madda
  335. {a="(A)(')", b="aاء"}, -- historic madda
  336. --easy (end)
  337. -- initial (needs both ^ and %W patterns)
  338. -- 'aw: the diphthong is to be resolved into 'awi' (next 8 lines)
  339. {a="^('aw)(O)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1i"},
  340. {a="(%W)('aw)(O)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1%2i"},
  341. {a="^('aw)(O)(\"?[uai])([%S]-O)", b="%1i"},
  342. {a="(%W)('aw)(O)(\"?[uai])([%S]-O)", b="%1%2i"},
  343. {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  344. {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
  345. {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"},
  346. {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"},
  347. -- then the 'initial' rules for the remaining cases
  348. {a="^(')([ua])", b="أ%2"},
  349. {a="^(')(i)", b="إ%2"},
  350. -- consider replacing initial %W with [%s%(%[%<%-]:
  351. -- {a="(%W)(')([ua])", b="%1أ%3"},
  352. -- {a="(%W)(')(i)", b="%1إ%3"},
  353. {a="([%s%(%[%<%-])(')([ua])", b="%1أ%3"},
  354. {a="([%s%(%[%<%-])(')(i)", b="%1إ%3"},
  355. -- final
  356. -- mi'aT is special orthography (unlike ^say'aN and .zim'aN)
  357. -- {a="(%^sa%.?[yY])(\"?%|?)(%')(aN)", b="%1%2ئ%4"}, --new
  358. -- {a="(.zi?m)(%')(aN)", b="%1ئ%3"}, --new
  359. {a="(mi)(%')(a[Tt])", b="%1ائ%3"},
  360. {a="(mi)(%')(aN%_?[AY])", b="%1أ%3"},
  361. -- easy (begin)
  362. -- The Munjid says that such words as radI'aN do not have the
  363. -- hamzah alone on the line, so take out the following two lines
  364. -- (final hamzah is on the line after a letter of prolongation or a
  365. -- consonant with sukūn)
  366. -- {a="([^Auai])(')(\"?[uai]N?)(%p*)$", b="%1ء%3%4"}, --new
  367. -- {a="([^Auai])(')(\"?[uai]N?)(%p*%s)", b="%1ء%3%4"},
  368. {a="([^Auai])(')(\"?aN)(%p*)$", b="%1ئ%3%4"}, --new
  369. {a="([^Auai])(')(\"?aN)(%p*%s)", b="%1ئ%3%4"}, --new
  370. {a="([^uai])(')(\"?a)(%p*)$", b="%1ء%3%4"}, --new
  371. {a="([^uai])(')(\"?a)(%p*%s)", b="%1ء%3%4"}, --new
  372. {a="([^uai])(')(\"?[ui]N?)(%p*)$", b="%1ء%3%4"}, --new
  373. {a="([^uai])(')(\"?[ui]N?)(%p*%s)", b="%1ء%3%4"}, --new
  374. --easy (end)
  375. -- u
  376. {a="(u)(')([uai]N?%p*)$", b="%1ؤ%3"},
  377. {a="(u)(')([uai]N?)(%p*%s)", b="%1ؤ%3%4"},
  378. {a="(u)(')(%p*)$", b="%1ؤ%3"},
  379. {a="(u)(')(%p*%s)", b="%1ؤ%3"},
  380. -- a
  381. {a="(a)(')(A%p*)$", b="%1آ"},
  382. {a="(a)(')(A)(%p*%s)", b="%1آ%4"},
  383. {a="(a)(')([u]N?%p*)$", b="%1أ%3"},
  384. {a="(a)(')([u]N?)(%p*%s)", b="%1أ%3%4"},
  385. {a="(a)(')(a%p*)$", b="%1أ%3"},
  386. {a="(a)(')(a)(%p*%s)", b="%1أ%3%4"},
  387. {a="(a)(')(aN%p*)$", b="%1أً"},
  388. {a="(a)(')(aN)(%p*%s)", b="%1أً%4"},
  389. {a="(a)(')([i]N?%p*)$", b="%1إ%3"},
  390. {a="(a)(')([i]N?)(%p*%s)", b="%1إ%3%4"},
  391. {a="(a)(')(%p*)$", b="%1أ%3"},
  392. {a="(a)(')(%p*%s)", b="%1أ%3"},
  393. -- i
  394. {a="(i)(')([uai]N?%p*)$", b="%1ئ%3"},
  395. {a="(i)(')([uai]N?)(%p*%s)", b="%1ئ%3%4"},
  396. {a="(i)(')(%p*)$", b="%1ئ%3"},
  397. {a="(i)(')(%p*%s)", b="%1ئ%3"},
  398. --
  399. -- middle
  400. {a="([Uw])(')", b="%1ء"}, --new
  401. {a="([Iy])(')", b="%1ئ"}, --easy
  402. {a="([^uai])(')([uU])", b="%1ؤ%3"},
  403. {a="([^uai])(')(%_?[aAY])", b="%1أ%3"},
  404. {a="([^uai])(')([iI])", b="%1ئ%3"},
  405. {a="(u)(')([uU])", b="%1ؤ%3"},
  406. {a="(u)(')(%_?[aAY])", b="%1ؤ%3"},
  407. {a="(u)(')([iI])", b="%1ئ%3"},
  408. {a="(a)(')(%_?[aAY])", b="%1أ%3"},
  409. {a="(a)(')([uU])", b="%1ؤ%3"},
  410. {a="(a)(')([iI])", b="%1ئ%3"},
  411. {a="(i)(')(%_?[aAY])", b="%1ئ%3"},
  412. {a="(i)(')([uU])", b="%1ئ%3"},
  413. {a="(i)(')([iI])", b="%1ئ%3"},
  414. {a="(a)(')([^uaiUAI])", b="%1أ%3"},
  415. {a="(u)(')([^uaiUAI])", b="%1ؤ%3"},
  416. {a="(i)(')([^uaiUAI])", b="%1ئ%3"}
  417. }
  418. tanwin = {
  419. -- assimilations (begin)
  420. {a="(O[%S]-)(%-?[uai]N[UI]?)(O)([rlmnwy])", b="%4%4"},
  421. {a="(%-?[uai]NU)(%s)([rlmnwy])", b="%1%2%3%3"},
  422. -- assimilations (end)
  423. {a="(O[%S]-)(%-?[uai]N[UI]?)(O)([uai])", b="%4"},
  424. {a="%-?uNU", b="ٌو"},
  425. {a="%-?aNU", b="ًوا"},
  426. {a="%-?iNU", b="ٍو"},
  427. -- assimilations (begin)
  428. {a="%-?(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"},
  429. {a="(O[%S]-)(%-?aN)(_A)(O)([rlmnwy])", b="%5%5"},
  430. {a="(O[%S]-)(%-?aN)(Y)(O)([rlmnwy])", b="%5%5"},
  431. {a="%-?(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"},
  432. {a="%-?(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"},
  433. {a="(T)%-?(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"},
  434. {a="(ء)%-?(aN)(%s)([rlmnwy])", b="%1%2%3%4%4"}, --new
  435. {a="([^TA])%-?(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"},
  436. {a="%-?(iNI?)(%s)([rlmnwy])", b="ٍ%2%3%3"},
  437. -- assimilations (end)
  438. {a="(O[%S]-)(%-?aN)(_A)(O)([uai])", b="%5"},
  439. {a="(O[%S]-)(%-?aN)(Y)(O)([uai])", b="%5"},
  440. -- quoted tanwīn (begin)
  441. {a="%-?(\"uN)", b=""},
  442. {a="(B)%-?(\"aN)", b="%1"},
  443. {a="%-?(\"aN)(_A)", b="ى"},
  444. {a="%-?(\"aN)(Y)", b="ى"},
  445. {a="(T)%-?(\"aN)", b="%1"},
  446. {a="([اآ])(ء)%-?(\"aN)", b="%1%2"}, --new
  447. {a="([^TA])%-?(\"aN)", b="%1ا"},
  448. {a="%-?(\"iNI?)", b=""},
  449. -- quoted tanwīn (end)
  450. {a="%-?(uN)", b="ٌ"},
  451. {a="(B)%-?(aN)", b="%1ً"},
  452. -- needed by \arbcolor:
  453. {a="%-?(aN)(O[%S]-%_AO)", b="ً"},
  454. {a="%-?(aN)(O[%S]-YO)", b="ً"},
  455. {a="(O[%S]-TO)%-?(aN)", b="ً"},
  456. {a="(O[%S]-)([اآ])(ء)(O)%-?(aN)", b="ً"}, --new
  457. {a="(O[%S]-[^TA]O)%-?(aN)", b="ًا"},
  458. --
  459. {a="%-?(aN)(_A)", b="ًى"},
  460. {a="%-?(aN)(Y)", b="ًى"},
  461. {a="(T)%-?(aN)", b="%1ً"},
  462. {a="([اآ])(ء)%-?(aN)", b="%1%2ً"}, --new
  463. {a="([^TA])%-?(aN)", b="%1ًا"},
  464. {a="%-?(iNI?)", b="ٍ"}
  465. }
  466. tanwineasy = { -- 'easy' requires some lines to be taken out:
  467. -- assimilations (begin)
  468. -- {a="(O[%S]-)(%-?[uai]N[UI]?)(O)([rlmnwy])", b="%4%4"},
  469. -- {a="(%-?[uai]NU)(%s)([rlmnwy])", b="%1%2%3%3"},
  470. -- assimilations (end)
  471. {a="(O[%S]-)(%-?[uai]N[UI]?)(O)([uai])", b="%4"},
  472. {a="%-?uNU", b="ٌو"},
  473. {a="%-?aNU", b="ًوا"},
  474. {a="%-?iNU", b="ٍو"},
  475. -- assimilations (begin)
  476. -- {a="%-?(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"},
  477. -- {a="(O[%S]-)(%-?aN)(_A)(O)([rlmnwy])", b="%5%5"},
  478. -- {a="(O[%S]-)(%-?aN)(Y)(O)([rlmnwy])", b="%5%5"},
  479. -- {a="%-?(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"},
  480. -- {a="%-?(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"},
  481. -- {a="(T)%-?(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"},
  482. -- {a="(ء)%-?(aN)(%s)([rlmnwy])", b="%1%2%3%4%4"}, --new
  483. -- {a="([^TA])%-?(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"},
  484. -- {a="%-?(iNI?)(%s)([rlmnwy])", b="ٍ%2%3%3"},
  485. -- assimilations (end)
  486. {a="(O[%S]-)(%-?aN)(_A)(O)([uai])", b="%5"},
  487. {a="(O[%S]-)(%-?aN)(Y)(O)([uai])", b="%5"},
  488. -- quoted tanwīn (begin)
  489. {a="%-?(\"uN)", b=""},
  490. {a="(B)%-?(\"aN)", b="%1"},
  491. {a="%-?(\"aN)(_A)", b="ى"},
  492. {a="%-?(\"aN)(Y)", b="ى"},
  493. {a="(T)%-?(\"aN)", b="%1"},
  494. {a="([اآ])(ء)%-?(\"aN)", b="%1%2"}, --new
  495. {a="([^TA])%-?(\"aN)", b="%1ا"},
  496. {a="%-?(\"iNI?)", b=""},
  497. -- quoted tanwīn (end)
  498. {a="%-?(uN)", b="ٌ"},
  499. {a="(B)%-?(aN)", b="%1ً"},
  500. -- needed by \arbcolor:
  501. {a="%-?(aN)(O[%S]-%_AO)", b="ً"},
  502. {a="%-?(aN)(O[%S]-YO)", b="ً"},
  503. {a="(O[%S]-TO)%-?(aN)", b="ً"},
  504. {a="(O[%S]-)([اآ])(ء)(O)%-?(aN)", b="ً"}, --new
  505. {a="(O[%S]-[^TA]O)%-?(aN)", b="ًا"},
  506. --
  507. {a="%-?(aN)(_A)", b="ًى"},
  508. {a="%-?(aN)(Y)", b="ًى"},
  509. {a="(T)%-?(aN)", b="%1ً"},
  510. {a="([اآ])(ء)%-?(aN)", b="%1%2ً"}, --new
  511. {a="([^TA])%-?(aN)", b="%1ًا"},
  512. {a="%-?(iNI?)", b="ٍ"}
  513. }
  514. trigraphs = { -- trigraphs or more
  515. -- ʾalif al-waṣl: put it back on with \arbnull
  516. {a="(O[%S]-)([%'a]l%-)(O)(\"[uai])", b="ٱ"},
  517. {a="(O[%S]-)([%'a]l%-)(O)([uai])", b="ا"},
  518. -- 'llatI / 'llad_I
  519. {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"},
  520. {a="([%(%[%|%<%s%-])'ll(a)([%_]?[dt])", b="%1الّ%2%3"}, --p
  521. -- law: the diphthong is to be resolved into 'awi' (next 8 lines)
  522. {a="^(law)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1i"},
  523. {a="(%W)(law)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1%2i"},
  524. {a="^(law)(O)(\"?[uai])([%S]-O)", b="%1i"},
  525. {a="(%W)(law)(O)(\"?[uai])([%S]-O)", b="%1%2i"},
  526. {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  527. {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
  528. {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, --p
  529. {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, --p
  530. -- al- + lām
  531. {a="^(a)l%-(l)", b="ا%1ل%2%2"},
  532. {a="([%(%[%|%<%s%-O])(a)l%-(l)", b="%1ا%2ل%3%3"}, --p
  533. -- al- + solar consonant ('c' and '^n' are additional characters)
  534. {a="^(a)l%-(%^n)", b="ا%1ل%2"}, -- ^n is lunar
  535. {a="([%(%[%|%<%s%-O])(a)l%-(%^n)", b="%1ا%2ل%3"},-- ^n is lunar --p
  536. {a="^(a)l%-([%_%^%.]?[tdrzsnc])", b="ا%1ل%2%2"},
  537. {a="([%(%[%|%<%s%-O])(a)l%-([%_%^%.]?[tdrzsnc])", b="%1ا%2ل%3%3"}, --p
  538. -- assim. art. + solar consonant ('c' and '^n' are additional characters)
  539. {a="^(a)(%^n)%-", b="ا%1ل"}, -- ^n is lunar
  540. {a="([%(%[%|%<%s%-O])(a)(%^n)%-", b="%1ا%2ل"},-- ^n is lunar --p
  541. {a="^(a)([%_%^%.]?[tdrzsnc])%-", b="ا%1ل%2"},
  542. {a="([%(%[%|%<%s%-O])(a)([%_%^%.]?[tdrzsnc])%-", b="%1ا%2ل%3"}, --p
  543. -- al- + initial unstable hamza
  544. {a="^(a)l%-(\")([uai])", b="ا%1ل%3ٱ"},
  545. {a="([%(%[%|%<%s%-O])(a)l%-(\")([uai])", b="%1ا%2ل%4ٱ"}, --p
  546. {a="^(a)l%-([uai])", b="ا%1ل%2ا"},
  547. {a="([%(%[%|%<%s%-O])(a)l%-([uai])", b="%1ا%2ل%3ا"}, --p
  548. -- li-/la- + art. + initial unstable hamza is a special orthography
  549. {a="l([ai])%-l%-(\")([uai])", b="ل%1ل%3ٱ"},
  550. {a="l([ai])%-l%-([uai])", b="ل%1ل%2ا"},
  551. -- al- + lunar consonant (i.e. what remains)
  552. {a="^(a)l%-", b="ا%1ل"},
  553. {a="([%(%[%|%<%s%-O])(a)l%-", b="%1ا%2ل"}, --p
  554. -- diphthongs to be resolved before ʾalif conjunctionis
  555. {a="(aW)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="awuا"},
  556. {a="(aw)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1u"},
  557. {a="(ay)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1i"},
  558. {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"},
  559. {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"},
  560. {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  561. -- art. with waṣla + lām
  562. {a="'l%-(l)", b="ال%1%1"},
  563. -- art. with waṣla + solar consonant
  564. -- ('c' and '^n' are additional characters)
  565. {a="'l%-(%^n)", b="ال%1"}, -- ^n is lunar
  566. {a="'l%-([%_%^%.]?[tdrzsnc])", b="ال%1%1"},
  567. -- li-/la- + art. + lām
  568. {a="l([ai])%-l%-(l)", b="ل%1%2%2"},
  569. -- assim. art. with waṣla + solar consonant ('c' and '^n' are
  570. -- additional characters)
  571. {a="'(%^n)%-", b="ال"}, -- ^n is lunar
  572. {a="'([%_%^%.]?[tdrzsnc])%-", b="ال%1"},
  573. -- li-/la- + art. + solar consonant is a special orthography
  574. -- ('c' and '^n' are additional characters)
  575. {a="l([ai])%-l%-(%^n)", b="ل%1ل%2"}, -- '^n' is lunar
  576. {a="l([ai])%-l%-([%_%^%.]?[tdrzsnc])", b="ل%1ل%2%2"},
  577. -- li-/la + assim. art. + solar consonant is a special orthography
  578. -- ('c' and '^n' are additional characters)
  579. {a="l([ai])%-(%^n)%-(%^n)", b="ل%1ل%3"}, -- ^n is lunar
  580. {a="l([ai])%-([%_%^%.]?[tdrzsnc])%-([%_%^%.]?[tdrzsnc])", b="ل%1ل%3%3"},
  581. -- art. with waṣla + initial unstable hamza
  582. {a="'l%-(\")([uai])", b="ال%2ٱ"},
  583. {a="'l%-([uai])", b="ال%1ا"},
  584. -- art. with waṣla + lunar consonant (i.e. what remains)
  585. {a="'l%-", b="ال"},
  586. -- the silent wāw
  587. {a="uU(%p*)$", b="uو%1"},
  588. {a="uU(%p*%s)", b="uو%1"},
  589. {a="aU(%p*)$", b="aو%1"},
  590. {a="aU(%p*%s)", b="aو%1"},
  591. {a="iU(%p*)$", b="iو%1"},
  592. {a="iU(%p*%s)", b="iو%1"},
  593. -- words ending in -āT with silent wāw/yāʾ
  594. {a="(_a)UA", b="%1وا"},
  595. {a="(_a)U", b="%1و"},
  596. {a="(_a)I", b="%1ي"}
  597. }
  598. idgham = {
  599. -- assimilations
  600. {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"},
  601. {a="(n)(O)([rlmnwy])([%S]-O)", b="%3"}
  602. }
  603. trigraphseasy = { -- differences marked below with 'easy'
  604. -- ʾalif al-waṣl: put it back on with \arbnull
  605. {a="(O[%S]-)([%'a]l%-)(O)(\"[uai])", b="ٱ"},
  606. {a="(O[%S]-)([%'a]l%-)(O)([uai])", b="ا"},
  607. -- Allah (easy)
  608. {a="l%-l_ah", b="l-ll_ah"},
  609. -- 'llatI / 'llad_I
  610. {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"},
  611. {a="([%(%[%|%<%s%-])'ll(a)([%_]?[dt])", b="%1الّ%2%3"}, --p
  612. -- law: the diphthong is to be resloved into 'awi' (next 8 lines)
  613. {a="^(law)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1i"},
  614. {a="(%W)(law)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1%2i"},
  615. {a="^(law)(O)(\"?[uai])([%S]-O)", b="%1i"},
  616. {a="(%W)(law)(O)(\"?[uai])([%S]-O)", b="%1%2i"},
  617. {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  618. {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
  619. {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, --p
  620. {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, --p
  621. -- al- + lām (easy)
  622. {a="^(a)l%-(l)", b="ا%1ل%2"},
  623. {a="([%(%[%|%<%s%-O])(a)l%-(l)", b="%1ا%2ل%3"}, --p
  624. -- al- + solar consonant (easy) ('c' and '^n' are additional characters)
  625. {a="^(a)l%-(%^n)", b="ا%1ل%2"}, -- ^n is lunar
  626. {a="([%(%[%|%<%s%-O])(a)l%-(%^n)", b="%1ا%2ل%3"}, -- ^n is lunar --p
  627. {a="^(a)l%-([%_%^%.]?[tdrzsnc])", b="ا%1ل%2"},
  628. {a="([%(%[%|%<%s%-O])(a)l%-([%_%^%.]?[tdrzsnc])", b="%1ا%2ل%3"}, --p
  629. -- assim. art. + solar consonant (easy) ('c' and '^n' are
  630. -- additional characters)
  631. {a="^(a)(%^n)%-", b="ا%1ل"}, -- ^n is lunar
  632. {a="([%(%[%|%<%s%-O])(a)(%^n)%-", b="%1ا%2ل"}, -- ^n is lunar --p
  633. {a="^(a)([%_%^%.]?[tdrzsnc])%-", b="ا%1ل"},
  634. {a="([%(%[%|%<%s%-O])(a)([%_%^%.]?[tdrzsnc])%-", b="%1ا%2ل"}, --p
  635. -- al- + initial unstable hamza
  636. {a="^(a)l%-(\")([uai])", b="ا%1ل%3ٱ"},
  637. {a="([%(%[%|%<%s%-O])(a)l%-(\")([uai])", b="%1ا%2ل%4ٱ"}, --p
  638. {a="^(a)l%-([uai])", b="ا%1ل%2ا"},
  639. {a="([%(%[%|%<%s%-O])(a)l%-([uai])", b="%1ا%2ل%3ا"}, --p
  640. -- li-/la- + art. + initial unstable hamza is a special orthography
  641. {a="l([ai])%-l%-(\")([uai])", b="ل%1ل%3ٱ"},
  642. {a="l([ai])%-l%-([uai])", b="ل%1ل%2ا"},
  643. -- al- + lunar consonant (i.e. what remains)
  644. {a="^(a)l%-", b="ا%1ل"},
  645. {a="([%(%[%|%<%s%-O])(a)l%-", b="%1ا%2ل"}, --p
  646. -- diphthongs to be resolved before ʾalif conjunctionis
  647. {a="(aW)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="awuا"},
  648. {a="(aw)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1u"},
  649. {a="(ay)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1i"},
  650. {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"},
  651. {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"},
  652. {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  653. -- art. with waṣla + lām (easy)
  654. {a="'l%-(l)", b="ال%1"},
  655. -- art. with waṣla + solar consonant (easy)
  656. -- ('c' and '^n' are additional characters)
  657. {a="'l%-(%^n)", b="ال%1"}, -- ^n is lunar
  658. {a="'l%-([%_%^%.]?[tdrzsnc])", b="ال%1"},
  659. -- li-/la- + art. + lām (easy)
  660. {a="l([ai])%-l%-(l)", b="ل%1%2"},
  661. -- assim. art. with waṣla + solar consonant (easy)
  662. -- ('c' and '^n' are additional characters)
  663. {a="'(%^n)%-", b="ال"}, -- ^n is lunar
  664. {a="'([%_%^%.]?[tdrzsnc])%-", b="ال"},
  665. -- li-/la- + art. + solar consonant is a special orthography (easy)
  666. -- ('c' and '^n' are additional characters)
  667. {a="l([ai])%-l%-(%^n)", b="ل%1ل%2"}, -- ^n is lunar
  668. {a="l([ai])%-l%-([%_%^%.]?[tdrzsnc])", b="ل%1ل%2"},
  669. -- li-/la + assim. art. + solar consonant is a special orthography (easy)
  670. -- ('c' and '^n' are additional characters)
  671. {a="l([ai])%-(%^n)%-(%^n)", b="ل%1ل%3"}, -- ^n is lunar
  672. {a="l([ai])%-([%_%^%.]?[tdrzsnc])%-([%_%^%.]?[tdrzsnc])", b="ل%1ل%3"},
  673. -- art. with waṣla + initial unstable hamza
  674. {a="'l%-(\")([uai])", b="ال%2ٱ"},
  675. {a="'l%-([uai])", b="ال%1ا"},
  676. -- art. with waṣla + lunar consonant (i.e. what remains)
  677. {a="'l%-", b="ال"},
  678. -- the silent wāw
  679. {a="uU(%p*)$", b="uو%1"},
  680. {a="uU(%p*%s)", b="uو%1"},
  681. {a="aU(%p*)$", b="aو%1"},
  682. {a="aU(%p*%s)", b="aو%1"},
  683. {a="iU(%p*)$", b="iو%1"},
  684. {a="iU(%p*%s)", b="iو%1"},
  685. -- words ending in -āT with silent wāw/yāʾ
  686. {a="(_a)UA", b="%1وا"},
  687. {a="(_a)U", b="%1و"},
  688. {a="(_a)I", b="%1ي"}
  689. }
  690. digraphs = {
  691. -- ʾiʿrāb: straight double quote must be discarded
  692. {a="(%-)(\"?[UI]na)(%p*%s)", b="%2%3"},
  693. {a="(%-)(\"?[UI]na)(%p*)$", b="%2%3"},
  694. {a="(%-)(\"?At[ui])(%p*%s)", b="%2%3"},
  695. {a="(%-)(\"?At[ui])(%p*)$", b="%2%3"},
  696. {a="(%-)(\"?Ani)(%p*%s)", b="%2%3"},
  697. {a="(%-)(\"?Ani)(%p*)$", b="%2%3"},
  698. {a="(%-)(\"?ayni)(%p*%s)", b="%2%3"},
  699. {a="(%-)(\"?ayni)(%p*)$", b="%2%3"},
  700. {a="(%-)(\"?[uai])(%p*%s)", b="%2%3"},
  701. {a="(%-)(\"?[uai])(%p*)$", b="%2%3"},
  702. -- ʾiʿrāb (end)
  703. -- initial straight double quote gives a connective ʾalif
  704. {a="^\"[uai]", b="ٱ"},
  705. {a="([%(%[%|%<%s%-])\"[uai]", b="%1ٱ"}, --p
  706. -- diphthongs to be resolved before ʾalif conjunctionis
  707. {a="(aW)(O)(\"?[uai])([%S]-O)", b="awuا"},
  708. {a="(aW)(%s)([%(%[%|%<]?)([uai])", b="awuا%2%3%4"}, --p
  709. {a="(aw)(O)(\"?[uai])([%S]-O)", b="%1u"},
  710. {a="(aw)(%s)([%(%[%|%<]?)(\"?[uai])", b="%1u%2%3ا"}, --p
  711. {a="(ay)(O)(\"?[uai])([%S]-O)", b="%1i"},
  712. {a="(ay)(%s)([%(%[%|%<]?)(\"?[uai])", b="%1i%2%3ا"}, --p
  713. -- hyphen + initial alif without hamza:
  714. {a="([uai]%-)(\"?[uai])([%^%_%.%`]?)([%aإأؤئ])", b="%1ا%3%4"},
  715. -- initial alif without hamza
  716. {a="^([%(%[%|%<]?)(\"?[uai])", b="%1ا%2"}, --p
  717. -- initial alif without hamza
  718. {a="(O[%S]-)([uaiUAIY])(O)(\"?[uai])", b="ا"},
  719. {a="(%s)([%(%[%|%<]?)(\"?[uai])", b="%1%2ا"}, --p
  720. {a="%-%-", b="ـ"},
  721. {a="ؤؤ", b="ؤّ"},
  722. {a="أأ", b="أّ"},
  723. {a="ئئ", b="ئّ"},
  724. {a="bb", b="بّ"},
  725. {a="BB", b="ـّ"},
  726. {a="([%_%^%.])([tghdsz])([tghdsz])", b="%1%2|%3"},
  727. -- same as above for additional characters:
  728. {a="([%_%^%.])([cn])([cn])", b="%1%2|%3"},
  729. {a="tt", b="تّ"},
  730. {a="%_t%_t", b="ثّ"},
  731. {a="jj", b="جّ"},
  732. {a="%^g%^g", b="جّ"},
  733. {a="%.h%.h", b="حّ"},
  734. {a="xx", b="خّ"},
  735. {a="%_h%_h", b="خّ"},
  736. {a="dd", b="دّ"},
  737. {a="%_d%_d", b="ذّ"},
  738. {a="rr", b="رّ"},
  739. {a="zz", b="زّ"},
  740. {a="ss", b="سّ"},
  741. {a="%^s%^s", b="شّ"},
  742. {a="%.s%.s", b="صّ"},
  743. {a="%.d%.d", b="ضّ"},
  744. {a="%.t%.t", b="طّ"},
  745. {a="%.z%.z", b="ظّ"},
  746. {a="%`%`", b="عّ"},
  747. {a="%.g%.g", b="غّ"},
  748. {a="ff", b="فّ"},
  749. {a="qq", b="قّ"},
  750. {a="kk", b="كّ"},
  751. {a="ll", b="لّ"},
  752. {a="mm", b="مّ"},
  753. {a="nn", b="نّ"},
  754. {a="hh", b="هّ"},
  755. {a="ww", b="وّ"},
  756. {a="yy", b="يّ"},
  757. {a="%.y%.y", b="ىّ"},
  758. -- additional characters + šaddah (begin)
  759. {a="pp", b="پّ"},
  760. {a="vv", b="ڤّ"},
  761. {a="gg", b="گّ"},
  762. {a="%^c%^c", b="چّ"},
  763. {a="%^z%^z", b="ژّ"},
  764. {a="%^n%^n", b="ڭّ"},
  765. -- additional characters + šaddah (end)
  766. {a="_t", b="ث"},
  767. {a="%^g", b="ج"},
  768. {a="%.h", b="ح"},
  769. {a="_h", b="خ"},
  770. {a="_d", b="ذ"},
  771. {a="%^s", b="ش"},
  772. {a="%.s", b="ص"},
  773. {a="%.d", b="ض"},
  774. {a="%.t", b="ط"},
  775. {a="%.z", b="ظ"},
  776. {a="%.g", b="غ"},
  777. {a="%.y", b="ى"},
  778. -- additional characters (begin)
  779. {a="%^c", b="چ"},
  780. {a="%^z", b="ژ"},
  781. {a="%^n", b="ڭ"},
  782. -- additional characters (end)
  783. {a="(U)(A)", b="%1ا"},
  784. {a="WA", b="وا"},
  785. {a="(a)W\"", b="%1وْا"},
  786. {a="(a)W", b="%1وا"},
  787. {a="_A", b="aى"},
  788. {a="_u", b="ٗ"},
  789. {a="_a", b="ٰ"},
  790. {a="_i", b="ٖ"},
  791. {a="%.b", b="ٮ"},
  792. {a="%.f", b="ڡ"},
  793. {a="%.q", b="ٯ"},
  794. {a="%.k", b="ک"},
  795. {a="%.n", b="ں"},
  796. {a="%^d", b="ڊ"}
  797. }
  798. single = {
  799. {a="b", b="ب"},
  800. {a="t", b="ت"},
  801. {a="j", b="ج"},
  802. {a="x", b="خ"},
  803. {a="d", b="د"},
  804. {a="r", b="ر"},
  805. {a="z", b="ز"},
  806. {a="s", b="س"},
  807. {a="f", b="ف"},
  808. {a="`", b="ع"},
  809. {a="f", b="ف"},
  810. {a="q", b="ق"},
  811. {a="k", b="ك"},
  812. {a="l", b="ل"},
  813. {a="m", b="م"},
  814. {a="n", b="ن"},
  815. {a="h", b="ه"},
  816. {a="w", b="و"},
  817. {a="y", b="ي"},
  818. {a="T", b="ة"},
  819. -- additional characters (begin)
  820. {a="p", b="پ"},
  821. {a="v", b="ڤ"},
  822. {a="g", b="گ"},
  823. -- additional characters (end)
  824. {a="\"$", b="ْ"},
  825. {a="\"(%W)", b="ْ%1"},
  826. {a="\"([^uaiUAI])", b="ْ%1"},
  827. {a="o", b="ْ"}, -- hard-coded sukūn
  828. {a="([^0-9])%-([^0-9])", b="%1%2"},
  829. {a="B", b="ـ"}
  830. }
  831. longv = {
  832. {a="\"A", b="ا"},
  833. {a="\"U", b="و"},
  834. {a="\"I", b="ي"},
  835. {a="\"Y", b="ى"},
  836. {a="A", b="َا"},
  837. {a="U", b="ُو"},
  838. {a="I", b="ِي"},
  839. {a="aY", b="aى"},
  840. {a="iY", b="iى"},
  841. {a="Y", b="aى"}
  842. }
  843. shortv = {
  844. {a="\"u", b=""},
  845. {a="\"a", b=""},
  846. {a="\"i", b=""},
  847. {a="%-?%.u", b="ُ"},
  848. {a="%-?%.a", b="َ"},
  849. {a="%-?%.i", b="ِ"},
  850. {a="u", b="ُ"},
  851. {a="a", b="َ"},
  852. {a="i", b="ِ"}
  853. }
  854. punctuation = {
  855. {a="%(%(", b="﴿"},
  856. {a="%)%)", b="﴾"},
  857. {a="%(", b="+@("},
  858. {a="%)", b="-@("},
  859. {a="%+%@%(", b=")"},
  860. {a="%-%@%(", b="("},
  861. {a="%<", b="+@<"},
  862. {a="%>", b="-@<"},
  863. {a="%+%@%<", b=">"},
  864. {a="%-%@%<", b="<"},
  865. {a="%[", b="+@["},
  866. {a="%]", b="-@["},
  867. {a="%+%@%[", b="]"},
  868. {a="%-%@%[", b="["},
  869. {a="%.", b="."},
  870. -- replaced with the next two rules to make the Arabic comma work
  871. -- after \abraces{}
  872. -- {a="([^0-9])%,", b="%1،"},
  873. {a="%,", b="،"},
  874. {a="([%d])%،", b="%1,"},
  875. {a="%?", b="؟"},
  876. {a="%;", b="؛"},
  877. }
  878. null = {
  879. {a="%&", b="‍"}, -- That is ^^^^200d, the zero-width joiner
  880. {a="%|", b=""},
  881. {a="^%-", b=""},
  882. {a="([^0-9])(%-)", b="%1"},
  883. {a="O[%S]-O", b=""},
  884. {a="[%^%_](.)", b=">??<%1"}
  885. }