arabluatex_voc.lua 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922
  1. --[[
  2. This file is part of the `arabluatex' package
  3. ArabLuaTeX -- Processing ArabTeX notation under LuaLaTeX
  4. Copyright (C) 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023
  5. Robert Alessi <alessi@robertalessi.net>
  6. Permission to use, copy, modify, and distribute this software for any
  7. purpose with or without fee is hereby granted, provided that the above
  8. copyright notice and this permission notice appear in all copies.
  9. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  10. WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  11. MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  12. ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  13. WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  14. ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  15. OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  16. Please send error reports and suggestions for improvements to Robert
  17. Alessi <alessi@robertalessi.net>
  18. --]]
  19. arbmarks = {
  20. {a="@bismillah", b="\\arabicfont{}^^^^fdfd", c="\\arb[trans]{bi-ismi \\uc{'l-l_ahi} 'l-ra.hm_ani 'l-ra.hImi}"},
  21. {a="@salam", b="\\arabicfont{}^^^^fdf5", c="\\arb[trans]{.sall_A\\arbnull{'l-l_ahu} \\uc{'l-l_ahu} `alay-hi wa-sallama}"},
  22. {a="@jalla", b="\\arabicfont{}^^^^fdfb", c="\\arb[trans]{^galla ^galAla-hu}"},
  23. {a="@slm", b="\\arabicfont{}^^^^fdfa", c="\\arb[trans]{.sall_A\\arbnull{'l-l_ahu} \\uc{'l-l_ahu} `alay-hi wa-sallama}"}
  24. }
  25. abjad = {
  26. {"a\"'", "b", "j", "d", "h", "w", "z", ".h", ".t"},
  27. {"y", "k", "l", "m", "n", "s", "`", "f", ".s", },
  28. {"q", "r", "^s", "t", "_t", "x", "_d", ".d", ".z", },
  29. {".g"}
  30. }
  31. numbers = {
  32. {a="0", b="٠"},
  33. {a="1", b="١"},
  34. {a="2", b="٢"},
  35. {a="3", b="٣"},
  36. {a="4", b="٤"},
  37. {a="5", b="٥"},
  38. {a="6", b="٦"},
  39. {a="7", b="٧"},
  40. {a="8", b="٨"},
  41. {a="9", b="٩"}
  42. }
  43. raw = {
  44. {a="A", b="َا"},
  45. {a="U", b="ُو"},
  46. {a="I", b="ِي"},
  47. {a="b", b="ب"},
  48. {a="t", b="ت"},
  49. {a="j", b="ج"},
  50. {a="x", b="خ"},
  51. {a="d", b="د"},
  52. {a="r", b="ر"},
  53. {a="z", b="ز"},
  54. {a="s", b="س"},
  55. {a="f", b="ف"},
  56. {a="`", b="ع"},
  57. {a="f", b="ف"},
  58. {a="q", b="ق"},
  59. {a="k", b="ك"},
  60. {a="l", b="ل"},
  61. {a="m", b="م"},
  62. {a="n", b="ن"},
  63. {a="h", b="ه"},
  64. {a="w", b="و"},
  65. {a="y", b="ي"},
  66. {a="T", b="ة"},
  67. {a="u", b="ُ"},
  68. {a="a", b="َ"},
  69. {a="i", b="ِ"}
  70. }
  71. buckwalter = {
  72. --- hard coded madda: hold it for now
  73. {a="%|", b="@"},
  74. {a="M", b="@"}, -- BW safe
  75. {a="%_", b="--"}, -- taṭwīl
  76. -- prevent any unwanted šadda from being generated
  77. {a="bb", b="b|b"},
  78. {a="tt", b="t|t"},
  79. {a="vv", b="v|v"},
  80. {a="jj", b="j|j"},
  81. {a="HH", b="H|H"},
  82. {a="xx", b="x|x"},
  83. {a="dd", b="d|d"},
  84. {a="%*%*", b="*|*"},
  85. {a="VV", b="V|V"}, -- BW safe
  86. {a="rr", b="r|r"},
  87. {a="ss", b="s|s"},
  88. {a="%$%$", b="$|$"},
  89. {a="cc", b="c|c"}, -- BW safe
  90. {a="SS", b="S|S"},
  91. {a="DD", b="D|D"},
  92. {a="TT", b="T|T"},
  93. {a="ZZ", b="Z|Z"},
  94. {a="EE", b="E|E"},
  95. {a="gg", b="g|g"},
  96. {a="ff", b="f|f"},
  97. {a="qq", b="q|q"},
  98. {a="kk", b="k|k"},
  99. {a="ll", b="l|l"},
  100. {a="mm", b="m|m"},
  101. {a="nn", b="n|n"},
  102. {a="hh", b="h|h"},
  103. {a="ww", b="w|w"},
  104. {a="yy", b="y|y"},
  105. -- hamza begin
  106. -- look into this later on:
  107. -- {a="%>a?A", b="@@@"}, -- hold this (madda)
  108. -- {a="%>a\'([^uai])", b="@@@%1"}, -- hold this (madda)
  109. {a="a?A\'", b="@@"}, -- hold this (classic madda)
  110. {a="\'", b="|\"\'"},
  111. {a="C", b="|\"\'"}, -- BW safe
  112. {a="%>", b="a\"\'"},
  113. {a="O", b="a\"\'"}, -- BW safe
  114. {a="%&", b="w\"\'"},
  115. {a="W", b="w\"\'"}, -- BW safe
  116. {a="%<", b="i\"\'"},
  117. {a="I", b="i\"\'"}, -- BW safe
  118. {a="%]", b="y\"\'"},
  119. {a="Q", b="y\"\'"},
  120. -- hamza end
  121. -- trigraphs
  122. {a="^Aal%-?", b="al-"},
  123. {a="(%W)Aal%-?", b="%1al-"},
  124. {a="(%s)Aal%-?", b="%1al-"},
  125. {a="([%-%s])Al%-?", b="%1\'l-"},
  126. {a="^A", b="a"},
  127. {a="(%W)A", b="%1a"},
  128. {a="(%s)A", b="%1a"},
  129. {a="(al%-[%g])(%~)", b="%1"},
  130. {a="(\'l%-[%g])(%~)", b="%1"},
  131. -- digraphs begin
  132. {a="aA", b="A"},
  133. {a="uw([^%~])", b="U%1"},
  134. {a="iy([^%~])", b="I%1"},
  135. -- digraphs end
  136. -- madda: get it back now
  137. -- {a="%@%@%@", b="\'A"},
  138. {a="%@%@", b="A\'"}, -- give back classic madda
  139. {a="%@", b="A\"\'"}, -- hard coded madda
  140. -- šadda:
  141. {a="([%g])(%~)", b="%1%1"},
  142. {a="%`", b="_a"},
  143. {a="e", b="_a"}, -- BW safe
  144. {a="v", b="_t"},
  145. {a="H", b=".h"},
  146. {a="%*", b="_d"},
  147. {a="V", b="_d"}, -- BW safe
  148. {a="%$", b="^s"},
  149. {a="c", b="^s"}, -- BW safe
  150. {a="S", b=".s"},
  151. {a="D", b=".d"},
  152. {a="T", b=".t"},
  153. {a="Z", b=".z"},
  154. {a="E", b="`"},
  155. {a="g", b=".g"},
  156. {a="p", b="T"},
  157. {a="N", b="uN"},
  158. {a="F", b="aN"},
  159. {a="K", b="iN"},
  160. {a="o", b="\""},
  161. {a="P", b="O"}, -- pass on to \arbnull
  162. -- hard-coded connective alif
  163. {a="%[", b="ٱ"},
  164. {a="L", b="ٱ"} -- BW safe
  165. }
  166. hamza = {
  167. -- next line for ʾiʿrāb hyphen
  168. {a="(')(%-)([uaiUAI])", b="%1%3"},
  169. -- next lines for ʾalif alone
  170. {a="(%.A)([uai]?)l%-(%^n)", b="ا%2ل%3"}, --additional (^n is lunar)
  171. {a="([%(%[%|%<%s%-O])(%.A)([uai]?)l%-(%^n)", b="%1ا%3%4"}, --additional (^n is lunar) --p
  172. {a="(%.A)([uai]?)l%-([%_%^%.]?[tdrzsnc])", b="ا%2ل%3%3"},
  173. {a="([%(%[%|%<%s%-O])(%.A)([uai]?)l%-([%_%^%.]?[tdrzsnc])", b="%1ا%3ل%4%4"}, --p
  174. {a="%.A", b="ا"},
  175. -- hard coded hamza
  176. {a="|\"'", b="ء"},
  177. {a="A\"'", b="آ"},
  178. {a="[au]\"'", b="أ"},
  179. {a="w\"'", b="ؤ"},
  180. {a="i\"'", b="إ"},
  181. {a="y\"'", b="ئ"},
  182. -- hamza takes tašdīd too
  183. {a="''([Uu])", b="ؤؤ%1"},
  184. {a="''([Aa])", b="أأ%1"},
  185. {a="''([Ii])", b="ئئ%1"},
  186. -- inseparable adverbial particle 'a- + 'a
  187. {a="\'(a)%-\'(a)", b="أ%1اأ%2"},
  188. -- initial long u and i (for a, see below)
  189. {a="%'%_U", b="أU"},
  190. {a="%'%_I", b="إI"},
  191. -- taḫfīfu 'l-hamza
  192. {a="'u'([^uaiUAI])", b="أU%1"},
  193. {a="'i'([^uaiUAI])", b="إI%1"},
  194. {a="([wf]a)%-\'([^uaiUAIl][^%-])", b="%1أ%2"},
  195. {a="^u'([^uaiUAI])", b="اU%1"},
  196. {a="([^uaiUAIYN][%s%(%[%<])u'([^uaiUAI])", b="%1اU%2"},
  197. {a="^i'([^uaiUAI])", b="اI%1"},
  198. {a="([^uaiUAIYN][%s%(%[%<])i'([^uaiUAI])", b="%1اI%2"},
  199. -- madda (historic writing below)
  200. {a="'a'([^uaiUAI])", b="آ%1"},
  201. {a="([^uiyUI])\'a?A([%_%^%.]?[%`%'btjghxdrzsfqklmnywAY])", b="%1آ%2"},
  202. {a="^\'a?A([%_%^%.]?[%`%'btjghxdrzsfqklmnywAY])", b="آ%1"},
  203. {a="\'a?A(O[%_%^%.]?[%`%'btjghxdrzsfqklmnywAY]-O)", b="آ"},
  204. {a="(%W)\'a?A([%_%^%.]?[%`%'btjghxdrzsfqklmnywAY])", b="%1آ%2"},
  205. {a="(A)(O%'[%S]-O)", b="آ"},
  206. {a="(A)(')(uN?%p*)$", b="aآء%3"},
  207. {a="(A)(')(uN?)(%p*%s)", b="aآء%3%4"},
  208. {a="(A)(')(iN?%p*)$", b="aآء%3"},
  209. {a="(A)(')(iN?)(%p*%s)", b="aآء%3%4"},
  210. {a="(A)(')([iI])", b="aآئ%3"}, -- historic madda
  211. {a="(A)(')(u)", b="aآؤ%3"}, -- historic madda
  212. {a="(A)(')", b="aآء"}, -- historic madda
  213. -- initial (needs both ^ and %W patterns)
  214. -- 'aw: the diphthong is to be resolved into 'awi' (next 8 lines)
  215. {a="^('aw)(O)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1i"},
  216. {a="(%W)('aw)(O)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1%2i"},
  217. {a="^('aw)(O)(\"?[uai])([%S]-O)", b="%1i"},
  218. {a="(%W)('aw)(O)(\"?[uai])([%S]-O)", b="%1%2i"},
  219. {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  220. {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
  221. {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, --p
  222. {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, --p
  223. -- then the 'initial' rules for the remaining cases
  224. {a="^(')([ua])", b="أ%2"},
  225. {a="^(')(i)", b="إ%2"},
  226. -- consider replacing initial %W with [%s%(%[%<%-]:
  227. -- {a="(%W)(')([ua])", b="%1أ%3"},
  228. -- {a="(%W)(')(i)", b="%1إ%3"},
  229. {a="([%s%(%[%<%-])(')([ua])", b="%1أ%3"},
  230. {a="([%s%(%[%<%-])(')(i)", b="%1إ%3"},
  231. -- final
  232. -- mi'aT is special orthography (unlike ^say'aN and .zim'aN):
  233. -- {a="(%^sa%.?[yY])(\"?%|?)(%')(aN)", b="%1%2ئ%4"}, --new
  234. -- {a="(.zi?m)(%')(aN)", b="%1ئ%3"}, --new
  235. {a="(mi)(%')(a[TtH])", b="%1ائ%3"},
  236. {a="(mi)(%')(aN%_?[AY])", b="%1أ%3"},
  237. -- final hamzah is on the line after a letter of prolongation or a
  238. -- consonant with sukūn
  239. {a="([^Auai])(')(\"?[uai]N?)(%p*)$", b="%1ء%3%4"}, --new
  240. {a="([^Auai])(')(\"?[uai]N?)(%p*%s)", b="%1ء%3%4"},
  241. -- u
  242. {a="(u)(')([uai]N?%p*)$", b="%1ؤ%3"},
  243. {a="(u)(')([uai]N?)(%p*%s)", b="%1ؤ%3%4"},
  244. {a="(u)(')(%p*)$", b="%1ؤ%3"},
  245. {a="(u)(')(%p*%s)", b="%1ؤ%3"},
  246. -- a
  247. {a="(a)(')(A%p*)$", b="%1آ"},
  248. {a="(a)(')(A)(%p*%s)", b="%1آ%4"},
  249. {a="(a)(')([u]N?%p*)$", b="%1أ%3"},
  250. {a="(a)(')([u]N?)(%p*%s)", b="%1أ%3%4"},
  251. {a="(a)(')(a%p*)$", b="%1أ%3"},
  252. {a="(a)(')(a)(%p*%s)", b="%1أ%3%4"},
  253. {a="(a)(')(aN%p*)$", b="%1أً"},
  254. {a="(a)(')(aN)(%p*%s)", b="%1أً%4"},
  255. {a="(a)(')([i]N?%p*)$", b="%1إ%3"},
  256. {a="(a)(')([i]N?)(%p*%s)", b="%1إ%3%4"},
  257. {a="(a)(')(%p*)$", b="%1أ%3"},
  258. {a="(a)(')(%p*%s)", b="%1أ%3"},
  259. -- i
  260. {a="(i)(')([uai]N?%p*)$", b="%1ئ%3"},
  261. {a="(i)(')([uai]N?)(%p*%s)", b="%1ئ%3%4"},
  262. {a="(i)(')(%p*)$", b="%1ئ%3"},
  263. {a="(i)(')(%p*%s)", b="%1ئ%3"},
  264. --
  265. -- middle
  266. {a="([UIwy])(')", b="%1ء"}, --new
  267. -- {a="([Iy])(')", b="%1ئ"}, -- included in the above line
  268. -- hamza is alone after letters of prolongation or sukūn
  269. -- {a="([^uai])(')([uU])", b="%1ؤ%3"},
  270. -- {a="([^uai])(')(%_?[aAY])", b="%1أ%3"},
  271. -- {a="([^uai])(')([iI])", b="%1ئ%3"},
  272. {a="([^uai])(')(%_?[uaiUAYI])", b="%1ء%3"},
  273. {a="(u)(')([UI])", b="%1ء%3"},
  274. {a="(u)(')([u])", b="%1ؤ%3"},
  275. {a="(u)(')(%_?[aAY])", b="%1ؤ%3"},
  276. {a="(u)(')([i])", b="%1ئ%3"},
  277. {a="(a)(')(%_?[aAY])", b="%1أ%3"},
  278. {a="(a)(')([uU])", b="%1ؤ%3"},
  279. {a="(a)(')([iI])", b="%1ئ%3"},
  280. {a="(i)(')([UI])", b="%1ء%3"},
  281. {a="(i)(')(%_?[aAY])", b="%1ئ%3"},
  282. {a="(i)(')([u])", b="%1ئ%3"},
  283. {a="(i)(')([i])", b="%1ئ%3"},
  284. {a="(a)(')([^uaiUAI])", b="%1أ%3"},
  285. {a="(u)(')([^uaiUAI])", b="%1ؤ%3"},
  286. {a="(i)(')([^uaiUAI])", b="%1ئ%3"}
  287. }
  288. hamzaeasy = { -- differences marked below with 'easy'
  289. -- next line for ʾiʿrāb hyphen
  290. {a="(')(%-)([uaiUAI])", b="%1%3"},
  291. -- next lines for ʾalif alone (easy)
  292. {a="(%.A)([uai]?)l%-(%^n)", b="ا%2ل%3"}, --additional (^n is lunar)
  293. {a="([%(%[%|%<%s%-O])(%.A)([uai]?)l%-(%^n)", b="%1ا%3%4"}, --additional (^n is lunar) --p
  294. {a="(%.A)([uai]?)l%-([%_%^%.]?[tdrzsnc])", b="ا%2ل%3"},
  295. {a="([%(%[%|%<%s%-O])(%.A)([uai]?)l%-([%_%^%.]?[tdrzsnc])", b="%1ا%3ل%4"}, --p
  296. {a="%.A", b="ا"},
  297. -- hard coded hamza
  298. {a="|\"'", b="ء"},
  299. {a="A\"'", b="آ"},
  300. {a="[au]\"'", b="أ"},
  301. {a="w\"'", b="ؤ"},
  302. {a="i\"'", b="إ"},
  303. {a="y\"'", b="ئ"},
  304. -- hamza takes tašdīd too
  305. {a="''([Uu])", b="ؤؤ%1"},
  306. {a="''([Aa])", b="أأ%1"},
  307. {a="''([Ii])", b="ئئ%1"},
  308. -- inseparable adverbial particle 'a- + 'a
  309. {a="\'(a)%-\'(a)", b="أ%1اأ%2"},
  310. -- initial long u and i (for a, see below)
  311. {a="%'%_U", b="أU"},
  312. {a="%'%_I", b="إI"},
  313. -- taḫfīfu 'l-hamza
  314. {a="'u'([^uaiUAI])", b="أU%1"},
  315. {a="'i'([^uaiUAI])", b="إI%1"},
  316. {a="([wf]a)%-\'([^uaiUAIl][^%-])", b="%1أ%2"},
  317. {a="^u'([^uaiUAI])", b="اU%1"},
  318. {a="([^uaiUAIYN][%s%(%[%<])u'([^uaiUAI])", b="%1اU%2"},
  319. {a="^i'([^uaiUAI])", b="اI%1"},
  320. {a="([^uaiUAIYN][%s%(%[%<])i'([^uaiUAI])", b="%1اI%2"},
  321. -- madda (historic writing below)
  322. {a="'a'([^uaiUAI])", b="آ%1"},
  323. {a="([^uiyUI])\'a?A([%_%^%.]?[%`%'btjghxdrzsfqklmnywAY])", b="%1آ%2"},
  324. {a="^\'a?A([%_%^%.]?[%`%'btjghxdrzsfqklmnywAY])", b="آ%1"},
  325. {a="\'a?A(O[%_%^%.]?[%`%'btjghxdrzsfqklmnywAY]-O)", b="آ"},
  326. {a="(%W)\'a?A([%_%^%.]?[%`%'btjghxdrzsfqklmnywAY])", b="%1آ%2"},
  327. --easy (begin)
  328. {a="(A)(O%'[%S]-O)", b="ا"},
  329. {a="(A)(')(uN?%p*)$", b="aاء%3"},
  330. {a="(A)(')(uN?)(%p*%s)", b="aاء%3%4"},
  331. {a="(A)(')(iN?%p*)$", b="aاء%3"},
  332. {a="(A)(')(iN?)(%p*%s)", b="aاء%3%4"},
  333. {a="(A)(')([iI])", b="aائ%3"}, -- historic madda
  334. {a="(A)(')(u)", b="aاؤ%3"}, -- historic madda
  335. {a="(A)(')", b="aاء"}, -- historic madda
  336. --easy (end)
  337. -- initial (needs both ^ and %W patterns)
  338. -- 'aw: the diphthong is to be resolved into 'awi' (next 8 lines)
  339. {a="^('aw)(O)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1i"},
  340. {a="(%W)('aw)(O)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1%2i"},
  341. {a="^('aw)(O)(\"?[uai])([%S]-O)", b="%1i"},
  342. {a="(%W)('aw)(O)(\"?[uai])([%S]-O)", b="%1%2i"},
  343. {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  344. {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
  345. {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"},
  346. {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"},
  347. -- then the 'initial' rules for the remaining cases
  348. {a="^(')([ua])", b="أ%2"},
  349. {a="^(')(i)", b="إ%2"},
  350. -- consider replacing initial %W with [%s%(%[%<%-]:
  351. -- {a="(%W)(')([ua])", b="%1أ%3"},
  352. -- {a="(%W)(')(i)", b="%1إ%3"},
  353. {a="([%s%(%[%<%-])(')([ua])", b="%1أ%3"},
  354. {a="([%s%(%[%<%-])(')(i)", b="%1إ%3"},
  355. -- final
  356. -- mi'aT is special orthography (unlike ^say'aN and .zim'aN)
  357. -- {a="(%^sa%.?[yY])(\"?%|?)(%')(aN)", b="%1%2ئ%4"}, --new
  358. -- {a="(.zi?m)(%')(aN)", b="%1ئ%3"}, --new
  359. {a="(mi)(%')(a[TtH])", b="%1ائ%3"},
  360. {a="(mi)(%')(aN%_?[AY])", b="%1أ%3"},
  361. -- easy (begin)
  362. -- The Munjid says that such words as radI'aN do not have the
  363. -- hamzah alone on the line, so take out the following two lines
  364. -- (final hamzah is on the line after a letter of prolongation or a
  365. -- consonant with sukūn)
  366. -- {a="([^Auai])(')(\"?[uai]N?)(%p*)$", b="%1ء%3%4"}, --new
  367. -- {a="([^Auai])(')(\"?[uai]N?)(%p*%s)", b="%1ء%3%4"},
  368. {a="([^Auai])(')(\"?aN)(%p*)$", b="%1ئ%3%4"}, --new
  369. {a="([^Auai])(')(\"?aN)(%p*%s)", b="%1ئ%3%4"}, --new
  370. {a="([^uai])(')(\"?a)(%p*)$", b="%1ء%3%4"}, --new
  371. {a="([^uai])(')(\"?a)(%p*%s)", b="%1ء%3%4"}, --new
  372. {a="([^uai])(')(\"?[ui]N?)(%p*)$", b="%1ء%3%4"}, --new
  373. {a="([^uai])(')(\"?[ui]N?)(%p*%s)", b="%1ء%3%4"}, --new
  374. --easy (end)
  375. -- u
  376. {a="(u)(')([uai]N?%p*)$", b="%1ؤ%3"},
  377. {a="(u)(')([uai]N?)(%p*%s)", b="%1ؤ%3%4"},
  378. {a="(u)(')(%p*)$", b="%1ؤ%3"},
  379. {a="(u)(')(%p*%s)", b="%1ؤ%3"},
  380. -- a
  381. {a="(a)(')(A%p*)$", b="%1آ"},
  382. {a="(a)(')(A)(%p*%s)", b="%1آ%4"},
  383. {a="(a)(')([u]N?%p*)$", b="%1أ%3"},
  384. {a="(a)(')([u]N?)(%p*%s)", b="%1أ%3%4"},
  385. {a="(a)(')(a%p*)$", b="%1أ%3"},
  386. {a="(a)(')(a)(%p*%s)", b="%1أ%3%4"},
  387. {a="(a)(')(aN%p*)$", b="%1أً"},
  388. {a="(a)(')(aN)(%p*%s)", b="%1أً%4"},
  389. {a="(a)(')([i]N?%p*)$", b="%1إ%3"},
  390. {a="(a)(')([i]N?)(%p*%s)", b="%1إ%3%4"},
  391. {a="(a)(')(%p*)$", b="%1أ%3"},
  392. {a="(a)(')(%p*%s)", b="%1أ%3"},
  393. -- i
  394. {a="(i)(')([uai]N?%p*)$", b="%1ئ%3"},
  395. {a="(i)(')([uai]N?)(%p*%s)", b="%1ئ%3%4"},
  396. {a="(i)(')(%p*)$", b="%1ئ%3"},
  397. {a="(i)(')(%p*%s)", b="%1ئ%3"},
  398. --
  399. -- middle
  400. {a="([Uw])(')", b="%1ء"}, --new
  401. {a="([Iy])(')", b="%1ئ"}, --easy
  402. {a="([^uai])(')([uU])", b="%1ؤ%3"},
  403. {a="([^uai])(')(%_?[aAY])", b="%1أ%3"},
  404. {a="([^uai])(')([iI])", b="%1ئ%3"},
  405. {a="(u)(')([uU])", b="%1ؤ%3"},
  406. {a="(u)(')(%_?[aAY])", b="%1ؤ%3"},
  407. {a="(u)(')([iI])", b="%1ئ%3"},
  408. {a="(a)(')(%_?[aAY])", b="%1أ%3"},
  409. {a="(a)(')([uU])", b="%1ؤ%3"},
  410. {a="(a)(')([iI])", b="%1ئ%3"},
  411. {a="(i)(')(%_?[aAY])", b="%1ئ%3"},
  412. {a="(i)(')([uU])", b="%1ئ%3"},
  413. {a="(i)(')([iI])", b="%1ئ%3"},
  414. {a="(a)(')([^uaiUAI])", b="%1أ%3"},
  415. {a="(u)(')([^uaiUAI])", b="%1ؤ%3"},
  416. {a="(i)(')([^uaiUAI])", b="%1ئ%3"}
  417. }
  418. tanwin = {
  419. -- assimilations (begin)
  420. {a="(O[%S]-)(%-?[uai]N[UI]?)(O)([rlmnwy])", b="%4%4"},
  421. {a="(%-?[uai]NU)(%s)([rlmnwy])", b="%1%2%3%3"},
  422. -- assimilations (end)
  423. {a="(O[%S]-)(%-?[uai]N[UI]?)(O)([uai])", b="%4"},
  424. {a="%-?uNU", b="ٌو"},
  425. {a="%-?aNU", b="ًوا"},
  426. {a="%-?iNU", b="ٍو"},
  427. -- assimilations (begin)
  428. {a="%-?(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"},
  429. {a="(O[%S]-)(%-?aN)(_A)(O)([rlmnwy])", b="%5%5"},
  430. {a="(O[%S]-)(%-?aN)(Y)(O)([rlmnwy])", b="%5%5"},
  431. {a="%-?(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"},
  432. {a="%-?(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"},
  433. {a="([TH])%-?(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"},
  434. {a="(ء)%-?(aN)(%s)([rlmnwy])", b="%1%2%3%4%4"}, --new
  435. {a="([^TAH])%-?(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"},
  436. {a="%-?(iNI?)(%s)([rlmnwy])", b="ٍ%2%3%3"},
  437. -- assimilations (end)
  438. {a="(O[%S]-)(%-?aN)(_A)(O)([uai])", b="%5"},
  439. {a="(O[%S]-)(%-?aN)(Y)(O)([uai])", b="%5"},
  440. -- quoted tanwīn (begin)
  441. {a="%-?(\"uN)", b=""},
  442. {a="(B)%-?(\"aN)", b="%1"},
  443. {a="%-?(\"aN)(_A)", b="ى"},
  444. {a="%-?(\"aN)(Y)", b="ى"},
  445. {a="([TH])%-?(\"aN)", b="%1"},
  446. {a="([اآ])(ء)%-?(\"aN)", b="%1%2"}, --new
  447. {a="([^TAH])%-?(\"aN)", b="%1ا"},
  448. {a="%-?(\"iNI?)", b=""},
  449. -- quoted tanwīn (end)
  450. {a="%-?(uN)", b="ٌ"},
  451. {a="(B)%-?(aN)", b="%1ً"},
  452. -- needed by \arbcolor:
  453. {a="%-?(aN)(O[%S]-%_AO)", b="ً"},
  454. {a="%-?(aN)(O[%S]-YO)", b="ً"},
  455. {a="(O[%S]-[TH]O)%-?(aN)", b="ً"},
  456. {a="(O[%S]-)([اآ])(ء)(O)%-?(aN)", b="ً"}, --new
  457. {a="(O[%S]-[^TAH]O)%-?(aN)", b="ًا"},
  458. --
  459. {a="%-?(aN)(_A)", b="ًى"},
  460. {a="%-?(aN)(Y)", b="ًى"},
  461. {a="([TH])%-?(aN)", b="%1ً"},
  462. {a="([اآ])(ء)%-?(aN)", b="%1%2ً"}, --new
  463. {a="([^TAH])%-?(aN)", b="%1ًا"},
  464. {a="%-?(iNI?)", b="ٍ"}
  465. }
  466. tanwineasy = { -- 'easy' requires some lines to be taken out:
  467. -- assimilations (begin)
  468. -- {a="(O[%S]-)(%-?[uai]N[UI]?)(O)([rlmnwy])", b="%4%4"},
  469. -- {a="(%-?[uai]NU)(%s)([rlmnwy])", b="%1%2%3%3"},
  470. -- assimilations (end)
  471. {a="(O[%S]-)(%-?[uai]N[UI]?)(O)([uai])", b="%4"},
  472. {a="%-?uNU", b="ٌو"},
  473. {a="%-?aNU", b="ًوا"},
  474. {a="%-?iNU", b="ٍو"},
  475. -- assimilations (begin)
  476. -- {a="%-?(uN)(%s)([rlmnwy])", b="ٌ%2%3%3"},
  477. -- {a="(O[%S]-)(%-?aN)(_A)(O)([rlmnwy])", b="%5%5"},
  478. -- {a="(O[%S]-)(%-?aN)(Y)(O)([rlmnwy])", b="%5%5"},
  479. -- {a="%-?(aN)(_A)(%s)([rlmnwy])", b="ًى%3%4%4"},
  480. -- {a="%-?(aN)(Y)(%s)([rlmnwy])", b="ًى%3%4%4"},
  481. -- {a="([TH])%-?(aN)(%s)([rlmnwy])", b="%1ً%3%4%4"},
  482. -- {a="(ء)%-?(aN)(%s)([rlmnwy])", b="%1%2%3%4%4"}, --new
  483. -- {a="([^TAH])%-?(aN)(%s)([rlmnwy])", b="%1ًا%3%4%4"},
  484. -- {a="%-?(iNI?)(%s)([rlmnwy])", b="ٍ%2%3%3"},
  485. -- assimilations (end)
  486. {a="(O[%S]-)(%-?aN)(_A)(O)([uai])", b="%5"},
  487. {a="(O[%S]-)(%-?aN)(Y)(O)([uai])", b="%5"},
  488. -- quoted tanwīn (begin)
  489. {a="%-?(\"uN)", b=""},
  490. {a="(B)%-?(\"aN)", b="%1"},
  491. {a="%-?(\"aN)(_A)", b="ى"},
  492. {a="%-?(\"aN)(Y)", b="ى"},
  493. {a="([TH])%-?(\"aN)", b="%1"},
  494. {a="([اآ])(ء)%-?(\"aN)", b="%1%2"}, --new
  495. {a="([^TAH])%-?(\"aN)", b="%1ا"},
  496. {a="%-?(\"iNI?)", b=""},
  497. -- quoted tanwīn (end)
  498. {a="%-?(uN)", b="ٌ"},
  499. {a="(B)%-?(aN)", b="%1ً"},
  500. -- needed by \arbcolor:
  501. {a="%-?(aN)(O[%S]-%_AO)", b="ً"},
  502. {a="%-?(aN)(O[%S]-YO)", b="ً"},
  503. {a="(O[%S]-[TH]O)%-?(aN)", b="ً"},
  504. {a="(O[%S]-)([اآ])(ء)(O)%-?(aN)", b="ً"}, --new
  505. {a="(O[%S]-[^TAH]O)%-?(aN)", b="ًا"},
  506. --
  507. {a="%-?(aN)(_A)", b="ًى"},
  508. {a="%-?(aN)(Y)", b="ًى"},
  509. {a="([TH])%-?(aN)", b="%1ً"},
  510. {a="([اآ])(ء)%-?(aN)", b="%1%2ً"}, --new
  511. {a="([^TAH])%-?(aN)", b="%1ًا"},
  512. {a="%-?(iNI?)", b="ٍ"}
  513. }
  514. trigraphs = { -- trigraphs or more
  515. -- ʾalif al-waṣl: put it back on with \arbnull
  516. {a="(O[%S]-)([%'a]l%-)(O)(\"[uai])", b="ٱ"},
  517. {a="(O[%S]-)([%'a]l%-)(O)([uai])", b="ا"},
  518. -- 'llatI / 'llad_I
  519. {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"},
  520. {a="([%(%[%|%<%s%-])'ll(a)([%_]?[dt])", b="%1الّ%2%3"}, --p
  521. -- law: the diphthong is to be resolved into 'awi' (next 8 lines)
  522. {a="^(law)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1i"},
  523. {a="(%W)(law)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1%2i"},
  524. {a="^(law)(O)(\"?[uai])([%S]-O)", b="%1i"},
  525. {a="(%W)(law)(O)(\"?[uai])([%S]-O)", b="%1%2i"},
  526. {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  527. {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
  528. {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, --p
  529. {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, --p
  530. -- al- + lām
  531. {a="^(a)l%-(l)", b="ا%1ل%2%2"},
  532. {a="([%(%[%|%<%s%-O])(a)l%-(l)", b="%1ا%2ل%3%3"}, --p
  533. -- al- + solar consonant ('c' and '^n' are additional characters)
  534. {a="^(a)l%-(%^n)", b="ا%1ل%2"}, -- ^n is lunar
  535. {a="([%(%[%|%<%s%-O])(a)l%-(%^n)", b="%1ا%2ل%3"},-- ^n is lunar --p
  536. {a="^(a)l%-([%_%^%.]?[tdrzsnc])", b="ا%1ل%2%2"},
  537. {a="([%(%[%|%<%s%-O])(a)l%-([%_%^%.]?[tdrzsnc])", b="%1ا%2ل%3%3"}, --p
  538. -- assim. art. + solar consonant ('c' and '^n' are additional characters)
  539. {a="^(a)(%^n)%-", b="ا%1ل"}, -- ^n is lunar
  540. {a="([%(%[%|%<%s%-O])(a)(%^n)%-", b="%1ا%2ل"},-- ^n is lunar --p
  541. {a="^(a)([%_%^%.]?[tdrzsnc])%-", b="ا%1ل%2"},
  542. {a="([%(%[%|%<%s%-O])(a)([%_%^%.]?[tdrzsnc])%-", b="%1ا%2ل%3"}, --p
  543. -- al- + initial unstable hamza
  544. {a="^(a)l%-(\")([uai])", b="ا%1ل%3ٱ"},
  545. {a="([%(%[%|%<%s%-O])(a)l%-(\")([uai])", b="%1ا%2ل%4ٱ"}, --p
  546. {a="^(a)l%-([uai])", b="ا%1ل%2ا"},
  547. {a="([%(%[%|%<%s%-O])(a)l%-([uai])", b="%1ا%2ل%3ا"}, --p
  548. -- li-/la- + art. + initial unstable hamza is a special orthography
  549. {a="l([ai])%-l%-(\")([uai])", b="ل%1ل%3ٱ"},
  550. {a="l([ai])%-l%-([uai])", b="ل%1ل%2ا"},
  551. -- al- + lunar consonant (i.e. what remains)
  552. {a="^(a)l%-", b="ا%1ل"},
  553. {a="([%(%[%|%<%s%-O])(a)l%-", b="%1ا%2ل"}, --p
  554. -- diphthongs to be resolved before ʾalif conjunctionis
  555. {a="(aW)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="awuا"},
  556. {a="(aw)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1u"},
  557. {a="(ay)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1i"},
  558. {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"},
  559. {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"},
  560. {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  561. -- art. with waṣla + lām
  562. {a="'l%-(l)", b="ال%1%1"},
  563. -- art. with waṣla + solar consonant
  564. -- ('c' and '^n' are additional characters)
  565. {a="'l%-(%^n)", b="ال%1"}, -- ^n is lunar
  566. {a="'l%-([%_%^%.]?[tdrzsnc])", b="ال%1%1"},
  567. -- li-/la- + art. + lām
  568. {a="l([ai])%-l%-(l)", b="ل%1%2%2"},
  569. -- assim. art. with waṣla + solar consonant ('c' and '^n' are
  570. -- additional characters)
  571. {a="'(%^n)%-", b="ال"}, -- ^n is lunar
  572. {a="'([%_%^%.]?[tdrzsnc])%-", b="ال%1"},
  573. -- li-/la- + art. + solar consonant is a special orthography
  574. -- ('c' and '^n' are additional characters)
  575. {a="l([ai])%-l%-(%^n)", b="ل%1ل%2"}, -- '^n' is lunar
  576. {a="l([ai])%-l%-([%_%^%.]?[tdrzsnc])", b="ل%1ل%2%2"},
  577. -- li-/la + assim. art. + solar consonant is a special orthography
  578. -- ('c' and '^n' are additional characters)
  579. {a="l([ai])%-(%^n)%-(%^n)", b="ل%1ل%3"}, -- ^n is lunar
  580. {a="l([ai])%-([%_%^%.]?[tdrzsnc])%-([%_%^%.]?[tdrzsnc])", b="ل%1ل%3%3"},
  581. -- art. with waṣla + initial unstable hamza
  582. {a="'l%-(\")([uai])", b="ال%2ٱ"},
  583. {a="'l%-([uai])", b="ال%1ا"},
  584. -- art. with waṣla + lunar consonant (i.e. what remains)
  585. {a="'l%-", b="ال"},
  586. -- the silent wāw
  587. {a="uU(%p*)$", b="uو%1"},
  588. {a="uU(%p*%s)", b="uو%1"},
  589. {a="aU(%p*)$", b="aو%1"},
  590. {a="aU(%p*%s)", b="aو%1"},
  591. {a="iU(%p*)$", b="iو%1"},
  592. {a="iU(%p*%s)", b="iو%1"},
  593. -- words ending in -āT with silent wāw/yāʾ
  594. {a="(_a)UA", b="%1وا"},
  595. {a="(_a)U", b="%1و"},
  596. {a="(_a)I", b="%1ي"}
  597. }
  598. idgham = {
  599. -- assimilations
  600. {a="(n)(%s)([rlmnwy])", b="%1%2%3%3"},
  601. {a="(n)(O)([rlmnwy])([%S]-O)", b="%3"}
  602. }
  603. trigraphseasy = { -- differences marked below with 'easy'
  604. -- ʾalif al-waṣl: put it back on with \arbnull
  605. {a="(O[%S]-)([%'a]l%-)(O)(\"[uai])", b="ٱ"},
  606. {a="(O[%S]-)([%'a]l%-)(O)([uai])", b="ا"},
  607. -- Allah (easy)
  608. {a="l%-l_ah", b="l-ll_ah"},
  609. -- 'llatI / 'llad_I
  610. {a="^'ll(a)([%_]?[dt])", b="الّ%1%2"},
  611. {a="([%(%[%|%<%s%-])'ll(a)([%_]?[dt])", b="%1الّ%2%3"}, --p
  612. -- law: the diphthong is to be resloved into 'awi' (next 8 lines)
  613. {a="^(law)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1i"},
  614. {a="(%W)(law)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1%2i"},
  615. {a="^(law)(O)(\"?[uai])([%S]-O)", b="%1i"},
  616. {a="(%W)(law)(O)(\"?[uai])([%S]-O)", b="%1%2i"},
  617. {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  618. {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
  619. {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, --p
  620. {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, --p
  621. -- al- + lām (easy)
  622. {a="^(a)l%-(l)", b="ا%1ل%2"},
  623. {a="([%(%[%|%<%s%-O])(a)l%-(l)", b="%1ا%2ل%3"}, --p
  624. -- al- + solar consonant (easy) ('c' and '^n' are additional characters)
  625. {a="^(a)l%-(%^n)", b="ا%1ل%2"}, -- ^n is lunar
  626. {a="([%(%[%|%<%s%-O])(a)l%-(%^n)", b="%1ا%2ل%3"}, -- ^n is lunar --p
  627. {a="^(a)l%-([%_%^%.]?[tdrzsnc])", b="ا%1ل%2"},
  628. {a="([%(%[%|%<%s%-O])(a)l%-([%_%^%.]?[tdrzsnc])", b="%1ا%2ل%3"}, --p
  629. -- assim. art. + solar consonant (easy) ('c' and '^n' are
  630. -- additional characters)
  631. {a="^(a)(%^n)%-", b="ا%1ل"}, -- ^n is lunar
  632. {a="([%(%[%|%<%s%-O])(a)(%^n)%-", b="%1ا%2ل"}, -- ^n is lunar --p
  633. {a="^(a)([%_%^%.]?[tdrzsnc])%-", b="ا%1ل"},
  634. {a="([%(%[%|%<%s%-O])(a)([%_%^%.]?[tdrzsnc])%-", b="%1ا%2ل"}, --p
  635. -- al- + initial unstable hamza
  636. {a="^(a)l%-(\")([uai])", b="ا%1ل%3ٱ"},
  637. {a="([%(%[%|%<%s%-O])(a)l%-(\")([uai])", b="%1ا%2ل%4ٱ"}, --p
  638. {a="^(a)l%-([uai])", b="ا%1ل%2ا"},
  639. {a="([%(%[%|%<%s%-O])(a)l%-([uai])", b="%1ا%2ل%3ا"}, --p
  640. -- li-/la- + art. + initial unstable hamza is a special orthography
  641. {a="l([ai])%-l%-(\")([uai])", b="ل%1ل%3ٱ"},
  642. {a="l([ai])%-l%-([uai])", b="ل%1ل%2ا"},
  643. -- al- + lunar consonant (i.e. what remains)
  644. {a="^(a)l%-", b="ا%1ل"},
  645. {a="([%(%[%|%<%s%-O])(a)l%-", b="%1ا%2ل"}, --p
  646. -- diphthongs to be resolved before ʾalif conjunctionis
  647. {a="(aW)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="awuا"},
  648. {a="(aw)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1u"},
  649. {a="(ay)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1i"},
  650. {a="(aW)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="awuا%2%3"},
  651. {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"},
  652. {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  653. -- art. with waṣla + lām (easy)
  654. {a="'l%-(l)", b="ال%1"},
  655. -- art. with waṣla + solar consonant (easy)
  656. -- ('c' and '^n' are additional characters)
  657. {a="'l%-(%^n)", b="ال%1"}, -- ^n is lunar
  658. {a="'l%-([%_%^%.]?[tdrzsnc])", b="ال%1"},
  659. -- li-/la- + art. + lām (easy)
  660. {a="l([ai])%-l%-(l)", b="ل%1%2"},
  661. -- assim. art. with waṣla + solar consonant (easy)
  662. -- ('c' and '^n' are additional characters)
  663. {a="'(%^n)%-", b="ال"}, -- ^n is lunar
  664. {a="'([%_%^%.]?[tdrzsnc])%-", b="ال"},
  665. -- li-/la- + art. + solar consonant is a special orthography (easy)
  666. -- ('c' and '^n' are additional characters)
  667. {a="l([ai])%-l%-(%^n)", b="ل%1ل%2"}, -- ^n is lunar
  668. {a="l([ai])%-l%-([%_%^%.]?[tdrzsnc])", b="ل%1ل%2"},
  669. -- li-/la + assim. art. + solar consonant is a special orthography (easy)
  670. -- ('c' and '^n' are additional characters)
  671. {a="l([ai])%-(%^n)%-(%^n)", b="ل%1ل%3"}, -- ^n is lunar
  672. {a="l([ai])%-([%_%^%.]?[tdrzsnc])%-([%_%^%.]?[tdrzsnc])", b="ل%1ل%3"},
  673. -- art. with waṣla + initial unstable hamza
  674. {a="'l%-(\")([uai])", b="ال%2ٱ"},
  675. {a="'l%-([uai])", b="ال%1ا"},
  676. -- art. with waṣla + lunar consonant (i.e. what remains)
  677. {a="'l%-", b="ال"},
  678. -- the silent wāw
  679. {a="uU(%p*)$", b="uو%1"},
  680. {a="uU(%p*%s)", b="uو%1"},
  681. {a="aU(%p*)$", b="aو%1"},
  682. {a="aU(%p*%s)", b="aو%1"},
  683. {a="iU(%p*)$", b="iو%1"},
  684. {a="iU(%p*%s)", b="iو%1"},
  685. -- words ending in -āT with silent wāw/yāʾ
  686. {a="(_a)UA", b="%1وا"},
  687. {a="(_a)U", b="%1و"},
  688. {a="(_a)I", b="%1ي"}
  689. }
  690. digraphs = {
  691. -- ʾiʿrāb: straight double quote must be discarded
  692. {a="(%-)(\"?[UI]na)(%p*%s)", b="%2%3"},
  693. {a="(%-)(\"?[UI]na)(%p*)$", b="%2%3"},
  694. {a="(%-)(\"?At[ui])(%p*%s)", b="%2%3"},
  695. {a="(%-)(\"?At[ui])(%p*)$", b="%2%3"},
  696. {a="(%-)(\"?Ani)(%p*%s)", b="%2%3"},
  697. {a="(%-)(\"?Ani)(%p*)$", b="%2%3"},
  698. {a="(%-)(\"?ayni)(%p*%s)", b="%2%3"},
  699. {a="(%-)(\"?ayni)(%p*)$", b="%2%3"},
  700. {a="(%-)(\"?[uai])(%p*%s)", b="%2%3"},
  701. {a="(%-)(\"?[uai])(%p*)$", b="%2%3"},
  702. -- ʾiʿrāb (end)
  703. -- initial straight double quote gives a connective ʾalif
  704. {a="^\"[uai]", b="ٱ"},
  705. {a="([%(%[%|%<%s%-])\"[uai]", b="%1ٱ"}, --p
  706. -- diphthongs to be resolved before ʾalif conjunctionis
  707. {a="(aW)(O)(\"?[uai])([%S]-O)", b="awuا"},
  708. {a="(aW)(%s)([%(%[%|%<]?)([uai])", b="awuا%2%3%4"}, --p
  709. {a="(aw)(O)(\"?[uai])([%S]-O)", b="%1u"},
  710. {a="(aw)(%s)([%(%[%|%<]?)(\"?[uai])", b="%1u%2%3ا"}, --p
  711. {a="(ay)(O)(\"?[uai])([%S]-O)", b="%1i"},
  712. {a="(ay)(%s)([%(%[%|%<]?)(\"?[uai])", b="%1i%2%3ا"}, --p
  713. -- hyphen + initial alif without hamza:
  714. {a="([uai]%-)(\"?[uai])([%^%_%.%`]?)([%aإأؤئ])", b="%1ا%3%4"},
  715. -- initial alif without hamza
  716. {a="^([%(%[%|%<]?)(\"?[uai])", b="%1ا%2"}, --p
  717. -- initial alif without hamza
  718. {a="(O[%S]-)([uaiUAIY])(O)(\"?[uai])", b="ا"},
  719. {a="(%s)([%(%[%|%<]?)(\"?[uai])", b="%1%2ا"}, --p
  720. {a="%-%-", b="ـ"},
  721. {a="ؤؤ", b="ؤّ"},
  722. {a="أأ", b="أّ"},
  723. {a="ئئ", b="ئّ"},
  724. {a="bb", b="بّ"},
  725. {a="BB", b="ـّ"},
  726. {a="([%_%^%.])([tghdsz])([tghdsz])", b="%1%2|%3"},
  727. -- same as above for additional characters:
  728. {a="([%_%^%.])([cn])([cn])", b="%1%2|%3"},
  729. {a="tt", b="تّ"},
  730. {a="%_t%_t", b="ثّ"},
  731. {a="jj", b="جّ"},
  732. {a="%^g%^g", b="جّ"},
  733. {a="%.h%.h", b="حّ"},
  734. {a="xx", b="خّ"},
  735. {a="%_h%_h", b="خّ"},
  736. {a="dd", b="دّ"},
  737. {a="%_d%_d", b="ذّ"},
  738. {a="rr", b="رّ"},
  739. {a="zz", b="زّ"},
  740. {a="ss", b="سّ"},
  741. {a="%^s%^s", b="شّ"},
  742. {a="%.s%.s", b="صّ"},
  743. {a="%.d%.d", b="ضّ"},
  744. {a="%.t%.t", b="طّ"},
  745. {a="%.z%.z", b="ظّ"},
  746. {a="%`%`", b="عّ"},
  747. {a="%.g%.g", b="غّ"},
  748. {a="ff", b="فّ"},
  749. {a="qq", b="قّ"},
  750. {a="kk", b="كّ"},
  751. {a="ll", b="لّ"},
  752. {a="mm", b="مّ"},
  753. {a="nn", b="نّ"},
  754. {a="hh", b="هّ"},
  755. {a="ww", b="وّ"},
  756. {a="yy", b="يّ"},
  757. {a="%.y%.y", b="ىّ"},
  758. -- additional characters + šaddah (begin)
  759. {a="pp", b="پّ"},
  760. {a="vv", b="ڤّ"},
  761. {a="gg", b="گّ"},
  762. {a="%^c%^c", b="چّ"},
  763. {a="%^z%^z", b="ژّ"},
  764. {a="%^n%^n", b="ڭّ"},
  765. -- additional characters + šaddah (end)
  766. {a="_t", b="ث"},
  767. {a="%^g", b="ج"},
  768. {a="%.h", b="ح"},
  769. {a="_h", b="خ"},
  770. {a="_d", b="ذ"},
  771. {a="%^s", b="ش"},
  772. {a="%.s", b="ص"},
  773. {a="%.d", b="ض"},
  774. {a="%.t", b="ط"},
  775. {a="%.z", b="ظ"},
  776. {a="%.g", b="غ"},
  777. {a="%.y", b="ى"},
  778. -- additional characters (begin)
  779. {a="%^c", b="چ"},
  780. {a="%^z", b="ژ"},
  781. {a="%^n", b="ڭ"},
  782. -- additional characters (end)
  783. {a="(U)(A)", b="%1ا"},
  784. {a="WA", b="وا"},
  785. {a="(a)W\"", b="%1وْا"},
  786. {a="(a)W", b="%1وا"},
  787. {a="_A", b="aى"},
  788. {a="_u", b="ٗ"},
  789. {a="_a", b="ٰ"},
  790. {a="_i", b="ٖ"},
  791. {a="%.b", b="ٮ"},
  792. {a="%.f", b="ڡ"},
  793. {a="%.q", b="ٯ"},
  794. {a="%.k", b="ک"},
  795. {a="%.n", b="ں"},
  796. {a="%^d", b="ڊ"}
  797. }
  798. single = {
  799. {a="b", b="ب"},
  800. {a="t", b="ت"},
  801. {a="j", b="ج"},
  802. {a="x", b="خ"},
  803. {a="d", b="د"},
  804. {a="r", b="ر"},
  805. {a="z", b="ز"},
  806. {a="s", b="س"},
  807. {a="f", b="ف"},
  808. {a="`", b="ع"},
  809. {a="f", b="ف"},
  810. {a="q", b="ق"},
  811. {a="k", b="ك"},
  812. {a="l", b="ل"},
  813. {a="m", b="م"},
  814. {a="n", b="ن"},
  815. {a="h", b="ه"},
  816. {a="w", b="و"},
  817. {a="y", b="ي"},
  818. {a="T", b="ة"},
  819. -- additional characters (begin)
  820. {a="H", b="ه"},
  821. {a="p", b="پ"},
  822. {a="v", b="ڤ"},
  823. {a="g", b="گ"},
  824. -- additional characters (end)
  825. {a="\"$", b="ْ"},
  826. {a="\"(%W)", b="ْ%1"},
  827. {a="\"([^uaiUAI])", b="ْ%1"},
  828. {a="o", b="ْ"}, -- hard-coded sukūn
  829. {a="([^0-9])%-([^0-9])", b="%1%2"},
  830. {a="B", b="ـ"}
  831. }
  832. longv = {
  833. {a="\"A", b="ا"},
  834. {a="\"U", b="و"},
  835. {a="\"I", b="ي"},
  836. {a="\"Y", b="ى"},
  837. {a="A", b="َا"},
  838. {a="U", b="ُو"},
  839. {a="I", b="ِي"},
  840. {a="aY", b="aى"},
  841. {a="iY", b="iى"},
  842. {a="Y", b="aى"}
  843. }
  844. shortv = {
  845. {a="\"u", b=""},
  846. {a="\"a", b=""},
  847. {a="\"i", b=""},
  848. {a="%-?%.u", b="ُ"},
  849. {a="%-?%.a", b="َ"},
  850. {a="%-?%.i", b="ِ"},
  851. {a="u", b="ُ"},
  852. {a="a", b="َ"},
  853. {a="i", b="ِ"}
  854. }
  855. punctuationhb = {
  856. {a="%(%(", b="﴿"},
  857. {a="%)%)", b="﴾"},
  858. {a="%.", b="."},
  859. -- replaced with the next two rules to make the Arabic comma work
  860. -- after \abraces{}
  861. -- {a="([^0-9])%,", b="%1،"},
  862. {a="%,", b="،"},
  863. {a="([%d])%،", b="%1,"},
  864. {a="%?", b="؟"},
  865. {a="%;", b="؛"},
  866. }
  867. punctuation = {
  868. {a="%(%(", b="﴿"},
  869. {a="%)%)", b="﴾"},
  870. {a="%(", b="+@("},
  871. {a="%)", b="-@("},
  872. {a="%+%@%(", b=")"},
  873. {a="%-%@%(", b="("},
  874. {a="%<", b="+@<"},
  875. {a="%>", b="-@<"},
  876. {a="%+%@%<", b=">"},
  877. {a="%-%@%<", b="<"},
  878. {a="%[", b="+@["},
  879. {a="%]", b="-@["},
  880. {a="%+%@%[", b="]"},
  881. {a="%-%@%[", b="["},
  882. {a="%.", b="."},
  883. -- replaced with the next two rules to make the Arabic comma work
  884. -- after \abraces{}
  885. -- {a="([^0-9])%,", b="%1،"},
  886. {a="%,", b="،"},
  887. {a="([%d])%،", b="%1,"},
  888. {a="%?", b="؟"},
  889. {a="%;", b="؛"},
  890. }
  891. null = {
  892. {a="%&", b="‍"}, -- That is ^^^^200d, the zero-width joiner
  893. {a="%|", b=""},
  894. {a="^%-", b=""},
  895. {a="([^0-9])(%-)", b="%1"},
  896. {a="O[%S]-O", b=""},
  897. {a="[%^%_](.)", b=">??<%1"}
  898. }