arabluatex_trans.lua 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837
  1. --[[
  2. This file is part of the `arabluatex' package
  3. ArabLuaTeX -- Processing ArabTeX notation under LuaLaTeX
  4. Copyright (C) 2016--2017 Robert Alessi
  5. Please send error reports and suggestions for improvements to Robert
  6. Alessi <alessi@robertalessi.net>
  7. This program is free software: you can redistribute it and/or modify
  8. it under the terms of the GNU General Public License as published by
  9. the Free Software Foundation, either version 3 of the License, or
  10. (at your option) any later version.
  11. This program is distributed in the hope that it will be useful, but
  12. WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. General Public License for more details.
  15. You should have received a copy of the GNU General Public License
  16. along with this program. If not, see
  17. <http://www.gnu.org/licenses/>.
  18. --]]
  19. -- common
  20. punctuationtr = {
  21. {a="%(%(", b="("},
  22. {a="%)%)", b=")"}
  23. }
  24. nulltr = {
  25. {a="%|", b=""},
  26. {a="o[%S]-o", b=""}
  27. }
  28. -- cap
  29. captr = {
  30. -- dmg (defaut); loc as well
  31. {a="ā", b="Ā"},
  32. {a="b", b="B"},
  33. {a="t", b="T"},
  34. {a="ṯ", b="Ṯ"},
  35. {a="ǧ", b="Ǧ"},
  36. {a="ḥ", b="Ḥ"},
  37. {a="ḫ", b="Ḫ"},
  38. {a="d", b="D"},
  39. {a="ḏ", b="Ḏ"},
  40. {a="r", b="R"},
  41. {a="z", b="Z"},
  42. {a="s", b="S"},
  43. {a="š", b="Š"},
  44. {a="ṣ", b="Ṣ"},
  45. {a="ḍ", b="Ḍ"},
  46. {a="ṭ", b="Ṭ"},
  47. {a="ẓ", b="Ẓ"},
  48. {a="ġ", b="Ġ"},
  49. {a="f", b="F"},
  50. {a="q", b="Q"},
  51. {a="k", b="K"},
  52. {a="l", b="L"},
  53. {a="m", b="M"},
  54. {a="n", b="N"},
  55. {a="h", b="H"},
  56. {a="w", b="W"},
  57. {a="ū", b="Ū"},
  58. {a="y", b="Y"},
  59. {a="ī", b="Ī"}
  60. }
  61. -- dmg
  62. hamzatrdmg = {
  63. -- hard coded hamza
  64. {a="|\"'", b="ʾ"},
  65. {a="A\"'", b="ʾA"},
  66. {a="[au]\"'", b="ʾ"},
  67. {a="w\"'", b="ʾ"},
  68. {a="i\"'", b="ʾ"},
  69. {a="y\"'", b="ʾ"},
  70. -- hamza takes tašdīd too
  71. {a="''([Uu])", b="ʾʾ%1"},
  72. {a="''([Aa])", b="ʾʾ%1"},
  73. {a="''([Ii])", b="ʾʾ%1"},
  74. -- initial long u and i (for a, see below)
  75. {a="%'%_U", b="ʾU"},
  76. {a="%'%_I", b="ʾI"},
  77. -- taḫfīfu 'l-hamza
  78. {a="'u'([^uaiUAI])", b="ʾU%1"},
  79. {a="'i'([^uaiUAI])", b="ʾI%1"},
  80. -- madda (historic writing below)
  81. {a="'a'([^uaiUAI])", b="ʾA%1"},
  82. {a="'a?A", b="ʾA"},
  83. {a="(A)(')(i)$", b="%1ʾ%3"},
  84. {a="(A)(')(i)(%W)", b="%1ʾ%3%4"},
  85. {a="(A)(')(i)", b="%1ʾ%3"}, -- historic madda
  86. {a="(A)(')", b="%1ʾ"}, -- historic madda
  87. -- initial (needs both ^ and %W patterns)
  88. -- 'aw: the diphthong is to be resolved into 'awi' (next 8 lines)
  89. {a="^('aw)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1i"},
  90. {a="(%W)('aw)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1%2i"},
  91. {a="^('aw)(o)(\"?[uai])([%S]-o)", b="%1i"},
  92. {a="(%W)('aw)(o)(\"?[uai])([%S]-o)", b="%1%2i"},
  93. {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  94. {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
  95. {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, --p
  96. {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, --p
  97. -- then the 'initial' rules for the remaining cases
  98. {a="^(')([ua])", b="ʾ%2"},
  99. {a="^(')(i)", b="ʾ%2"},
  100. {a="(%W)(')([ua])", b="%1ʾ%3"},
  101. {a="(%W)(')(i)", b="%1ʾ%3"},
  102. -- final
  103. {a="([Iy])(')(aN)$", b="%1ʾ%3"},
  104. {a="([Iy])(')(aN)(%W)", b="%1ʾ%3%4"},
  105. {a="([^uai])(')([uai]N?)$", b="%1ʾ%3"},
  106. {a="([^uai])(')([uai]N?)(%W)", b="%1ʾ%3%4"},
  107. {a="([UI])(')([uai])$", b="%1ʾ%3"},
  108. {a="([UI])(')([uai])(%W)", b="%1ʾ%3%4"},
  109. -- middle
  110. {a="(U)(')", b="%1ʾ"},
  111. {a="([Iy])(')", b="%1ʾ"},
  112. {a="([^uai])(')([uU])", b="%1ʾ%3"},
  113. {a="([^uai])(')([aA])", b="%1ʾ%3"},
  114. {a="([^uai])(')([iI])", b="%1ʾ%3"},
  115. {a="(u)(')([uU])", b="%1ʾ%3"},
  116. {a="(u)(')([aA])", b="%1ʾ%3"},
  117. {a="(u)(')([iI])", b="%1ʾ%3"},
  118. {a="(a)(')([aA])", b="%1ʾ%3"},
  119. {a="(a)(')([uU])", b="%1ʾ%3"},
  120. {a="(a)(')([iI])", b="%1ʾ%3"},
  121. {a="(i)(')([aA])", b="%1ʾ%3"},
  122. {a="(i)(')([uU])", b="%1ʾ%3"},
  123. {a="(i)(')([iI])", b="%1ʾ%3"},
  124. {a="(a)(')([^uaiUAI])", b="%1ʾ%3"},
  125. {a="(u)(')([^uaiUAI])", b="%1ʾ%3"},
  126. {a="(i)(')([^uaiUAI])", b="%1ʾ%3"}
  127. }
  128. tanwintrdmg = {
  129. {a="%-?([uai]NU)(o)([ui])([%S]-o)", b="\\arbup{un%3}"},
  130. {a="%-?([uai]NU)(%s)([ui])", b="\\arbup{un%3}%2'"},
  131. {a="%-?(iNI)(o)([ui])([%S]-o)", b="i\\arbup{n%3}"},
  132. {a="%-?(iNI)(%s)([ui])", b="i\\arbup{n%3}%2'"},
  133. {a="(o[%S]-)([uai]N[UI])(o)(\"?[ui])", b="'"},
  134. {a="%-?uNU", b="\\arbup{un}"},
  135. {a="%-?aNU", b="\\arbup{an}"},
  136. {a="%-?iNU", b="\\arbup{in}"},
  137. {a="%-?iNI", b="i\\arbup{n}"},
  138. -- tanwīn preceding ʾalif conjunctionis
  139. {a="%-?(uN)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="\\arbup{uni}"},
  140. {a="%-?(aN)(_A)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="ạ\\arbup{ni}"},
  141. {a="%-?(aN)(Y)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="ạ\\arbup{ni}"},
  142. {a="(T)%-?(aN)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="t\\arbup{ani}"},
  143. {a="([^TA])%-?(aN)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1\\arbup{ani}"},
  144. {a="%-?(iN)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="\\arbup{ini}"},
  145. {a="%-?(uN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="\\arbup{uni}%2%3"},
  146. {a="%-?(aN)(_A)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạ\\arbup{ni}%3%4"},
  147. {a="%-?(aN)(Y)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạ\\arbup{ni}%3%4"},
  148. {a="(T)%-?(aN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="t\\arbup{ani}%3%4"},
  149. {a="([^TA])%-?(aN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1\\arbup{ani}%3%4"},
  150. {a="%-?(iN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="\\arbup{ini}%2%3"},
  151. -- tanwīn preceding 'lla_dI/'llatI
  152. {a="%-?(uN)(o)('lla[%_]?[dt])([%S]-o)", b="\\arbup{uni}"},
  153. {a="%-?(aN)(_A)(o)('lla[%_]?[dt])([%S]-o)", b="ạ\\arbup{ni}"},
  154. {a="%-?(aN)(Y)(o)('lla[%_]?[dt])([%S]-o)", b="ạ\\arbup{ni}"},
  155. {a="(T)%-?(aN)(o)('lla[%_]?[dt])([%S]-o)", b="t\\arbup{ani}"},
  156. {a="([^TA])%-?(aN)(o)('lla[%_]?[dt])([%S]-o)", b="%1\\arbup{ani}"},
  157. {a="%-?(iN)(o)('lla[%_]?[dt])([%S]-o)", b="\\arbup{ini}"},
  158. {a="%-?(uN)(%s)('lla[%_]?[dt])", b="\\arbup{uni}%2%3"},
  159. {a="%-?(aN)(_A)(%s)('lla[%_]?[dt])", b="ạ\\arbup{ni}%3%4"},
  160. {a="%-?(aN)(Y)(%s)('lla[%_]?[dt])", b="ạ\\arbup{ni}%3%4"},
  161. {a="(T)%-?(aN)(%s)('lla[%_]?[dt])", b="t\\arbup{ani}%3%4"},
  162. {a="([^TA])%-?(aN)(%s)('lla[%_]?[dt])", b="%1\\arbup{ani}%3%4"},
  163. {a="%-?(iN)(%s)('lla[%_]?[dt])", b="\\arbup{ini}%2%3"},
  164. -- tanwīn + alif without hamza and kasra (ibn) or dhamma (uhrub)
  165. {a="%-?(uN)(o)([ui])([%S]-o)", b="\\arbup{un%3}"},
  166. {a="%-?(aN)(_A)(o)([ui])([%S]-o)", b="ạ\\arbup{n%4}"},
  167. {a="%-?(aN)(Y)(o)([ui])([%S]-o)", b="ạ\\arbup{n%4}"},
  168. {a="(T)%-?(aN)(o)([ui])([%S]-o)", b="t\\arbup{an%4}"},
  169. {a="([^TA])%-?(aN)(o)([ui])([%S]-o)", b="%1\\arbup{an%4}"},
  170. {a="%-?(iN)(o)([ui])([%S]-o)", b="\\arbup{in%3}"},
  171. {a="(o[%S]-)([uai]N)(o)(\"?[ui])", b="'"},
  172. {a="%-?(uN)(%s)([ui])", b="\\arbup{un%3}%2'"},
  173. {a="%-?(aN)(_A)(%s)([ui])", b="ạ\\arbup{n%4}%3'"},
  174. {a="%-?(aN)(Y)(%s)([ui])", b="ạ\\arbup{n%4}%3'"},
  175. {a="(T)%-?(aN)(%s)([ui])", b="t\\arbup{an%4}%3'"},
  176. {a="([^TA])%-?(aN)(%s)([ui])", b="%1\\arbup{an%4}%3'"},
  177. {a="%-?(iN)(%s)([ui])", b="\\arbup{in%3}%2'"},
  178. --
  179. -- {a="uN", b="\\arbup{un}"}, (now included in the last line of this table)
  180. {a="%-?(\"?At)%-?([ui])N", b="\\arbup{%1%2n}"},
  181. {a="%-?(aN)(_A)", b="ạ\\arbup{n}"},
  182. {a="%-?(aN)(Y)", b="ạ\\arbup{n}"},
  183. {a="(T)%-?(\"?aN)", b="t\\arbup{an}"},
  184. {a="([^TA])%-?(\"?aN)", b="%1\\arbup{an}"},
  185. {a="%-?([ui])N", b="\\arbup{%1n}"}
  186. }
  187. trigraphstrdmg = { -- trigraphs or more
  188. -- 'llatI / 'llad_I
  189. {a="^'ll(a)([%_]?[dt])", b="'ll%1%2"},
  190. {a="([%(%[%|%<%s])'ll(a)([%_]?[dt])", b="%1'll%2%3"}, --p
  191. -- law: the diphthong is to be resolved into 'awi' (next 8 lines)
  192. {a="^(law)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1i"},
  193. {a="(%W)(law)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1%2i"},
  194. {a="^(law)(o)(\"?[uai])([%S]-o)", b="%1i"},
  195. {a="(%W)(law)(o)(\"?[uai])([%S]-o)", b="%1%2i"},
  196. {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  197. {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
  198. {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, --p
  199. {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, --p
  200. -- al- + lām
  201. {a="^(a)l%-(l)", b="%1l-%2"},
  202. {a="([%(%[%|%<%s%-o])(a)l%-(l)", b="%1%2l-%3"}, --p
  203. -- al- + solar consonant
  204. {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2-%2"},
  205. {a="([%(%[%|%<%s%-o])(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2%3-%3"}, --p
  206. -- assim. art. + solar consonant
  207. {a="^(a)([%_%^%.]?[tdrzsn])%-", b="%1%2-"},
  208. {a="([%(%[%|%<%s%-o])(a)([%_%^%.]?[tdrzsn])%-", b="%1%2%3-"}, --p
  209. -- al- + initial unstable hamza
  210. {a="^(a)l%-(\"?[uai])", b="%1l-%2"},
  211. {a="([%(%[%|%<%s%-o])(a)l%-(\"?[uai])", b="%1%2l-%3"}, --p
  212. -- li-/la- + art. + initial unstable hamza is a special orthography
  213. {a="l([ai])%-l%-([uai])", b="l%1-l-%2"},
  214. -- al- + lunar consonant (i.e. what remains)
  215. {a="^(a)l%-", b="%1l-"},
  216. {a="([%(%[%|%<%s%-o])(a)l%-", b="%1%2l-"}, --p
  217. -- diphthongs to be resolved before ʾalif conjunctionis
  218. {a="(aw)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1u"},
  219. {a="(ay)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1i"},
  220. {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"},
  221. {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  222. -- art. with waṣla + lām
  223. {a="'l%-(l)", b="'l-%1"},
  224. -- art. with waṣla + solar consonant
  225. {a="'l%-([%_%^%.]?[tdrzsn])", b="'%1-%1"},
  226. -- li-/la- + art. + lām
  227. {a="l([ai])%-l%-(l)", b="l%1-%2%2"},
  228. -- assim. art. with waṣla + solar consonant
  229. {a="'([%_%^%.]?[tdrzsn])%-", b="'%1-"},
  230. -- li-/la- + art. + solar consonant is a special orthography
  231. {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="l%1-%2-%2"},
  232. -- li-/la- + assim. art. + solar consonant is a special orthography
  233. {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="l%1-%2-%3"},
  234. -- art. with waṣla + initial unstable hamza
  235. {a="'l%-(\"?[uai])", b="'l-%1"},
  236. -- art. with waṣla + lunar consonant (i.e. what remains)
  237. {a="'l%-", b="'l-"},
  238. -- the silent wāw
  239. {a="uU$", b="u"},
  240. {a="uU(%W)", b="u%1"},
  241. {a="aU$", b="a"},
  242. {a="aU(%W)", b="a%1"},
  243. {a="iU$", b="i"},
  244. {a="iU(%W)", b="i%1"},
  245. -- words ending in -āT with silent wāw/yāʾ
  246. {a="(_a)UA", b="A"},
  247. {a="(_a)U", b="A"},
  248. {a="(_a)I", b="A"}
  249. }
  250. idghamtrdmg = {
  251. -- assimilations
  252. {a="(n)(}?)(%s)([rlmnwy])", b="%4%2%3%4"},
  253. {a="(n)(}?)(o)([rlmnwy])([%S]-o)", b="%4%2"}
  254. }
  255. digraphstrdmg = {
  256. {a="([uai]%-)(\"?[uai])", b="%1'"}, -- hyphen + initial alif without hamza
  257. -- the following two are replaced with the 4 lines next for now
  258. -- {a="^(\"?[uai])", b="%1"}, -- initial alif without hamza
  259. -- {a="(%W)(\"?[uai])", b="%1%2"}, -- initial alif without hamza
  260. -- {a="^(\"[uai])", b="'"}, -- initial alif without hamza
  261. -- {a="(%W)(\"[uai])", b="%1'"}, -- initial alif without hamza
  262. {a="^(\"?[uai])", b="%1"}, -- initial alif without hamza
  263. {a="(%W)(\"?[uai])", b="%1%2"}, -- initial alif without hamza
  264. -- this is not necessary, take out for now:
  265. -- {a="([%_]?[uaiUAIY])(%s)([uai])", b="%1%2'"}, -- initial alif without hamza
  266. {a="(aw)(o)(\"?[uai])([%S]-o)", b="%1u"},
  267. {a="(aw)(%s)([%(%[%|%<]?)(\"?[uai])", b="%1u%2%3'"}, --p
  268. {a="(ay)(o)(\"?[uai])([%S]-o)", b="%1i"},
  269. {a="(ay)(%s)([%(%[%|%<]?)(\"?[uai])", b="%1i%2%3'"}, --p
  270. {a="(aW)(o)(\"?[uai])([%S]-o)", b="awu"},
  271. {a="(UA)(o)(\"?[uai])([%S]-o)", b="u"},
  272. {a="(%_A)(o)(\"?[uai])([%S]-o)", b="ạ"},
  273. {a="(Y)(o)(\"?[uai])([%S]-o)", b="ạ"},
  274. {a="(%_a)(o)(\"?[uai])([%S]-o)", b="a"},
  275. {a="(A)(o)(\"?[uai])([%S]-o)", b="a"},
  276. {a="([%_]?[Uu])(o)(\"?[uai])([%S]-o)", b="u"},
  277. {a="([%_]?[Ii])(o)(\"?[uai])([%S]-o)", b="i"},
  278. {a="(o[%S]-)([UAIYWuaiyw])(o)(\"?[uai])", b="'"},
  279. {a="(aW)(%s)([%(%[%|%<]?)(\"?[uai])", b="awu%2%3%4"}, --p
  280. {a="(UA)(%s)([%(%[%|%<]?)(\"?[uai])", b="u%2%3'"}, --p
  281. {a="([^%_][uai])(%s)([%(%[%|%<]?)(\"?[uai])", b="%1%2%3'"}, --p
  282. {a="(%_A)(%s)([%(%[%|%<]?)(\"?[uai])", b="ạ%2%3'"}, --p
  283. {a="(Y)(%s)([%(%[%|%<]?)(\"?[uai])", b="ạ%2%3'"}, --p
  284. {a="(%_a)(%s)([%(%[%|%<]?)(\"?[uai])", b="a%2%3'"}, --p
  285. {a="(A)(%s)([%(%[%|%<]?)(\"?[uai])", b="a%2%3'"}, --p
  286. {a="([%_]?[Uu])(%s)([%(%[%|%<]?)(\"?[uai])", b="u%2%3'"}, --p
  287. {a="([%_]?[Ii])(%s)([%(%[%|%<]?)(\"?[uai])", b="i%2%3'"}, --p
  288. -- ʾiʿrāb hyphen (begin)
  289. {a="(%-)(\"?[UI]na)(%p?%s)", b="\\arbup{%2}%3"},
  290. {a="(%-)(\"?[UI]na)(%p?)$", b="\\arbup{%2}%3"},
  291. {a="(%-)(\"?At[ui])(%p?%s)", b="\\arbup{%2}%3"},
  292. {a="(%-)(\"?At[ui])(%p?)$", b="\\arbup{%2}%3"},
  293. {a="(%-)(\"?Ani)(%p?%s)", b="\\arbup{%2}%3"},
  294. {a="(%-)(\"?Ani)(%p?)$", b="\\arbup{%2}%3"},
  295. {a="(%-)(\"?ayni)(%p?%s)", b="\\arbup{%2}%3"},
  296. {a="(%-)(\"?ayni)(%p?)$", b="\\arbup{%2}%3"},
  297. {a="(%-)(\"?[uai])(%p?%s)", b="\\arbup{%2}%3"},
  298. {a="(%-)(\"?[uai])(%p?)$", b="\\arbup{%2}%3"},
  299. -- ʾiʿrāb hyphen (end) shorten long vowels preceding ʾalif
  300. -- conjunctionis—without forgetting 'lla_dI
  301. {a="(U)(A)", b="U"},
  302. {a="(aW)(o)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])([%S]-o)", b="awu"},
  303. {a="(%_a)(o)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])([%S]-o)", b="a"},
  304. {a="(%_A)(o)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])([%S]-o)", b="ạ"},
  305. {a="(A)(o)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])([%S]-o)", b="a"},
  306. {a="(Y)(o)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])([%S]-o)", b="ạ"},
  307. {a="([%_]?[Uu])(o)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])([%S]-o)", b="u"},
  308. {a="([%_]?[Ii])(o)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])([%S]-o)", b="i"},
  309. --p (next 7 lines, just after %s)
  310. {a="(aW)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])", b="awu%2%3"},
  311. {a="(%_a)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])", b="a%2%3"},
  312. {a="(%_A)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])", b="ạ%2%3"},
  313. {a="(A)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])", b="a%2%3"},
  314. {a="(Y)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])", b="ạ%2%3"},
  315. {a="([%_]?[Uu])(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])", b="u%2%3"},
  316. {a="([%_]?[Ii])(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])", b="i%2%3"},
  317. {a="%-%-", b=""},
  318. {a="iyyaT(%p?)$", b="īyaT%1"},
  319. {a="iyyaT(%p?%s)", b="īyaT%1"},
  320. {a="iyy(%p?)$", b="ī%1"},
  321. {a="iyy(%p?%s)", b="ī%1"},
  322. -- {a="T([^uai])", b="%1"},
  323. {a="T(\\arbup)", b="t%1"},
  324. {a="([a%'][%_%^%.]?[tdrzsln]%-)(%S-)T([%(%[%|%<%s])(a[%_%^%.]?[tdrzsln]%-)", b="%1%2h%3%4"}, --p
  325. {a="T([%(%[%|%<%s])(a[%_%^%.]?[tdrzsln]%-)", b="t%1%2"}, --p
  326. {a="T([%|\"])", b="t%1"},
  327. {a="T(%p?%s)", b="h%1"},
  328. {a="T(%p?)$", b="h%1"},
  329. {a="T(%p?)(%W)", b="h%1%2"},
  330. {a="_t", b="ṯ"},
  331. {a="%^g", b="ǧ"},
  332. {a="%.h", b="ḥ"},
  333. {a="_h", b="ḫ"},
  334. {a="_d", b="ḏ"},
  335. {a="%^s", b="š"},
  336. {a="%.s", b="ṣ"},
  337. {a="%.d", b="ḍ"},
  338. {a="%.t", b="ṭ"},
  339. {a="%.z", b="ẓ"},
  340. {a="%.g", b="ġ"},
  341. -- the following needs to be moved above shortening rules
  342. -- {a="(U)(A)", b="ū"},
  343. {a="WA", b="w"},
  344. {a="(a)W", b="%1w"},
  345. {a="_A", b="ạ̄"},
  346. {a="_u", b="ū"},
  347. {a="_a", b="ā"},
  348. {a="_i", b="ī"},
  349. {a="%.b", b="ḅ"},
  350. {a="%.f", b="f̣"},
  351. {a="%.q", b="q̣"},
  352. {a="%.k", b="k"},
  353. {a="%.n", b="ṇ"},
  354. {a="%^d", b="d́"}
  355. }
  356. singletrdmg = {
  357. {a="b", b="b"},
  358. {a="t", b="t"},
  359. {a="j", b="ǧ"},
  360. {a="x", b="ḫ"},
  361. {a="d", b="d"},
  362. {a="r", b="r"},
  363. {a="z", b="z"},
  364. {a="s", b="s"},
  365. {a="`", b="ʿ"},
  366. {a="f", b="f"},
  367. {a="q", b="q"},
  368. {a="k", b="k"},
  369. {a="l", b="l"},
  370. {a="m", b="m"},
  371. {a="n", b="n"},
  372. {a="h", b="h"},
  373. {a="w", b="w"},
  374. {a="y", b="y"},
  375. {a="T", b="t"},
  376. {a="\"", b=""},
  377. {a="B", b=""}
  378. }
  379. longvtrdmg = {
  380. {a="A", b="ā"},
  381. {a="U", b="ū"},
  382. {a="I", b="ī"},
  383. {a="Y", b="ạ̄"}
  384. }
  385. shortvtrdmg = {
  386. {a="u", b="u"},
  387. {a="a", b="a"},
  388. {a="i", b="i"}
  389. }
  390. -- loc
  391. hamzatrloc = {
  392. -- hard coded hamza
  393. {a="|\"'", b="ʾ"},
  394. {a="A\"'", b="ʾA"},
  395. {a="[au]\"'", b="ʾ"},
  396. {a="w\"'", b="ʾ"},
  397. {a="i\"'", b="ʾ"},
  398. {a="y\"'", b="ʾ"},
  399. -- hamza takes tašdīd too
  400. {a="''([Uu])", b="ʾʾ%1"},
  401. {a="''([Aa])", b="ʾʾ%1"},
  402. {a="''([Ii])", b="ʾʾ%1"},
  403. -- initial long u and i (for a, see below)
  404. {a="%'%_U", b="U"},
  405. {a="%'%_I", b="I"},
  406. -- taḫfīfu 'l-hamza
  407. {a="^'u'([^uaiUAI])", b="U%1"},
  408. {a="(%W)'u'([^uaiUAI])", b="%1U%2"},
  409. {a="'u'([^uaiUAI])", b="ʾU"},
  410. {a="^'i'([^uaiUAI])", b="I%1"},
  411. {a="(%W)'i'([^uaiUAI])", b="%1I%2"},
  412. {a="'i'([^uaiUAI])", b="ʾI"},
  413. -- madda (historic writing below)
  414. {a="^(')(A)", b="%2"},
  415. {a="(%W)(')(A)", b="%1%3"},
  416. {a="^'a'([^uaiUAI])", b="A%1"},
  417. {a="(%W)'a'([^uaiUAI])", b="%1A%2"},
  418. {a="'a'([^uaiUAI])", b="A%1"},
  419. {a="^'a?A", b="A"},
  420. {a="(%W)'a?A", b="%1A"},
  421. {a="'a?A", b="ʾA"},
  422. {a="(A)(')(i)$", b="%1ʾ%3"},
  423. {a="(A)(')(i)(%W)", b="%1ʾ%3%4"},
  424. {a="(A)(')(i)", b="%1ʾ%3"}, -- historic madda
  425. {a="(A)(')", b="%1ʾ"}, -- historic madda
  426. -- initial (needs both ^ and %W patterns)
  427. {a="^(')([ua])", b="%2"},
  428. {a="^(')(i)", b="%2"},
  429. {a="(%W)(')([ua])", b="%1%3"},
  430. {a="(%W)(')(i)", b="%1%3"},
  431. -- final
  432. {a="([Iy])(')(aN)$", b="%1ʾ%3"},
  433. {a="([Iy])(')(aN)(%W)", b="%1ʾ%3%4"},
  434. {a="([^uai])(')([uai]N?)$", b="%1ʾ%3"},
  435. {a="([^uai])(')([uai]N?)(%W)", b="%1ʾ%3%4"},
  436. {a="([UI])(')([uai])$", b="%1ʾ%3"},
  437. {a="([UI])(')([uai])(%W)", b="%1ʾ%3%4"},
  438. -- middle
  439. {a="(U)(')", b="%1ʾ"},
  440. {a="([Iy])(')", b="%1ʾ"},
  441. {a="([^uai])(')([uU])", b="%1ʾ%3"},
  442. {a="([^uai])(')([aA])", b="%1ʾ%3"},
  443. {a="([^uai])(')([iI])", b="%1ʾ%3"},
  444. {a="(u)(')([uU])", b="%1ʾ%3"},
  445. {a="(u)(')([aA])", b="%1ʾ%3"},
  446. {a="(u)(')([iI])", b="%1ʾ%3"},
  447. {a="(a)(')([aA])", b="%1ʾ%3"},
  448. {a="(a)(')([uU])", b="%1ʾ%3"},
  449. {a="(a)(')([iI])", b="%1ʾ%3"},
  450. {a="(i)(')([aA])", b="%1ʾ%3"},
  451. {a="(i)(')([uU])", b="%1ʾ%3"},
  452. {a="(i)(')([iI])", b="%1ʾ%3"},
  453. {a="(a)(')([^uaiUAI])", b="%1ʾ%3"},
  454. {a="(u)(')([^uaiUAI])", b="%1ʾ%3"},
  455. {a="(i)(')([^uaiUAI])", b="%1ʾ%3"}
  456. }
  457. tanwintrloc = {
  458. {a="%-?uNU", b="un"},
  459. {a="%-?aNU", b="an"},
  460. {a="%-?iNU", b="in"},
  461. {a="%-?iNI", b="in"},
  462. {a="%-?(\"?At)%-?([ui])N", b="%1%2n"},
  463. {a="%-?([ui])N", b="%1n"},
  464. {a="%-?(aN)(_A)", b="an"},
  465. {a="%-?(aN)(Y)", b="an"},
  466. {a="(T)%-?(\"?aN)", b="tan"},
  467. {a="([^TA])%-?(\"?aN)", b="%1an"}
  468. }
  469. trigraphstrloc = { -- trigraphs or more
  470. -- 'llatI / 'llad_I
  471. {a="^'ll(a)([%_]?[dt])", b="all%1%2"},
  472. {a="([%(%[%|%<%s])'ll(a)([%_]?[dt])", b="%1all%2%3"}, --p
  473. -- al- + lām
  474. {a="^(a)l%-(l)", b="%1l-%2"},
  475. {a="(%s)(a)l%-(l)", b="%1%2l-%3"},
  476. -- al- + solar consonant
  477. {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="%1l-%2"},
  478. {a="(%s)(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2l-%3"},
  479. -- assim. art. + solar consonant
  480. {a="^(a)([%_%^%.]?[tdrzsn])%-", b="%1l-"},
  481. {a="(%s)(a)([%_%^%.]?[tdrzsn])%-", b="%1%2l-"},
  482. -- al- + initial unstable hamza
  483. {a="^(a)l%-([uai])", b="%1l-%2"},
  484. {a="(%s)(a)l%-([uai])", b="%1%2l-%3"},
  485. -- li-/la- + art. + initial unstable hamza is a special orthography
  486. {a="l([ai])%-l%-([uai])", b="l%1-l-%2"},
  487. -- al- + lunar consonant (i.e. what remains)
  488. {a="^(a)l%-", b="%1l-"},
  489. {a="(%s)(a)l%-", b="%1%2l-"},
  490. -- art. with waṣla + lām
  491. {a="'l%-(l)", b="al-%1"},
  492. -- art. with waṣla + solar consonant
  493. {a="'l%-([%_%^%.]?[tdrzsn])", b="al-%1"},
  494. -- li-/la- + art. + lām
  495. {a="l([ai])%-l%-(l)", b="l%1-l-%2"},
  496. -- assim. art. with waṣla + solar consonant
  497. {a="'([%_%^%.]?[tdrzsn])%-", b="al-"},
  498. -- li-/la- + art. + solar consonant is a special orthography
  499. {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="l%1-l-%2"},
  500. -- li-/la- + assim. art. + solar consonant is a special orthography
  501. {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="l%1-l-%3"},
  502. -- art. with waṣla + initial unstable hamza
  503. {a="'l%-([uai])", b="al-%1"},
  504. -- art. with waṣla + lunar consonant (i.e. what remains)
  505. {a="'l%-", b="al-"},
  506. -- the silent wāw
  507. {a="uU$", b="u"},
  508. {a="uU(%W)", b="u%1"},
  509. {a="aU$", b="a"},
  510. {a="aU(%W)", b="a%1"},
  511. {a="iU$", b="i"},
  512. {a="iU(%W)", b="i%1"},
  513. -- words ending in -āT with silent wāw/yāʾ
  514. {a="(_a)UA", b="A"},
  515. {a="(_a)U", b="A"},
  516. {a="(_a)I", b="A"}
  517. }
  518. digraphstrloc = {
  519. -- discard the ʾiʿrāb hyphen (begin)
  520. {a="(%-)(\"?[UI]na)(%p?%s)", b="%2%3"},
  521. {a="(%-)(\"?[UI]na)(%p?)$", b="%2%3"},
  522. {a="(%-)(\"?At[ui])(%p?%s)", b="%2%3"},
  523. {a="(%-)(\"?At[ui])(%p?)$", b="%2%3"},
  524. {a="(%-)(\"?Ani)(%p?%s)", b="%2%3"},
  525. {a="(%-)(\"?Ani)(%p?)$", b="%2%3"},
  526. {a="(%-)(\"?ayni)(%p?%s)", b="%2%3"},
  527. {a="(%-)(\"?ayni)(%p?)$", b="%2%3"},
  528. {a="(%-)([uai])(%p?%s)", b="%2%3"},
  529. {a="(%-)([uai])(%p?)$", b="%2%3"},
  530. -- discard the ʾiʿrāb hyphen (end)
  531. {a="(%-)(\"?[uai])", b="%1%2"}, -- hyphen + initial alif without hamza
  532. {a="^(\"?[uai])", b="%1"}, -- initial alif without hamza
  533. {a="(%s)([uai])", b="%1%2"}, -- initial alif without hamza
  534. {a="%-%-", b=""},
  535. {a="uww", b="ūw"},
  536. {a="iyy(%p?)$", b="ī%1"},
  537. {a="iyy(%p?%s)", b="ī%1"},
  538. {a="iyy", b="īy"},
  539. {a="([tkdsg])(h)", b="%1'%2"},
  540. -- {a="T([^uai])", b="h%1"},
  541. {a="([a%']l%-)(%S-)T([%(%[%|%<%s])(al%-)", b="%1%2h%3%4"}, --p
  542. {a="T([%(%[%|%<%s])(al%-)", b="t%1%2"}, --p
  543. {a="T(%p?)$", b="h%1"},
  544. {a="T(%p?%s)", b="h%1"},
  545. {a="_t", b="th"},
  546. {a="%^g", b="j"},
  547. {a="%.h", b="ḥ"},
  548. {a="_h", b="kh"},
  549. {a="_d", b="dh"},
  550. {a="%^s", b="sh"},
  551. {a="%.s", b="ṣ"},
  552. {a="%.d", b="ḍ"},
  553. {a="%.t", b="ṭ"},
  554. {a="%.z", b="ẓ"},
  555. {a="%.g", b="gh"},
  556. {a="(U)(A)", b="ū"},
  557. {a="WA", b="w"},
  558. {a="(a)W", b="%1w"},
  559. {a="_A", b="á"},
  560. {a="_u", b="ū"},
  561. {a="_a", b="ā"},
  562. {a="_i", b="ī"},
  563. {a="%.b", b="b"},
  564. {a="%.f", b="f"},
  565. {a="%.q", b="q"},
  566. {a="%.k", b="k"},
  567. {a="%.n", b="n"},
  568. {a="%^d", b="d"}
  569. }
  570. singletrloc = {
  571. {a="b", b="b"},
  572. {a="t", b="t"},
  573. {a="j", b="j"},
  574. {a="x", b="kh"},
  575. {a="d", b="d"},
  576. {a="r", b="r"},
  577. {a="z", b="z"},
  578. {a="s", b="s"},
  579. {a="`", b="`"},
  580. {a="f", b="f"},
  581. {a="q", b="q"},
  582. {a="k", b="k"},
  583. {a="l", b="l"},
  584. {a="m", b="m"},
  585. {a="n", b="n"},
  586. {a="h", b="h"},
  587. {a="w", b="w"},
  588. {a="y", b="y"},
  589. {a="T", b="t"},
  590. {a="\"", b=""},
  591. {a="B", b=""}
  592. }
  593. longvtrloc = {
  594. {a="A", b="ā"},
  595. {a="U", b="ū"},
  596. {a="I", b="ī"},
  597. {a="Y", b="á"},
  598. }
  599. shortvtrloc = {
  600. {a="u", b="u"},
  601. {a="a", b="a"},
  602. {a="i", b="i"}
  603. }
  604. finaltrloc = {
  605. {a="ʾ", b="'"},
  606. }
  607. -- arabica
  608. hamzatrarabica = { -- ≠ from hamzatrloc: initial hamza has to be held
  609. -- hard coded hamza
  610. {a="|\"'", b="ʾ"},
  611. {a="A\"'", b="ʾA"},
  612. {a="[au]\"'", b="ʾ"},
  613. {a="w\"'", b="ʾ"},
  614. {a="i\"'", b="ʾ"},
  615. {a="y\"'", b="ʾ"},
  616. -- hamza takes tašdīd too
  617. {a="''([Uu])", b="ʾʾ%1"},
  618. {a="''([Aa])", b="ʾʾ%1"},
  619. {a="''([Ii])", b="ʾʾ%1"},
  620. -- initial long u and i (for a, see below)
  621. {a="%'%_U", b="U"},
  622. {a="%'%_I", b="I"},
  623. -- taḫfīfu 'l-hamza
  624. {a="^'u'([^uaiUAI])", b="U%1"},
  625. {a="(%W)'u'([^uaiUAI])", b="%1U%2"},
  626. {a="'u'([^uaiUAI])", b="ʾU"},
  627. {a="^'i'([^uaiUAI])", b="I%1"},
  628. {a="(%W)'i'([^uaiUAI])", b="%1I%2"},
  629. {a="'i'([^uaiUAI])", b="ʾI"},
  630. -- madda (historic writing below)
  631. {a="^(')(A)", b="%2"},
  632. {a="(%W)(')(A)", b="%1%3"},
  633. {a="^'a'([^uaiUAI])", b="A%1"},
  634. {a="(%W)'a'([^uaiUAI])", b="%1A%2"},
  635. {a="'a'([^uaiUAI])", b="A%1"},
  636. {a="^'a?A", b="A"},
  637. {a="(%W)'a?A", b="%1A"},
  638. {a="'a?A", b="ʾA"},
  639. {a="(A)(')(i)$", b="%1ʾ%3"},
  640. {a="(A)(')(i)(%W)", b="%1ʾ%3%4"},
  641. {a="(A)(')(i)", b="%1ʾ%3"}, -- historic madda
  642. {a="(A)(')", b="%1ʾ"}, -- historic madda
  643. -- initial (needs both ^ and %W patterns):
  644. -- hold it for now (see below, beginning of digraphs table)
  645. {a="^(')([ua])", b="@%2"},
  646. {a="^(')(i)", b="@%2"},
  647. {a="(%W)(')([ua])", b="%1@%3"},
  648. {a="(%W)(')(i)", b="%1@%3"},
  649. -- final
  650. {a="([Iy])(')(aN)$", b="%1ʾ%3"},
  651. {a="([Iy])(')(aN)(%W)", b="%1ʾ%3%4"},
  652. {a="([^uai])(')([uai]N?)$", b="%1ʾ%3"},
  653. {a="([^uai])(')([uai]N?)(%W)", b="%1ʾ%3%4"},
  654. {a="([UI])(')([uai])$", b="%1ʾ%3"},
  655. {a="([UI])(')([uai])(%W)", b="%1ʾ%3%4"},
  656. -- middle
  657. {a="(U)(')", b="%1ʾ"},
  658. {a="([Iy])(')", b="%1ʾ"},
  659. {a="([^uai])(')([uU])", b="%1ʾ%3"},
  660. {a="([^uai])(')([aA])", b="%1ʾ%3"},
  661. {a="([^uai])(')([iI])", b="%1ʾ%3"},
  662. {a="(u)(')([uU])", b="%1ʾ%3"},
  663. {a="(u)(')([aA])", b="%1ʾ%3"},
  664. {a="(u)(')([iI])", b="%1ʾ%3"},
  665. {a="(a)(')([aA])", b="%1ʾ%3"},
  666. {a="(a)(')([uU])", b="%1ʾ%3"},
  667. {a="(a)(')([iI])", b="%1ʾ%3"},
  668. {a="(i)(')([aA])", b="%1ʾ%3"},
  669. {a="(i)(')([uU])", b="%1ʾ%3"},
  670. {a="(i)(')([iI])", b="%1ʾ%3"},
  671. {a="(a)(')([^uaiUAI])", b="%1ʾ%3"},
  672. {a="(u)(')([^uaiUAI])", b="%1ʾ%3"},
  673. {a="(i)(')([^uaiUAI])", b="%1ʾ%3"}
  674. }
  675. trigraphstrarabica = { -- trigraphs or more
  676. -- 'llatI / 'llad_I
  677. {a="^'ll(a)([%_]?[dt])", b="ll%1%2"},
  678. {a="([%-%(%[%|%<%s])'ll(a)([%_]?[dt])", b="%1ll%2%3"}, --p
  679. -- al- + lām
  680. {a="^(a)l%-(l)", b="%1l-%2"},
  681. {a="(%s)(a)l%-(l)", b="%1%2l-%3"},
  682. -- al- + solar consonant
  683. {a="^(a)l%-([%_%^%.]?[tdrzsn])", b="%1l-%2"},
  684. {a="(%s)(a)l%-([%_%^%.]?[tdrzsn])", b="%1%2l-%3"},
  685. -- assim. art. + solar consonant
  686. {a="^(a)([%_%^%.]?[tdrzsn])%-", b="%1l-"},
  687. {a="(%s)(a)([%_%^%.]?[tdrzsn])%-", b="%1%2l-"},
  688. -- al- + initial unstable hamza
  689. {a="^(a)l%-([uai])", b="%1l-%2"},
  690. {a="(%s)(a)l%-([uai])", b="%1%2l-%3"},
  691. -- li-/la- + art. + initial unstable hamza is a special orthography
  692. {a="l([ai])%-l%-([uai])", b="l%1-l-%2"},
  693. -- al- + lunar consonant (i.e. what remains)
  694. {a="^(a)l%-", b="%1l-"},
  695. {a="(%s)(a)l%-", b="%1%2l-"},
  696. -- art. with waṣla + lām
  697. {a="'l%-(l)", b="l-%1"},
  698. -- art. with waṣla + solar consonant
  699. {a="'l%-([%_%^%.]?[tdrzsn])", b="l-%1"},
  700. -- li-/la- + art. + lām
  701. {a="l([ai])%-l%-(l)", b="l%1-l-%2"},
  702. -- assim. art. with waṣla + solar consonant
  703. {a="'([%_%^%.]?[tdrzsn])%-", b="l-"},
  704. -- li-/la- + art. + solar consonant is a special orthography
  705. {a="l([ai])%-l%-([%_%^%.]?[tdrzsn])", b="l%1-l-%2"},
  706. -- li-/la- + assim. art. + solar consonant is a special orthography
  707. {a="l([ai])%-([%_%^%.]?[tdrzsn])%-([%_%^%.]?[tdrzsn])", b="l%1-l-%3"},
  708. -- art. with waṣla + initial unstable hamza
  709. {a="'l%-([uai])", b="l-%1"},
  710. -- art. with waṣla + lunar consonant (i.e. what remains)
  711. {a="'l%-", b="l-"},
  712. -- the silent wāw
  713. {a="uU$", b="u"},
  714. {a="uU(%W)", b="u%1"},
  715. {a="aU$", b="a"},
  716. {a="aU(%W)", b="a%1"},
  717. {a="iU$", b="i"},
  718. {a="iU(%W)", b="i%1"},
  719. -- words ending in -āT with silent wāw/yāʾ
  720. {a="(_a)UA", b="A"},
  721. {a="(_a)U", b="A"},
  722. {a="(_a)I", b="A"}
  723. }
  724. digraphstrarabica = {
  725. {a="([uai]%-)(\"?[uai])", b="%1"}, -- hyphen + initial alif without hamza
  726. {a="([UAIYuai])(%s)([%(%[%|%<]?)(\"?[uai])", b="%1%2%3"}, --p
  727. {a="@", b=""}, -- remove the tag before the former hamza
  728. -- discard the ʾiʿrāb hyphen (begin)
  729. {a="(%-)(\"?[UI]na)(%p?%s)", b="%2%3"},
  730. {a="(%-)(\"?[UI]na)(%p?)$", b="%2%3"},
  731. {a="(%-)(\"?At[ui])(%p?%s)", b="%2%3"},
  732. {a="(%-)(\"?At[ui])(%p?)$", b="%2%3"},
  733. {a="(%-)(\"?Ani)(%p?%s)", b="%2%3"},
  734. {a="(%-)(\"?Ani)(%p?)$", b="%2%3"},
  735. {a="(%-)(\"?ayni)(%p?%s)", b="%2%3"},
  736. {a="(%-)(\"?ayni)(%p?)$", b="%2%3"},
  737. {a="(%-)([uai])(%p?%s)", b="%2%3"},
  738. {a="(%-)([uai])(%p?)$", b="%2%3"},
  739. -- discard the ʾiʿrāb hyphen (end)
  740. {a="(%-)(\"?[uai])", b="%1%2"}, -- hyphen + initial alif without hamza
  741. {a="^(\"?[uai])", b="%1"}, -- initial alif without hamza
  742. {a="(%s)([uai])", b="%1%2"}, -- initial alif without hamza
  743. {a="%-%-", b=""},
  744. {a="iyy(%p?)$", b="ī%1"},
  745. {a="iyy(%p?%s)", b="ī%1"},
  746. -- {a="T([^uai])", b="h%1"},
  747. {a="([a%']l%-)(%S-)aT([%(%[%|%<%s])(al%-)", b="%1%2a%3%4"}, --p
  748. {a="aT([%(%[%|%<%s])(al%-)", b="at%1%2"}, --p
  749. {a="aT(%p?)$", b="a%1"},
  750. {a="aT(%p?%s)", b="a%1"},
  751. {a="_t", b="ṯ"},
  752. {a="%^g", b="ǧ"},
  753. {a="%.h", b="ḥ"},
  754. {a="_h", b="ḫ"},
  755. {a="_d", b="ḏ"},
  756. {a="%^s", b="š"},
  757. {a="%.s", b="ṣ"},
  758. {a="%.d", b="ḍ"},
  759. {a="%.t", b="ṭ"},
  760. {a="%.z", b="ẓ"},
  761. {a="%.g", b="ġ"},
  762. {a="(U)(A)", b="ū"},
  763. {a="WA", b="w"},
  764. {a="(a)W", b="%1w"},
  765. {a="_A", b="ā"},
  766. {a="_u", b="ū"},
  767. {a="_a", b="ā"},
  768. {a="_i", b="ī"},
  769. {a="%.b", b="b"},
  770. {a="%.f", b="f"},
  771. {a="%.q", b="q"},
  772. {a="%.k", b="k"},
  773. {a="%.n", b="n"},
  774. {a="%^d", b="d"}
  775. }
  776. singletrarabica = {
  777. {a="b", b="b"},
  778. {a="t", b="t"},
  779. {a="j", b="ǧ"},
  780. {a="x", b="ḫ"},
  781. {a="d", b="d"},
  782. {a="r", b="r"},
  783. {a="z", b="z"},
  784. {a="s", b="s"},
  785. {a="`", b="ʿ"},
  786. {a="f", b="f"},
  787. {a="q", b="q"},
  788. {a="k", b="k"},
  789. {a="l", b="l"},
  790. {a="m", b="m"},
  791. {a="n", b="n"},
  792. {a="h", b="h"},
  793. {a="w", b="w"},
  794. {a="y", b="y"},
  795. {a="T", b="t"},
  796. {a="\"", b=""},
  797. {a="B", b=""}
  798. }
  799. longvtrarabica = {
  800. {a="[AY]", b="ā"},
  801. {a="U", b="ū"},
  802. {a="I", b="ī"}
  803. }