arabluatex_trans.lua 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999
  1. --[[
  2. This file is part of the `arabluatex' package
  3. ArabLuaTeX -- Processing ArabTeX notation under LuaLaTeX
  4. Copyright (C) 2016--2018 Robert Alessi
  5. Please send error reports and suggestions for improvements to Robert
  6. Alessi <alessi@robertalessi.net>
  7. This program is free software: you can redistribute it and/or modify
  8. it under the terms of the GNU General Public License as published by
  9. the Free Software Foundation, either version 3 of the License, or
  10. (at your option) any later version.
  11. This program is distributed in the hope that it will be useful, but
  12. WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. General Public License for more details.
  15. You should have received a copy of the GNU General Public License
  16. along with this program. If not, see
  17. <http://www.gnu.org/licenses/>.
  18. --]]
  19. -- common
  20. punctuationtr = {
  21. {a="%(%(", b="("},
  22. {a="%)%)", b=")"}
  23. }
  24. nulltr = {
  25. {a="%|", b=""},
  26. {a="o[%S]-o", b=""},
  27. {a="[%^%_](.)", b="<??>%1"}
  28. }
  29. -- cap (legacy)
  30. captr = {
  31. -- dmg (defaut); loc as well
  32. {a="ā", b="Ā"},
  33. {a="b", b="B"},
  34. {a="t", b="T"},
  35. {a="ṯ", b="Ṯ"},
  36. {a="ǧ", b="Ǧ"},
  37. {a="ḥ", b="Ḥ"},
  38. {a="ḫ", b="Ḫ"},
  39. {a="d", b="D"},
  40. {a="ḏ", b="Ḏ"},
  41. {a="r", b="R"},
  42. {a="z", b="Z"},
  43. {a="s", b="S"},
  44. {a="š", b="Š"},
  45. {a="ṣ", b="Ṣ"},
  46. {a="ḍ", b="Ḍ"},
  47. {a="ṭ", b="Ṭ"},
  48. {a="ẓ", b="Ẓ"},
  49. {a="ġ", b="Ġ"},
  50. {a="f", b="F"},
  51. {a="q", b="Q"},
  52. {a="k", b="K"},
  53. {a="l", b="L"},
  54. {a="m", b="M"},
  55. {a="n", b="N"},
  56. {a="h", b="H"},
  57. {a="w", b="W"},
  58. {a="ū", b="Ū"},
  59. {a="y", b="Y"},
  60. {a="ī", b="Ī"}
  61. }
  62. -- uc
  63. lcuc = {
  64. {a="b", b="B"},
  65. {a="t", b="T"},
  66. {a="ṯ", b="Ṯ"},
  67. {a="ǧ", b="Ǧ"},
  68. {a="j", b="J"},
  69. {a="ḥ", b="Ḥ"},
  70. {a="ḫ", b="Ḫ"},
  71. {a="d", b="D"},
  72. {a="ḏ", b="Ḏ"},
  73. {a="r", b="R"},
  74. {a="z", b="Z"},
  75. {a="s", b="S"},
  76. {a="š", b="Š"},
  77. {a="ṣ", b="Ṣ"},
  78. {a="ḍ", b="Ḍ"},
  79. {a="ṭ", b="Ṭ"},
  80. {a="ẓ", b="Ẓ"},
  81. {a="ġ", b="Ġ"},
  82. {a="f", b="F"},
  83. {a="q", b="Q"},
  84. {a="k", b="K"},
  85. {a="l", b="L"},
  86. {a="m", b="M"},
  87. {a="n", b="N"},
  88. {a="h", b="H"},
  89. {a="w", b="W"},
  90. {a="y", b="Y"},
  91. {a="u", b="U"},
  92. {a="a", b="A"},
  93. {a="i", b="I"},
  94. {a="ū", b="Ū"},
  95. {a="ā", b="Ā"},
  96. {a="ī", b="Ī"},
  97. -- additional characters
  98. {a="p", b="P"},
  99. {a="č", b="Č"},
  100. {a="ž", b="Ž"},
  101. {a="v", b="V"},
  102. {a="g", b="G"},
  103. {a="ñ", b="Ñ"},
  104. {a="ch", b="Ch"}, -- loc
  105. }
  106. -- dmg
  107. hamzatrdmg = {
  108. -- next lines for ʾalif alone
  109. {a="(%.A)l%-(%^n)", b=".|l-%2"}, --additional (^n is lunar)
  110. {a="([%(%[%|%<%s%-o])(%.A)l%-(%^n)", b="%1.|l-%3"}, --additional (^n is lunar) --p
  111. {a="(%.A)l%-([%_%^%.]?[tdrzsnc])", b=".|%2-%2"},
  112. {a="([%(%[%|%<%s%-o])(%.A)l%-([%_%^%.]?[tdrzsnc])", b="%1.|%3-%3"}, --p
  113. {a="(%.A)([uai])l%-(%^n)", b="||%2l-%3"}, --additional (^n is lunar)
  114. {a="([%(%[%|%<%s%-o])(%.A)([uai])l%-(%^n)", b="%1||%3l-%4"}, --additional (^n is lunar) --p
  115. {a="(%.A)([uai])l%-([%_%^%.]?[tdrzsnc])", b="||%2%3-%3"},
  116. {a="([%(%[%|%<%s%-o])(%.A)([uai])l%-([%_%^%.]?[tdrzsnc])", b="%1||%3%4-%4"}, --p
  117. {a="(%.A)([^uai])", b=".|%2"},
  118. {a="(%.A)([uai])", b="||%2"},
  119. -- hard coded hamza
  120. {a="|\"'", b="ʾ"},
  121. {a="A\"'", b="ʾA"},
  122. {a="[au]\"'", b="ʾ"},
  123. {a="w\"'", b="ʾ"},
  124. {a="i\"'", b="ʾ"},
  125. {a="y\"'", b="ʾ"},
  126. -- hamza takes tašdīd too
  127. {a="''([Uu])", b="ʾʾ%1"},
  128. {a="''([Aa])", b="ʾʾ%1"},
  129. {a="''([Ii])", b="ʾʾ%1"},
  130. -- initial long u and i (for a, see below)
  131. {a="%'%_U", b="ʾU"},
  132. {a="%'%_I", b="ʾI"},
  133. -- taḫfīfu 'l-hamza
  134. {a="'u'([^uaiUAI])", b="ʾU%1"},
  135. {a="'i'([^uaiUAI])", b="ʾI%1"},
  136. {a="^u'([^uaiUAI])", b="U%1"},
  137. {a="([^uaiUAIYN][%s%(%[%<])u'([^uaiUAI])", b="%1U%2"},
  138. {a="^i'([^uaiUAI])", b="I%1"},
  139. {a="([^uaiUAIYN][%s%(%[%<])i'([^uaiUAI])", b="%1I%2"},
  140. -- madda (historic writing below)
  141. {a="'a'([^uaiUAI])", b="ʾA%1"},
  142. {a="'a?A", b="ʾA"},
  143. {a="(A)(')(i)$", b="%1ʾ%3"},
  144. {a="(A)(')(i)(%W)", b="%1ʾ%3%4"},
  145. {a="(A)(')(i)", b="%1ʾ%3"}, -- historic madda
  146. {a="(A)(')", b="%1ʾ"}, -- historic madda
  147. -- initial (needs both ^ and %W patterns)
  148. -- 'aw: the diphthong is to be resolved into 'awi' (next 8 lines)
  149. {a="^('aw)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1i"},
  150. {a="(%W)('aw)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1%2i"},
  151. {a="^('aw)(o)(\"?[uai])([%S]-o)", b="%1i"},
  152. {a="(%W)('aw)(o)(\"?[uai])([%S]-o)", b="%1%2i"},
  153. {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  154. {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
  155. {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, --p
  156. {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, --p
  157. -- then the 'initial' rules for the remaining cases
  158. {a="^(')([ua])", b="ʾ%2"},
  159. {a="^(')(i)", b="ʾ%2"},
  160. -- consider replacing initial %W with [%s%(%[%<%-]:
  161. -- {a="(%W)(')([ua])", b="%1ʾ%3"},
  162. -- {a="(%W)(')(i)", b="%1ʾ%3"},
  163. {a="([%s%(%[%<%-])(')([ua])", b="%1ʾ%3"},
  164. {a="([%s%(%[%<%-])(')(i)", b="%1ʾ%3"},
  165. -- final
  166. {a="([Iy])(')(aN)$", b="%1ʾ%3"},
  167. {a="([Iy])(')(aN)(%W)", b="%1ʾ%3%4"},
  168. {a="([^uai])(')([uai]N?)$", b="%1ʾ%3"},
  169. {a="([^uai])(')([uai]N?)(%W)", b="%1ʾ%3%4"},
  170. {a="([UI])(')([uai])$", b="%1ʾ%3"},
  171. {a="([UI])(')([uai])(%W)", b="%1ʾ%3%4"},
  172. -- middle
  173. {a="(U)(')", b="%1ʾ"},
  174. {a="([Iy])(')", b="%1ʾ"},
  175. {a="([^uai])(')([uU])", b="%1ʾ%3"},
  176. {a="([^uai])(')(%_?[aAY])", b="%1ʾ%3"},
  177. {a="([^uai])(')([iI])", b="%1ʾ%3"},
  178. {a="(u)(')([uU])", b="%1ʾ%3"},
  179. {a="(u)(')(%_?[aAY])", b="%1ʾ%3"},
  180. {a="(u)(')([iI])", b="%1ʾ%3"},
  181. {a="(a)(')(%_?[aAY])", b="%1ʾ%3"},
  182. {a="(a)(')([uU])", b="%1ʾ%3"},
  183. {a="(a)(')([iI])", b="%1ʾ%3"},
  184. {a="(i)(')(%_?[aAY])", b="%1ʾ%3"},
  185. {a="(i)(')([uU])", b="%1ʾ%3"},
  186. {a="(i)(')([iI])", b="%1ʾ%3"},
  187. {a="(a)(')([^uaiUAI])", b="%1ʾ%3"},
  188. {a="(u)(')([^uaiUAI])", b="%1ʾ%3"},
  189. {a="(i)(')([^uaiUAI])", b="%1ʾ%3"}
  190. }
  191. tanwintrdmg = {
  192. {a="%-?([uai]NU)(o)([ui])([%S]-o)", b="\\arbup{un%3}"},
  193. {a="%-?([uai]NU)(%s)([ui])", b="\\arbup{un%3}%2'"},
  194. {a="%-?(iNI)(o)([ui])([%S]-o)", b="i\\arbup{n%3}"},
  195. {a="%-?(iNI)(%s)([ui])", b="i\\arbup{n%3}%2'"},
  196. {a="(o[%S]-)([uai]N[UI])(o)(\"?[ui])", b="'"},
  197. {a="%-?uNU", b="\\arbup{un}"},
  198. {a="%-?aNU", b="\\arbup{an}"},
  199. {a="%-?iNU", b="\\arbup{in}"},
  200. {a="%-?iNI", b="i\\arbup{n}"},
  201. -- tanwīn preceding ʾalif conjunctionis
  202. {a="%-?(uN)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="\\arbup{uni}"},
  203. {a="%-?(aN)(_A)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="ạ\\arbup{ni}"},
  204. {a="%-?(aN)(Y)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="ạ\\arbup{ni}"},
  205. {a="(T)%-?(aN)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="t\\arbup{ani}"},
  206. {a="([^TA])%-?(aN)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1\\arbup{ani}"},
  207. {a="%-?(iN)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="\\arbup{ini}"},
  208. {a="%-?(uN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="\\arbup{uni}%2%3"},
  209. {a="%-?(aN)(_A)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạ\\arbup{ni}%3%4"},
  210. {a="%-?(aN)(Y)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạ\\arbup{ni}%3%4"},
  211. {a="(T)%-?(aN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="t\\arbup{ani}%3%4"},
  212. {a="([^TA])%-?(aN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1\\arbup{ani}%3%4"},
  213. {a="%-?(iN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="\\arbup{ini}%2%3"},
  214. -- tanwīn preceding 'lla_dI/'llatI
  215. {a="%-?(uN)(o)('lla[%_]?[dt])([%S]-o)", b="\\arbup{uni}"},
  216. {a="%-?(aN)(_A)(o)('lla[%_]?[dt])([%S]-o)", b="ạ\\arbup{ni}"},
  217. {a="%-?(aN)(Y)(o)('lla[%_]?[dt])([%S]-o)", b="ạ\\arbup{ni}"},
  218. {a="(T)%-?(aN)(o)('lla[%_]?[dt])([%S]-o)", b="t\\arbup{ani}"},
  219. {a="([^TA])%-?(aN)(o)('lla[%_]?[dt])([%S]-o)", b="%1\\arbup{ani}"},
  220. {a="%-?(iN)(o)('lla[%_]?[dt])([%S]-o)", b="\\arbup{ini}"},
  221. {a="%-?(uN)(%s)('lla[%_]?[dt])", b="\\arbup{uni}%2%3"},
  222. {a="%-?(aN)(_A)(%s)('lla[%_]?[dt])", b="ạ\\arbup{ni}%3%4"},
  223. {a="%-?(aN)(Y)(%s)('lla[%_]?[dt])", b="ạ\\arbup{ni}%3%4"},
  224. {a="(T)%-?(aN)(%s)('lla[%_]?[dt])", b="t\\arbup{ani}%3%4"},
  225. {a="([^TA])%-?(aN)(%s)('lla[%_]?[dt])", b="%1\\arbup{ani}%3%4"},
  226. {a="%-?(iN)(%s)('lla[%_]?[dt])", b="\\arbup{ini}%2%3"},
  227. -- tanwīn + alif without hamza and kasra (ibn) or dhamma (uhrub)
  228. {a="%-?(uN)(o)([ui])([%S]-o)", b="\\arbup{un%3}"},
  229. {a="%-?(aN)(_A)(o)([ui])([%S]-o)", b="ạ\\arbup{n%4}"},
  230. {a="%-?(aN)(Y)(o)([ui])([%S]-o)", b="ạ\\arbup{n%4}"},
  231. {a="(T)%-?(aN)(o)([ui])([%S]-o)", b="t\\arbup{an%4}"},
  232. {a="([^TA])%-?(aN)(o)([ui])([%S]-o)", b="%1\\arbup{an%4}"},
  233. {a="%-?(iN)(o)([ui])([%S]-o)", b="\\arbup{in%3}"},
  234. {a="(o[%S]-)([uai]N)(o)(\"?[ui])", b="'"},
  235. {a="%-?(uN)(%s)([ui])", b="\\arbup{un%3}%2'"},
  236. {a="%-?(aN)(_A)(%s)([ui])", b="ạ\\arbup{n%4}%3'"},
  237. {a="%-?(aN)(Y)(%s)([ui])", b="ạ\\arbup{n%4}%3'"},
  238. {a="(T)%-?(aN)(%s)([ui])", b="t\\arbup{an%4}%3'"},
  239. {a="([^TA])%-?(aN)(%s)([ui])", b="%1\\arbup{an%4}%3'"},
  240. {a="%-?(iN)(%s)([ui])", b="\\arbup{in%3}%2'"},
  241. --
  242. -- {a="uN", b="\\arbup{un}"}, (now included in the last line of this table)
  243. {a="%-?(\"?At)%-?([ui])N", b="\\arbup{%1%2n}"},
  244. {a="%-?(aN)(_A)", b="ạ\\arbup{n}"},
  245. {a="%-?(aN)(Y)", b="ạ\\arbup{n}"},
  246. {a="(T)%-?(\"?aN)", b="t\\arbup{an}"},
  247. {a="([^TA])%-?(\"?aN)", b="%1\\arbup{an}"},
  248. {a="%-?([ui])N", b="\\arbup{%1n}"}
  249. }
  250. trigraphstrdmg = { -- trigraphs or more
  251. -- 'llatI / 'llad_I
  252. {a="^'ll(a)([%_]?[dt])", b="'ll%1%2"},
  253. {a="([%(%[%|%<%s])'ll(a)([%_]?[dt])", b="%1'll%2%3"}, --p
  254. -- law: the diphthong is to be resolved into 'awi' (next 8 lines)
  255. {a="^(law)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1i"},
  256. {a="(%W)(law)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1%2i"},
  257. {a="^(law)(o)(\"?[uai])([%S]-o)", b="%1i"},
  258. {a="(%W)(law)(o)(\"?[uai])([%S]-o)", b="%1%2i"},
  259. {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  260. {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
  261. {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, --p
  262. {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, --p
  263. -- al- + lām
  264. {a="^(a)l%-(l)", b="%1l-%2"},
  265. {a="([%(%[%|%<%s%-o])(a)l%-(l)", b="%1%2l-%3"}, --p
  266. -- al- + solar consonant ('c' and '^n' are additional characters)
  267. {a="^(a)l%-(%^n)", b="%1l-%2"}, -- ^n is lunar
  268. {a="([%(%[%|%<%s%-o])(a)l%-(%^n)", b="%1%2l-%3"}, --^n is lunar --p
  269. {a="^(a)l%-([%_%^%.]?[tdrzsnc])", b="%1%2-%2"},
  270. {a="([%(%[%|%<%s%-o])(a)l%-([%_%^%.]?[tdrzsnc])", b="%1%2%3-%3"}, --p
  271. -- assim. art. + solar consonant ('c' and '^n' are additional characters)
  272. {a="^(a)(%^n)%-", b="%1l-"}, -- ^n is lunar
  273. {a="([%(%[%|%<%s%-o])(a)(%^n)%-", b="%1%2l-"}, --^n is lunar --p
  274. {a="^(a)([%_%^%.]?[tdrzsnc])%-", b="%1%2-"},
  275. {a="([%(%[%|%<%s%-o])(a)([%_%^%.]?[tdrzsnc])%-", b="%1%2%3-"}, --p
  276. -- al- + initial unstable hamza
  277. {a="^(a)l%-(\"?[uai])", b="%1l-%2"},
  278. {a="([%(%[%|%<%s%-o])(a)l%-(\"?[uai])", b="%1%2l-%3"}, --p
  279. -- li-/la- + art. + initial unstable hamza is a special orthography
  280. {a="l([ai])%-l%-([uai])", b="l%1-l-%2"},
  281. -- al- + lunar consonant (i.e. what remains)
  282. {a="^(a)l%-", b="%1l-"},
  283. {a="([%(%[%|%<%s%-o])(a)l%-", b="%1%2l-"}, --p
  284. -- diphthongs to be resolved before ʾalif conjunctionis
  285. {a="(aw)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1u"},
  286. {a="(ay)(o)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-o)", b="%1i"},
  287. {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"},
  288. {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  289. -- art. with waṣla + lām
  290. {a="'l%-(l)", b="'l-%1"},
  291. -- art. with waṣla + solar consonant
  292. -- ('c' and '^n' are additional characters)
  293. {a="'l%-(%^n)", b="'l-%1"}, -- ^n is lunar
  294. {a="'l%-([%_%^%.]?[tdrzsnc])", b="'%1-%1"},
  295. -- li-/la- + art. + lām
  296. {a="l([ai])%-l%-(l)", b="l%1-%2-%2"},
  297. -- assim. art. with waṣla + solar consonant
  298. -- ('c' and '^n' are additional characters)
  299. {a="'(%^n)%-", b="'l-"}, -- ^n is lunar
  300. {a="'([%_%^%.]?[tdrzsnc])%-", b="'%1-"},
  301. -- li-/la- + art. + solar consonant is a special orthography
  302. -- ('c' and '^n' are additional characters)
  303. {a="l([ai])%-l%-(%^n)", b="l%1-l-%2"}, -- ^n is lunar
  304. {a="l([ai])%-l%-([%_%^%.]?[tdrzsnc])", b="l%1-%2-%2"},
  305. -- li-/la- + assim. art. + solar consonant is a special orthography
  306. -- ('c' and '^n' are additional characters)
  307. {a="l([ai])%-(%^n)%-(%^n)", b="l%1-l-%3"}, -- ^n is lunar
  308. {a="l([ai])%-([%_%^%.]?[tdrzsnc])%-([%_%^%.]?[tdrzsnc])", b="l%1-%2-%3"},
  309. -- art. with waṣla + initial unstable hamza
  310. {a="'l%-(\"?[uai])", b="'l-%1"},
  311. -- art. with waṣla + lunar consonant (i.e. what remains)
  312. {a="'l%-", b="'l-"},
  313. -- the silent wāw
  314. {a="uU$", b="u"},
  315. {a="uU(%W)", b="u%1"},
  316. {a="aU$", b="a"},
  317. {a="aU(%W)", b="a%1"},
  318. {a="iU$", b="i"},
  319. {a="iU(%W)", b="i%1"},
  320. -- words ending in -āT with silent wāw/yāʾ
  321. {a="(_a)UA", b="A"},
  322. {a="(_a)U", b="A"},
  323. {a="(_a)I", b="A"}
  324. }
  325. idghamtrdmg = {
  326. -- assimilations
  327. {a="(n)(}?)(%s)([rlmnwy])", b="%4%2%3%4"},
  328. {a="(n)(}?)(o)([rlmnwy])([%S]-o)", b="%4%2"}
  329. }
  330. digraphstrdmg = {
  331. {a="([uai]%-)(\"?[uai])", b="%1'"}, -- hyphen + initial alif without hamza
  332. -- the following two are replaced with the 4 lines next for now
  333. -- {a="^(\"?[uai])", b="%1"}, -- initial alif without hamza
  334. -- {a="(%W)(\"?[uai])", b="%1%2"}, -- initial alif without hamza
  335. -- {a="^(\"[uai])", b="'"}, -- initial alif without hamza
  336. -- {a="(%W)(\"[uai])", b="%1'"}, -- initial alif without hamza
  337. {a="^(\"?[uai])", b="%1"}, -- initial alif without hamza
  338. {a="(%W)(\"?[uai])", b="%1%2"}, -- initial alif without hamza
  339. -- this is not necessary, take out for now:
  340. -- {a="([%_]?[uaiUAIY])(%s)([uai])", b="%1%2'"}, -- initial alif without hamza
  341. {a="(aw)(o)(\"?[uai])([%S]-o)", b="%1u"},
  342. {a="(aw)(%s)([%(%[%|%<]?)(\"?[uai])", b="%1u%2%3'"}, --p
  343. {a="(ay)(o)(\"?[uai])([%S]-o)", b="%1i"},
  344. {a="(ay)(%s)([%(%[%|%<]?)(\"?[uai])", b="%1i%2%3'"}, --p
  345. {a="(aW)(o)(\"?[uai])([%S]-o)", b="awu"},
  346. {a="(UA)(o)(\"?[uai])([%S]-o)", b="u"},
  347. {a="(%_A)(o)(\"?[uai])([%S]-o)", b="ạ"},
  348. {a="(Y)(o)(\"?[uai])([%S]-o)", b="ạ"},
  349. {a="(%_a)(o)(\"?[uai])([%S]-o)", b="a"},
  350. {a="(A)(o)(\"?[uai])([%S]-o)", b="a"},
  351. {a="([%_]?[Uu])(o)(\"?[uai])([%S]-o)", b="u"},
  352. {a="([%_]?[Ii])(o)(\"?[uai])([%S]-o)", b="i"},
  353. {a="(o[%S]-)([UAIYWuaiyw])(o)(\"?[uai])", b="'"},
  354. {a="(aW)(%s)([%(%[%|%<]?)(\"?[uai])", b="awu%2%3%4"}, --p
  355. {a="(UA)(%s)([%(%[%|%<]?)(\"?[uai])", b="u%2%3'"}, --p
  356. {a="([^%_][uai])(%s)([%(%[%|%<]?)(\"?[uai])", b="%1%2%3'"}, --p
  357. {a="(%_A)(%s)([%(%[%|%<]?)(\"?[uai])", b="ạ%2%3'"}, --p
  358. {a="(Y)(%s)([%(%[%|%<]?)(\"?[uai])", b="ạ%2%3'"}, --p
  359. {a="(%_a)(%s)([%(%[%|%<]?)(\"?[uai])", b="a%2%3'"}, --p
  360. {a="(A)(%s)([%(%[%|%<]?)(\"?[uai])", b="a%2%3'"}, --p
  361. {a="([%_]?[Uu])(%s)([%(%[%|%<]?)(\"?[uai])", b="u%2%3'"}, --p
  362. {a="([%_]?[Ii])(%s)([%(%[%|%<]?)(\"?[uai])", b="i%2%3'"}, --p
  363. -- ʾiʿrāb hyphen (begin)
  364. {a="(%-)(\"?[UI]na)(%p*%s)", b="\\arbup{%2}%3"},
  365. {a="(%-)(\"?[UI]na)(%p*)$", b="\\arbup{%2}%3"},
  366. {a="(%-)(\"?At[ui])(%p*%s)", b="\\arbup{%2}%3"},
  367. {a="(%-)(\"?At[ui])(%p*)$", b="\\arbup{%2}%3"},
  368. {a="(%-)(\"?Ani)(%p*%s)", b="\\arbup{%2}%3"},
  369. {a="(%-)(\"?Ani)(%p*)$", b="\\arbup{%2}%3"},
  370. {a="(%-)(\"?ayni)(%p*%s)", b="\\arbup{%2}%3"},
  371. {a="(%-)(\"?ayni)(%p*)$", b="\\arbup{%2}%3"},
  372. {a="(%-)(\"?[uai])(%p*%s)", b="\\arbup{%2}%3"},
  373. {a="(%-)(\"?[uai])(%p*)$", b="\\arbup{%2}%3"},
  374. -- ʾiʿrāb hyphen (end) shorten long vowels preceding ʾalif
  375. -- conjunctionis—without forgetting 'lla_dI
  376. {a="(U)(A)", b="U"},
  377. {a="(aW)(o)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])([%S]-o)", b="awu"},
  378. {a="(%_a)(o)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])([%S]-o)", b="a"},
  379. {a="(%_A)(o)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])([%S]-o)", b="ạ"},
  380. {a="(A)(o)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])([%S]-o)", b="a"},
  381. {a="(Y)(o)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])([%S]-o)", b="ạ"},
  382. {a="([%_]?[Uu])(o)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])([%S]-o)", b="u"},
  383. {a="([%_]?[Ii])(o)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])([%S]-o)", b="i"},
  384. --p (next 7 lines, just after %s)
  385. {a="(aW)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])", b="awu%2%3"},
  386. {a="(%_a)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])", b="a%2%3"},
  387. {a="(%_A)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])", b="ạ%2%3"},
  388. {a="(A)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])", b="a%2%3"},
  389. {a="(Y)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])", b="ạ%2%3"},
  390. {a="([%_]?[Uu])(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])", b="u%2%3"},
  391. {a="([%_]?[Ii])(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])", b="i%2%3"},
  392. {a="%-%-", b=""},
  393. {a="iyyaT(%p*)$", b="īyaT%1"},
  394. {a="iyyaT(%p*%s)", b="īyaT%1"},
  395. {a="iyy(%p*)$", b="ī%1"},
  396. {a="iyy(%p*%s)", b="ī%1"},
  397. -- {a="T([^uai])", b="%1"},
  398. {a="T(\\arbup)", b="t%1"},
  399. {a="([a%'][%_%^%.]?[tdrzsln]%-)(%S-)T([%(%[%|%<%s])(a[%_%^%.]?[tdrzsln]%-)", b="%1%2h%3%4"}, --p
  400. {a="T([%(%[%|%<%s])(a[%_%^%.]?[tdrzsln]%-)", b="t%1%2"}, --p
  401. {a="T([%|\"])", b="t%1"},
  402. {a="T(%p*%s)", b="h%1"},
  403. {a="T(%p*)$", b="h%1"},
  404. {a="T(%p*)(%W)", b="h%1%2"},
  405. {a="_t", b="ṯ"},
  406. {a="%^g", b="ǧ"},
  407. {a="%.h", b="ḥ"},
  408. {a="_h", b="ḫ"},
  409. {a="_d", b="ḏ"},
  410. {a="%^s", b="š"},
  411. {a="%.s", b="ṣ"},
  412. {a="%.d", b="ḍ"},
  413. {a="%.t", b="ṭ"},
  414. {a="%.z", b="ẓ"},
  415. {a="%.g", b="ġ"},
  416. {a="%.y", b="y"},
  417. -- additional characters (begin)
  418. {a="%^c", b="č"},
  419. {a="%^z", b="ž"},
  420. {a="%^n", b="ñ"},
  421. -- additional characters (end)
  422. -- the following needs to be moved above shortening rules
  423. -- {a="(U)(A)", b="ū"},
  424. {a="WA", b="w"},
  425. {a="(a)W", b="%1w"},
  426. {a="_A", b="ạ̄"},
  427. {a="_u", b="ū"},
  428. {a="_a", b="ā"},
  429. {a="_i", b="ī"},
  430. {a="%.b", b="ḅ"},
  431. {a="%.f", b="f̣"},
  432. {a="%.q", b="q̣"},
  433. {a="%.k", b="k"},
  434. {a="%.n", b="ṇ"},
  435. {a="%^d", b="d́"}
  436. }
  437. singletrdmg = {
  438. {a="b", b="b"},
  439. {a="t", b="t"},
  440. {a="j", b="ǧ"},
  441. {a="x", b="ḫ"},
  442. {a="d", b="d"},
  443. {a="r", b="r"},
  444. {a="z", b="z"},
  445. {a="s", b="s"},
  446. {a="`", b="ʿ"},
  447. {a="f", b="f"},
  448. {a="q", b="q"},
  449. {a="k", b="k"},
  450. {a="l", b="l"},
  451. {a="m", b="m"},
  452. {a="n", b="n"},
  453. {a="h", b="h"},
  454. {a="w", b="w"},
  455. {a="y", b="y"},
  456. {a="T", b="t"},
  457. -- additional characters (begin)
  458. {a="p", b="p"},
  459. {a="v", b="v"},
  460. {a="g", b="g"},
  461. -- additional characters (end)
  462. {a="\"", b=""},
  463. {a="B", b=""}
  464. }
  465. longvtrdmg = {
  466. {a="A", b="ā"},
  467. {a="U", b="ū"},
  468. {a="I", b="ī"},
  469. {a="aY", b="ay"},
  470. {a="iY", b="ī"},
  471. {a="Y", b="ạ̄"}
  472. }
  473. shortvtrdmg = {
  474. {a="([uai])([uai])([uai])", b="/%1,%2,%3/"},
  475. {a="([uai])([uai])", b="/%1,%2/"},
  476. {a="u", b="u"},
  477. {a="a", b="a"},
  478. {a="i", b="i"}
  479. }
  480. -- loc
  481. hamzatrloc = {
  482. -- next lines for ʾalif alone
  483. {a="(%.A)([^uai])", b=".|%2"},
  484. {a="(%.A)([uai])", b="||%2"},
  485. -- hard coded hamza
  486. {a="|\"'", b="ʾ"},
  487. {a="A\"'", b="ʾA"},
  488. {a="[au]\"'", b="ʾ"},
  489. {a="w\"'", b="ʾ"},
  490. {a="i\"'", b="ʾ"},
  491. {a="y\"'", b="ʾ"},
  492. -- hamza takes tašdīd too
  493. {a="''([Uu])", b="ʾʾ%1"},
  494. {a="''([Aa])", b="ʾʾ%1"},
  495. {a="''([Ii])", b="ʾʾ%1"},
  496. -- initial long u and i (for a, see below)
  497. {a="%'%_U", b="U"},
  498. {a="%'%_I", b="I"},
  499. -- taḫfīfu 'l-hamza
  500. {a="^'u'([^uaiUAI])", b="U%1"},
  501. {a="([%s%(%[%<%-])'u'([^uaiUAI])", b="%1U%2"},
  502. {a="^'i'([^uaiUAI])", b="I%1"},
  503. {a="([%s%(%[%<%-])'i'([^uaiUAI])", b="%1I%2"},
  504. {a="^u'([^uaiUAI])", b="U%1"},
  505. {a="([^uaiUAIYN][%s%(%[%<])u'([^uaiUAI])", b="%1U%2"},
  506. {a="^i'([^uaiUAI])", b="I%1"},
  507. {a="([^uaiUAIYN][%s%(%[%<])i'([^uaiUAI])", b="%1I%2"},
  508. -- madda (historic writing below)
  509. {a="^(')(A)", b="%2"},
  510. {a="(%W)(')(A)", b="%1%3"},
  511. {a="^'a'([^uaiUAI])", b="A%1"},
  512. {a="(%W)'a'([^uaiUAI])", b="%1A%2"},
  513. {a="'a'([^uaiUAI])", b="A%1"},
  514. {a="^'a?A", b="A"},
  515. {a="(%W)'a?A", b="%1A"},
  516. {a="'a?A", b="ʾA"},
  517. {a="(A)(')(i)$", b="%1ʾ%3"},
  518. {a="(A)(')(i)(%W)", b="%1ʾ%3%4"},
  519. {a="(A)(')(i)", b="%1ʾ%3"}, -- historic madda
  520. {a="(A)(')", b="%1ʾ"}, -- historic madda
  521. -- initial (needs both ^ and %W patterns)
  522. {a="^(')([ua])", b="%2"},
  523. {a="^(')(i)", b="%2"},
  524. -- consider replacing initial %W with [%s%(%[%<%-]:
  525. -- {a="(%W)(')([ua])", b="%1%3"},
  526. -- {a="(%W)(')(i)", b="%1%3"},
  527. {a="([%s%(%[%<%-])(')([ua])", b="%1%3"},
  528. {a="([%s%(%[%<%-])(')(i)", b="%1%3"},
  529. -- final
  530. {a="([Iy])(')(aN)$", b="%1ʾ%3"},
  531. {a="([Iy])(')(aN)(%W)", b="%1ʾ%3%4"},
  532. {a="([^uai])(')([uai]N?)$", b="%1ʾ%3"},
  533. {a="([^uai])(')([uai]N?)(%W)", b="%1ʾ%3%4"},
  534. {a="([UI])(')([uai])$", b="%1ʾ%3"},
  535. {a="([UI])(')([uai])(%W)", b="%1ʾ%3%4"},
  536. -- middle
  537. {a="(U)(')", b="%1ʾ"},
  538. {a="([Iy])(')", b="%1ʾ"},
  539. {a="([^uai])(')([uU])", b="%1ʾ%3"},
  540. {a="([^uai])(')(%_?[aAY])", b="%1ʾ%3"},
  541. {a="([^uai])(')([iI])", b="%1ʾ%3"},
  542. {a="(u)(')([uU])", b="%1ʾ%3"},
  543. {a="(u)(')(%_?[aAY])", b="%1ʾ%3"},
  544. {a="(u)(')([iI])", b="%1ʾ%3"},
  545. {a="(a)(')(%_?[aAY])", b="%1ʾ%3"},
  546. {a="(a)(')([uU])", b="%1ʾ%3"},
  547. {a="(a)(')([iI])", b="%1ʾ%3"},
  548. {a="(i)(')(%_?[aAY])", b="%1ʾ%3"},
  549. {a="(i)(')([uU])", b="%1ʾ%3"},
  550. {a="(i)(')([iI])", b="%1ʾ%3"},
  551. {a="(a)(')([^uaiUAI])", b="%1ʾ%3"},
  552. {a="(u)(')([^uaiUAI])", b="%1ʾ%3"},
  553. {a="(i)(')([^uaiUAI])", b="%1ʾ%3"}
  554. }
  555. tanwintrloc = {
  556. {a="%-?uNU", b="un"},
  557. {a="%-?aNU", b="an"},
  558. {a="%-?iNU", b="in"},
  559. {a="%-?iNI", b="in"},
  560. {a="%-?(\"?At)%-?([ui])N", b="%1%2n"},
  561. {a="%-?([ui])N", b="%1n"},
  562. {a="%-?(aN)(_A)", b="an"},
  563. {a="%-?(aN)(Y)", b="an"},
  564. {a="(T)%-?(\"?aN)", b="tan"},
  565. {a="([^TA])%-?(\"?aN)", b="%1an"}
  566. }
  567. trigraphstrloc = { -- trigraphs or more
  568. -- 'llatI / 'llad_I
  569. {a="^'ll(a)([%_]?[dt])", b="all%1%2"},
  570. {a="([%(%[%|%<%s])'ll(a)([%_]?[dt])", b="%1all%2%3"}, --p
  571. -- al- + lām
  572. {a="^(a)l%-(l)", b="%1l-%2"},
  573. {a="(%s)(a)l%-(l)", b="%1%2l-%3"},
  574. -- al- + solar consonant ('c' and '^n' are additional characters)
  575. {a="^(a)l%-(%^n)", b="%1l-%2"}, -- ^n is lunar
  576. {a="(%s)(a)l%-(%^n)", b="%1%2l-%3"}, -- ^n is lunar
  577. {a="^(a)l%-([%_%^%.]?[tdrzsnc])", b="%1l-%2"},
  578. {a="(%s)(a)l%-([%_%^%.]?[tdrzsnc])", b="%1%2l-%3"},
  579. -- assim. art. + solar consonant ('c' and '^n' are additional characters)
  580. {a="^(a)(%^n)%-", b="%1l-"}, -- ^n is lunar
  581. {a="(%s)(a)(%^n)%-", b="%1%2l-"}, -- ^n is lunar
  582. {a="^(a)([%_%^%.]?[tdrzsnc])%-", b="%1l-"},
  583. {a="(%s)(a)([%_%^%.]?[tdrzsnc])%-", b="%1%2l-"},
  584. -- al- + initial unstable hamza
  585. {a="^(a)l%-([uai])", b="%1l-%2"},
  586. {a="(%s)(a)l%-([uai])", b="%1%2l-%3"},
  587. -- li-/la- + art. + initial unstable hamza is a special orthography
  588. {a="l([ai])%-l%-([uai])", b="l%1-l-%2"},
  589. -- al- + lunar consonant (i.e. what remains)
  590. {a="^(a)l%-", b="%1l-"},
  591. {a="(%s)(a)l%-", b="%1%2l-"},
  592. -- art. with waṣla + lām
  593. {a="'l%-(l)", b="al-%1"},
  594. -- art. with waṣla + solar consonant
  595. -- ('c' and '^n' are additional characters)
  596. {a="'l%-(%^n)", b="al-%1"}, -- ^n is lunar
  597. {a="'l%-([%_%^%.]?[tdrzsnc])", b="al-%1"},
  598. -- li-/la- + art. + lām
  599. {a="l([ai])%-l%-(l)", b="l%1-l-%2"},
  600. -- assim. art. with waṣla + solar consonant
  601. -- ('c' and '^n' are additional characters)
  602. {a="'(%^n)%-", b="al-"}, -- ^n is lunar
  603. {a="'([%_%^%.]?[tdrzsnc])%-", b="al-"},
  604. -- li-/la- + art. + solar consonant is a special orthography
  605. -- ('c' and '^n' are additional characters)
  606. {a="l([ai])%-l%-(%^n)", b="l%1-l-%2"}, -- ^n is lunar
  607. {a="l([ai])%-l%-([%_%^%.]?[tdrzsnc])", b="l%1-l-%2"},
  608. -- li-/la- + assim. art. + solar consonant is a special orthography
  609. -- ('c' and '^n' are additional characters)
  610. {a="l([ai])%-(%^n)%-(%^n)", b="l%1-l-%3"}, -- ^n is lunar
  611. {a="l([ai])%-([%_%^%.]?[tdrzsnc])%-([%_%^%.]?[tdrzsnc])", b="l%1-l-%3"},
  612. -- art. with waṣla + initial unstable hamza
  613. {a="'l%-([uai])", b="al-%1"},
  614. -- art. with waṣla + lunar consonant (i.e. what remains)
  615. {a="'l%-", b="al-"},
  616. -- the silent wāw
  617. {a="uU$", b="u"},
  618. {a="uU(%W)", b="u%1"},
  619. {a="aU$", b="a"},
  620. {a="aU(%W)", b="a%1"},
  621. {a="iU$", b="i"},
  622. {a="iU(%W)", b="i%1"},
  623. -- words ending in -āT with silent wāw/yāʾ
  624. {a="(_a)UA", b="A"},
  625. {a="(_a)U", b="A"},
  626. {a="(_a)I", b="A"}
  627. }
  628. digraphstrloc = {
  629. -- discard the ʾiʿrāb hyphen (begin)
  630. {a="(%-)(\"?[UI]na)(%p*%s)", b="%2%3"},
  631. {a="(%-)(\"?[UI]na)(%p*)$", b="%2%3"},
  632. {a="(%-)(\"?At[ui])(%p*%s)", b="%2%3"},
  633. {a="(%-)(\"?At[ui])(%p*)$", b="%2%3"},
  634. {a="(%-)(\"?Ani)(%p*%s)", b="%2%3"},
  635. {a="(%-)(\"?Ani)(%p*)$", b="%2%3"},
  636. {a="(%-)(\"?ayni)(%p*%s)", b="%2%3"},
  637. {a="(%-)(\"?ayni)(%p*)$", b="%2%3"},
  638. {a="(%-)([uai])(%p*%s)", b="%2%3"},
  639. {a="(%-)([uai])(%p*)$", b="%2%3"},
  640. -- discard the ʾiʿrāb hyphen (end)
  641. {a="(%-)(\"?[uai])", b="%1%2"}, -- hyphen + initial alif without hamza
  642. {a="^(\"?[uai])", b="%1"}, -- initial alif without hamza
  643. {a="(%s)([uai])", b="%1%2"}, -- initial alif without hamza
  644. {a="%-%-", b=""},
  645. {a="uww", b="ūw"},
  646. {a="iyy(%p*)$", b="ī%1"},
  647. {a="iyy(%p*%s)", b="ī%1"},
  648. {a="iyy", b="īy"},
  649. {a="([tkdsg])(h)", b="%1'%2"},
  650. -- {a="T([^uai])", b="h%1"},
  651. {a="([a%']l%-)(%S-)T([%(%[%|%<%s])(al%-)", b="%1%2h%3%4"}, --p
  652. {a="T([%(%[%|%<%s])(al%-)", b="t%1%2"}, --p
  653. {a="T([%|\"])", b="t%1"},
  654. {a="T(%p*)$", b="h%1"},
  655. {a="T(%p*%s)", b="h%1"},
  656. {a="_t", b="th"},
  657. {a="%^g", b="j"},
  658. {a="%.h", b="ḥ"},
  659. {a="_h", b="kh"},
  660. {a="_d", b="dh"},
  661. {a="%^s", b="sh"},
  662. {a="%.s", b="ṣ"},
  663. {a="%.d", b="ḍ"},
  664. {a="%.t", b="ṭ"},
  665. {a="%.z", b="ẓ"},
  666. {a="%.g", b="gh"},
  667. {a="%.y", b="y"},
  668. -- additional characters (begin)
  669. {a="%^c", b="ch"},
  670. {a="%^z", b="zh"},
  671. {a="%^n", b="ñ"},
  672. -- additional characters (end)
  673. {a="(U)(A)", b="ū"},
  674. {a="WA", b="w"},
  675. {a="(a)W", b="%1w"},
  676. {a="_A", b="á"},
  677. {a="_u", b="ū"},
  678. {a="_a", b="ā"},
  679. {a="_i", b="ī"},
  680. {a="%.b", b="b"},
  681. {a="%.f", b="f"},
  682. {a="%.q", b="q"},
  683. {a="%.k", b="k"},
  684. {a="%.n", b="n"},
  685. {a="%^d", b="d"}
  686. }
  687. singletrloc = {
  688. {a="b", b="b"},
  689. {a="t", b="t"},
  690. {a="j", b="j"},
  691. {a="x", b="kh"},
  692. {a="d", b="d"},
  693. {a="r", b="r"},
  694. {a="z", b="z"},
  695. {a="s", b="s"},
  696. {a="`", b="`"},
  697. {a="f", b="f"},
  698. {a="q", b="q"},
  699. {a="k", b="k"},
  700. {a="l", b="l"},
  701. {a="m", b="m"},
  702. {a="n", b="n"},
  703. {a="h", b="h"},
  704. {a="w", b="w"},
  705. {a="y", b="y"},
  706. {a="T", b="t"},
  707. -- additional characters (begin)
  708. {a="p", b="p"},
  709. {a="v", b="v"},
  710. {a="g", b="g"},
  711. -- additional characters (end)
  712. {a="\"", b=""},
  713. {a="B", b=""}
  714. }
  715. longvtrloc = {
  716. {a="A", b="ā"},
  717. {a="U", b="ū"},
  718. {a="I", b="ī"},
  719. {a="aY", b="ay"},
  720. {a="iY", b="ī"},
  721. {a="Y", b="á"},
  722. }
  723. shortvtrloc = {
  724. {a="([uai])([uai])([uai])", b="/%1,%2,%3/"},
  725. {a="([uai])([uai])", b="/%1,%2/"},
  726. {a="u", b="u"},
  727. {a="a", b="a"},
  728. {a="i", b="i"}
  729. }
  730. finaltrloc = {
  731. {a="ʾ", b="'"},
  732. }
  733. -- arabica
  734. hamzatrarabica = { -- ≠ from hamzatrloc: initial hamza has to be held
  735. -- next lines for ʾalif alone
  736. {a="(%.A)([^uai])", b=".|%2"},
  737. {a="(%.A)([uai])", b="||%2"},
  738. -- hard coded hamza
  739. {a="|\"'", b="ʾ"},
  740. {a="A\"'", b="ʾA"},
  741. {a="[au]\"'", b="ʾ"},
  742. {a="w\"'", b="ʾ"},
  743. {a="i\"'", b="ʾ"},
  744. {a="y\"'", b="ʾ"},
  745. -- hamza takes tašdīd too
  746. {a="''([Uu])", b="ʾʾ%1"},
  747. {a="''([Aa])", b="ʾʾ%1"},
  748. {a="''([Ii])", b="ʾʾ%1"},
  749. -- initial long u and i (for a, see below)
  750. {a="%'%_U", b="U"},
  751. {a="%'%_I", b="I"},
  752. -- taḫfīfu 'l-hamza
  753. {a="^'u'([^uaiUAI])", b="U%1"},
  754. {a="([%s%(%[%<%-])'u'([^uaiUAI])", b="%1U%2"},
  755. {a="^'i'([^uaiUAI])", b="I%1"},
  756. {a="([%s%(%[%<%-])'i'([^uaiUAI])", b="%1I%2"},
  757. {a="^u'([^uaiUAI])", b="U%1"},
  758. {a="([^uaiUAIYN][%s%(%[%<])u'([^uaiUAI])", b="%1U%2"},
  759. {a="^i'([^uaiUAI])", b="I%1"},
  760. {a="([^uaiUAIYN][%s%(%[%<])i'([^uaiUAI])", b="%1I%2"},
  761. -- madda (historic writing below)
  762. {a="^(')(A)", b="%2"},
  763. {a="(%W)(')(A)", b="%1%3"},
  764. {a="^'a'([^uaiUAI])", b="A%1"},
  765. {a="(%W)'a'([^uaiUAI])", b="%1A%2"},
  766. {a="'a'([^uaiUAI])", b="A%1"},
  767. {a="^'a?A", b="A"},
  768. {a="(%W)'a?A", b="%1A"},
  769. {a="'a?A", b="ʾA"},
  770. {a="(A)(')(i)$", b="%1ʾ%3"},
  771. {a="(A)(')(i)(%W)", b="%1ʾ%3%4"},
  772. {a="(A)(')(i)", b="%1ʾ%3"}, -- historic madda
  773. {a="(A)(')", b="%1ʾ"}, -- historic madda
  774. -- initial (needs both ^ and %W patterns):
  775. -- hold it for now (see below, beginning of digraphs table)
  776. {a="^(')([ua])", b="@%2"},
  777. {a="^(')(i)", b="@%2"},
  778. -- consider replacing initial %W with [%s%(%[%<%-]:
  779. -- {a="(%W)(')([ua])", b="%1@%3"},
  780. -- {a="(%W)(')(i)", b="%1@%3"},
  781. {a="([%s%(%[%<%-])(')([ua])", b="%1@%3"},
  782. {a="([%s%(%[%<%-])(')(i)", b="%1@%3"},
  783. -- final
  784. {a="([Iy])(')(aN)$", b="%1ʾ%3"},
  785. {a="([Iy])(')(aN)(%W)", b="%1ʾ%3%4"},
  786. {a="([^uai])(')([uai]N?)$", b="%1ʾ%3"},
  787. {a="([^uai])(')([uai]N?)(%W)", b="%1ʾ%3%4"},
  788. {a="([UI])(')([uai])$", b="%1ʾ%3"},
  789. {a="([UI])(')([uai])(%W)", b="%1ʾ%3%4"},
  790. -- middle
  791. {a="(U)(')", b="%1ʾ"},
  792. {a="([Iy])(')", b="%1ʾ"},
  793. {a="([^uai])(')([uU])", b="%1ʾ%3"},
  794. {a="([^uai])(')(%_?[aAY])", b="%1ʾ%3"},
  795. {a="([^uai])(')([iI])", b="%1ʾ%3"},
  796. {a="(u)(')([uU])", b="%1ʾ%3"},
  797. {a="(u)(')(%_?[aAY])", b="%1ʾ%3"},
  798. {a="(u)(')([iI])", b="%1ʾ%3"},
  799. {a="(a)(')(%_?[aAY])", b="%1ʾ%3"},
  800. {a="(a)(')([uU])", b="%1ʾ%3"},
  801. {a="(a)(')([iI])", b="%1ʾ%3"},
  802. {a="(i)(')(%_?[aAY])", b="%1ʾ%3"},
  803. {a="(i)(')([uU])", b="%1ʾ%3"},
  804. {a="(i)(')([iI])", b="%1ʾ%3"},
  805. {a="(a)(')([^uaiUAI])", b="%1ʾ%3"},
  806. {a="(u)(')([^uaiUAI])", b="%1ʾ%3"},
  807. {a="(i)(')([^uaiUAI])", b="%1ʾ%3"}
  808. }
  809. trigraphstrarabica = { -- trigraphs or more
  810. -- 'llatI / 'llad_I
  811. {a="^'ll(a)([%_]?[dt])", b="ll%1%2"},
  812. {a="([%-%(%[%|%<%s])'ll(a)([%_]?[dt])", b="%1ll%2%3"}, --p
  813. -- al- + lām
  814. {a="^(a)l%-(l)", b="%1l-%2"},
  815. {a="(%s)(a)l%-(l)", b="%1%2l-%3"},
  816. -- al- + solar consonant ('c' and '^n' are additional characters)
  817. {a="^(a)l%-(%^n)", b="%1l-%2"}, -- ^n is lunar
  818. {a="(%s)(a)l%-(%^n)", b="%1%2l-%3"}, -- ^n is lunar
  819. {a="^(a)l%-([%_%^%.]?[tdrzsnc])", b="%1l-%2"},
  820. {a="(%s)(a)l%-([%_%^%.]?[tdrzsnc])", b="%1%2l-%3"},
  821. -- assim. art. + solar consonant ('c' and '^n' are additional characters)
  822. {a="^(a)(%^n)%-", b="%1l-"}, -- ^n is lunar
  823. {a="(%s)(a)(%^n)%-", b="%1%2l-"}, -- ^n is lunar
  824. {a="^(a)([%_%^%.]?[tdrzsnc])%-", b="%1l-"},
  825. {a="(%s)(a)([%_%^%.]?[tdrzsnc])%-", b="%1%2l-"},
  826. -- al- + initial unstable hamza
  827. {a="^(a)l%-([uai])", b="%1l-%2"},
  828. {a="(%s)(a)l%-([uai])", b="%1%2l-%3"},
  829. -- li-/la- + art. + initial unstable hamza is a special orthography
  830. {a="l([ai])%-l%-([uai])", b="l%1-l-%2"},
  831. -- al- + lunar consonant (i.e. what remains)
  832. {a="^(a)l%-", b="%1l-"},
  833. {a="(%s)(a)l%-", b="%1%2l-"},
  834. -- art. with waṣla + lām
  835. {a="'l%-(l)", b="l-%1"},
  836. -- art. with waṣla + solar consonant
  837. -- ('c' and '^n' are additional characters)
  838. {a="'l%-(%^n)", b="l-%1"}, -- ^n is lunar
  839. {a="'l%-([%_%^%.]?[tdrzsnc])", b="l-%1"},
  840. -- li-/la- + art. + lām
  841. {a="l([ai])%-l%-(l)", b="l%1-l-%2"},
  842. -- assim. art. with waṣla + solar consonant
  843. -- ('c' and '^n' are additional characters)
  844. {a="'(%^n)%-", b="l-"}, -- ^n is lunar
  845. {a="'([%_%^%.]?[tdrzsnc])%-", b="l-"},
  846. -- li-/la- + art. + solar consonant is a special orthography
  847. -- ('c' and '^n' are additional characters)
  848. {a="l([ai])%-l%-(%^n)", b="l%1-l-%2"}, -- ^n is lunar
  849. {a="l([ai])%-l%-([%_%^%.]?[tdrzsnc])", b="l%1-l-%2"},
  850. -- li-/la- + assim. art. + solar consonant is a special orthography
  851. -- ('c' and '^n' are additional characters)
  852. {a="l([ai])%-(%^n)%-(%^n)", b="l%1-l-%3"}, -- ^n is lunar
  853. {a="l([ai])%-([%_%^%.]?[tdrzsnc])%-([%_%^%.]?[tdrzsnc])", b="l%1-l-%3"},
  854. -- art. with waṣla + initial unstable hamza
  855. {a="'l%-([uai])", b="l-%1"},
  856. -- art. with waṣla + lunar consonant (i.e. what remains)
  857. {a="'l%-", b="l-"},
  858. -- the silent wāw
  859. {a="uU$", b="u"},
  860. {a="uU(%W)", b="u%1"},
  861. {a="aU$", b="a"},
  862. {a="aU(%W)", b="a%1"},
  863. {a="iU$", b="i"},
  864. {a="iU(%W)", b="i%1"},
  865. -- words ending in -āT with silent wāw/yāʾ
  866. {a="(_a)UA", b="A"},
  867. {a="(_a)U", b="A"},
  868. {a="(_a)I", b="A"}
  869. }
  870. digraphstrarabica = {
  871. {a="([uai]%-)(\"?[uai])", b="%1"}, -- hyphen + initial alif without hamza
  872. {a="([UAIYuai])(%s)([%(%[%|%<]?)(\"?[uai])", b="%1%2%3"}, --p
  873. {a="(o[%S]-)([UAIuai])(o)(\"?[uai])", b=""},
  874. {a="@", b=""}, -- remove the tag before the former hamza
  875. -- discard the ʾiʿrāb hyphen (begin)
  876. {a="(%-)(\"?[UI]na)(%p*%s)", b="%2%3"},
  877. {a="(%-)(\"?[UI]na)(%p*)$", b="%2%3"},
  878. {a="(%-)(\"?At[ui])(%p*%s)", b="%2%3"},
  879. {a="(%-)(\"?At[ui])(%p*)$", b="%2%3"},
  880. {a="(%-)(\"?Ani)(%p*%s)", b="%2%3"},
  881. {a="(%-)(\"?Ani)(%p*)$", b="%2%3"},
  882. {a="(%-)(\"?ayni)(%p*%s)", b="%2%3"},
  883. {a="(%-)(\"?ayni)(%p*)$", b="%2%3"},
  884. {a="(%-)([uai])(%p*%s)", b="%2%3"},
  885. {a="(%-)([uai])(%p*)$", b="%2%3"},
  886. -- discard the ʾiʿrāb hyphen (end)
  887. {a="(%-)(\"?[uai])", b="%1%2"}, -- hyphen + initial alif without hamza
  888. {a="^(\"?[uai])", b="%1"}, -- initial alif without hamza
  889. {a="(%s)([uai])", b="%1%2"}, -- initial alif without hamza
  890. {a="%-%-", b=""},
  891. {a="iyy(%p*)$", b="ī%1"},
  892. {a="iyy(%p*%s)", b="ī%1"},
  893. -- {a="T([^uai])", b="h%1"},
  894. {a="([a%']l%-)(%S-)aT([%(%[%|%<%s])(al%-)", b="%1%2a%3%4"}, --p
  895. {a="aT([%(%[%|%<%s])(al%-)", b="at%1%2"}, --p
  896. {a="T([%|\"])", b="t%1"},
  897. {a="aT(%p*)$", b="a%1"},
  898. {a="aT(%p*%s)", b="a%1"},
  899. {a="_t", b="ṯ"},
  900. {a="%^g", b="ǧ"},
  901. {a="%.h", b="ḥ"},
  902. {a="_h", b="ḫ"},
  903. {a="_d", b="ḏ"},
  904. {a="%^s", b="š"},
  905. {a="%.s", b="ṣ"},
  906. {a="%.d", b="ḍ"},
  907. {a="%.t", b="ṭ"},
  908. {a="%.z", b="ẓ"},
  909. {a="%.g", b="ġ"},
  910. {a="%.y", b="y"},
  911. -- additional characters (begin)
  912. {a="%^c", b="č"},
  913. {a="%^z", b="ž"},
  914. {a="%^n", b="ñ"},
  915. -- additional characters (end)
  916. {a="(U)(A)", b="ū"},
  917. {a="WA", b="w"},
  918. {a="(a)W", b="%1w"},
  919. {a="_A", b="ā"},
  920. {a="_u", b="ū"},
  921. {a="_a", b="ā"},
  922. {a="_i", b="ī"},
  923. {a="%.b", b="b"},
  924. {a="%.f", b="f"},
  925. {a="%.q", b="q"},
  926. {a="%.k", b="k"},
  927. {a="%.n", b="n"},
  928. {a="%^d", b="d"}
  929. }
  930. singletrarabica = {
  931. {a="b", b="b"},
  932. {a="t", b="t"},
  933. {a="j", b="ǧ"},
  934. {a="x", b="ḫ"},
  935. {a="d", b="d"},
  936. {a="r", b="r"},
  937. {a="z", b="z"},
  938. {a="s", b="s"},
  939. {a="`", b="ʿ"},
  940. {a="f", b="f"},
  941. {a="q", b="q"},
  942. {a="k", b="k"},
  943. {a="l", b="l"},
  944. {a="m", b="m"},
  945. {a="n", b="n"},
  946. {a="h", b="h"},
  947. {a="w", b="w"},
  948. {a="y", b="y"},
  949. {a="T", b="t"},
  950. -- additional characters (begin)
  951. {a="p", b="p"},
  952. {a="v", b="v"},
  953. {a="g", b="g"},
  954. -- additional characters (end)
  955. {a="\"", b=""},
  956. {a="B", b=""}
  957. }
  958. longvtrarabica = {
  959. {a="aY", b="ay"},
  960. {a="iY", b="ī"},
  961. {a="[AY]", b="ā"},
  962. {a="U", b="ū"},
  963. {a="I", b="ī"}
  964. }