arabluatex_trans.lua 35 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019
  1. --[[
  2. This file is part of the `arabluatex' package
  3. ArabLuaTeX -- Processing ArabTeX notation under LuaLaTeX
  4. Copyright (C) 2016--2020 Robert Alessi
  5. Please send error reports and suggestions for improvements to Robert
  6. Alessi <alessi@robertalessi.net>
  7. This program is free software: you can redistribute it and/or modify
  8. it under the terms of the GNU General Public License as published by
  9. the Free Software Foundation, either version 3 of the License, or
  10. (at your option) any later version.
  11. This program is distributed in the hope that it will be useful, but
  12. WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. General Public License for more details.
  15. You should have received a copy of the GNU General Public License
  16. along with this program. If not, see
  17. <http://www.gnu.org/licenses/>.
  18. --]]
  19. -- common
  20. punctuationtr = {
  21. {a="%(%(", b="("},
  22. {a="%)%)", b=")"}
  23. }
  24. nulltr = {
  25. {a="%|", b=""},
  26. {a="o", b=""},
  27. {a="O[%S]-O", b=""},
  28. {a="[%^%_](.)", b="<??>%1"}
  29. }
  30. -- cap (legacy)
  31. captr = {
  32. -- dmg (defaut); loc as well
  33. {a="ā", b="Ā"},
  34. {a="b", b="B"},
  35. {a="t", b="T"},
  36. {a="ṯ", b="Ṯ"},
  37. {a="ǧ", b="Ǧ"},
  38. {a="ḥ", b="Ḥ"},
  39. {a="ḫ", b="Ḫ"},
  40. {a="d", b="D"},
  41. {a="ḏ", b="Ḏ"},
  42. {a="r", b="R"},
  43. {a="z", b="Z"},
  44. {a="s", b="S"},
  45. {a="š", b="Š"},
  46. {a="ṣ", b="Ṣ"},
  47. {a="ḍ", b="Ḍ"},
  48. {a="ṭ", b="Ṭ"},
  49. {a="ẓ", b="Ẓ"},
  50. {a="ġ", b="Ġ"},
  51. {a="f", b="F"},
  52. {a="q", b="Q"},
  53. {a="k", b="K"},
  54. {a="l", b="L"},
  55. {a="m", b="M"},
  56. {a="n", b="N"},
  57. {a="h", b="H"},
  58. {a="w", b="W"},
  59. {a="ū", b="Ū"},
  60. {a="y", b="Y"},
  61. {a="ī", b="Ī"}
  62. }
  63. -- uc
  64. lcuc = {
  65. {a="b", b="B"},
  66. {a="t", b="T"},
  67. {a="ṯ", b="Ṯ"},
  68. {a="ǧ", b="Ǧ"},
  69. {a="j", b="J"},
  70. {a="ḥ", b="Ḥ"},
  71. {a="ḫ", b="Ḫ"},
  72. {a="d", b="D"},
  73. {a="ḏ", b="Ḏ"},
  74. {a="r", b="R"},
  75. {a="z", b="Z"},
  76. {a="s", b="S"},
  77. {a="š", b="Š"},
  78. {a="ṣ", b="Ṣ"},
  79. {a="ḍ", b="Ḍ"},
  80. {a="ṭ", b="Ṭ"},
  81. {a="ẓ", b="Ẓ"},
  82. {a="ġ", b="Ġ"},
  83. {a="f", b="F"},
  84. {a="q", b="Q"},
  85. {a="k", b="K"},
  86. {a="l", b="L"},
  87. {a="m", b="M"},
  88. {a="n", b="N"},
  89. {a="h", b="H"},
  90. {a="w", b="W"},
  91. {a="y", b="Y"},
  92. {a="u", b="U"},
  93. {a="a", b="A"},
  94. {a="i", b="I"},
  95. {a="ū", b="Ū"},
  96. {a="ā", b="Ā"},
  97. {a="ī", b="Ī"},
  98. -- additional characters
  99. {a="p", b="P"},
  100. {a="č", b="Č"},
  101. {a="ž", b="Ž"},
  102. {a="v", b="V"},
  103. {a="g", b="G"},
  104. {a="ñ", b="Ñ"},
  105. {a="ch", b="Ch"}, -- loc
  106. }
  107. -- dmg
  108. hamzatrdmg = {
  109. -- next lines for ʾalif alone
  110. {a="(%.A)l%-(%^n)", b=".|l-%2"}, --additional (^n is lunar)
  111. {a="([%(%[%|%<%s%-O])(%.A)l%-(%^n)", b="%1.|l-%3"}, --additional (^n is lunar) --p
  112. {a="(%.A)l%-([%_%^%.]?[tdrzsnc])", b=".|%2-%2"},
  113. {a="([%(%[%|%<%s%-O])(%.A)l%-([%_%^%.]?[tdrzsnc])", b="%1.|%3-%3"}, --p
  114. {a="(%.A)([uai])l%-(%^n)", b="||%2l-%3"}, --additional (^n is lunar)
  115. {a="([%(%[%|%<%s%-O])(%.A)([uai])l%-(%^n)", b="%1||%3l-%4"}, --additional (^n is lunar) --p
  116. {a="(%.A)([uai])l%-([%_%^%.]?[tdrzsnc])", b="||%2%3-%3"},
  117. {a="([%(%[%|%<%s%-O])(%.A)([uai])l%-([%_%^%.]?[tdrzsnc])", b="%1||%3%4-%4"}, --p
  118. {a="(%.A)([^uai])", b=".|%2"},
  119. {a="(%.A)([uai])", b="||%2"},
  120. -- hard coded hamza
  121. {a="|\"'", b="ʾ"},
  122. {a="A\"'", b="ʾA"},
  123. {a="[au]\"'", b="ʾ"},
  124. {a="w\"'", b="ʾ"},
  125. {a="i\"'", b="ʾ"},
  126. {a="y\"'", b="ʾ"},
  127. -- hamza takes tašdīd too
  128. {a="''([Uu])", b="ʾʾ%1"},
  129. {a="''([Aa])", b="ʾʾ%1"},
  130. {a="''([Ii])", b="ʾʾ%1"},
  131. -- initial long u and i (for a, see below)
  132. {a="%'%_U", b="ʾU"},
  133. {a="%'%_I", b="ʾI"},
  134. -- taḫfīfu 'l-hamza
  135. {a="'u'([^uaiUAI])", b="ʾU%1"},
  136. {a="'i'([^uaiUAI])", b="ʾI%1"},
  137. {a="^u'([^uaiUAI])", b="U%1"},
  138. {a="([^uaiUAIYN][%s%(%[%<])u'([^uaiUAI])", b="%1U%2"},
  139. {a="^i'([^uaiUAI])", b="I%1"},
  140. {a="([^uaiUAIYN][%s%(%[%<])i'([^uaiUAI])", b="%1I%2"},
  141. -- madda (historic writing below)
  142. {a="'a'([^uaiUAI])", b="ʾA%1"},
  143. {a="'a?A", b="ʾA"},
  144. {a="(A)(')(i)$", b="%1ʾ%3"},
  145. {a="(A)(')(i)(%W)", b="%1ʾ%3%4"},
  146. {a="(A)(')(i)", b="%1ʾ%3"}, -- historic madda
  147. {a="(A)(')", b="%1ʾ"}, -- historic madda
  148. -- initial (needs both ^ and %W patterns)
  149. -- 'aw: the diphthong is to be resolved into 'awi' (next 8 lines)
  150. {a="^('aw)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1i"},
  151. {a="(%W)('aw)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1%2i"},
  152. {a="^('aw)(O)(\"?[uai])([%S]-O)", b="%1i"},
  153. {a="(%W)('aw)(O)(\"?[uai])([%S]-O)", b="%1%2i"},
  154. {a="^('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  155. {a="(%W)('aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
  156. {a="^('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, --p
  157. {a="(%W)('aw)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, --p
  158. -- then the 'initial' rules for the remaining cases
  159. {a="^(')([ua])", b="ʾ%2"},
  160. {a="^(')(i)", b="ʾ%2"},
  161. -- consider replacing initial %W with [%s%(%[%<%-]:
  162. -- {a="(%W)(')([ua])", b="%1ʾ%3"},
  163. -- {a="(%W)(')(i)", b="%1ʾ%3"},
  164. {a="([%s%(%[%<%-])(')([ua])", b="%1ʾ%3"},
  165. {a="([%s%(%[%<%-])(')(i)", b="%1ʾ%3"},
  166. -- final
  167. {a="([Iy])(')(aN)$", b="%1ʾ%3"},
  168. {a="([Iy])(')(aN)(%W)", b="%1ʾ%3%4"},
  169. {a="([^uai])(')([uai]N?)$", b="%1ʾ%3"},
  170. {a="([^uai])(')([uai]N?)(%W)", b="%1ʾ%3%4"},
  171. {a="([UI])(')([uai])$", b="%1ʾ%3"},
  172. {a="([UI])(')([uai])(%W)", b="%1ʾ%3%4"},
  173. -- middle
  174. {a="(U)(')", b="%1ʾ"},
  175. {a="([Iy])(')", b="%1ʾ"},
  176. {a="([^uai])(')([uU])", b="%1ʾ%3"},
  177. {a="([^uai])(')(%_?[aAY])", b="%1ʾ%3"},
  178. {a="([^uai])(')([iI])", b="%1ʾ%3"},
  179. {a="(u)(')([uU])", b="%1ʾ%3"},
  180. {a="(u)(')(%_?[aAY])", b="%1ʾ%3"},
  181. {a="(u)(')([iI])", b="%1ʾ%3"},
  182. {a="(a)(')(%_?[aAY])", b="%1ʾ%3"},
  183. {a="(a)(')([uU])", b="%1ʾ%3"},
  184. {a="(a)(')([iI])", b="%1ʾ%3"},
  185. {a="(i)(')(%_?[aAY])", b="%1ʾ%3"},
  186. {a="(i)(')([uU])", b="%1ʾ%3"},
  187. {a="(i)(')([iI])", b="%1ʾ%3"},
  188. {a="(a)(')([^uaiUAI])", b="%1ʾ%3"},
  189. {a="(u)(')([^uaiUAI])", b="%1ʾ%3"},
  190. {a="(i)(')([^uaiUAI])", b="%1ʾ%3"}
  191. }
  192. tanwintrdmg = {
  193. {a="%-?([uai]NU)(O)([ui])([%S]-O)", b="\\arbup{un%3}"},
  194. {a="%-?([uai]NU)(%s)([ui])", b="\\arbup{un%3}%2'"},
  195. {a="%-?(iNI)(O)([ui])([%S]-O)", b="i\\arbup{n%3}"},
  196. {a="%-?(iNI)(%s)([ui])", b="i\\arbup{n%3}%2'"},
  197. {a="(O[%S]-)([uai]N[UI])(O)(\"?[ui])", b="'"},
  198. {a="%-?uNU", b="\\arbup{un}"},
  199. {a="%-?aNU", b="\\arbup{an}"},
  200. {a="%-?iNU", b="\\arbup{in}"},
  201. {a="%-?iNI", b="i\\arbup{n}"},
  202. -- tanwīn preceding ʾalif conjunctionis
  203. {a="%-?(uN)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="\\arbup{uni}"},
  204. {a="%-?(aN)(_A)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="ạ\\arbup{ni}"},
  205. {a="%-?(aN)(Y)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="ạ\\arbup{ni}"},
  206. {a="(T)%-?(aN)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="t\\arbup{ani}"},
  207. {a="([^TA])%-?(aN)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1\\arbup{ani}"},
  208. {a="%-?(iN)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="\\arbup{ini}"},
  209. {a="%-?(uN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="\\arbup{uni}%2%3"},
  210. {a="%-?(aN)(_A)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạ\\arbup{ni}%3%4"},
  211. {a="%-?(aN)(Y)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="ạ\\arbup{ni}%3%4"},
  212. {a="(T)%-?(aN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="t\\arbup{ani}%3%4"},
  213. {a="([^TA])%-?(aN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1\\arbup{ani}%3%4"},
  214. {a="%-?(iN)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="\\arbup{ini}%2%3"},
  215. -- tanwīn preceding 'lla_dI/'llatI
  216. {a="%-?(uN)(O)('lla[%_]?[dt])([%S]-O)", b="\\arbup{uni}"},
  217. {a="%-?(aN)(_A)(O)('lla[%_]?[dt])([%S]-O)", b="ạ\\arbup{ni}"},
  218. {a="%-?(aN)(Y)(O)('lla[%_]?[dt])([%S]-O)", b="ạ\\arbup{ni}"},
  219. {a="(T)%-?(aN)(O)('lla[%_]?[dt])([%S]-O)", b="t\\arbup{ani}"},
  220. {a="([^TA])%-?(aN)(O)('lla[%_]?[dt])([%S]-O)", b="%1\\arbup{ani}"},
  221. {a="%-?(iN)(O)('lla[%_]?[dt])([%S]-O)", b="\\arbup{ini}"},
  222. {a="%-?(uN)(%s)('lla[%_]?[dt])", b="\\arbup{uni}%2%3"},
  223. {a="%-?(aN)(_A)(%s)('lla[%_]?[dt])", b="ạ\\arbup{ni}%3%4"},
  224. {a="%-?(aN)(Y)(%s)('lla[%_]?[dt])", b="ạ\\arbup{ni}%3%4"},
  225. {a="(T)%-?(aN)(%s)('lla[%_]?[dt])", b="t\\arbup{ani}%3%4"},
  226. {a="([^TA])%-?(aN)(%s)('lla[%_]?[dt])", b="%1\\arbup{ani}%3%4"},
  227. {a="%-?(iN)(%s)('lla[%_]?[dt])", b="\\arbup{ini}%2%3"},
  228. -- tanwīn + alif without hamza and kasra (ibn) or dhamma (uhrub)
  229. {a="%-?(uN)(O)([ui])([%S]-O)", b="\\arbup{un%3}"},
  230. {a="%-?(aN)(_A)(O)([ui])([%S]-O)", b="ạ\\arbup{n%4}"},
  231. {a="%-?(aN)(Y)(O)([ui])([%S]-O)", b="ạ\\arbup{n%4}"},
  232. {a="(T)%-?(aN)(O)([ui])([%S]-O)", b="t\\arbup{an%4}"},
  233. {a="([^TA])%-?(aN)(O)([ui])([%S]-O)", b="%1\\arbup{an%4}"},
  234. {a="%-?(iN)(O)([ui])([%S]-O)", b="\\arbup{in%3}"},
  235. {a="(O[%S]-)([uai]N)(O)(\"?[ui])", b="'"},
  236. {a="%-?(uN)(%s)([ui])", b="\\arbup{un%3}%2'"},
  237. {a="%-?(aN)(_A)(%s)([ui])", b="ạ\\arbup{n%4}%3'"},
  238. {a="%-?(aN)(Y)(%s)([ui])", b="ạ\\arbup{n%4}%3'"},
  239. {a="(T)%-?(aN)(%s)([ui])", b="t\\arbup{an%4}%3'"},
  240. {a="([^TA])%-?(aN)(%s)([ui])", b="%1\\arbup{an%4}%3'"},
  241. {a="%-?(iN)(%s)([ui])", b="\\arbup{in%3}%2'"},
  242. --
  243. -- {a="uN", b="\\arbup{un}"}, (now included in the last line of this table)
  244. {a="%-?(\"?At)%-?([ui])N", b="\\arbup{%1%2n}"},
  245. -- needed by \arbcolor:
  246. {a="%-?(aN)(O[%S]-%_AO)", b="ạ\\arbup{n}"},
  247. {a="%-?(aN)(O[%S]-YO)", b="ạ\\arbup{n}"},
  248. {a="(O[%S]-TO)%-?(\"?aN)", b="\\arbup{an}"},
  249. {a="(O[%S]-[^TA]O)%-?(\"?aN)", b="\\arbup{an}"},
  250. --
  251. {a="%-?(aN)(_A)", b="ạ\\arbup{n}"},
  252. {a="%-?(aN)(Y)", b="ạ\\arbup{n}"},
  253. {a="(T)%-?(\"?aN)", b="t\\arbup{an}"},
  254. {a="([^TA])%-?(\"?aN)", b="%1\\arbup{an}"},
  255. {a="%-?([ui])N", b="\\arbup{%1n}"}
  256. }
  257. trigraphstrdmg = { -- trigraphs or more
  258. -- 'llatI / 'llad_I
  259. {a="^'ll(a)([%_]?[dt])", b="'ll%1%2"},
  260. {a="([%(%[%|%<%s])'ll(a)([%_]?[dt])", b="%1'll%2%3"}, --p
  261. -- law: the diphthong is to be resolved into 'awi' (next 8 lines)
  262. {a="^(law)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1i"},
  263. {a="(%W)(law)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1%2i"},
  264. {a="^(law)(O)(\"?[uai])([%S]-O)", b="%1i"},
  265. {a="(%W)(law)(O)(\"?[uai])([%S]-O)", b="%1%2i"},
  266. {a="^(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  267. {a="(%W)(law)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1%2i%3%4"},
  268. {a="^(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1i%2%3"}, --p
  269. {a="(%W)(law)(%s)([%(%[%|%<]?\"?[uai])", b="%1%2i%3%4"}, --p
  270. -- al- + lām
  271. {a="^(a)l%-(l)", b="%1l-%2"},
  272. {a="([%(%[%|%<%s%-O])(a)l%-(l)", b="%1%2l-%3"}, --p
  273. -- al- + solar consonant ('c' and '^n' are additional characters)
  274. {a="^(a)l%-(%^n)", b="%1l-%2"}, -- ^n is lunar
  275. {a="([%(%[%|%<%s%-O])(a)l%-(%^n)", b="%1%2l-%3"}, --^n is lunar --p
  276. {a="^(a)l%-([%_%^%.]?[tdrzsnc])", b="%1%2-%2"},
  277. {a="([%(%[%|%<%s%-O])(a)l%-([%_%^%.]?[tdrzsnc])", b="%1%2%3-%3"}, --p
  278. -- assim. art. + solar consonant ('c' and '^n' are additional characters)
  279. {a="^(a)(%^n)%-", b="%1l-"}, -- ^n is lunar
  280. {a="([%(%[%|%<%s%-O])(a)(%^n)%-", b="%1%2l-"}, --^n is lunar --p
  281. {a="^(a)([%_%^%.]?[tdrzsnc])%-", b="%1%2-"},
  282. {a="([%(%[%|%<%s%-O])(a)([%_%^%.]?[tdrzsnc])%-", b="%1%2%3-"}, --p
  283. -- al- + initial unstable hamza
  284. {a="^(a)l%-(\"?[uai])", b="%1l-%2"},
  285. {a="([%(%[%|%<%s%-O])(a)l%-(\"?[uai])", b="%1%2l-%3"}, --p
  286. -- li-/la- + art. + initial unstable hamza is a special orthography
  287. {a="l([ai])%-l%-([uai])", b="l%1-l-%2"},
  288. -- al- + lunar consonant (i.e. what remains)
  289. {a="^(a)l%-", b="%1l-"},
  290. {a="([%(%[%|%<%s%-O])(a)l%-", b="%1%2l-"}, --p
  291. -- diphthongs to be resolved before ʾalif conjunctionis
  292. {a="(aw)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1u"},
  293. {a="(ay)(O)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)([%S]-O)", b="%1i"},
  294. {a="(aw)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1u%2%3"},
  295. {a="(ay)(%s)(['][%_%^%.]?[l'btjghxdrzs`fqkmnwy]%-)", b="%1i%2%3"},
  296. -- art. with waṣla + lām
  297. {a="'l%-(l)", b="'l-%1"},
  298. -- art. with waṣla + solar consonant
  299. -- ('c' and '^n' are additional characters)
  300. {a="'l%-(%^n)", b="'l-%1"}, -- ^n is lunar
  301. {a="'l%-([%_%^%.]?[tdrzsnc])", b="'%1-%1"},
  302. -- li-/la- + art. + lām
  303. {a="l([ai])%-l%-(l)", b="l%1-%2-%2"},
  304. -- assim. art. with waṣla + solar consonant
  305. -- ('c' and '^n' are additional characters)
  306. {a="'(%^n)%-", b="'l-"}, -- ^n is lunar
  307. {a="'([%_%^%.]?[tdrzsnc])%-", b="'%1-"},
  308. -- li-/la- + art. + solar consonant is a special orthography
  309. -- ('c' and '^n' are additional characters)
  310. {a="l([ai])%-l%-(%^n)", b="l%1-l-%2"}, -- ^n is lunar
  311. {a="l([ai])%-l%-([%_%^%.]?[tdrzsnc])", b="l%1-%2-%2"},
  312. -- li-/la- + assim. art. + solar consonant is a special orthography
  313. -- ('c' and '^n' are additional characters)
  314. {a="l([ai])%-(%^n)%-(%^n)", b="l%1-l-%3"}, -- ^n is lunar
  315. {a="l([ai])%-([%_%^%.]?[tdrzsnc])%-([%_%^%.]?[tdrzsnc])", b="l%1-%2-%3"},
  316. -- art. with waṣla + initial unstable hamza
  317. {a="'l%-(\"?[uai])", b="'l-%1"},
  318. -- art. with waṣla + lunar consonant (i.e. what remains)
  319. {a="'l%-", b="'l-"},
  320. -- the silent wāw
  321. {a="uU$", b="u"},
  322. {a="uU(%W)", b="u%1"},
  323. {a="aU$", b="a"},
  324. {a="aU(%W)", b="a%1"},
  325. {a="iU$", b="i"},
  326. {a="iU(%W)", b="i%1"},
  327. -- words ending in -āT with silent wāw/yāʾ
  328. {a="(_a)UA", b="A"},
  329. {a="(_a)U", b="A"},
  330. {a="(_a)I", b="A"}
  331. }
  332. idghamtrdmg = {
  333. -- assimilations
  334. {a="(n)(}?)(%s)([rlmnwy])", b="%4%2%3%4"},
  335. {a="(n)(}?)(O)([rlmnwy])([%S]-O)", b="%4%2"}
  336. }
  337. digraphstrdmg = {
  338. {a="([uai]%-)(\"?[uai])", b="%1'"}, -- hyphen + initial alif without hamza
  339. -- the following two are replaced with the 4 lines next for now
  340. -- {a="^(\"?[uai])", b="%1"}, -- initial alif without hamza
  341. -- {a="(%W)(\"?[uai])", b="%1%2"}, -- initial alif without hamza
  342. -- {a="^(\"[uai])", b="'"}, -- initial alif without hamza
  343. -- {a="(%W)(\"[uai])", b="%1'"}, -- initial alif without hamza
  344. {a="^(\"?[uai])", b="%1"}, -- initial alif without hamza
  345. {a="(%W)(\"?[uai])", b="%1%2"}, -- initial alif without hamza
  346. -- this is not necessary, take out for now:
  347. -- {a="([%_]?[uaiUAIY])(%s)([uai])", b="%1%2'"}, -- initial alif without hamza
  348. {a="(aw)(O)(\"?[uai])([%S]-O)", b="%1u"},
  349. {a="(aw)(%s)([%(%[%|%<]?)(\"?[uai])", b="%1u%2%3'"}, --p
  350. {a="(ay)(O)(\"?[uai])([%S]-O)", b="%1i"},
  351. {a="(ay)(%s)([%(%[%|%<]?)(\"?[uai])", b="%1i%2%3'"}, --p
  352. {a="(aW)(O)(\"?[uai])([%S]-O)", b="awu"},
  353. {a="(UA)(O)(\"?[uai])([%S]-O)", b="u"},
  354. {a="(%_A)(O)(\"?[uai])([%S]-O)", b="ạ"},
  355. {a="(Y)(O)(\"?[uai])([%S]-O)", b="ạ"},
  356. {a="(%_a)(O)(\"?[uai])([%S]-O)", b="a"},
  357. {a="(A)(O)(\"?[uai])([%S]-O)", b="a"},
  358. {a="([%_]?[Uu])(O)(\"?[uai])([%S]-O)", b="u"},
  359. {a="([%_]?[Ii])(O)(\"?[uai])([%S]-O)", b="i"},
  360. {a="(O[%S]-)([%'a]l%-)(O)(\"?[uai])", b="'"},
  361. {a="(O[%S]-)([UAIYWuaiyw])(O)(\"?[uai])", b="'"},
  362. {a="(aW)(%s)([%(%[%|%<]?)(\"?[uai])", b="awu%2%3%4"}, --p
  363. {a="(UA)(%s)([%(%[%|%<]?)(\"?[uai])", b="u%2%3'"}, --p
  364. {a="([^%_][uai])(%s)([%(%[%|%<]?)(\"?[uai])", b="%1%2%3'"}, --p
  365. {a="(%_A)(%s)([%(%[%|%<]?)(\"?[uai])", b="ạ%2%3'"}, --p
  366. {a="(Y)(%s)([%(%[%|%<]?)(\"?[uai])", b="ạ%2%3'"}, --p
  367. {a="(%_a)(%s)([%(%[%|%<]?)(\"?[uai])", b="a%2%3'"}, --p
  368. {a="(A)(%s)([%(%[%|%<]?)(\"?[uai])", b="a%2%3'"}, --p
  369. {a="([%_]?[Uu])(%s)([%(%[%|%<]?)(\"?[uai])", b="u%2%3'"}, --p
  370. {a="([%_]?[Ii])(%s)([%(%[%|%<]?)(\"?[uai])", b="i%2%3'"}, --p
  371. -- ʾiʿrāb hyphen (begin)
  372. {a="(%-)(\"?[UI]na)(%p*%s)", b="\\arbup{%2}%3"},
  373. {a="(%-)(\"?[UI]na)(%p*)$", b="\\arbup{%2}%3"},
  374. {a="(%-)(\"?At%.?[ui])(%p*%s)", b="\\arbup{%2}%3"},
  375. {a="(%-)(\"?At%.?[ui])(%p*)$", b="\\arbup{%2}%3"},
  376. {a="(%-)(\"?Ani)(%p*%s)", b="\\arbup{%2}%3"},
  377. {a="(%-)(\"?Ani)(%p*)$", b="\\arbup{%2}%3"},
  378. {a="(%-)(\"?%.?ayni)(%p*%s)", b="\\arbup{%2}%3"},
  379. {a="(%-)(\"?%.?ayni)(%p*)$", b="\\arbup{%2}%3"},
  380. {a="(%-)(\"?%.?[uai])(%p*%s)", b="\\arbup{%2}%3"},
  381. {a="(%-)(\"?%.?[uai])(%p*)$", b="\\arbup{%2}%3"},
  382. -- ʾiʿrāb hyphen (end) shorten long vowels preceding ʾalif
  383. -- conjunctionis—without forgetting 'lla_dI
  384. {a="(U)(A)", b="U"},
  385. {a="(aW)(O)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])([%S]-O)", b="awu"},
  386. {a="(%_a)(O)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])([%S]-O)", b="a"},
  387. {a="(%_A)(O)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])([%S]-O)", b="ạ"},
  388. {a="(A)(O)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])([%S]-O)", b="a"},
  389. {a="(Y)(O)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])([%S]-O)", b="ạ"},
  390. {a="([%_]?[Uu])(O)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])([%S]-O)", b="u"},
  391. {a="([%_]?[Ii])(O)('[%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])([%S]-O)", b="i"},
  392. --p (next 7 lines, just after %s)
  393. {a="(aW)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])", b="awu%2%3"},
  394. {a="(%_a)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])", b="a%2%3"},
  395. {a="(%_A)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])", b="ạ%2%3"},
  396. {a="(A)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])", b="a%2%3"},
  397. {a="(Y)(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])", b="ạ%2%3"},
  398. {a="([%_]?[Uu])(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])", b="u%2%3"},
  399. {a="([%_]?[Ii])(%s)([%(%[%|%<]?['][%_%^%.]?[l'btjghxdrzs`fqkmnwy][%-l])", b="i%2%3"},
  400. {a="%-%-", b=""},
  401. {a="iyyaT(%p*)$", b="īyaT%1"},
  402. {a="iyyaT(%p*%s)", b="īyaT%1"},
  403. {a="iyy(%p*)$", b="ī%1"},
  404. {a="iyy(%p*%s)", b="ī%1"},
  405. -- {a="T([^uai])", b="%1"},
  406. {a="T(\\arbup)", b="t%1"},
  407. {a="([a%'][%_%^%.]?[tdrzsln]%-)(%S-)T([%(%[%|%<%s])(a[%_%^%.]?[tdrzsln]%-)", b="%1%2h%3%4"}, --p
  408. {a="T([%(%[%|%<%s])(a[%_%^%.]?[tdrzsln]%-)", b="t%1%2"}, --p
  409. {a="T([%|\"])", b="t%1"},
  410. {a="T(%p*%s)", b="h%1"},
  411. {a="T(%p*)$", b="h%1"},
  412. {a="T(%p*)(%W)", b="h%1%2"},
  413. {a="_t", b="ṯ"},
  414. {a="%^g", b="ǧ"},
  415. {a="%.h", b="ḥ"},
  416. {a="_h", b="ḫ"},
  417. {a="_d", b="ḏ"},
  418. {a="%^s", b="š"},
  419. {a="%.s", b="ṣ"},
  420. {a="%.d", b="ḍ"},
  421. {a="%.t", b="ṭ"},
  422. {a="%.z", b="ẓ"},
  423. {a="%.g", b="ġ"},
  424. {a="%.y", b="y"},
  425. -- additional characters (begin)
  426. {a="%^c", b="č"},
  427. {a="%^z", b="ž"},
  428. {a="%^n", b="ñ"},
  429. -- additional characters (end)
  430. -- the following needs to be moved above shortening rules
  431. -- {a="(U)(A)", b="ū"},
  432. {a="WA", b="w"},
  433. {a="(a)W", b="%1w"},
  434. {a="_A", b="ạ̄"},
  435. {a="_u", b="ū"},
  436. {a="_a", b="ā"},
  437. {a="_i", b="ī"},
  438. {a="%.b", b="ḅ"},
  439. {a="%.f", b="f̣"},
  440. {a="%.q", b="q̣"},
  441. {a="%.k", b="k"},
  442. {a="%.n", b="ṇ"},
  443. {a="%^d", b="d́"}
  444. }
  445. singletrdmg = {
  446. {a="b", b="b"},
  447. {a="t", b="t"},
  448. {a="j", b="ǧ"},
  449. {a="x", b="ḫ"},
  450. {a="d", b="d"},
  451. {a="r", b="r"},
  452. {a="z", b="z"},
  453. {a="s", b="s"},
  454. {a="`", b="ʿ"},
  455. {a="f", b="f"},
  456. {a="q", b="q"},
  457. {a="k", b="k"},
  458. {a="l", b="l"},
  459. {a="m", b="m"},
  460. {a="n", b="n"},
  461. {a="h", b="h"},
  462. {a="w", b="w"},
  463. {a="y", b="y"},
  464. {a="T", b="t"},
  465. -- additional characters (begin)
  466. {a="p", b="p"},
  467. {a="v", b="v"},
  468. {a="g", b="g"},
  469. -- additional characters (end)
  470. {a="\"", b=""},
  471. {a="B", b=""}
  472. }
  473. longvtrdmg = {
  474. {a="A", b="ā"},
  475. {a="U", b="ū"},
  476. {a="I", b="ī"},
  477. {a="aY", b="ay"},
  478. {a="iY", b="ī"},
  479. {a="Y", b="ạ̄"}
  480. }
  481. shortvtrdmg = {
  482. {a="([uai])([uai])([uai])", b="/%1,%2,%3/"},
  483. {a="([uai])([uai])", b="/%1,%2/"},
  484. {a="%.u", b="u"},
  485. {a="%.a", b="a"},
  486. {a="%.i", b="i"},
  487. {a="u", b="u"},
  488. {a="a", b="a"},
  489. {a="i", b="i"}
  490. }
  491. -- loc
  492. hamzatrloc = {
  493. -- next lines for ʾalif alone
  494. {a="(%.A)([^uai])", b=".|%2"},
  495. {a="(%.A)([uai])", b="||%2"},
  496. -- hard coded hamza
  497. {a="|\"'", b="ʾ"},
  498. {a="A\"'", b="ʾA"},
  499. {a="[au]\"'", b="ʾ"},
  500. {a="w\"'", b="ʾ"},
  501. {a="i\"'", b="ʾ"},
  502. {a="y\"'", b="ʾ"},
  503. -- hamza takes tašdīd too
  504. {a="''([Uu])", b="ʾʾ%1"},
  505. {a="''([Aa])", b="ʾʾ%1"},
  506. {a="''([Ii])", b="ʾʾ%1"},
  507. -- initial long u and i (for a, see below)
  508. {a="%'%_U", b="U"},
  509. {a="%'%_I", b="I"},
  510. -- taḫfīfu 'l-hamza
  511. {a="^'u'([^uaiUAI])", b="U%1"},
  512. {a="([%s%(%[%<%-])'u'([^uaiUAI])", b="%1U%2"},
  513. {a="^'i'([^uaiUAI])", b="I%1"},
  514. {a="([%s%(%[%<%-])'i'([^uaiUAI])", b="%1I%2"},
  515. {a="^u'([^uaiUAI])", b="U%1"},
  516. {a="([^uaiUAIYN][%s%(%[%<])u'([^uaiUAI])", b="%1U%2"},
  517. {a="^i'([^uaiUAI])", b="I%1"},
  518. {a="([^uaiUAIYN][%s%(%[%<])i'([^uaiUAI])", b="%1I%2"},
  519. -- madda (historic writing below)
  520. {a="^(')(A)", b="%2"},
  521. {a="(%W)(')(A)", b="%1%3"},
  522. {a="^'a'([^uaiUAI])", b="A%1"},
  523. {a="(%W)'a'([^uaiUAI])", b="%1A%2"},
  524. {a="'a'([^uaiUAI])", b="A%1"},
  525. {a="^'a?A", b="A"},
  526. {a="(%W)'a?A", b="%1A"},
  527. {a="'a?A", b="ʾA"},
  528. {a="(A)(')(i)$", b="%1ʾ%3"},
  529. {a="(A)(')(i)(%W)", b="%1ʾ%3%4"},
  530. {a="(A)(')(i)", b="%1ʾ%3"}, -- historic madda
  531. {a="(A)(')", b="%1ʾ"}, -- historic madda
  532. -- initial (needs both ^ and %W patterns)
  533. {a="^(')([ua])", b="%2"},
  534. {a="^(')(i)", b="%2"},
  535. -- consider replacing initial %W with [%s%(%[%<%-]:
  536. -- {a="(%W)(')([ua])", b="%1%3"},
  537. -- {a="(%W)(')(i)", b="%1%3"},
  538. {a="([%s%(%[%<%-])(')([ua])", b="%1%3"},
  539. {a="([%s%(%[%<%-])(')(i)", b="%1%3"},
  540. -- final
  541. {a="([Iy])(')(aN)$", b="%1ʾ%3"},
  542. {a="([Iy])(')(aN)(%W)", b="%1ʾ%3%4"},
  543. {a="([^uai])(')([uai]N?)$", b="%1ʾ%3"},
  544. {a="([^uai])(')([uai]N?)(%W)", b="%1ʾ%3%4"},
  545. {a="([UI])(')([uai])$", b="%1ʾ%3"},
  546. {a="([UI])(')([uai])(%W)", b="%1ʾ%3%4"},
  547. -- middle
  548. {a="(U)(')", b="%1ʾ"},
  549. {a="([Iy])(')", b="%1ʾ"},
  550. {a="([^uai])(')([uU])", b="%1ʾ%3"},
  551. {a="([^uai])(')(%_?[aAY])", b="%1ʾ%3"},
  552. {a="([^uai])(')([iI])", b="%1ʾ%3"},
  553. {a="(u)(')([uU])", b="%1ʾ%3"},
  554. {a="(u)(')(%_?[aAY])", b="%1ʾ%3"},
  555. {a="(u)(')([iI])", b="%1ʾ%3"},
  556. {a="(a)(')(%_?[aAY])", b="%1ʾ%3"},
  557. {a="(a)(')([uU])", b="%1ʾ%3"},
  558. {a="(a)(')([iI])", b="%1ʾ%3"},
  559. {a="(i)(')(%_?[aAY])", b="%1ʾ%3"},
  560. {a="(i)(')([uU])", b="%1ʾ%3"},
  561. {a="(i)(')([iI])", b="%1ʾ%3"},
  562. {a="(a)(')([^uaiUAI])", b="%1ʾ%3"},
  563. {a="(u)(')([^uaiUAI])", b="%1ʾ%3"},
  564. {a="(i)(')([^uaiUAI])", b="%1ʾ%3"}
  565. }
  566. tanwintrloc = {
  567. {a="%-?uNU", b="un"},
  568. {a="%-?aNU", b="an"},
  569. {a="%-?iNU", b="in"},
  570. {a="%-?iNI", b="in"},
  571. {a="%-?(\"?At)%-?([ui])N", b="%1%2n"},
  572. {a="%-?([ui])N", b="%1n"},
  573. -- needed by \arbcolor:
  574. {a="%-?(aN)(O[%S]-%_AO)", b="an"},
  575. {a="%-?(aN)(O[%S]-YO)", b="an"},
  576. {a="(O[%S]-TO)%-?(\"?aN)", b="an"},
  577. {a="(O[%S]-[^TA]O)%-?(\"?aN)", b="an"},
  578. --
  579. {a="%-?(aN)(_A)", b="an"},
  580. {a="%-?(aN)(Y)", b="an"},
  581. {a="(T)%-?(\"?aN)", b="tan"},
  582. {a="([^TA])%-?(\"?aN)", b="%1an"}
  583. }
  584. trigraphstrloc = { -- trigraphs or more
  585. -- 'llatI / 'llad_I
  586. {a="^'ll(a)([%_]?[dt])", b="all%1%2"},
  587. {a="([%(%[%|%<%s])'ll(a)([%_]?[dt])", b="%1all%2%3"}, --p
  588. -- al- + lām
  589. {a="^(a)l%-(l)", b="%1l-%2"},
  590. {a="(%s)(a)l%-(l)", b="%1%2l-%3"},
  591. -- al- + solar consonant ('c' and '^n' are additional characters)
  592. {a="^(a)l%-(%^n)", b="%1l-%2"}, -- ^n is lunar
  593. {a="(%s)(a)l%-(%^n)", b="%1%2l-%3"}, -- ^n is lunar
  594. {a="^(a)l%-([%_%^%.]?[tdrzsnc])", b="%1l-%2"},
  595. {a="(%s)(a)l%-([%_%^%.]?[tdrzsnc])", b="%1%2l-%3"},
  596. -- assim. art. + solar consonant ('c' and '^n' are additional characters)
  597. {a="^(a)(%^n)%-", b="%1l-"}, -- ^n is lunar
  598. {a="(%s)(a)(%^n)%-", b="%1%2l-"}, -- ^n is lunar
  599. {a="^(a)([%_%^%.]?[tdrzsnc])%-", b="%1l-"},
  600. {a="(%s)(a)([%_%^%.]?[tdrzsnc])%-", b="%1%2l-"},
  601. -- al- + initial unstable hamza
  602. {a="^(a)l%-([uai])", b="%1l-%2"},
  603. {a="(%s)(a)l%-([uai])", b="%1%2l-%3"},
  604. -- li-/la- + art. + initial unstable hamza is a special orthography
  605. {a="l([ai])%-l%-([uai])", b="l%1-l-%2"},
  606. -- al- + lunar consonant (i.e. what remains)
  607. {a="^(a)l%-", b="%1l-"},
  608. {a="(%s)(a)l%-", b="%1%2l-"},
  609. -- art. with waṣla + lām
  610. {a="'l%-(l)", b="al-%1"},
  611. -- art. with waṣla + solar consonant
  612. -- ('c' and '^n' are additional characters)
  613. {a="'l%-(%^n)", b="al-%1"}, -- ^n is lunar
  614. {a="'l%-([%_%^%.]?[tdrzsnc])", b="al-%1"},
  615. -- li-/la- + art. + lām
  616. {a="l([ai])%-l%-(l)", b="l%1-l-%2"},
  617. -- assim. art. with waṣla + solar consonant
  618. -- ('c' and '^n' are additional characters)
  619. {a="'(%^n)%-", b="al-"}, -- ^n is lunar
  620. {a="'([%_%^%.]?[tdrzsnc])%-", b="al-"},
  621. -- li-/la- + art. + solar consonant is a special orthography
  622. -- ('c' and '^n' are additional characters)
  623. {a="l([ai])%-l%-(%^n)", b="l%1-l-%2"}, -- ^n is lunar
  624. {a="l([ai])%-l%-([%_%^%.]?[tdrzsnc])", b="l%1-l-%2"},
  625. -- li-/la- + assim. art. + solar consonant is a special orthography
  626. -- ('c' and '^n' are additional characters)
  627. {a="l([ai])%-(%^n)%-(%^n)", b="l%1-l-%3"}, -- ^n is lunar
  628. {a="l([ai])%-([%_%^%.]?[tdrzsnc])%-([%_%^%.]?[tdrzsnc])", b="l%1-l-%3"},
  629. -- art. with waṣla + initial unstable hamza
  630. {a="'l%-([uai])", b="al-%1"},
  631. -- art. with waṣla + lunar consonant (i.e. what remains)
  632. {a="'l%-", b="al-"},
  633. -- the silent wāw
  634. {a="uU$", b="u"},
  635. {a="uU(%W)", b="u%1"},
  636. {a="aU$", b="a"},
  637. {a="aU(%W)", b="a%1"},
  638. {a="iU$", b="i"},
  639. {a="iU(%W)", b="i%1"},
  640. -- words ending in -āT with silent wāw/yāʾ
  641. {a="(_a)UA", b="A"},
  642. {a="(_a)U", b="A"},
  643. {a="(_a)I", b="A"}
  644. }
  645. digraphstrloc = {
  646. -- discard the ʾiʿrāb hyphen (begin)
  647. {a="(%-)(\"?[UI]na)(%p*%s)", b="%2%3"},
  648. {a="(%-)(\"?[UI]na)(%p*)$", b="%2%3"},
  649. {a="(%-)(\"?At[ui])(%p*%s)", b="%2%3"},
  650. {a="(%-)(\"?At[ui])(%p*)$", b="%2%3"},
  651. {a="(%-)(\"?Ani)(%p*%s)", b="%2%3"},
  652. {a="(%-)(\"?Ani)(%p*)$", b="%2%3"},
  653. {a="(%-)(\"?ayni)(%p*%s)", b="%2%3"},
  654. {a="(%-)(\"?ayni)(%p*)$", b="%2%3"},
  655. {a="(%-)([uai])(%p*%s)", b="%2%3"},
  656. {a="(%-)([uai])(%p*)$", b="%2%3"},
  657. -- discard the ʾiʿrāb hyphen (end)
  658. {a="(%-)(\"?[uai])", b="%1%2"}, -- hyphen + initial alif without hamza
  659. {a="^(\"?[uai])", b="%1"}, -- initial alif without hamza
  660. {a="(%s)([uai])", b="%1%2"}, -- initial alif without hamza
  661. {a="%-%-", b=""},
  662. {a="uww", b="ūw"},
  663. {a="iyy(%p*)$", b="ī%1"},
  664. {a="iyy(%p*%s)", b="ī%1"},
  665. {a="iyy", b="īy"},
  666. {a="([tkdsg])(h)", b="%1'%2"},
  667. -- {a="T([^uai])", b="h%1"},
  668. {a="([a%']l%-)(%S-)T([%(%[%|%<%s])(al%-)", b="%1%2h%3%4"}, --p
  669. {a="T([%(%[%|%<%s])(al%-)", b="t%1%2"}, --p
  670. {a="T([%|\"])", b="t%1"},
  671. {a="T(%p*)$", b="h%1"},
  672. {a="T(%p*%s)", b="h%1"},
  673. {a="_t", b="th"},
  674. {a="%^g", b="j"},
  675. {a="%.h", b="ḥ"},
  676. {a="_h", b="kh"},
  677. {a="_d", b="dh"},
  678. {a="%^s", b="sh"},
  679. {a="%.s", b="ṣ"},
  680. {a="%.d", b="ḍ"},
  681. {a="%.t", b="ṭ"},
  682. {a="%.z", b="ẓ"},
  683. {a="%.g", b="gh"},
  684. {a="%.y", b="y"},
  685. -- additional characters (begin)
  686. {a="%^c", b="ch"},
  687. {a="%^z", b="zh"},
  688. {a="%^n", b="ñ"},
  689. -- additional characters (end)
  690. {a="(U)(A)", b="ū"},
  691. {a="WA", b="w"},
  692. {a="(a)W", b="%1w"},
  693. {a="_A", b="á"},
  694. {a="_u", b="ū"},
  695. {a="_a", b="ā"},
  696. {a="_i", b="ī"},
  697. {a="%.b", b="b"},
  698. {a="%.f", b="f"},
  699. {a="%.q", b="q"},
  700. {a="%.k", b="k"},
  701. {a="%.n", b="n"},
  702. {a="%^d", b="d"}
  703. }
  704. singletrloc = {
  705. {a="b", b="b"},
  706. {a="t", b="t"},
  707. {a="j", b="j"},
  708. {a="x", b="kh"},
  709. {a="d", b="d"},
  710. {a="r", b="r"},
  711. {a="z", b="z"},
  712. {a="s", b="s"},
  713. {a="`", b="`"},
  714. {a="f", b="f"},
  715. {a="q", b="q"},
  716. {a="k", b="k"},
  717. {a="l", b="l"},
  718. {a="m", b="m"},
  719. {a="n", b="n"},
  720. {a="h", b="h"},
  721. {a="w", b="w"},
  722. {a="y", b="y"},
  723. {a="T", b="t"},
  724. -- additional characters (begin)
  725. {a="p", b="p"},
  726. {a="v", b="v"},
  727. {a="g", b="g"},
  728. -- additional characters (end)
  729. {a="\"", b=""},
  730. {a="B", b=""}
  731. }
  732. longvtrloc = {
  733. {a="A", b="ā"},
  734. {a="U", b="ū"},
  735. {a="I", b="ī"},
  736. {a="aY", b="ay"},
  737. {a="iY", b="ī"},
  738. {a="Y", b="á"},
  739. }
  740. shortvtrloc = {
  741. {a="([uai])([uai])([uai])", b="/%1,%2,%3/"},
  742. {a="([uai])([uai])", b="/%1,%2/"},
  743. {a="%.u", b="u"},
  744. {a="%.a", b="a"},
  745. {a="%.i", b="i"},
  746. {a="u", b="u"},
  747. {a="a", b="a"},
  748. {a="i", b="i"}
  749. }
  750. finaltrloc = {
  751. {a="ʾ", b="'"},
  752. }
  753. -- arabica
  754. hamzatrarabica = { -- ≠ from hamzatrloc: initial hamza has to be held
  755. -- next lines for ʾalif alone
  756. {a="(%.A)([^uai])", b=".|%2"},
  757. {a="(%.A)([uai])", b="||%2"},
  758. -- hard coded hamza
  759. {a="|\"'", b="ʾ"},
  760. {a="A\"'", b="ʾA"},
  761. {a="[au]\"'", b="ʾ"},
  762. {a="w\"'", b="ʾ"},
  763. {a="i\"'", b="ʾ"},
  764. {a="y\"'", b="ʾ"},
  765. -- hamza takes tašdīd too
  766. {a="''([Uu])", b="ʾʾ%1"},
  767. {a="''([Aa])", b="ʾʾ%1"},
  768. {a="''([Ii])", b="ʾʾ%1"},
  769. -- initial long u and i (for a, see below)
  770. {a="%'%_U", b="U"},
  771. {a="%'%_I", b="I"},
  772. -- taḫfīfu 'l-hamza
  773. {a="^'u'([^uaiUAI])", b="U%1"},
  774. {a="([%s%(%[%<%-])'u'([^uaiUAI])", b="%1U%2"},
  775. {a="^'i'([^uaiUAI])", b="I%1"},
  776. {a="([%s%(%[%<%-])'i'([^uaiUAI])", b="%1I%2"},
  777. {a="^u'([^uaiUAI])", b="U%1"},
  778. {a="([^uaiUAIYN][%s%(%[%<])u'([^uaiUAI])", b="%1U%2"},
  779. {a="^i'([^uaiUAI])", b="I%1"},
  780. {a="([^uaiUAIYN][%s%(%[%<])i'([^uaiUAI])", b="%1I%2"},
  781. -- madda (historic writing below)
  782. {a="^(')(A)", b="%2"},
  783. {a="(%W)(')(A)", b="%1%3"},
  784. {a="^'a'([^uaiUAI])", b="A%1"},
  785. {a="(%W)'a'([^uaiUAI])", b="%1A%2"},
  786. {a="'a'([^uaiUAI])", b="A%1"},
  787. {a="^'a?A", b="A"},
  788. {a="(%W)'a?A", b="%1A"},
  789. {a="'a?A", b="ʾA"},
  790. {a="(A)(')(i)$", b="%1ʾ%3"},
  791. {a="(A)(')(i)(%W)", b="%1ʾ%3%4"},
  792. {a="(A)(')(i)", b="%1ʾ%3"}, -- historic madda
  793. {a="(A)(')", b="%1ʾ"}, -- historic madda
  794. -- initial (needs both ^ and %W patterns):
  795. -- hold it for now (see below, beginning of digraphs table)
  796. {a="^(')([ua])", b="@%2"},
  797. {a="^(')(i)", b="@%2"},
  798. -- consider replacing initial %W with [%s%(%[%<%-]:
  799. -- {a="(%W)(')([ua])", b="%1@%3"},
  800. -- {a="(%W)(')(i)", b="%1@%3"},
  801. {a="([%s%(%[%<%-])(')([ua])", b="%1@%3"},
  802. {a="([%s%(%[%<%-])(')(i)", b="%1@%3"},
  803. -- final
  804. {a="([Iy])(')(aN)$", b="%1ʾ%3"},
  805. {a="([Iy])(')(aN)(%W)", b="%1ʾ%3%4"},
  806. {a="([^uai])(')([uai]N?)$", b="%1ʾ%3"},
  807. {a="([^uai])(')([uai]N?)(%W)", b="%1ʾ%3%4"},
  808. {a="([UI])(')([uai])$", b="%1ʾ%3"},
  809. {a="([UI])(')([uai])(%W)", b="%1ʾ%3%4"},
  810. -- middle
  811. {a="(U)(')", b="%1ʾ"},
  812. {a="([Iy])(')", b="%1ʾ"},
  813. {a="([^uai])(')([uU])", b="%1ʾ%3"},
  814. {a="([^uai])(')(%_?[aAY])", b="%1ʾ%3"},
  815. {a="([^uai])(')([iI])", b="%1ʾ%3"},
  816. {a="(u)(')([uU])", b="%1ʾ%3"},
  817. {a="(u)(')(%_?[aAY])", b="%1ʾ%3"},
  818. {a="(u)(')([iI])", b="%1ʾ%3"},
  819. {a="(a)(')(%_?[aAY])", b="%1ʾ%3"},
  820. {a="(a)(')([uU])", b="%1ʾ%3"},
  821. {a="(a)(')([iI])", b="%1ʾ%3"},
  822. {a="(i)(')(%_?[aAY])", b="%1ʾ%3"},
  823. {a="(i)(')([uU])", b="%1ʾ%3"},
  824. {a="(i)(')([iI])", b="%1ʾ%3"},
  825. {a="(a)(')([^uaiUAI])", b="%1ʾ%3"},
  826. {a="(u)(')([^uaiUAI])", b="%1ʾ%3"},
  827. {a="(i)(')([^uaiUAI])", b="%1ʾ%3"}
  828. }
  829. trigraphstrarabica = { -- trigraphs or more
  830. -- 'llatI / 'llad_I
  831. {a="^'ll(a)([%_]?[dt])", b="ll%1%2"},
  832. {a="([%-%(%[%|%<%s])'ll(a)([%_]?[dt])", b="%1ll%2%3"}, --p
  833. -- al- + lām
  834. {a="^(a)l%-(l)", b="%1l-%2"},
  835. {a="(%s)(a)l%-(l)", b="%1%2l-%3"},
  836. -- al- + solar consonant ('c' and '^n' are additional characters)
  837. {a="^(a)l%-(%^n)", b="%1l-%2"}, -- ^n is lunar
  838. {a="(%s)(a)l%-(%^n)", b="%1%2l-%3"}, -- ^n is lunar
  839. {a="^(a)l%-([%_%^%.]?[tdrzsnc])", b="%1l-%2"},
  840. {a="(%s)(a)l%-([%_%^%.]?[tdrzsnc])", b="%1%2l-%3"},
  841. -- assim. art. + solar consonant ('c' and '^n' are additional characters)
  842. {a="^(a)(%^n)%-", b="%1l-"}, -- ^n is lunar
  843. {a="(%s)(a)(%^n)%-", b="%1%2l-"}, -- ^n is lunar
  844. {a="^(a)([%_%^%.]?[tdrzsnc])%-", b="%1l-"},
  845. {a="(%s)(a)([%_%^%.]?[tdrzsnc])%-", b="%1%2l-"},
  846. -- al- + initial unstable hamza
  847. {a="^(a)l%-([uai])", b="%1l-%2"},
  848. {a="(%s)(a)l%-([uai])", b="%1%2l-%3"},
  849. -- li-/la- + art. + initial unstable hamza is a special orthography
  850. {a="l([ai])%-l%-([uai])", b="l%1-l-%2"},
  851. -- al- + lunar consonant (i.e. what remains)
  852. {a="^(a)l%-", b="%1l-"},
  853. {a="(%s)(a)l%-", b="%1%2l-"},
  854. -- art. with waṣla + lām
  855. {a="'l%-(l)", b="l-%1"},
  856. -- art. with waṣla + solar consonant
  857. -- ('c' and '^n' are additional characters)
  858. {a="'l%-(%^n)", b="l-%1"}, -- ^n is lunar
  859. {a="'l%-([%_%^%.]?[tdrzsnc])", b="l-%1"},
  860. -- li-/la- + art. + lām
  861. {a="l([ai])%-l%-(l)", b="l%1-l-%2"},
  862. -- assim. art. with waṣla + solar consonant
  863. -- ('c' and '^n' are additional characters)
  864. {a="'(%^n)%-", b="l-"}, -- ^n is lunar
  865. {a="'([%_%^%.]?[tdrzsnc])%-", b="l-"},
  866. -- li-/la- + art. + solar consonant is a special orthography
  867. -- ('c' and '^n' are additional characters)
  868. {a="l([ai])%-l%-(%^n)", b="l%1-l-%2"}, -- ^n is lunar
  869. {a="l([ai])%-l%-([%_%^%.]?[tdrzsnc])", b="l%1-l-%2"},
  870. -- li-/la- + assim. art. + solar consonant is a special orthography
  871. -- ('c' and '^n' are additional characters)
  872. {a="l([ai])%-(%^n)%-(%^n)", b="l%1-l-%3"}, -- ^n is lunar
  873. {a="l([ai])%-([%_%^%.]?[tdrzsnc])%-([%_%^%.]?[tdrzsnc])", b="l%1-l-%3"},
  874. -- art. with waṣla + initial unstable hamza
  875. {a="'l%-([uai])", b="l-%1"},
  876. -- art. with waṣla + lunar consonant (i.e. what remains)
  877. {a="'l%-", b="l-"},
  878. -- the silent wāw
  879. {a="uU$", b="u"},
  880. {a="uU(%W)", b="u%1"},
  881. {a="aU$", b="a"},
  882. {a="aU(%W)", b="a%1"},
  883. {a="iU$", b="i"},
  884. {a="iU(%W)", b="i%1"},
  885. -- words ending in -āT with silent wāw/yāʾ
  886. {a="(_a)UA", b="A"},
  887. {a="(_a)U", b="A"},
  888. {a="(_a)I", b="A"}
  889. }
  890. digraphstrarabica = {
  891. {a="([uai]%-)(\"?[uai])", b="%1"}, -- hyphen + initial alif without hamza
  892. {a="([UAIYuai])(%s)([%(%[%|%<]?)(\"?[uai])", b="%1%2%3"}, --p
  893. {a="(O[%S]-)([UAIuai])(O)(\"?[uai])", b=""},
  894. {a="@", b=""}, -- remove the tag before the former hamza
  895. -- discard the ʾiʿrāb hyphen (begin)
  896. {a="(%-)(\"?[UI]na)(%p*%s)", b="%2%3"},
  897. {a="(%-)(\"?[UI]na)(%p*)$", b="%2%3"},
  898. {a="(%-)(\"?At[ui])(%p*%s)", b="%2%3"},
  899. {a="(%-)(\"?At[ui])(%p*)$", b="%2%3"},
  900. {a="(%-)(\"?Ani)(%p*%s)", b="%2%3"},
  901. {a="(%-)(\"?Ani)(%p*)$", b="%2%3"},
  902. {a="(%-)(\"?ayni)(%p*%s)", b="%2%3"},
  903. {a="(%-)(\"?ayni)(%p*)$", b="%2%3"},
  904. {a="(%-)([uai])(%p*%s)", b="%2%3"},
  905. {a="(%-)([uai])(%p*)$", b="%2%3"},
  906. -- discard the ʾiʿrāb hyphen (end)
  907. {a="(%-)(\"?[uai])", b="%1%2"}, -- hyphen + initial alif without hamza
  908. {a="^(\"?[uai])", b="%1"}, -- initial alif without hamza
  909. {a="(%s)([uai])", b="%1%2"}, -- initial alif without hamza
  910. {a="%-%-", b=""},
  911. {a="iyy(%p*)$", b="ī%1"},
  912. {a="iyy(%p*%s)", b="ī%1"},
  913. -- {a="T([^uai])", b="h%1"},
  914. {a="([a%']l%-)(%S-)aT([%(%[%|%<%s])(al%-)", b="%1%2a%3%4"}, --p
  915. {a="aT([%(%[%|%<%s])(al%-)", b="at%1%2"}, --p
  916. {a="T([%|\"])", b="t%1"},
  917. {a="aT(%p*)$", b="a%1"},
  918. {a="aT(%p*%s)", b="a%1"},
  919. {a="_t", b="ṯ"},
  920. {a="%^g", b="ǧ"},
  921. {a="%.h", b="ḥ"},
  922. {a="_h", b="ḫ"},
  923. {a="_d", b="ḏ"},
  924. {a="%^s", b="š"},
  925. {a="%.s", b="ṣ"},
  926. {a="%.d", b="ḍ"},
  927. {a="%.t", b="ṭ"},
  928. {a="%.z", b="ẓ"},
  929. {a="%.g", b="ġ"},
  930. {a="%.y", b="y"},
  931. -- additional characters (begin)
  932. {a="%^c", b="č"},
  933. {a="%^z", b="ž"},
  934. {a="%^n", b="ñ"},
  935. -- additional characters (end)
  936. {a="(U)(A)", b="ū"},
  937. {a="WA", b="w"},
  938. {a="(a)W", b="%1w"},
  939. {a="_A", b="ā"},
  940. {a="_u", b="ū"},
  941. {a="_a", b="ā"},
  942. {a="_i", b="ī"},
  943. {a="%.b", b="b"},
  944. {a="%.f", b="f"},
  945. {a="%.q", b="q"},
  946. {a="%.k", b="k"},
  947. {a="%.n", b="n"},
  948. {a="%^d", b="d"}
  949. }
  950. singletrarabica = {
  951. {a="b", b="b"},
  952. {a="t", b="t"},
  953. {a="j", b="ǧ"},
  954. {a="x", b="ḫ"},
  955. {a="d", b="d"},
  956. {a="r", b="r"},
  957. {a="z", b="z"},
  958. {a="s", b="s"},
  959. {a="`", b="ʿ"},
  960. {a="f", b="f"},
  961. {a="q", b="q"},
  962. {a="k", b="k"},
  963. {a="l", b="l"},
  964. {a="m", b="m"},
  965. {a="n", b="n"},
  966. {a="h", b="h"},
  967. {a="w", b="w"},
  968. {a="y", b="y"},
  969. {a="T", b="t"},
  970. -- additional characters (begin)
  971. {a="p", b="p"},
  972. {a="v", b="v"},
  973. {a="g", b="g"},
  974. -- additional characters (end)
  975. {a="\"", b=""},
  976. {a="B", b=""}
  977. }
  978. longvtrarabica = {
  979. {a="aY", b="ay"},
  980. {a="iY", b="ī"},
  981. {a="[AY]", b="ā"},
  982. {a="U", b="ū"},
  983. {a="I", b="ī"}
  984. }