arabluatex.lua 34 KB


  1. --[[
  2. This file is part of the `arabluatex' package
  3. ArabLuaTeX -- Processing ArabTeX notation under LuaLaTeX
  4. Copyright (C) 2016--2018 Robert Alessi
  5. Please send error reports and suggestions for improvements to Robert
  6. Alessi <alessi@robertalessi.net>
  7. This program is free software: you can redistribute it and/or modify
  8. it under the terms of the GNU General Public License as published by
  9. the Free Software Foundation, either version 3 of the License, or
  10. (at your option) any later version.
  11. This program is distributed in the hope that it will be useful, but
  12. WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. General Public License for more details.
  15. You should have received a copy of the GNU General Public License
  16. along with this program. If not, see
  17. <http://www.gnu.org/licenses/>.
  18. --]]
  19. require("arabluatex_voc")
  20. require("arabluatex_fullvoc")
  21. require("arabluatex_novoc")
  22. require("arabluatex_trans")
  23. -- lpeg equivalent for string.gsub()
  24. local function gsub(s, patt, repl)
  25. patt = lpeg.P(patt)
  26. patt = lpeg.Cs((patt / repl + 1)^0)
  27. return lpeg.match(patt, s)
  28. end
  29. -- makeatletter, makeatother
  30. local atletter = "\\makeatletter{}"
  31. local atother = "\\makeatother{}"
  32. -- some basic patterns:
  33. local ascii = lpeg.R("az", "AZ", "@@")
  34. local dblbkslash = lpeg.Cs("\\")
  35. local bsqbrackets = lpeg.Cs{ "[" * ((1 - lpeg.S"[]") + lpeg.V(1))^0 * "]" }
  36. local bcbraces = lpeg.Cs{ "{" * ((1 - lpeg.S"{}") + lpeg.V(1))^0 * "}" }
  37. local spce = lpeg.Cs(" ")
  38. local spcenc = lpeg.P(" ")
  39. local cmdstar = lpeg.Cs(spce * lpeg.P("*"))
  40. local bsqbracketsii = lpeg.Cs(bsqbrackets^-2)
  41. local bcbracesii = lpeg.Cs(bcbraces^-2)
  42. local cmd = lpeg.Cs(dblbkslash * ascii^1 * cmdstar^-1)
  43. local cmdargs = lpeg.Cs(spce^-1 * bsqbracketsii * bcbracesii * bsqbrackets^-1)
  44. local arbargs = lpeg.Cs(spce^-1 * bsqbrackets^-1 * bcbraces)
  45. local baytargs = lpeg.Cs(spce * bcbraces * bsqbrackets^-1 * bcbraces)
  46. local function protectarb(str)
  47. str = string.gsub(str, "(\\arb%s?)(%[.-%])(%b{})", "\\al@brk{\\arb%2%3}")
  48. str = string.gsub(str, "(\\LR%s?)(%b{})", "\\@LR%2")
  49. str = string.gsub(str, "(\\RL%s?)(%b{})", "\\@RL%2")
  50. return str
  51. end
  52. local function unprotectarb(str)
  53. str = string.gsub(str, "(\\@arb)(%[.-%])(%b{})", "\\arb%2%3")
  54. str = string.gsub(str, "(\\@LR)(%b{})", "\\LR%2")
  55. str = string.gsub(str, "(\\@RL)(%b{})", "\\RL%2")
  56. str = gsub(str, lpeg.Cs("\\al@brk") * bcbraces, function(tag, body)
  57. body = string.sub(body, 2, -2)
  58. return string.format("%s", body)
  59. end)
  60. return str
  61. end
  62. -- the following is to be taken out of \arb{}
  63. local outofarb = {
  64. "LRfootnote",
  65. "RLfootnote",
  66. "edtext",
  67. "pstart",
  68. "pend"
  69. }
  70. -- commands the arguments of which must not be processed by arabluatex
  71. -- inside \arb{}. 'albrkcmds' is what is set by default. 'brkcmds'
  72. -- collects the commands set in the preamble with \MkArbBreak{}
  73. local albrkcmds = {
  74. "begin",
  75. "end",
  76. "par",
  77. "LRmarginpar",
  78. "arbmark",
  79. "abjad"
  80. }
  81. local brkcmds = {}
  82. function mkarbbreak(str, opt)
  83. str = str ..","
  84. str = string.gsub(str, "%s+", "")
  85. local fieldstart = 1
  86. if opt == "dflt" then
  87. repeat
  88. local nexti = string.find(str, "%,", fieldstart)
  89. table.insert(brkcmds, string.sub(str, fieldstart, nexti-1))
  90. fieldstart = nexti +1
  91. until fieldstart > string.len(str)
  92. return brkcmds
  93. elseif opt == "out" then
  94. repeat
  95. local nexti = string.find(str, "%,", fieldstart)
  96. table.insert(outofarb, string.sub(str, fieldstart, nexti-1))
  97. fieldstart = nexti +1
  98. until fieldstart > string.len(str)
  99. return outofarb
  100. end
  101. end
  102. local function breakcmd(str)
  103. -- process \item[], then \item[]
  104. str = string.gsub(str, "\\(item.?)(%b[])",
  105. function(tag, body)
  106. body = string.sub(body, 2, -2)
  107. return string.format("\\al@brk{\\item[\\arb{%s}] }", body)
  108. end)
  109. str = string.gsub(str, "(\\item)(%s+)", "%1{}%2")
  110. -- \textcolor
  111. str = string.gsub(str, "\\(textcolor%s?)(%b{})(%b{})",
  112. function(tag, bodycolor, bodytext)
  113. bodycolor = string.sub(bodycolor, 2, -2)
  114. bodytext = string.sub(bodytext, 2, -2)
  115. return string.format("\\al@brk{\\%s{%s}{\\arb{%s}}}", tag, bodycolor, bodytext)
  116. end)
  117. -- commands set by default in outofarb
  118. for i = 1,#outofarb do
  119. str = gsub(str, dblbkslash * lpeg.Cs(outofarb[i]) * cmdargs, "}%1%2%3\\arb{")
  120. end
  121. -- commands set by default in albrkcmds
  122. for i = 1,#albrkcmds do
  123. str = gsub(str, dblbkslash * lpeg.Cs(albrkcmds[i]) * cmdargs, "\\al@brk{%1%2%3}")
  124. end
  125. -- user commands (brkcmds)
  126. if next(brkcmds) == nil then
  127. -- nothing to do
  128. else
  129. for i = 1,#brkcmds do
  130. str = gsub(str, dblbkslash * lpeg.Cs(brkcmds[i]) * cmdargs, "\\al@brk{%1%2%3}")
  131. end
  132. end
  133. return str
  134. end
  135. local function holdcmd(str)
  136. str = gsub(str, lpeg.Cs("\\arb") * bcbraces, function(tag, body)
  137. body = string.sub(body, 2, -2)
  138. body = gsub(body, cmd * spcenc^-1 * bsqbracketsii * spcenc^-1 * bcbraces, function(btag, bopt, bbody)
  139. bbody = string.sub(bbody, 2, -2)
  140. if string.find(btag, "@") then
  141. return holdcmd(string.format("}%s%s{%s}\\arb{", btag, bopt, bbody))
  142. else
  143. return holdcmd(string.format("}%s%s{\\arb{%s}}\\arb{", btag, bopt, bbody))
  144. end
  145. end)
  146. return string.format("%s{%s}", tag, body)
  147. end)
  148. str = string.gsub(str, "\\arb{}", "")
  149. return str
  150. end
  151. local function arbnum(str)
  152. str = string.gsub(str, "([0-9%,%-%/]+)", function(num)
  153. return string.reverse(num)
  154. end)
  155. return str
  156. end
  157. local function indnum(str)
  158. str = string.gsub(str, "([0-9%,%-%/]+)", function(num)
  159. return string.reverse(num)
  160. end)
  161. for i = 1,#numbers do
  162. str = string.gsub(str, numbers[i].a, numbers[i].b)
  163. end
  164. return str
  165. end
  166. local function processdiscretionary(str)
  167. str = string.gsub(str, "\\%-", "\\-{}")
  168. return str
  169. end
  170. local function processarbnull(str, scheme)
  171. if scheme == "buckwalter" then
  172. str = string.gsub(str, "(\\arbnull.?)(%b{})", function(tag, body)
  173. body = string.sub(body, 2, -2)
  174. return string.format("P%sP", body)
  175. end)
  176. else
  177. str = string.gsub(str, "(\\arbnull.?)(%b{})", function(tag, body)
  178. body = string.sub(body, 2, -2)
  179. return string.format("O%sO", body)
  180. end)
  181. end
  182. return str
  183. end
  184. local function takeoutabjad(str)
  185. str = string.gsub(str, "(\\abjad.?)(%b{})", function(tag, body)
  186. body = string.sub(body, 2, -2)
  187. return string.format("%s", body)
  188. end)
  189. return str
  190. end
  191. local function takeoutcapetc(str)
  192. str = string.gsub(str, "(\\arb.?%[trans%])(%b{})", function(tag, body)
  193. body = string.sub(body, 2, -2)
  194. body = string.gsub(body, "(\\uc%s?)(%b{})", "\\Uc%2")
  195. return string.format("%s{%s}", tag, body)
  196. end)
  197. str = string.gsub(str, "(\\arbup.?)(%b{})", function(tag, body)
  198. body = string.sub(body, 2, -2)
  199. return string.format("%s", body)
  200. end)
  201. str = string.gsub(str, "(\\uc.?)(%b{})", function(tag, body)
  202. body = string.sub(body, 2, -2)
  203. return string.format("%s", body)
  204. end)
  205. str = string.gsub(str, "\\linebreak", "")
  206. str = string.gsub(str, "\\%-", "")
  207. return str
  208. end
  209. local function checkwrnested(str)
  210. for i = 1,#outofarb do
  211. str = gsub(str, dblbkslash * lpeg.Cs(lpeg.P("LR") + lpeg.P("RL")) * cmdargs,
  212. function(prefix, tag, body)
  213. body = string.sub(body, 2, -2)
  214. if string.find(body, "\\"..outofarb[i]) then
  215. return atletter.."\\al@wrong@nesting{}"..atother
  216. else
  217. -- nothing to do, so proceed.
  218. end
  219. end)
  220. end
  221. return str
  222. end
  223. local function takeoutarb(str)
  224. str = checkwrnested(str)
  225. for i = 1,#outofarb do
  226. str = gsub(str, dblbkslash * lpeg.Cs(outofarb[i]) * cmdargs,
  227. function(prefix, tag, body)
  228. body = gsub(body, lpeg.P("\\arb"), "\\@rb")
  229. return string.format("%s%s%s", prefix, tag, body)
  230. end)
  231. end
  232. str = string.gsub(str, "(\\arb%s?)(%b{})", function(tag, body)
  233. body = string.sub(body, 2, -2)
  234. return string.format("%s", body)
  235. end)
  236. str = string.gsub(str, "\\@rb", "\\arb")
  237. str = "\\arb{"..str.."}"
  238. return str
  239. end
  240. local function voc(str, rules)
  241. str = string.gsub(str, "\\arb(%b{})", function(inside)
  242. inside = string.sub(inside, 2, -2)
  243. for i = 1,#hamza do
  244. inside = string.gsub(inside, hamza[i].a, hamza[i].b)
  245. end
  246. if rules == "idgham" then
  247. for i = 1,#tanwin do
  248. inside = string.gsub(inside, tanwin[i].a, tanwin[i].b)
  249. end
  250. else
  251. for i = 1,#tanwineasy do
  252. inside = string.gsub(inside, tanwineasy[i].a, tanwineasy[i].b)
  253. end
  254. end
  255. for i = 1,#trigraphs do
  256. inside = string.gsub(inside, trigraphs[i].a, trigraphs[i].b)
  257. end
  258. if rules == "idgham" then
  259. for i = 1,#idgham do
  260. inside = string.gsub(inside, idgham[i].a, idgham[i].b)
  261. end
  262. end
  263. for i = 1,#digraphs do
  264. inside = string.gsub(inside, digraphs[i].a, digraphs[i].b)
  265. end
  266. for i = 1,#single do
  267. inside = string.gsub(inside, single[i].a, single[i].b)
  268. end
  269. for i = 1,#longv do
  270. inside = string.gsub(inside, longv[i].a, longv[i].b)
  271. end
  272. for i = 1,#shortv do
  273. inside = string.gsub(inside, shortv[i].a, shortv[i].b)
  274. end
  275. for i = 1,#punctuation do
  276. inside = string.gsub(inside, punctuation[i].a, punctuation[i].b)
  277. end
  278. for i = 1,#null do
  279. inside = string.gsub(inside, null[i].a, null[i].b)
  280. end
  281. inside = indnum(inside)
  282. return string.format("\\arabicfont{}%s", inside)
  283. end)
  284. return str
  285. end
  286. local function voceasy(str)
  287. str = string.gsub(str, "\\arb(%b{})", function(inside)
  288. inside = string.sub(inside, 2, -2)
  289. for i = 1,#hamzaeasy do
  290. inside = string.gsub(inside, hamzaeasy[i].a, hamzaeasy[i].b)
  291. end
  292. for i = 1,#tanwineasy do
  293. inside = string.gsub(inside, tanwineasy[i].a, tanwineasy[i].b)
  294. end
  295. for i = 1,#trigraphseasy do
  296. inside = string.gsub(inside, trigraphseasy[i].a, trigraphseasy[i].b)
  297. end
  298. for i = 1,#digraphs do
  299. inside = string.gsub(inside, digraphs[i].a, digraphs[i].b)
  300. end
  301. for i = 1,#single do
  302. inside = string.gsub(inside, single[i].a, single[i].b)
  303. end
  304. for i = 1,#longv do
  305. inside = string.gsub(inside, longv[i].a, longv[i].b)
  306. end
  307. for i = 1,#shortv do
  308. inside = string.gsub(inside, shortv[i].a, shortv[i].b)
  309. end
  310. for i = 1,#punctuation do
  311. inside = string.gsub(inside, punctuation[i].a, punctuation[i].b)
  312. end
  313. for i = 1,#null do
  314. inside = string.gsub(inside, null[i].a, null[i].b)
  315. end
  316. inside = indnum(inside)
  317. return string.format("\\arabicfont{}%s", inside)
  318. end)
  319. return str
  320. end
  321. local function fullvoc(str, rules)
  322. str = string.gsub(str, "\\arb(%b{})", function(inside)
  323. inside = string.sub(inside, 2, -2)
  324. for i = 1,#hamzafv do
  325. inside = string.gsub(inside, hamzafv[i].a, hamzafv[i].b)
  326. end
  327. if rules == "idgham" then
  328. for i = 1,#tanwinfv do
  329. inside = string.gsub(inside, tanwinfv[i].a, tanwinfv[i].b)
  330. end
  331. else
  332. for i = 1,#tanwinfveasy do
  333. inside = string.gsub(inside, tanwinfveasy[i].a, tanwinfveasy[i].b)
  334. end
  335. end
  336. for i = 1,#trigraphsfv do
  337. inside = string.gsub(inside, trigraphsfv[i].a, trigraphsfv[i].b)
  338. end
  339. if rules == "idgham" then
  340. for i = 1,#idgham do
  341. inside = string.gsub(inside, idgham[i].a, idgham[i].b)
  342. end
  343. end
  344. if rules == "idgham" then
  345. for i = 1,#digraphsfvidgham do
  346. inside = string.gsub(inside, digraphsfvidgham[i].a, digraphsfvidgham[i].b)
  347. end
  348. else
  349. for i = 1,#digraphsfv do
  350. inside = string.gsub(inside, digraphsfv[i].a, digraphsfv[i].b)
  351. end
  352. end
  353. for i = 1,#singlefv do
  354. inside = string.gsub(inside, singlefv[i].a, singlefv[i].b)
  355. end
  356. for i = 1,#longv do
  357. inside = string.gsub(inside, longv[i].a, longv[i].b)
  358. end
  359. for i = 1,#shortv do
  360. inside = string.gsub(inside, shortv[i].a, shortv[i].b)
  361. end
  362. for i = 1,#punctuation do
  363. inside = string.gsub(inside, punctuation[i].a, punctuation[i].b)
  364. end
  365. for i = 1,#null do
  366. inside = string.gsub(inside, null[i].a, null[i].b)
  367. end
  368. inside = indnum(inside)
  369. return string.format("\\arabicfont{}%s", inside)
  370. end)
  371. return str
  372. end
  373. local function fullvoceasy(str, rules)
  374. str = string.gsub(str, "\\arb(%b{})", function(inside)
  375. inside = string.sub(inside, 2, -2)
  376. for i = 1,#hamzafveasy do
  377. inside = string.gsub(inside, hamzafveasy[i].a, hamzafveasy[i].b)
  378. end
  379. for i = 1,#tanwinfveasy do
  380. inside = string.gsub(inside, tanwinfveasy[i].a, tanwinfveasy[i].b)
  381. end
  382. for i = 1,#trigraphsfveasy do
  383. inside = string.gsub(inside, trigraphsfveasy[i].a, trigraphsfveasy[i].b)
  384. end
  385. if rules == "nosukun" then
  386. for i = 1,#digraphsfveasy do
  387. inside = string.gsub(inside, digraphsfveasy[i].a, digraphsfveasy[i].b)
  388. end
  389. else
  390. for i = 1,#digraphsfv do
  391. inside = string.gsub(inside, digraphsfv[i].a, digraphsfv[i].b)
  392. end
  393. end
  394. if rules == "nosukun" then
  395. for i = 1,#singlefveasy do
  396. inside = string.gsub(inside, singlefveasy[i].a, singlefveasy[i].b)
  397. end
  398. else
  399. for i = 1,#singlefv do
  400. inside = string.gsub(inside, singlefv[i].a, singlefv[i].b)
  401. end
  402. end
  403. for i = 1,#longv do
  404. inside = string.gsub(inside, longv[i].a, longv[i].b)
  405. end
  406. for i = 1,#shortv do
  407. inside = string.gsub(inside, shortv[i].a, shortv[i].b)
  408. end
  409. for i = 1,#punctuation do
  410. inside = string.gsub(inside, punctuation[i].a, punctuation[i].b)
  411. end
  412. for i = 1,#null do
  413. inside = string.gsub(inside, null[i].a, null[i].b)
  414. end
  415. inside = indnum(inside)
  416. return string.format("\\arabicfont{}%s", inside)
  417. end)
  418. return str
  419. end
  420. local function novoc(str)
  421. str = string.gsub(str, "\\arb(%b{})", function(inside)
  422. inside = string.sub(inside, 2, -2)
  423. for i = 1,#hamza do
  424. inside = string.gsub(inside, hamza[i].a, hamza[i].b)
  425. end
  426. for i = 1,#tanwinnv do
  427. inside = string.gsub(inside, tanwinnv[i].a, tanwinnv[i].b)
  428. end
  429. for i = 1,#trigraphsnv do
  430. inside = string.gsub(inside, trigraphsnv[i].a, trigraphsnv[i].b)
  431. end
  432. for i = 1,#digraphs do
  433. inside = string.gsub(inside, digraphs[i].a, digraphs[i].b)
  434. end
  435. for i = 1,#single do
  436. inside = string.gsub(inside, single[i].a, single[i].b)
  437. end
  438. for i = 1,#longvnv do
  439. inside = string.gsub(inside, longvnv[i].a, longvnv[i].b)
  440. end
  441. for i = 1,#shortvnv do
  442. inside = string.gsub(inside, shortvnv[i].a, shortvnv[i].b)
  443. end
  444. for i = 1,#punctuation do
  445. inside = string.gsub(inside, punctuation[i].a, punctuation[i].b)
  446. end
  447. for i = 1,#null do
  448. inside = string.gsub(inside, null[i].a, null[i].b)
  449. end
  450. inside = indnum(inside)
  451. return string.format("\\arabicfont{}%s", inside)
  452. end)
  453. return str
  454. end
  455. local function novoceasy(str)
  456. str = string.gsub(str, "\\arb(%b{})", function(inside)
  457. inside = string.sub(inside, 2, -2)
  458. for i = 1,#hamzaeasy do
  459. inside = string.gsub(inside, hamzaeasy[i].a, hamzaeasy[i].b)
  460. end
  461. for i = 1,#tanwinnv do
  462. inside = string.gsub(inside, tanwinnv[i].a, tanwinnv[i].b)
  463. end
  464. for i = 1,#trigraphsnv do
  465. inside = string.gsub(inside, trigraphsnv[i].a, trigraphsnv[i].b)
  466. end
  467. for i = 1,#digraphs do
  468. inside = string.gsub(inside, digraphs[i].a, digraphs[i].b)
  469. end
  470. for i = 1,#single do
  471. inside = string.gsub(inside, single[i].a, single[i].b)
  472. end
  473. for i = 1,#longvnv do
  474. inside = string.gsub(inside, longvnv[i].a, longvnv[i].b)
  475. end
  476. for i = 1,#shortvnv do
  477. inside = string.gsub(inside, shortvnv[i].a, shortvnv[i].b)
  478. end
  479. for i = 1,#punctuation do
  480. inside = string.gsub(inside, punctuation[i].a, punctuation[i].b)
  481. end
  482. for i = 1,#null do
  483. inside = string.gsub(inside, null[i].a, null[i].b)
  484. end
  485. inside = indnum(inside)
  486. return string.format("\\arabicfont{}%s", inside)
  487. end)
  488. return str
  489. end
  490. local function transdmg(str, rules)
  491. str = string.gsub(str, "\\arb(%b{})", function(inside)
  492. inside = string.sub(inside, 2, -2)
  493. for i = 1,#hamzatrdmg do
  494. inside = string.gsub(inside, hamzatrdmg[i].a, hamzatrdmg[i].b)
  495. end
  496. for i = 1,#tanwintrdmg do
  497. inside = string.gsub(inside, tanwintrdmg[i].a, tanwintrdmg[i].b)
  498. end
  499. for i = 1,#trigraphstrdmg do
  500. inside = string.gsub(inside, trigraphstrdmg[i].a, trigraphstrdmg[i].b)
  501. end
  502. if rules == "idgham" then
  503. for i = 1,#idghamtrdmg do
  504. inside = string.gsub(inside, idghamtrdmg[i].a, idghamtrdmg[i].b)
  505. end
  506. end
  507. for i = 1,#digraphstrdmg do
  508. inside = string.gsub(inside, digraphstrdmg[i].a, digraphstrdmg[i].b)
  509. end
  510. for i = 1,#singletrdmg do
  511. inside = string.gsub(inside, singletrdmg[i].a, singletrdmg[i].b)
  512. end
  513. for i = 1,#longvtrdmg do
  514. inside = string.gsub(inside, longvtrdmg[i].a, longvtrdmg[i].b)
  515. end
  516. for i = 1,#shortvtrdmg do
  517. inside = string.gsub(inside, shortvtrdmg[i].a, shortvtrdmg[i].b)
  518. end
  519. for i = 1,#punctuationtr do
  520. inside = string.gsub(inside, punctuationtr[i].a, punctuationtr[i].b)
  521. end
  522. for i = 1,#nulltr do
  523. inside = string.gsub(inside, nulltr[i].a, nulltr[i].b)
  524. end
  525. return string.format("\\txtrans{%s}", inside)
  526. end)
  527. return str
  528. end
  529. local function transloc(str)
  530. str = string.gsub(str, "\\arb(%b{})", function(inside)
  531. inside = string.sub(inside, 2, -2)
  532. for i = 1,#hamzatrloc do
  533. inside = string.gsub(inside, hamzatrloc[i].a, hamzatrloc[i].b)
  534. end
  535. for i = 1,#tanwintrloc do
  536. inside = string.gsub(inside, tanwintrloc[i].a, tanwintrloc[i].b)
  537. end
  538. for i = 1,#trigraphstrloc do
  539. inside = string.gsub(inside, trigraphstrloc[i].a, trigraphstrloc[i].b)
  540. end
  541. for i = 1,#digraphstrloc do
  542. inside = string.gsub(inside, digraphstrloc[i].a, digraphstrloc[i].b)
  543. end
  544. for i = 1,#singletrloc do
  545. inside = string.gsub(inside, singletrloc[i].a, singletrloc[i].b)
  546. end
  547. for i = 1,#longvtrloc do
  548. inside = string.gsub(inside, longvtrloc[i].a, longvtrloc[i].b)
  549. end
  550. for i = 1,#shortvtrloc do
  551. inside = string.gsub(inside, shortvtrloc[i].a, shortvtrloc[i].b)
  552. end
  553. for i = 1,#finaltrloc do
  554. inside = string.gsub(inside, finaltrloc[i].a, finaltrloc[i].b)
  555. end
  556. for i = 1,#punctuationtr do
  557. inside = string.gsub(inside, punctuationtr[i].a, punctuationtr[i].b)
  558. end
  559. for i = 1,#nulltr do
  560. inside = string.gsub(inside, nulltr[i].a, nulltr[i].b)
  561. end
  562. return string.format("\\txtrans{%s}", inside)
  563. end)
  564. return str
  565. end
  566. local function transarabica(str)
  567. str = string.gsub(str, "\\arb(%b{})", function(inside)
  568. inside = string.sub(inside, 2, -2)
  569. for i = 1,#hamzatrarabica do
  570. inside = string.gsub(inside, hamzatrarabica[i].a, hamzatrarabica[i].b)
  571. end
  572. for i = 1,#tanwintrloc do
  573. inside = string.gsub(inside, tanwintrloc[i].a, tanwintrloc[i].b)
  574. end
  575. for i = 1,#trigraphstrarabica do
  576. inside = string.gsub(inside, trigraphstrarabica[i].a, trigraphstrarabica[i].b)
  577. end
  578. for i = 1,#digraphstrarabica do
  579. inside = string.gsub(inside, digraphstrarabica[i].a, digraphstrarabica[i].b)
  580. end
  581. for i = 1,#singletrarabica do
  582. inside = string.gsub(inside, singletrarabica[i].a, singletrarabica[i].b)
  583. end
  584. for i = 1,#longvtrarabica do
  585. inside = string.gsub(inside, longvtrarabica[i].a, longvtrarabica[i].b)
  586. end
  587. for i = 1,#shortvtrloc do
  588. inside = string.gsub(inside, shortvtrloc[i].a, shortvtrloc[i].b)
  589. end
  590. for i = 1,#punctuationtr do
  591. inside = string.gsub(inside, punctuationtr[i].a, punctuationtr[i].b)
  592. end
  593. for i = 1,#nulltr do
  594. inside = string.gsub(inside, nulltr[i].a, nulltr[i].b)
  595. end
  596. return string.format("\\txtrans{%s}", inside)
  597. end)
  598. return str
  599. end
  600. local function processbuckw(str)
  601. str = string.gsub(str, "\\arb(%b{})", function(inside)
  602. inside = string.sub(inside, 2, -2)
  603. for i = 1,#buckwalter do
  604. inside = string.gsub(inside, buckwalter[i].a, buckwalter[i].b)
  605. end
  606. return string.format("\\arb{%s}", inside)
  607. end)
  608. return str
  609. end
  610. -- The following functions produce a copy of the original .tex source
  611. -- file in which all arabtex strings are replaced with Unicode
  612. -- equivalents
  613. local utffilesuffix = "_out"
  614. local export_utf = "no"
  615. function al_utffilesuffix(str)
  616. utffilesuffix = str
  617. return true
  618. end
  619. function al_doexport(str)
  620. export_utf = str
  621. return true
  622. end
  623. function al_openstream()
  624. local f = io.open(tex.jobname..utffilesuffix.."_tmp.tex", "a+")
  625. local preamble = io.open(tex.jobname..".tex", "r")
  626. for line in preamble:lines() do
  627. f:write(line, "\n")
  628. if string.find(line, "^%s-\\begin%s?{document}") then
  629. break
  630. end
  631. end
  632. preamble:close()
  633. f:close()
  634. return true
  635. end
  636. local function processarbtoutf(str)
  637. if export_utf ~= "arabverse" then
  638. str = "\\begin{arabexport}"..str
  639. else end
  640. str = string.gsub(str, "(\\txtrans%s?)(%b{})", function(tag, body)
  641. body = string.sub(body, 2, -2)
  642. body = string.gsub(body, "(\\abjad%s?)(%b{})", function(btag, bbody)
  643. bbody = string.sub(bbody, 2, -2)
  644. return string.format("%s", bbody)
  645. end)
  646. return string.format("%s{%s}", tag, body)
  647. end)
  648. str = string.gsub(str, "(\\txarb%s?)(%b{})", function(tag, body)
  649. body = string.sub(body, 2, -2)
  650. body = string.gsub(body, "(\\abjad%s?)(%b{})", function(btag, bbody)
  651. bbody = string.sub(bbody, 2, -2)
  652. if tonumber(bbody) ~= nil then
  653. bbody = abjadify(bbody)
  654. return string.format("\\aemph{\\arb[novoc]{%s}}", bbody)
  655. else
  656. return string.format("%s{%s}", btag, bbody)
  657. end
  658. end)
  659. body = string.gsub(body, "(\\arbmark%s?)(%b{})", function(btag, bbody)
  660. bbody = string.sub(bbody, 2, -2)
  661. return string.format("%s[rl]{%s}", btag, bbody)
  662. end)
  663. return string.format("%s{%s}", tag, body)
  664. end)
  665. str = string.gsub(str, "(\\bayt)%s?(%b{})(%b[])(%b{})", function(tag, argi, argii, argiii)
  666. argi = string.sub(argi, 2, -2)
  667. argii = string.sub(argii, 2, -2)
  668. argiii = string.sub(argiii, 2, -2)
  669. return string.format("%s*{\\arb{%s}}[\\arb{%s}]{\\arb{%s}}", tag, argi, argii, argiii)
  670. end)
  671. str = string.gsub(str, "(\\bayt)%s?(%b{})(%b{})", function(tag, argi, argii)
  672. argi = string.sub(argi, 2, -2)
  673. argii = string.sub(argii, 2, -2)
  674. return string.format("%s*{\\arb{%s}}{\\arb{%s}}", tag, argi, argii)
  675. end)
  676. str = string.gsub(str, "(\\prname)%s?(%b{})", function(tag, body)
  677. body = string.sub(body, 2, -2)
  678. if string.find(body, "\\uc%s?%b{}") then
  679. return string.format("%s*{%s}", tag, body)
  680. else
  681. return string.format("%s{\\arb[trans]{\\uc{%s}}}", tag, body)
  682. end
  683. end)
  684. str = string.gsub(str, "(\\begin%s?{arab})(%b[])", function(tag, body)
  685. if string.find(body, "trans") then
  686. return string.format("\\par\\bgroup\\setLR\\arb%s{", body)
  687. else
  688. return string.format("\\par\\bgroup\\setRL\\arb%s{", body)
  689. end
  690. end)
  691. str = string.gsub(str, "(\\begin%s?{arab})", "\\par\\bgroup\\arbpardir\\arb{")
  692. str = string.gsub(str, "\\end%s?{arab}", "}\\egroup\\par")
  693. -- This does not work, while the following two do. Look into this later.
  694. -- str = gsub(str, lpeg.Cs("\\arb") * spcenc * bsqbrackets^-1 * bcbraces, function(tag, opt, body)
  695. -- body = string.sub(body, 2, -2)
  696. -- return string.format("%s%s\\@al@pr@ob%s\\@al@pr@cb", tag, opt, body)
  697. -- end)
  698. str = string.gsub(str, "(\\arb%s?)(%b[])(%b{})", function(tag, opt, body)
  699. body = string.sub(body, 2, -2)
  700. return string.format("%s%s\\@al@pr@ob%s\\@al@pr@cb", tag, opt, body)
  701. end)
  702. str = string.gsub(str, "(\\arb)%s?(%b{})", function(tag, body)
  703. body = string.sub(body, 2, -2)
  704. return string.format("%s\\@al@pr@ob%s\\@al@pr@cb", tag, body)
  705. end)
  706. str = string.gsub(str, "(\\arbmark)%s?(%b[])(%b{})", function(tag, opt, body)
  707. body = string.sub(body, 2, -2)
  708. return string.format("%s%s\\@al@pr@ob%s\\@al@pr@cb", tag, opt, body)
  709. end)
  710. str = string.gsub(str, "(\\arbmark)%s?(%b{})", function(tag, body)
  711. body = string.sub(body, 2, -2)
  712. return string.format("%s\\@al@pr@ob%s\\@al@pr@cb", tag, body)
  713. end)
  714. str = string.gsub(str, "(\\[Uu]c)%s?(%b{})", function(tag, body)
  715. body = string.sub(body, 2, -2)
  716. return string.format("%s\\@al@pr@ob%s\\@al@pr@cb", tag, body)
  717. end)
  718. str = string.gsub(str, "{", "\\@al@ob")
  719. str = string.gsub(str, "} ", "\\@al@cb@sp")
  720. str = string.gsub(str, "}", "\\@al@cb")
  721. str = string.gsub(str, "\\@al@pr@ob", "{")
  722. str = string.gsub(str, "\\@al@pr@cb", "}")
  723. str = string.gsub(str, "(%b{})", function(body)
  724. body = string.sub(body, 2, -2)
  725. body = string.gsub(body, "(%s?)(\\@al@ob)", "%1{")
  726. body = string.gsub(body, "(\\@al@cb@sp)", "} ")
  727. body = string.gsub(body, "(\\@al@cb)(%s?)", "}%2")
  728. return string.format("{%s}", body)
  729. end)
  730. if export_utf ~= "arabverse" then
  731. str = str.."\\end{arabexport}"
  732. else end
  733. return str
  734. end
  735. function arbtoutf(str)
  736. str = processarbtoutf(str)
  737. str = "\\ArbOutFile{"..str.."}"
  738. str = string.gsub(str, "(\\ArbOutFile)%s?(%b{})", function(tag, body)
  739. body = string.sub(body, 2, -2)
  740. body = gsub(body, lpeg.Cs("\\arb") * arbargs, "}%1%2\\ArbOutFile{")
  741. return string.format("%s{%s}", tag, body)
  742. end)
  743. str = string.gsub(str, "(\\ArbOutFile)%s?(%b{})", function(tag, body)
  744. body = string.sub(body, 2, -2)
  745. body = string.gsub(body, "(\\[Uu]c)%s?(%b{})", "}%1%2\\ArbOutFile{")
  746. return string.format("%s{%s}", tag, body)
  747. end)
  748. str = string.gsub(str, "(\\ArbOutFile)%s?(%b{})", function(tag, body)
  749. body = string.sub(body, 2, -2)
  750. body = gsub(body, lpeg.Cs("\\arbmark") * arbargs, "}%1%2\\ArbOutFile{")
  751. return string.format("%s{%s}", tag, body)
  752. end)
  753. return str
  754. end
  755. function tooutfile(str, nl)
  756. local f = io.open(tex.jobname..utffilesuffix.."_tmp.tex", "a+")
  757. if nl == "newline" then
  758. f:write(str, "\n\n")
  759. else
  760. f:write(str)
  761. end
  762. f:close()
  763. return str
  764. end
  765. function al_closestream()
  766. local f = io.open(tex.jobname..utffilesuffix.."_tmp.tex", "r")
  767. local o = io.open(tex.jobname..utffilesuffix..".tex", "w")
  768. local t = f:read("*a")
  769. t = string.gsub(t, "\\arabicfont{}", "")
  770. t = string.gsub(t, "\\par ", "\n\n")
  771. t = string.gsub(t, "(\\@al@ob)", "{")
  772. t = string.gsub(t, "(\\@al@cb@sp)", "} ")
  773. t = string.gsub(t, "(\\@al@cb)(%s?)", "}")
  774. t = gsub(t, lpeg.Cs("\\begin") * spcenc^-1 * bcbraces * cmdargs, "\n%1%2%3\n")
  775. t = string.gsub(t, "(\\\\)(%s?)", "%1\n")
  776. t = string.gsub(t, "(\\\\)(\n)(\\end%s?)(%b{})", "%1%3%4")
  777. t = string.gsub(t, "%s-\n(\\begin%s?)(%b{})", "\n%1%2")
  778. t = string.gsub(t, "(\\item)", "\n%1")
  779. t = string.gsub(t, "\n\n(\\item)", "\n%1")
  780. t = string.gsub(t, "(\\end%s?)(%b{})", "%1%2\n")
  781. t = string.gsub(t, "([^\n]%s-)(\\end)%s?(%b{})", "%1\n%2%3")
  782. t = string.gsub(t, "\n\n\n", "\n\n")
  783. t = string.gsub(t, "(\\txarb%s?%{)(\\txarb%s?)(%b{})(%})", function(tagio, tagii, body, tagic)
  784. body = string.sub(body, 2, -2)
  785. return string.format("%s%s%s", tagio, body, tagic)
  786. end)
  787. t = string.gsub(t, "(\\prname%s?%*%{)(\\txtrans%s?)(%b{})(%})", function(tagio, tagii, body, tagic)
  788. body = string.sub(body, 2, -2)
  789. return string.format("%s%s%s", tagio, body, tagic)
  790. end)
  791. if string.find(t, "\\begin%s?{document}.-\\arb%s?[%[%{]") or
  792. string.find(t, "\\begin%s?{document}.-\\[Uu]c%s?%b{}")
  793. then
  794. tex.print([[\unexpanded{\PackageWarningNoLine{arabluatex}{There are still 'arabtex' strings to be converted. Please open ]]..tex.jobname..utffilesuffix..".tex"..[[ and compile it one more time}}]])
  795. else end
  796. t = t.."\n\\end{document}"
  797. io.write(t)
  798. o:write(t)
  799. f:close()
  800. o:close()
  801. os.remove(tex.jobname..utffilesuffix.."_tmp.tex")
  802. return true
  803. end
  804. -- Process standard arabluatex modes:
  805. function processvoc(str, rules, scheme)
  806. str = takeoutarb(str)
  807. str = processarbnull(str, scheme)
  808. str = takeoutcapetc(str)
  809. str = protectarb(str)
  810. str = breakcmd(str)
  811. str = holdcmd(str)
  812. if scheme == "buckwalter" then
  813. str = processbuckw(str)
  814. else end
  815. if rules == "easy" or rules == "easynosukun" then
  816. str = voceasy(str)
  817. elseif rules == "dflt" or rules == "idgham" then
  818. str = voc(str, rules)
  819. else end
  820. str = unprotectarb(str)
  821. if export_utf == "yes" then
  822. tofile = "\\txarb{"..str.."}"
  823. tooutfile(tofile)
  824. elseif export_utf == "arabverse" then
  825. tofile = "\\txarb{"..str.."}"
  826. tooutfile(tofile)
  827. else
  828. return str
  829. end
  830. return ""
  831. end
  832. function processfullvoc(str, rules, scheme)
  833. str = takeoutarb(str)
  834. str = processarbnull(str, scheme)
  835. str = takeoutcapetc(str)
  836. str = protectarb(str)
  837. str = breakcmd(str)
  838. str = holdcmd(str)
  839. if scheme == "buckwalter" then
  840. str = processbuckw(str)
  841. else end
  842. if rules == "easy" then
  843. str = fullvoceasy(str, "sukun")
  844. elseif rules == "easynosukun" then
  845. str = fullvoceasy(str, "nosukun")
  846. elseif rules == "dflt" or rules == "idgham" then
  847. str = fullvoc(str, rules)
  848. else end
  849. str = unprotectarb(str)
  850. if export_utf == "yes" then
  851. tofile = "\\txarb{"..str.."}"
  852. tooutfile(tofile)
  853. elseif export_utf == "arabverse" then
  854. tofile = "\\txarb{"..str.."}"
  855. tooutfile(tofile)
  856. else
  857. return str
  858. end
  859. return ""
  860. end
  861. function processnovoc(str, rules, scheme)
  862. str = takeoutarb(str)
  863. str = processarbnull(str, scheme)
  864. str = takeoutcapetc(str)
  865. str = protectarb(str)
  866. str = breakcmd(str)
  867. str = holdcmd(str)
  868. if scheme == "buckwalter" then
  869. str = processbuckw(str)
  870. else end
  871. if rules == "easy" or rules == "easynosukun" then
  872. str = novoceasy(str)
  873. elseif rules == "dflt" or rules == "idgham" then
  874. str = novoc(str)
  875. else end
  876. str = unprotectarb(str)
  877. if export_utf == "yes" then
  878. tofile = "\\txarb{"..str.."}"
  879. tooutfile(tofile)
  880. elseif export_utf == "arabverse" then
  881. tofile = "\\txarb{"..str.."}"
  882. tooutfile(tofile)
  883. else
  884. return str
  885. end
  886. return ""
  887. end
  888. function processtrans(str, mode, rules, scheme)
  889. str = takeoutarb(str)
  890. str = processdiscretionary(str)
  891. str = processarbnull(str, scheme)
  892. str = takeoutabjad(str)
  893. str = protectarb(str)
  894. str = breakcmd(str)
  895. str = holdcmd(str)
  896. if scheme == "buckwalter" then
  897. str = processbuckw(str)
  898. end
  899. if mode == "dmg" then
  900. str = transdmg(str, rules)
  901. elseif mode == "loc" then
  902. str = transloc(str)
  903. elseif mode == "arabica" then
  904. str = transarabica(str)
  905. end
  906. str = unprotectarb(str)
  907. if export_utf == "yes" then
  908. tofile = str
  909. tooutfile(tofile)
  910. elseif export_utf == "arabverse" then
  911. tofile = str
  912. tooutfile(tofile)
  913. else
  914. return str
  915. end
  916. return ""
  917. end
  918. function newarbmark(abbr, rtlmk, ltrmk)
  919. rtlmk = "\\arabicfont{}"..rtlmk
  920. table.insert(arbmarks, {a = abbr, b = rtlmk, c = ltrmk})
  921. table.sort(arbmarks, function(a ,b) return(#a.a > #b.a) end)
  922. return true
  923. end
  924. local function isintable(table, element)
  925. for i = 1,#table do
  926. if table[i].a == element then
  927. return true
  928. end
  929. end
  930. return false
  931. end
  932. function processarbmarks(str, dir)
  933. if not isintable(arbmarks, str) then
  934. str = "\\LR{<??>}"..atletter.."\\al@wrong@mark{}"..atother
  935. else
  936. if dir == "lr" then
  937. for i = 1,#arbmarks do
  938. str = string.gsub(str, arbmarks[i].a, arbmarks[i].c)
  939. end
  940. elseif dir == "rl" then
  941. for i = 1,#arbmarks do
  942. str = string.gsub(str, arbmarks[i].a, arbmarks[i].b)
  943. end
  944. elseif tex.textdir == "TLT" then
  945. for i = 1,#arbmarks do
  946. str = string.gsub(str, arbmarks[i].a, arbmarks[i].c)
  947. end
  948. else
  949. for i = 1,#arbmarks do
  950. str = string.gsub(str, arbmarks[i].a, arbmarks[i].b)
  951. end
  952. end
  953. end
  954. if export_utf == "yes" then
  955. tofile = str
  956. tooutfile(tofile)
  957. elseif export_utf == "arabverse" then
  958. tofile = str
  959. tooutfile(tofile)
  960. else
  961. return str
  962. end
  963. return ""
  964. end
  965. function uc(str)
  966. str = string.gsub(str, "(\\txtrans.?)(%b{})", function(tag, body)
  967. body = string.sub(body, 2, -2)
  968. return string.format("%s", body)
  969. end)
  970. -- Allah and ibn
  971. str = string.gsub(str, "(al%-lāh)([uai]?)", "{Allāh%2}")
  972. str = string.gsub(str, "([%'%-]?)(l%-lāh)([uai]?)", "%1{Llāh%3}")
  973. str = string.gsub(str, "(%s[%(%<%[]?)([i%']?b[n%.])", "%1{%2}")
  974. for i = 1,#lcuc do
  975. str = string.gsub(str, "^([%S]-%-[`']?)"..lcuc[i].a, "{%1"..lcuc[i].b.."}")
  976. end
  977. for i = 1,#lcuc do
  978. str = string.gsub(str, "(%s[%(%<%[]?)([%S]-%-[`']?)"..lcuc[i].a, "%1{%2"..lcuc[i].b.."}")
  979. end
  980. for i = 1,#lcuc do
  981. str = string.gsub(str, "^([%S]-%-ʿ)"..lcuc[i].a, "{%1"..lcuc[i].b.."}")
  982. end
  983. for i = 1,#lcuc do
  984. str = string.gsub(str, "(%s[%(%<%[]?)([%S]-%-ʿ)"..lcuc[i].a, "%1{%2"..lcuc[i].b.."}")
  985. end
  986. for i = 1,#lcuc do
  987. str = string.gsub(str, "^([%S]-%-ʾ)"..lcuc[i].a, "{%1"..lcuc[i].b.."}")
  988. end
  989. for i = 1,#lcuc do
  990. str = string.gsub(str, "(%s[%(%<%[]?)([%S]-%-ʾ)"..lcuc[i].a, "%1{%2"..lcuc[i].b.."}")
  991. end
  992. for i = 1,#lcuc do
  993. str = string.gsub(str, "^([`'])"..lcuc[i].a, "{%1"..lcuc[i].b.."}")
  994. end
  995. for i = 1,#lcuc do
  996. str = string.gsub(str, "(%s[%(%<%[]?)([`'])"..lcuc[i].a, "%1{%2"..lcuc[i].b.."}")
  997. end
  998. for i = 1,#lcuc do
  999. str = string.gsub(str, "^(ʾ)"..lcuc[i].a, "{%1"..lcuc[i].b.."}")
  1000. end
  1001. for i = 1,#lcuc do
  1002. str = string.gsub(str, "(%s[%(%<%[]?)(ʾ)"..lcuc[i].a, "%1{%2"..lcuc[i].b.."}")
  1003. end
  1004. for i = 1,#lcuc do
  1005. str = string.gsub(str, "^(ʿ)"..lcuc[i].a, "{%1"..lcuc[i].b.."}")
  1006. end
  1007. for i = 1,#lcuc do
  1008. str = string.gsub(str, "(%s[%(%<%[]?)(ʿ)"..lcuc[i].a, "%1{%2"..lcuc[i].b.."}")
  1009. end
  1010. for i = 1,#lcuc do
  1011. str = string.gsub(str, "^"..lcuc[i].a, lcuc[i].b)
  1012. end
  1013. for i = 1,#lcuc do
  1014. str = string.gsub(str, "(%s[%(%<%[]?)"..lcuc[i].a, "%1"..lcuc[i].b)
  1015. end
  1016. if export_utf == "yes" then
  1017. tofile = "\\txtrans{"..str.."}"
  1018. tooutfile(tofile)
  1019. elseif export_utf == "arabverse" then
  1020. tofile = "\\txtrans{"..str.."}"
  1021. tooutfile(tofile)
  1022. else
  1023. return "\\txtrans{"..str.."}"
  1024. end
  1025. return ""
  1026. end
  1027. -- this function is adapted from an 'obsolete project' of Khaled
  1028. -- Hosny's that dates back to 2010. Thanks to him.
  1029. -- See https://github.com/khaledhosny/lualatex-arabic
  1030. function abjadify(n)
  1031. local abjadnum = ""
  1032. n = tonumber(n)
  1033. if n >= 1000 then
  1034. for i=1,math.floor(n/1000) do
  1035. abjadnum = abjadnum .. abjad[4][1]
  1036. end
  1037. n = math.fmod(n,1000)
  1038. end
  1039. if n >= 100 then
  1040. abjadnum = abjadnum .. abjad[3][math.floor(n/100)]
  1041. n = math.fmod(n, 100)
  1042. end
  1043. if n >= 10 then
  1044. abjadnum = abjadnum .. abjad[2][math.floor(n/10)]
  1045. n = math.fmod(n, 10)
  1046. end
  1047. if n >= 1 then
  1048. abjadnum = abjadnum .. abjad[1][math.floor(n/1)]
  1049. end
  1050. return "\\arb[novoc]{"..abjadnum.."}"
  1051. end
  1052. function abraces(str)
  1053. if tex.textdir == "TRT" then
  1054. str = "\\}"..str.."\\{"
  1055. elseif tex.textdir == "TLT" then
  1056. str = "\\{"..str.."\\}"
  1057. end
  1058. return str
  1059. end
  1060. function aemph(str, opt)
  1061. if tex.textdir == "TRT" then
  1062. str = "$\\overline{\\text{\\textdir TRT{}"..str.."}}$"
  1063. elseif tex.textdir == "TLT" then
  1064. if opt == "over" then
  1065. str = "$\\overline{\\text{"..str.."}}$"
  1066. else
  1067. str = "$\\underline{\\text{"..str.."}}$"
  1068. end
  1069. end
  1070. return str
  1071. end