findtext.lua 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. #! /usr/bin/env lua
  2. local me = arg[0]:gsub(".*[/\\](.*)$", "%1")
  3. local function err(fmt, ...)
  4. io.stderr:write(("%s: %s\n"):format(me, fmt:format(...)))
  5. os.exit(1)
  6. end
  7. local output
  8. local inputs = { }
  9. local lang
  10. local author
  11. local i = 1
  12. local function usage()
  13. print([[
  14. Usage: ]]..me..[[ [OPTIONS] FILE...
  15. Extract translatable strings from the given FILE(s).
  16. Available options:
  17. -h,--help Show this help screen and exit.
  18. -o,--output X Set output file (default: stdout).
  19. -a,--author X Set author.
  20. -l,--lang X Set language name.
  21. ]])
  22. os.exit(0)
  23. end
  24. while i <= #arg do
  25. local a = arg[i]
  26. if (a == "-h") or (a == "--help") then
  27. usage()
  28. elseif (a == "-o") or (a == "--output") then
  29. i = i + 1
  30. if i > #arg then
  31. err("missing required argument to `%s'", a)
  32. end
  33. output = arg[i]
  34. elseif (a == "-a") or (a == "--author") then
  35. i = i + 1
  36. if i > #arg then
  37. err("missing required argument to `%s'", a)
  38. end
  39. author = arg[i]
  40. elseif (a == "-l") or (a == "--lang") then
  41. i = i + 1
  42. if i > #arg then
  43. err("missing required argument to `%s'", a)
  44. end
  45. lang = arg[i]
  46. elseif a:sub(1, 1) ~= "-" then
  47. table.insert(inputs, a)
  48. else
  49. err("unrecognized option `%s'", a)
  50. end
  51. i = i + 1
  52. end
  53. if #inputs == 0 then
  54. err("no input files")
  55. end
  56. local outfile = io.stdout
  57. local function printf(fmt, ...)
  58. outfile:write(fmt:format(...))
  59. end
  60. if output then
  61. local e
  62. outfile, e = io.open(output, "w")
  63. if not outfile then
  64. err("error opening file for writing: %s", e)
  65. end
  66. end
  67. if author or lang then
  68. outfile:write("\n")
  69. end
  70. if lang then
  71. printf("# Language: %s\n", lang)
  72. end
  73. if author then
  74. printf("# Author: %s\n", author)
  75. end
  76. if author or lang then
  77. outfile:write("\n")
  78. end
  79. local c_escapes = {
  80. [('a'):byte(1)] = '\a',
  81. [('b'):byte(1)] = '\b',
  82. [('f'):byte(1)] = '\f',
  83. [('r'):byte(1)] = '\r',
  84. [('t'):byte(1)] = '\t',
  85. [('v'):byte(1)] = '\v',
  86. -- \n is handled separately
  87. }
  88. local function parse_lua_string(s)
  89. local esc = false
  90. local i = 1
  91. local len = #s
  92. while i <= len do
  93. local c = s:byte(i)
  94. i = i + 1
  95. if esc then
  96. esc = false
  97. if c >= 0x30 and c <= 0x39 then
  98. -- 0x30 = 0
  99. -- 0x39 = 9
  100. local scode = s:match('%d%d?%d?', i - 1)
  101. local ncode = tonumber(scode)
  102. s = s:sub(1, i - 3) .. string.char(ncode) .. s:sub(i-1 + #scode)
  103. -- Reevaluate the current character only if it isn't \
  104. i = i - (ncode == 0x5C and 1 or 2)
  105. len = #s
  106. elseif c == 0x6E then
  107. -- 0x6E = n
  108. s = s:sub(1, i - 3) .. "@n" .. s:sub(i)
  109. elseif c == 0x78 then
  110. -- 0x78 = x
  111. s = s:sub(1, i - 3) .. s:sub(i - 1)
  112. i = i - 2
  113. len = len - 1
  114. io.stderr:write("Warning: Hex escape sequence is illegal in Lua 5.1\n")
  115. elseif c_escapes[c] ~= nil then
  116. s = s:sub(1, i - 3) .. c_escapes[c] .. s:sub(i)
  117. len = len - 1
  118. i = i - 1
  119. else
  120. s = s:sub(1, i - 3) .. s:sub(i - 1)
  121. len = len - 1
  122. -- Reevaluate the current character only if it isn't \
  123. i = i - (c == 0x5C and 1 or 2)
  124. end
  125. elseif c == 0x5C then
  126. -- 0x5C = \
  127. esc = true
  128. elseif c == 0x0A then
  129. -- 0x0A = LF
  130. s = s:sub(1, i - 2) .. "@n" .. s:sub(i)
  131. len = len + 1
  132. i = i + 1
  133. elseif c == 0x3D then
  134. -- 0x3D = =
  135. s = s:sub(1, i - 2) .. "@=" .. s:sub(i)
  136. len = len + 1
  137. i = i + 1
  138. elseif c == 0x23 and i == 2 then
  139. -- 0x23 = #
  140. s = '@' .. s
  141. len = len + 1
  142. i = i + 1
  143. end
  144. end
  145. return s
  146. end
  147. local function replace_quote_in_quote(s)
  148. --[[
  149. state = 0: normal code, starting state
  150. state = 1: seen -
  151. state = 2: seen " or ' (begin string parsing)
  152. state = 3: seen \ within string
  153. --]]
  154. local state = 0
  155. local i = 1
  156. local len = #s
  157. local end_str
  158. while i <= len do
  159. local c = s:byte(i)
  160. i = i + 1
  161. if state == 0 then
  162. if c == 0x2D then
  163. -- 0x2D = -
  164. state = 1
  165. elseif c == 0x22 or c == 0x27 then
  166. -- 0x22 = "
  167. -- 0x27 = '
  168. end_str = c
  169. state = 2
  170. -- else remain in state 0
  171. end
  172. elseif state == 1 then
  173. if c == 0x2D then
  174. -- 0x2D = -
  175. -- Ignore the rest of the line. We don't parse --[[ ... ]].
  176. return s:sub(1, i - 3)
  177. elseif c == 0x22 or c == 0x27 then
  178. -- 0x22 = "
  179. -- 0x27 = '
  180. end_str = c
  181. state = 3
  182. else
  183. state = 0
  184. end
  185. elseif state == 2 then
  186. if c == 0x5C then
  187. -- 0x5C = \
  188. state = 3
  189. elseif c == end_str then
  190. state = 0
  191. elseif c == 0x22 or c == 0x27 or c == 0x28 then
  192. -- " or ' or open parenthesis
  193. s = s:sub(1, i - 2) .. ("\\%03d"):format(c) .. s:sub(i)
  194. i = i + 3
  195. len = len + 3
  196. -- else remain in state 2
  197. end
  198. elseif state == 3 then
  199. if c == 0x22 or c == 0x27 then
  200. -- Escaped quote found - replace it
  201. s = s:sub(1, i - 2) .. (c == 0x22 and "034" or "039") .. s:sub(i)
  202. i = i + 2
  203. len = len + 2
  204. state = 2
  205. else
  206. state = 2
  207. end
  208. end
  209. end
  210. assert(#s == len)
  211. return s
  212. end
  213. local messages = {}
  214. for _, file in ipairs(inputs) do
  215. local infile, e = io.open(file, "r")
  216. local textdomains = {}
  217. if infile then
  218. for line in infile:lines() do
  219. for translator_name, textdomain in line:gmatch('local (%w+)%s*=%s*%w+%.get_translator%("([^"]*)"%)') do
  220. --print(translator_name, textdomain)
  221. messages[textdomain] = messages[textdomain] or {}
  222. textdomains[translator_name] = textdomain
  223. end
  224. line = replace_quote_in_quote(line)
  225. for translator, s in line:gmatch('(%w+)%("([^"]*)"') do
  226. s = parse_lua_string(s)
  227. if textdomains[translator] then
  228. local textdomain = textdomains[translator]
  229. table.insert(messages[textdomain], s)
  230. end
  231. end
  232. for textdomain, s in line:gmatch('%w+%.translate%("([^"]*)"%s*,%s*"([^"]*)"') do
  233. s = parse_lua_string(s)
  234. messages[textdomain] = messages[textdomain] or {}
  235. table.insert(messages[textdomain], s)
  236. end
  237. end
  238. infile:close()
  239. else
  240. io.stderr:write(("%s: WARNING: error opening file: %s\n"):format(me, e))
  241. end
  242. end
  243. for textdomain, mtbl in pairs(messages) do
  244. table.sort(messages[textdomain])
  245. local last_msg
  246. printf("# textdomain: %s\n", textdomain)
  247. for _, msg in ipairs(messages[textdomain]) do
  248. if msg ~= last_msg then
  249. printf("%s=\n", msg)
  250. end
  251. last_msg = msg
  252. end
  253. end
  254. if output then
  255. outfile:close()
  256. end
  257. --[[
  258. TESTS:
  259. local S = minetest.get_translator("domain")
  260. S("foo") S("bar")
  261. S("bar")
  262. S("foo") -- S("doesn't matter")
  263. print("this is in a string S(") x=0 print(") still text") S('bar baz "this" \"\'that\' foobar')
  264. minetest.translate("another_domain", "foo")
  265. S("#foo=@1\n@2", "bar", "baz")
  266. S("what's this? (oh, an apostrophe)")
  267. S("\035 is a #")
  268. S("\092 is a \\")
  269. S("\\ is a \\")
  270. S("\# is a #")
  271. ]]