bc.lua 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522
  1. --- Bytecode parsing.
  2. -- Please note that this module is experimental and subject to change.
  3. -- @module advtrains_doc_integration.bc
  4. local bc = {}
  5. local band = bit.band
  6. local brshift = bit.rshift
  7. local function read_u8(str, pos)
  8. return string.byte(str, pos, pos)
  9. end
  10. local function read_u16le(str, pos)
  11. local l, u = string.byte(str, pos, pos+1)
  12. return u*32+l
  13. end
  14. local function read_u32le(str, pos)
  15. local b0, b1, b2, b3 = string.byte(str, pos, pos+3)
  16. return b3*256^3+b2*256^2+b1*256+b0
  17. end
  18. local function readflags(val, spec)
  19. local t = {}
  20. for k, v in pairs(spec) do
  21. local b = band(val, v)
  22. if b ~= 0 then
  23. t[k] = b
  24. end
  25. end
  26. return t
  27. end
  28. local function read_lj_uleb128(str, pos)
  29. local b = read_u8(str, pos)
  30. local v = 0
  31. local count = 0
  32. while b >= 128 do
  33. v = v + (b%128)*128^count
  34. count = count + 1
  35. b = read_u8(str, pos+count)
  36. end
  37. return v+b*128^count, pos+count+1
  38. end
  39. local lj_bcdef = {
  40. [0] = {"ISLT", "var", nil, "var"},
  41. {"ISGE", "var", nil, "var"},
  42. {"ISLE", "var", nil, "var"},
  43. {"ISGT", "var", nil, "var"},
  44. --
  45. {"ISEQV", "var", nil, "var"},
  46. {"ISNEV", "var", nil, "var"},
  47. {"ISEQS", "var", nil, "str"},
  48. {"ISNES", "var", nil, "str"},
  49. {"ISEQN", "var", nil, "num"},
  50. {"ISNEN", "var", nil, "num"},
  51. {"ISEQP", "var", nil, "pri"},
  52. {"ISNEP", "var", nil, "pri"},
  53. --
  54. {"ISTC", "dst", nil, "var"},
  55. {"ISFC", "dst", nil, "var"},
  56. {"IST", nil, nil, "var"},
  57. {"ISF", nil, nil, "var"},
  58. {"ISTYPE", "var", nil, "lit"},
  59. {"ISNUM", "var", nil, "lit"},
  60. --
  61. {"MOV", "dst", nil, "var"},
  62. {"NOT", "dst", nil, "var"},
  63. {"UNM", "dst", nil, "var"},
  64. {"LEN", "dst", nil, "var"},
  65. }
  66. for _, var in ipairs {
  67. {"VN", "dst", "var", "num"},
  68. {"NV", "dst", "var", "num"},
  69. {"VV", "dst", "num", "var"},
  70. } do
  71. for _, ins in ipairs {"ADD", "SUB", "MUL", "DIV", "MOD"} do
  72. table.insert(lj_bcdef, {ins..var[1], unpack(var, 2, 4)})
  73. end
  74. end
  75. for _, ent in ipairs {
  76. {"POW", "dst", "var", "var"},
  77. {"CAT", "dst", "rbase", "rbase"},
  78. --
  79. {"KSTR", "dst", nil, "str"},
  80. {"KCDATA", "dst", nil, "cdata"},
  81. {"KSHORT", "dst", nil, "lits"},
  82. {"KNUM", "dst", nil, "num"},
  83. {"KPRI", "dst", nil, "pri"},
  84. {"KNIL", "dst", nil, "base"},
  85. {"UGET", "dst", nil, "uv"},
  86. {"USETV", "uv", nil, "var"},
  87. {"USETS", "uv", nil, "str"},
  88. {"USETN", "uv", nil, "num"},
  89. {"USETP", "uv", nil, "pri"},
  90. {"UCLO", "rbase", nil, "jump"},
  91. {"FNEW", "dst", nil, "func"},
  92. --
  93. {"TNEW", "dst", nil, "lit"},
  94. {"TDUP", "dst", nil, "tab"},
  95. {"GGET", "dst", nil, "str"},
  96. {"GSET", "var", nil, "str"},
  97. {"TGETV", "dst", "var", "var"},
  98. {"TGETS", "dst", "var", "str"},
  99. {"TGETB", "dst", "var", "lit"},
  100. {"TGETR", "dst", "var", "var"},
  101. {"TSETV", "var", "var", "var"},
  102. {"TSETS", "var", "var", "str"},
  103. {"TSETB", "var", "var", "lit"},
  104. {"TSETM", "base", nil, "num"},
  105. {"TSETR", "var", "var", "var"},
  106. --
  107. {"CALLM", "base", "lit", "lit"},
  108. {"CALL", "base", "lit", "lit"},
  109. {"CALLMT", "base", nil, "lit"},
  110. {"CALLT", "base", nil, "lit"},
  111. {"ITERC", "base", "lit", "lit"},
  112. {"ITERN", "base", "lit", "lit"},
  113. {"VARG", "base", "lit", "lit"},
  114. {"ISNEXT", "base", nil, "jump"},
  115. --
  116. {"RETM", "base", nil, "lit"},
  117. {"RET", "rbase", nil, "lit"},
  118. {"RET0", "rbase", nil, "lit"},
  119. {"RET1", "rbase", nil, "lit"},
  120. --
  121. {"FORI", "base", nil, "jump"},
  122. {"JFORI", "base", nil, "jump"},
  123. --
  124. {"FORL", "base", nil, "jump"},
  125. {"IFORL", "base", nil, "jump"},
  126. {"JFORL", "base", nil, "lit"},
  127. --
  128. {"ITERL", "base", nil, "jump"},
  129. {"IITERL", "base", nil, "jump"},
  130. {"JITERL", "base", nil, "lit"},
  131. --
  132. {"LOOP", "rbase", nil, "jump"},
  133. {"ILOOP", "rbase", nil, "jump"},
  134. {"JLOOP", "rbase", nil, "lit"},
  135. --
  136. {"JMP", "rbase", nil, "jump"},
  137. --
  138. {"FUNCF", "rbase", nil, nil},
  139. {"IFUNCF", "rbase", nil, nil},
  140. {"JFUNCF", "rbase", nil, "lit"},
  141. {"FUNCV", "rbase", nil, nil},
  142. {"IFUNCV", "rbase", nil, nil},
  143. {"JFUNCV", "rbase", nil, "lit"},
  144. {"FUNCC", "rbase", nil, nil},
  145. {"FUNCCW", "rbase", nil, nil},
  146. } do
  147. table.insert(lj_bcdef, ent)
  148. end
  149. local function lj_read_nbytes(dump, pos)
  150. local len, p2 = read_lj_uleb128(dump, pos)
  151. if len == 0 then
  152. return "", p2
  153. else
  154. local val = string.sub(dump, p2, p2+len-1)
  155. return val, p2+len
  156. end
  157. end
  158. local function lj_parse_bytecode(phead, pos, pstr, _)
  159. local inslist = {}
  160. for k = 1, phead.numbc do
  161. local w = read_u32le(pstr, pos+4*k-4)
  162. local op = lj_bcdef[w%256]
  163. if not op then
  164. return nil, ("Invalid opcode: %02X"):format(op)
  165. end
  166. local ins = {op[1], {type = op[2], value = math.floor(w/256)%256}}
  167. if op[3] then
  168. ins[3] = {type = op[3], value = math.floor(w/256^3)%256}
  169. ins[4] = {type = op[4], value = math.floor(w/256^2)%256}
  170. else
  171. ins[3] = {type = op[4], value = math.floor(w/256^2)}
  172. end
  173. inslist[k] = ins
  174. end
  175. return inslist, pos+4*phead.numbc
  176. end
  177. local function lj_parse_uv(phead, pos, pstr, _)
  178. local uvlist = {}
  179. for k = 1, phead.numuv do
  180. uvlist[k] = read_u16le(pstr, pos+2*k-2)
  181. end
  182. return uvlist, pos+2*phead.numuv
  183. end
  184. local lj_ktab_type = {
  185. ["nil"] = 0,
  186. ["false"] = 1,
  187. ["true"] = 2,
  188. int = 3,
  189. num = 4,
  190. str = 5,
  191. }
  192. local function lj_parse_ktabk(_, pos, pstr, _)
  193. local tp, p2 = read_lj_uleb128(pstr, pos)
  194. if tp >= lj_ktab_type.str then
  195. local len = tp - lj_ktab_type.str
  196. return string.sub(pstr, p2, p2+len-1), p2+len
  197. elseif tp == lj_ktab_type.int then
  198. return read_lj_uleb128(pstr, p2)
  199. elseif tp == lj_ktab_type.num then
  200. local lo, p3 = read_lj_uleb128(pstr, p2)
  201. local hi, p4 = read_lj_uleb128(pstr, p3)
  202. local exp = band(brshift(hi, 20), 0x7ff)
  203. local mat = (band(hi, 0xfffff)+lo/2^32)/0x100000
  204. local sign = brshift(hi, 31)
  205. sign = (-1)^sign
  206. if exp == 0 then
  207. return sign*math.ldexp(mat, exp-1022), p4
  208. elseif exp == 0x7ff then
  209. if mat == 0 then
  210. return sign*math.huge, p4
  211. end
  212. return nil, ("Bad numeric KTABK value 0x%08x%08x"):format(hi, lo)
  213. end
  214. return sign*math.ldexp(1+mat, exp-1023), p4
  215. elseif tp == lj_ktab_type["nil"] then
  216. return nil, p2
  217. elseif tp == lj_ktab_type["true"] then
  218. return true, p2
  219. elseif tp == lj_ktab_type["false"] then
  220. return false, p2
  221. end
  222. return nil, ("Bad KTABK constant type %d"):format(tp)
  223. end
  224. local function lj_parse_ktab(phead, pos, pstr, bcflags)
  225. local tab = {}
  226. local narr, p2 = read_lj_uleb128(pstr, pos)
  227. local nhash, p3 = read_lj_uleb128(pstr, p2)
  228. pos = p3
  229. for k = 0, narr-1 do
  230. tab[k], pos = lj_parse_ktabk(phead, pos, pstr, bcflags)
  231. if tab[k] == nil and type(pos) ~= "number" then
  232. return nil, pos
  233. end
  234. end
  235. for _ = 1, nhash do
  236. local k, p4 = lj_parse_ktabk(phead, pos, pstr, bcflags)
  237. if k == nil then
  238. if type(p4) == "number" then
  239. return nil, "Table index is nil"
  240. end
  241. return nil, p4
  242. end
  243. tab[k], pos = lj_parse_ktabk(phead, p4, pstr, bcflags)
  244. if tab[k] == nil and type(pos) ~= "number" then
  245. return nil, pos
  246. end
  247. end
  248. return tab, pos
  249. end
  250. local lj_kgc_type = {
  251. child = 0,
  252. tab = 1,
  253. str = 5,
  254. }
  255. local function lj_parse_kgc(phead, pos, pstr, bcflags)
  256. local gclist = {}
  257. for k = phead.numkgc-1, 0, -1 do
  258. local tp, p2 = read_lj_uleb128(pstr, pos)
  259. if tp >= lj_kgc_type.str then
  260. local len = tp-lj_kgc_type.str
  261. local str = string.sub(pstr, p2, p2+len-1)
  262. gclist[k] = str
  263. pos = p2+len
  264. elseif tp == lj_kgc_type.tab then
  265. local tbl, p3 = lj_parse_ktab(phead, p2, pstr, bcflags)
  266. if not tbl then
  267. return nil, p3
  268. end
  269. gclist[k] = tbl
  270. pos = p3
  271. elseif tp == lj_kgc_type.child then
  272. local idx = bcflags.top - 1
  273. if idx < 0 then
  274. return nil, "Child stack underflow"
  275. end
  276. gclist[k] = idx
  277. bcflags.top = idx
  278. pos = p2
  279. else
  280. return nil, ("Bad constant type %d"):format(tp)
  281. end
  282. end
  283. return gclist, pos
  284. end
  285. local function lj_parse_pdata_body(phead, pstr, bcflags)
  286. local pdata = {
  287. header = phead,
  288. }
  289. local pos = 1
  290. pdata.bytecode, pos = lj_parse_bytecode(phead, pos, pstr, bcflags)
  291. if not pdata.bytecode then
  292. return nil, pos
  293. end
  294. pdata.uv, pos = lj_parse_uv(phead, pos, pstr, bcflags)
  295. if not pdata.uv then
  296. return nil, pos
  297. end
  298. pdata.kgc, pos = lj_parse_kgc(phead, pos, pstr, bcflags)
  299. if not pdata.kgc then
  300. return nil, pos
  301. end
  302. return pdata
  303. end
  304. local lj_proto_flags = {}
  305. local function lj_parse_proto(pstr, bcflags)
  306. local phead, pos = {}
  307. phead.flags = readflags(read_u8(pstr, 1), lj_proto_flags)
  308. phead.numparams = read_u8(pstr, 2)
  309. phead.framesize = read_u8(pstr, 3)
  310. phead.numuv = read_u8(pstr, 4)
  311. phead.numkgc, pos = read_lj_uleb128(pstr, 5)
  312. phead.numkn, pos = read_lj_uleb128(pstr, pos)
  313. phead.numbc, pos = read_lj_uleb128(pstr, pos)
  314. if not bcflags.strip then
  315. phead.debuglen, pos = read_lj_uleb128(pstr, pos)
  316. if phead.debuglen > 0 then
  317. phead.firstline, pos = read_lj_uleb128(pstr, pos)
  318. phead.numline, pos = read_lj_uleb128(pstr, pos)
  319. end
  320. end
  321. return lj_parse_pdata_body(phead, string.sub(pstr, pos), bcflags)
  322. end
  323. local lj_bcdump_flags = {
  324. be = 1,
  325. strip = 2,
  326. ffi = 4,
  327. fr2 = 8,
  328. }
  329. local function parse_lj2(dump)
  330. local flags, pos = read_lj_uleb128(dump, 1)
  331. flags = readflags(flags, lj_bcdump_flags)
  332. if flags.ffi then
  333. return nil, "LuaJIT bytecode dump with FFI is not supported"
  334. elseif flags.be then
  335. return nil, "Big-endian LuaJIT bytecode is not supported"
  336. end
  337. local chunkname
  338. if not flags.strip then
  339. local cname, p2 = lj_read_nbytes(dump, pos)
  340. pos = p2
  341. if cname ~= "" then
  342. chunkname = cname
  343. end
  344. end
  345. local prototypes = {}
  346. flags.top = 0
  347. while true do
  348. local proto, p2 = lj_read_nbytes(dump, pos)
  349. pos = p2
  350. if proto == "" then
  351. break
  352. end
  353. local pdata, err = lj_parse_proto(proto, flags)
  354. if pdata == nil then
  355. return nil, err
  356. end
  357. flags.top = flags.top + 1
  358. table.insert(prototypes, pdata)
  359. end
  360. flags.top = nil
  361. return {
  362. chunkname = chunkname,
  363. prototypes = prototypes,
  364. }
  365. end
  366. local function parse_lj(dump)
  367. local version = string.byte(dump, 1, 1)
  368. if version == 2 then
  369. return parse_lj2(string.sub(dump, 2))
  370. end
  371. return nil, "Unsupported LuaJIT bytecode version"
  372. end
  373. local function ensure_result(st, ...)
  374. local count = select("#", ...)
  375. if count > 0 and ... ~= nil then
  376. return st, ...
  377. end
  378. return ...
  379. end
  380. --- Try to parse a bytecode dump.
  381. -- @tparam string|function dump The bytecode input or the function to read.
  382. -- @return[1] "luajit" If `dump` is valid LuaJIT bytecode.
  383. -- @treturn[1] ... Data parsed from the bytecode.
  384. -- @treturn[2] nil If the dump cannot be parsed.
  385. -- @treturn[2] string A message indicating the error.
  386. function bc.parse(dump)
  387. local tp = type(dump)
  388. if tp == "function" then
  389. return bc.parse(string.dump(dump))
  390. elseif tp ~= "string" then
  391. return nil, "Invalid bytecode dump type"
  392. end
  393. local header = string.sub(dump, 1, 3)
  394. if header == "\27LJ" then
  395. return ensure_result("luajit", parse_lj(string.sub(dump, 4)))
  396. end
  397. return nil, "Unsupported bytecode dump format"
  398. end
  399. local escape_string_table = {
  400. ["\n"] = [[\n]],
  401. ["\r"] = [[\r]],
  402. ["\0"] = [[\z]],
  403. ["\""] = [[\"]],
  404. ["\\"] = [[\\]],
  405. }
  406. local function escape_string(str)
  407. return (string.gsub(str, "[%z\1-\31\127-\255]", function(c)
  408. if escape_string_table[c] then
  409. return escape_string_table[c]
  410. end
  411. return ([[\%d]]):format(string.byte(c))
  412. end))
  413. end
  414. local function lj_value_tostring(proto, line, value)
  415. local vt, vv = value.type, value.value
  416. local vs = ("%3d"):format(vv)
  417. if vt == nil then
  418. return " "
  419. elseif vt == "jump" then
  420. return ("=> %04d"):format(line+vv-32767)
  421. elseif vt == "str" then
  422. local ref = proto.kgc[vv]
  423. if type(ref) == "string" then
  424. return vs, ([["%s"]]):format(escape_string(ref))
  425. end
  426. elseif vt == "func" then
  427. local ref = proto.kgc[vv]
  428. if type(ref) == "number" then
  429. return vs, ("BYTECODE %d"):format(ref)
  430. end
  431. elseif vt == "lits" then
  432. if vv >= 32768 then
  433. return ("%3d"):format(vv-65536)
  434. end
  435. end
  436. return vs
  437. end
  438. local function lj_proto_tostring(index, proto)
  439. local st = {("-- BYTECODE -- %d"):format(index-1)}
  440. local jmp_target = {}
  441. for ln, line in ipairs(proto.bytecode) do
  442. if line[3] and line[3].type == "jump" then
  443. jmp_target[ln+line[3].value-32767] = true
  444. end
  445. end
  446. for ln, line in ipairs(proto.bytecode) do
  447. local lt = {("%04d"):format(ln)}
  448. if jmp_target[ln] then
  449. table.insert(lt, "=>")
  450. else
  451. table.insert(lt, " ")
  452. end
  453. table.insert(lt, ("%-6s"):format(line[1]))
  454. table.insert(lt, lj_value_tostring(proto, ln, line[2]))
  455. if line[4] then
  456. local bs, bn = lj_value_tostring(proto, ln, line[3])
  457. local cs, cn = lj_value_tostring(proto, ln, line[4])
  458. table.insert(lt, ("%3d %3d"):format(bs, cs))
  459. if bn or cn then
  460. table.insert(lt, ";")
  461. table.insert(lt, bn)
  462. table.insert(lt, cn)
  463. end
  464. else
  465. local ds, dn = lj_value_tostring(proto, ln, line[3])
  466. table.insert(lt, ("%-7s"):format(ds))
  467. if dn then
  468. table.insert(lt, ("; %s"):format(dn))
  469. end
  470. end
  471. table.insert(st, table.concat(lt, " "))
  472. end
  473. return table.concat(st, "\n")
  474. end
  475. --- Try to format a bytecode dump.
  476. -- @tparam string|function dump The bytecode input of the function to read.
  477. -- @return[1] "luajit" If `dump` is valid LuaJIT bytecode.
  478. -- @treturn[1] string A string describing the bytecode dump. The format is similar to that of `luajit -bl`
  479. -- @treturn[2] nil If the dump cannot be parsed.
  480. -- @treturn[2] string A message indicating the error.
  481. function bc.tostring(dump)
  482. local tp, data = bc.parse(dump)
  483. if tp == "luajit" then
  484. local st = {}
  485. for k, proto in ipairs(data.prototypes) do
  486. table.insert(st, lj_proto_tostring(k, proto))
  487. end
  488. return tp, table.concat(st, "\n\n")
  489. end
  490. return nil, data
  491. end
  492. return bc