123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522 |
- --- Bytecode parsing.
- -- Please note that this module is experimental and subject to change.
- -- @module advtrains_doc_integration.bc
- local bc = {}
- local band = bit.band
- local brshift = bit.rshift
- local function read_u8(str, pos)
- return string.byte(str, pos, pos)
- end
- local function read_u16le(str, pos)
- local l, u = string.byte(str, pos, pos+1)
- return u*32+l
- end
- local function read_u32le(str, pos)
- local b0, b1, b2, b3 = string.byte(str, pos, pos+3)
- return b3*256^3+b2*256^2+b1*256+b0
- end
- local function readflags(val, spec)
- local t = {}
- for k, v in pairs(spec) do
- local b = band(val, v)
- if b ~= 0 then
- t[k] = b
- end
- end
- return t
- end
- local function read_lj_uleb128(str, pos)
- local b = read_u8(str, pos)
- local v = 0
- local count = 0
- while b >= 128 do
- v = v + (b%128)*128^count
- count = count + 1
- b = read_u8(str, pos+count)
- end
- return v+b*128^count, pos+count+1
- end
- local lj_bcdef = {
- [0] = {"ISLT", "var", nil, "var"},
- {"ISGE", "var", nil, "var"},
- {"ISLE", "var", nil, "var"},
- {"ISGT", "var", nil, "var"},
- --
- {"ISEQV", "var", nil, "var"},
- {"ISNEV", "var", nil, "var"},
- {"ISEQS", "var", nil, "str"},
- {"ISNES", "var", nil, "str"},
- {"ISEQN", "var", nil, "num"},
- {"ISNEN", "var", nil, "num"},
- {"ISEQP", "var", nil, "pri"},
- {"ISNEP", "var", nil, "pri"},
- --
- {"ISTC", "dst", nil, "var"},
- {"ISFC", "dst", nil, "var"},
- {"IST", nil, nil, "var"},
- {"ISF", nil, nil, "var"},
- {"ISTYPE", "var", nil, "lit"},
- {"ISNUM", "var", nil, "lit"},
- --
- {"MOV", "dst", nil, "var"},
- {"NOT", "dst", nil, "var"},
- {"UNM", "dst", nil, "var"},
- {"LEN", "dst", nil, "var"},
- }
- for _, var in ipairs {
- {"VN", "dst", "var", "num"},
- {"NV", "dst", "var", "num"},
- {"VV", "dst", "num", "var"},
- } do
- for _, ins in ipairs {"ADD", "SUB", "MUL", "DIV", "MOD"} do
- table.insert(lj_bcdef, {ins..var[1], unpack(var, 2, 4)})
- end
- end
- for _, ent in ipairs {
- {"POW", "dst", "var", "var"},
- {"CAT", "dst", "rbase", "rbase"},
- --
- {"KSTR", "dst", nil, "str"},
- {"KCDATA", "dst", nil, "cdata"},
- {"KSHORT", "dst", nil, "lits"},
- {"KNUM", "dst", nil, "num"},
- {"KPRI", "dst", nil, "pri"},
- {"KNIL", "dst", nil, "base"},
- {"UGET", "dst", nil, "uv"},
- {"USETV", "uv", nil, "var"},
- {"USETS", "uv", nil, "str"},
- {"USETN", "uv", nil, "num"},
- {"USETP", "uv", nil, "pri"},
- {"UCLO", "rbase", nil, "jump"},
- {"FNEW", "dst", nil, "func"},
- --
- {"TNEW", "dst", nil, "lit"},
- {"TDUP", "dst", nil, "tab"},
- {"GGET", "dst", nil, "str"},
- {"GSET", "var", nil, "str"},
- {"TGETV", "dst", "var", "var"},
- {"TGETS", "dst", "var", "str"},
- {"TGETB", "dst", "var", "lit"},
- {"TGETR", "dst", "var", "var"},
- {"TSETV", "var", "var", "var"},
- {"TSETS", "var", "var", "str"},
- {"TSETB", "var", "var", "lit"},
- {"TSETM", "base", nil, "num"},
- {"TSETR", "var", "var", "var"},
- --
- {"CALLM", "base", "lit", "lit"},
- {"CALL", "base", "lit", "lit"},
- {"CALLMT", "base", nil, "lit"},
- {"CALLT", "base", nil, "lit"},
- {"ITERC", "base", "lit", "lit"},
- {"ITERN", "base", "lit", "lit"},
- {"VARG", "base", "lit", "lit"},
- {"ISNEXT", "base", nil, "jump"},
- --
- {"RETM", "base", nil, "lit"},
- {"RET", "rbase", nil, "lit"},
- {"RET0", "rbase", nil, "lit"},
- {"RET1", "rbase", nil, "lit"},
- --
- {"FORI", "base", nil, "jump"},
- {"JFORI", "base", nil, "jump"},
- --
- {"FORL", "base", nil, "jump"},
- {"IFORL", "base", nil, "jump"},
- {"JFORL", "base", nil, "lit"},
- --
- {"ITERL", "base", nil, "jump"},
- {"IITERL", "base", nil, "jump"},
- {"JITERL", "base", nil, "lit"},
- --
- {"LOOP", "rbase", nil, "jump"},
- {"ILOOP", "rbase", nil, "jump"},
- {"JLOOP", "rbase", nil, "lit"},
- --
- {"JMP", "rbase", nil, "jump"},
- --
- {"FUNCF", "rbase", nil, nil},
- {"IFUNCF", "rbase", nil, nil},
- {"JFUNCF", "rbase", nil, "lit"},
- {"FUNCV", "rbase", nil, nil},
- {"IFUNCV", "rbase", nil, nil},
- {"JFUNCV", "rbase", nil, "lit"},
- {"FUNCC", "rbase", nil, nil},
- {"FUNCCW", "rbase", nil, nil},
- } do
- table.insert(lj_bcdef, ent)
- end
- local function lj_read_nbytes(dump, pos)
- local len, p2 = read_lj_uleb128(dump, pos)
- if len == 0 then
- return "", p2
- else
- local val = string.sub(dump, p2, p2+len-1)
- return val, p2+len
- end
- end
- local function lj_parse_bytecode(phead, pos, pstr, _)
- local inslist = {}
- for k = 1, phead.numbc do
- local w = read_u32le(pstr, pos+4*k-4)
- local op = lj_bcdef[w%256]
- if not op then
- return nil, ("Invalid opcode: %02X"):format(op)
- end
- local ins = {op[1], {type = op[2], value = math.floor(w/256)%256}}
- if op[3] then
- ins[3] = {type = op[3], value = math.floor(w/256^3)%256}
- ins[4] = {type = op[4], value = math.floor(w/256^2)%256}
- else
- ins[3] = {type = op[4], value = math.floor(w/256^2)}
- end
- inslist[k] = ins
- end
- return inslist, pos+4*phead.numbc
- end
- local function lj_parse_uv(phead, pos, pstr, _)
- local uvlist = {}
- for k = 1, phead.numuv do
- uvlist[k] = read_u16le(pstr, pos+2*k-2)
- end
- return uvlist, pos+2*phead.numuv
- end
- local lj_ktab_type = {
- ["nil"] = 0,
- ["false"] = 1,
- ["true"] = 2,
- int = 3,
- num = 4,
- str = 5,
- }
- local function lj_parse_ktabk(_, pos, pstr, _)
- local tp, p2 = read_lj_uleb128(pstr, pos)
- if tp >= lj_ktab_type.str then
- local len = tp - lj_ktab_type.str
- return string.sub(pstr, p2, p2+len-1), p2+len
- elseif tp == lj_ktab_type.int then
- return read_lj_uleb128(pstr, p2)
- elseif tp == lj_ktab_type.num then
- local lo, p3 = read_lj_uleb128(pstr, p2)
- local hi, p4 = read_lj_uleb128(pstr, p3)
- local exp = band(brshift(hi, 20), 0x7ff)
- local mat = (band(hi, 0xfffff)+lo/2^32)/0x100000
- local sign = brshift(hi, 31)
- sign = (-1)^sign
- if exp == 0 then
- return sign*math.ldexp(mat, exp-1022), p4
- elseif exp == 0x7ff then
- if mat == 0 then
- return sign*math.huge, p4
- end
- return nil, ("Bad numeric KTABK value 0x%08x%08x"):format(hi, lo)
- end
- return sign*math.ldexp(1+mat, exp-1023), p4
- elseif tp == lj_ktab_type["nil"] then
- return nil, p2
- elseif tp == lj_ktab_type["true"] then
- return true, p2
- elseif tp == lj_ktab_type["false"] then
- return false, p2
- end
- return nil, ("Bad KTABK constant type %d"):format(tp)
- end
- local function lj_parse_ktab(phead, pos, pstr, bcflags)
- local tab = {}
- local narr, p2 = read_lj_uleb128(pstr, pos)
- local nhash, p3 = read_lj_uleb128(pstr, p2)
- pos = p3
- for k = 0, narr-1 do
- tab[k], pos = lj_parse_ktabk(phead, pos, pstr, bcflags)
- if tab[k] == nil and type(pos) ~= "number" then
- return nil, pos
- end
- end
- for _ = 1, nhash do
- local k, p4 = lj_parse_ktabk(phead, pos, pstr, bcflags)
- if k == nil then
- if type(p4) == "number" then
- return nil, "Table index is nil"
- end
- return nil, p4
- end
- tab[k], pos = lj_parse_ktabk(phead, p4, pstr, bcflags)
- if tab[k] == nil and type(pos) ~= "number" then
- return nil, pos
- end
- end
- return tab, pos
- end
- local lj_kgc_type = {
- child = 0,
- tab = 1,
- str = 5,
- }
- local function lj_parse_kgc(phead, pos, pstr, bcflags)
- local gclist = {}
- for k = phead.numkgc-1, 0, -1 do
- local tp, p2 = read_lj_uleb128(pstr, pos)
- if tp >= lj_kgc_type.str then
- local len = tp-lj_kgc_type.str
- local str = string.sub(pstr, p2, p2+len-1)
- gclist[k] = str
- pos = p2+len
- elseif tp == lj_kgc_type.tab then
- local tbl, p3 = lj_parse_ktab(phead, p2, pstr, bcflags)
- if not tbl then
- return nil, p3
- end
- gclist[k] = tbl
- pos = p3
- elseif tp == lj_kgc_type.child then
- local idx = bcflags.top - 1
- if idx < 0 then
- return nil, "Child stack underflow"
- end
- gclist[k] = idx
- bcflags.top = idx
- pos = p2
- else
- return nil, ("Bad constant type %d"):format(tp)
- end
- end
- return gclist, pos
- end
- local function lj_parse_pdata_body(phead, pstr, bcflags)
- local pdata = {
- header = phead,
- }
- local pos = 1
- pdata.bytecode, pos = lj_parse_bytecode(phead, pos, pstr, bcflags)
- if not pdata.bytecode then
- return nil, pos
- end
- pdata.uv, pos = lj_parse_uv(phead, pos, pstr, bcflags)
- if not pdata.uv then
- return nil, pos
- end
- pdata.kgc, pos = lj_parse_kgc(phead, pos, pstr, bcflags)
- if not pdata.kgc then
- return nil, pos
- end
- return pdata
- end
- local lj_proto_flags = {}
- local function lj_parse_proto(pstr, bcflags)
- local phead, pos = {}
- phead.flags = readflags(read_u8(pstr, 1), lj_proto_flags)
- phead.numparams = read_u8(pstr, 2)
- phead.framesize = read_u8(pstr, 3)
- phead.numuv = read_u8(pstr, 4)
- phead.numkgc, pos = read_lj_uleb128(pstr, 5)
- phead.numkn, pos = read_lj_uleb128(pstr, pos)
- phead.numbc, pos = read_lj_uleb128(pstr, pos)
- if not bcflags.strip then
- phead.debuglen, pos = read_lj_uleb128(pstr, pos)
- if phead.debuglen > 0 then
- phead.firstline, pos = read_lj_uleb128(pstr, pos)
- phead.numline, pos = read_lj_uleb128(pstr, pos)
- end
- end
- return lj_parse_pdata_body(phead, string.sub(pstr, pos), bcflags)
- end
- local lj_bcdump_flags = {
- be = 1,
- strip = 2,
- ffi = 4,
- fr2 = 8,
- }
- local function parse_lj2(dump)
- local flags, pos = read_lj_uleb128(dump, 1)
- flags = readflags(flags, lj_bcdump_flags)
- if flags.ffi then
- return nil, "LuaJIT bytecode dump with FFI is not supported"
- elseif flags.be then
- return nil, "Big-endian LuaJIT bytecode is not supported"
- end
- local chunkname
- if not flags.strip then
- local cname, p2 = lj_read_nbytes(dump, pos)
- pos = p2
- if cname ~= "" then
- chunkname = cname
- end
- end
- local prototypes = {}
- flags.top = 0
- while true do
- local proto, p2 = lj_read_nbytes(dump, pos)
- pos = p2
- if proto == "" then
- break
- end
- local pdata, err = lj_parse_proto(proto, flags)
- if pdata == nil then
- return nil, err
- end
- flags.top = flags.top + 1
- table.insert(prototypes, pdata)
- end
- flags.top = nil
- return {
- chunkname = chunkname,
- prototypes = prototypes,
- }
- end
- local function parse_lj(dump)
- local version = string.byte(dump, 1, 1)
- if version == 2 then
- return parse_lj2(string.sub(dump, 2))
- end
- return nil, "Unsupported LuaJIT bytecode version"
- end
- local function ensure_result(st, ...)
- local count = select("#", ...)
- if count > 0 and ... ~= nil then
- return st, ...
- end
- return ...
- end
- --- Try to parse a bytecode dump.
- -- @tparam string|function dump The bytecode input or the function to read.
- -- @return[1] "luajit" If `dump` is valid LuaJIT bytecode.
- -- @treturn[1] ... Data parsed from the bytecode.
- -- @treturn[2] nil If the dump cannot be parsed.
- -- @treturn[2] string A message indicating the error.
- function bc.parse(dump)
- local tp = type(dump)
- if tp == "function" then
- return bc.parse(string.dump(dump))
- elseif tp ~= "string" then
- return nil, "Invalid bytecode dump type"
- end
- local header = string.sub(dump, 1, 3)
- if header == "\27LJ" then
- return ensure_result("luajit", parse_lj(string.sub(dump, 4)))
- end
- return nil, "Unsupported bytecode dump format"
- end
- local escape_string_table = {
- ["\n"] = [[\n]],
- ["\r"] = [[\r]],
- ["\0"] = [[\z]],
- ["\""] = [[\"]],
- ["\\"] = [[\\]],
- }
- local function escape_string(str)
- return (string.gsub(str, "[%z\1-\31\127-\255]", function(c)
- if escape_string_table[c] then
- return escape_string_table[c]
- end
- return ([[\%d]]):format(string.byte(c))
- end))
- end
- local function lj_value_tostring(proto, line, value)
- local vt, vv = value.type, value.value
- local vs = ("%3d"):format(vv)
- if vt == nil then
- return " "
- elseif vt == "jump" then
- return ("=> %04d"):format(line+vv-32767)
- elseif vt == "str" then
- local ref = proto.kgc[vv]
- if type(ref) == "string" then
- return vs, ([["%s"]]):format(escape_string(ref))
- end
- elseif vt == "func" then
- local ref = proto.kgc[vv]
- if type(ref) == "number" then
- return vs, ("BYTECODE %d"):format(ref)
- end
- elseif vt == "lits" then
- if vv >= 32768 then
- return ("%3d"):format(vv-65536)
- end
- end
- return vs
- end
- local function lj_proto_tostring(index, proto)
- local st = {("-- BYTECODE -- %d"):format(index-1)}
- local jmp_target = {}
- for ln, line in ipairs(proto.bytecode) do
- if line[3] and line[3].type == "jump" then
- jmp_target[ln+line[3].value-32767] = true
- end
- end
- for ln, line in ipairs(proto.bytecode) do
- local lt = {("%04d"):format(ln)}
- if jmp_target[ln] then
- table.insert(lt, "=>")
- else
- table.insert(lt, " ")
- end
- table.insert(lt, ("%-6s"):format(line[1]))
- table.insert(lt, lj_value_tostring(proto, ln, line[2]))
- if line[4] then
- local bs, bn = lj_value_tostring(proto, ln, line[3])
- local cs, cn = lj_value_tostring(proto, ln, line[4])
- table.insert(lt, ("%3d %3d"):format(bs, cs))
- if bn or cn then
- table.insert(lt, ";")
- table.insert(lt, bn)
- table.insert(lt, cn)
- end
- else
- local ds, dn = lj_value_tostring(proto, ln, line[3])
- table.insert(lt, ("%-7s"):format(ds))
- if dn then
- table.insert(lt, ("; %s"):format(dn))
- end
- end
- table.insert(st, table.concat(lt, " "))
- end
- return table.concat(st, "\n")
- end
- --- Try to format a bytecode dump.
- -- @tparam string|function dump The bytecode input of the function to read.
- -- @return[1] "luajit" If `dump` is valid LuaJIT bytecode.
- -- @treturn[1] string A string describing the bytecode dump. The format is similar to that of `luajit -bl`
- -- @treturn[2] nil If the dump cannot be parsed.
- -- @treturn[2] string A message indicating the error.
- function bc.tostring(dump)
- local tp, data = bc.parse(dump)
- if tp == "luajit" then
- local st = {}
- for k, proto in ipairs(data.prototypes) do
- table.insert(st, lj_proto_tostring(k, proto))
- end
- return tp, table.concat(st, "\n\n")
- end
- return nil, data
- end
- return bc
|