gen_dwarf.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409
  1. #! /usr/bin/env python3
  2. """
  3. Embedded unwind-table generator.
  4. """
  5. import io
  6. import re
  7. from struct import unpack_from, pack, unpack
  8. from subprocess import Popen, PIPE, DEVNULL
  9. import sys
  10. U64_MAX = 0xffffffffffffffff
  11. U32_MAX = U64_MAX >> 32
  12. # Globals set up by the initialization routines.
  13. WORD_SIZE = None
  14. LP64 = None
  15. BASE_ADDR = None
  16. # DWARF constants used below.
  17. DW_EH_PE_absptr = 0x00
  18. DW_EH_PE_omit = 0xff
  19. DW_EH_PE_uleb128 = 0x01
  20. DW_EH_PE_udata2 = 0x02
  21. DW_EH_PE_udata4 = 0x03
  22. DW_EH_PE_udata8 = 0x04
  23. DW_EH_PE_sleb128 = 0x09
  24. DW_EH_PE_sdata2 = 0x0a
  25. DW_EH_PE_sdata4 = 0x0b
  26. DW_EH_PE_sdata8 = 0x0c
  27. DW_EH_PE_signed = 0x09
  28. DW_EH_PE_pcrel = 0x10
  29. DW_EH_PE_indirect = 0x80
  30. DW_EH_PE_aligned = 0x50
  31. def unpack_1 (fmt, bx, ix):
  32. "Convenience wrapper to unpack only the first value."
  33. return unpack_from(fmt, bx, ix)[0]
  34. def encode_uleb (value):
  35. "Generate the byte sequence for an ULEB-128 value."
  36. while True:
  37. byte = value & 0x7f
  38. value >>= 7
  39. if value:
  40. byte |= 0x80
  41. yield byte
  42. if not value:
  43. break
  44. def read_uleb (bx, ix):
  45. """
  46. Read an ULEB-128 from a bytes object at an offset.
  47. Returns the unsigned value and the number of bytes read.
  48. """
  49. ret = shift = 0
  50. off = ix
  51. while True:
  52. byte = bx[off]
  53. off += 1
  54. ret |= (byte & 0x7f) << shift
  55. if (byte & 0x80) == 0:
  56. break
  57. shift += 7
  58. return ret, off - ix
  59. def read_sleb (bx, ix):
  60. """
  61. Read an SLEB-128 from a bytes object at an offset.
  62. Returns the signed value and the number of bytes read.
  63. """
  64. mask = [0xffffff80, 0xffffc000, 0xffe00000, 0xf0000000, 0]
  65. bitmask = [0x40, 0x40, 0x40, 0x40, 0x8]
  66. value = i = tmp = 0
  67. for i in range (0, 5):
  68. tmp = bx[ix + i] & 0x7f
  69. value = tmp << (i * 7) | value
  70. if (bx[ix + i] & 0x80) != 0x80:
  71. if bitmask[i] & tmp:
  72. value |= mask[i]
  73. break
  74. if i == 4 and (tmp & 0xf0) != 0:
  75. raise ValueError ("invalid sleb128")
  76. return unpack("i", pack ("I", value))[0], i + 1
  77. ENCPTR_FORMATS =\
  78. {
  79. DW_EH_PE_udata2: ("=H", 2),
  80. DW_EH_PE_sdata2: ("=h", 2),
  81. DW_EH_PE_udata4: ("=I", 4),
  82. DW_EH_PE_sdata4: ("=i", 4),
  83. DW_EH_PE_udata8: ("=Q", 8),
  84. DW_EH_PE_sdata8: ("=q", 8)
  85. }
  86. def read_encptr (bx, ix, enc, pc):
  87. if enc == DW_EH_PE_omit:
  88. return 0, 0
  89. elif enc == DW_EH_PE_aligned:
  90. size = WORD_SIZE
  91. new_ix = (ix + size - 1) & ~(size - 1)
  92. ret = unpack_1 ("=Q" if LP64 else "=I", bx, new_ix)
  93. return ret, size + new_ix - ix
  94. xe = enc & 0x70
  95. if xe == DW_EH_PE_absptr:
  96. base = 0
  97. elif xe == DW_EH_PE_pcrel:
  98. base = pc
  99. else:
  100. raise ValueError ("unsupported pointer application value")
  101. if (enc & 0x7) == 0:
  102. enc |= DW_EH_PE_udata8 if LP64 else DW_EH_PE_udata4
  103. data = ENCPTR_FORMATS.get (enc & 0xf)
  104. if data:
  105. ret = unpack_1 (data[0], bx, ix)
  106. off = data[1]
  107. elif xe == DW_EH_PE_uleb128:
  108. ret, off = read_uleb (bx, ix)
  109. elif xe == DW_EH_PE_sleb128:
  110. ret, off = read_sleb (bx, ix)
  111. else:
  112. raise ValueError ("unsupported data encoding")
  113. if ret == 0:
  114. # 0 is always an absolute value.
  115. return 0, off
  116. ret += base
  117. if (enc & DW_EH_PE_indirect) != 0:
  118. new_off = ret - BASE_ADDR
  119. ret = unpack_1 ("=Q" if LP64 else "=I", bx, new_off)
  120. return ret, off
  121. class CIE:
  122. def __init__ (self, dw_id):
  123. self.dw_id = dw_id
  124. self.array_idx = -1
  125. self.code_enc = DW_EH_PE_absptr
  126. def __hash__ (self):
  127. return hash ((self.code_align, self.ret_addr, self.data_align,
  128. self.code_enc, self.opcodes))
  129. def __eq__ (self, x):
  130. return (self.code_align == x.code_align and
  131. self.ret_addr == x.ret_addr and
  132. self.data_align == x.data_align and
  133. self.code_enc == x.code_enc and
  134. self.opcodes == x.opcodes)
  135. class Opcodes:
  136. """
  137. We try to minimize the sequence of opcodes used by both CIE's and
  138. FDE's by accumulating them in a hash table. This is the class that
  139. is hashed and stored. Unlike raw DWARF opcodes, our types have the
  140. size embedded in them, so knowing the end pointer is a fast operation.
  141. """
  142. def __init__ (self, bx):
  143. self.opcodes = bx
  144. self.header = list (encode_uleb (len (bx)))
  145. def __hash__ (self):
  146. return hash (self.opcodes)
  147. def __eq__ (self, x):
  148. if isinstance (x, Opcodes):
  149. return self.opcodes == x.opcodes
  150. return self.opcodes == x
  151. def __len__ (self):
  152. return len (self.opcodes) + len (self.header)
  153. class DwarfState:
  154. def __init__ (self):
  155. self.cies_by_id = {}
  156. self.raw_cies = {}
  157. self.pc = []
  158. self.ops = {}
  159. self.ops_len = 0
  160. def add_opcodes (self, bx):
  161. if all (byte == 0 for byte in bx):
  162. bx = b'\0'
  163. ops = self.ops
  164. pos = ops.get (bx)
  165. if pos is None:
  166. obj = Opcodes (bx)
  167. ops[obj] = pos = self.ops_len
  168. self.ops_len += len (obj)
  169. return pos
  170. def add_cie (self, cie):
  171. val = self.raw_cies.get (cie)
  172. if val is None:
  173. cie.array_idx = len (self.raw_cies)
  174. self.raw_cies[cie] = val = cie
  175. self.cies_by_id[val.dw_id] = val
  176. cie.opcodes_idx = self.add_opcodes (cie.opcodes)
  177. def add_fde (self, cie, lstart, lend, opcodes):
  178. ops_idx = self.add_opcodes (opcodes)
  179. self.pc.append ((lstart, lend, cie.array_idx | (ops_idx << 8)))
  180. def get_cie (self, cie_id):
  181. return self.cies_by_id[cie_id]
  182. def process_cie (bx, state, ix, rlen, start):
  183. "Add a CIE to the DWARF state."
  184. cie = CIE (start)
  185. ver = bx[ix]
  186. ix += 1
  187. aug_ix = ix
  188. while bx[ix] != 0:
  189. ix += 1
  190. if bx[aug_ix] == 101 and bx[aug_ix + 1] == 104:
  191. # Ignore GNU 'eh' augmentation data.
  192. ix += WORD_SIZE
  193. aug_ix += 2
  194. ix += 1
  195. cie.code_align, off = read_uleb (bx, ix)
  196. ix += off
  197. cie.data_align, off = read_sleb (bx, ix)
  198. ix += off
  199. if ver == 3:
  200. cie.ret_addr, off = read_uleb (bx, ix)
  201. ix += off
  202. else:
  203. cie.ret_addr = bx[ix]
  204. ix += 1
  205. istart = ix
  206. while True:
  207. ch = bx[aug_ix]
  208. if ch == 0:
  209. break
  210. elif ch == 122: # 'z'
  211. val, off = read_uleb (bx, ix)
  212. ix += off
  213. istart = ix + val
  214. elif ch == 82: # 'R'
  215. cie.code_enc = bx[ix]
  216. ix += 1
  217. aug_ix += 1
  218. cie.opcodes = bx[istart:rlen]
  219. state.add_cie (cie)
  220. def process_fde (bx, state, ix, cie_id, lp64, rlen):
  221. "Add an FDE to the DWARF state."
  222. cie_id = ix - cie_id - (8 if lp64 else 4)
  223. cie = state.get_cie (cie_id)
  224. initial_loc, off = read_encptr (bx, ix, cie.code_enc, ix)
  225. ix += off
  226. addr_range, off = read_encptr (bx, ix, cie.code_enc & 0xf, ix)
  227. ix += off
  228. initial_loc += BASE_ADDR
  229. state.add_fde (cie, initial_loc, addr_range, bx[ix:rlen])
  230. def process_dwarf (bx):
  231. """
  232. Given a bytes object that contains the .eh_frame section of an ELF file,
  233. produce a condensed view of the unwind information to be used by the
  234. kernel at runtime.
  235. """
  236. ix, end = (0, len (bx))
  237. state = DwarfState ()
  238. while ix < end:
  239. start = ix
  240. ulen = unpack_1 ("=I", bx, ix)
  241. if ulen == 0:
  242. break
  243. ix += 4
  244. initlen = ulen
  245. lp64 = False
  246. if ulen == U32_MAX:
  247. lp64 = True
  248. initlen = unpack_1 ("=Q", bx, ix)
  249. ix += 8
  250. new_ix = ix + initlen
  251. if lp64:
  252. cie_id = unpack_1 ("=Q", bx, ix)
  253. ix += 8
  254. else:
  255. cie_id = unpack_1 ("=I", bx, ix)
  256. ix += 4
  257. if cie_id == U32_MAX:
  258. cie_id = U64_MAX
  259. if cie_id == 0:
  260. process_cie (bx, state, ix, new_ix, start)
  261. else:
  262. process_fde (bx, state, ix, cie_id, lp64, new_ix)
  263. ix = new_ix
  264. return state
  265. def gen_dwarf (stdin):
  266. """
  267. Accumulate all the information from the .eh_frame section and
  268. return it as a bytes object. During the process, also fetch some
  269. data regarding base load address and others.
  270. """
  271. global BASE_ADDR
  272. rx = re.compile (('0x[0-9a-fA-F]* ([0-9a-fA-f]*) '
  273. '([0-9a-fA-f]*) ([0-9a-fA-f]*) ([0-9a-fA-f]*)'))
  274. bx = io.BytesIO ()
  275. for line in stdin:
  276. line = line.lstrip ()
  277. match = rx.match (line)
  278. if not match:
  279. continue
  280. elif BASE_ADDR is None:
  281. BASE_ADDR = int (line[:line.find (' ')], 16)
  282. for i in range (1, 5):
  283. try:
  284. val = int (match.group (i), 16)
  285. except ValueError:
  286. if not match.group (i):
  287. continue
  288. raise
  289. bx.write (val.to_bytes (4, byteorder = 'big', signed = False))
  290. bx.flush ()
  291. return bx.getvalue ()
  292. def output_dwarf (state):
  293. # FDAs should already be sorted by address, but just in case.
  294. pcs = sorted (state.pc, key = lambda elem: elem[0])
  295. lo_pc = pcs[0][0]
  296. print ("#include <kern/unwind.h>\n")
  297. # Output CIE's.
  298. print ("static const struct unw_cie unw_cies[] __unwind =\n{")
  299. for cie in state.raw_cies:
  300. print (" { 0x%x, 0x%x, %d, %d, %d }," % (cie.code_align, cie.ret_addr,
  301. cie.data_align, cie.code_enc,
  302. cie.opcodes_idx))
  303. print ("};\n")
  304. # Output FDE's.
  305. print ("static const struct unw_fde unw_fdes[] __unwind =\n{")
  306. for pc in pcs:
  307. print (" { 0x%x, 0x%x, 0x%x }," % (pc[0] - lo_pc, pc[1], pc[2]))
  308. print ("};\n")
  309. # Output opcodes shared by both CIE's and FDE's.
  310. i = 0
  311. print ("static const uint8_t unw_opcodes[] __unwind =\n{", end = "")
  312. for op in state.ops:
  313. for byte in op.header:
  314. if (i & 7) == 0:
  315. print ("\n ", end = "")
  316. print ("0x%02x, " % byte, end = "")
  317. i += 1
  318. for byte in op.opcodes:
  319. if (i & 7) == 0:
  320. print ("\n ", end = "")
  321. print ("0x%02x, " % byte, end = "")
  322. i += 1
  323. print ("\n};\n")
  324. # Output global data.
  325. print ("const struct unw_globals unw_globals __unwind =\n{")
  326. print (" .nr_fdes = %d," % len (state.pc))
  327. print (" .fdes = unw_fdes,")
  328. print (" .cies = unw_cies,")
  329. print (" .ops = unw_opcodes,")
  330. print (" .base_addr = 0x%xul" % lo_pc)
  331. print ("};\n")
  332. print ("const struct unw_globals *unw_globals_ptr = &unw_globals;\n")
  333. def main (path):
  334. global LP64, WORD_SIZE
  335. with Popen (["readelf", "-h", "-x", ".eh_frame", path],
  336. stdout = PIPE, stderr = DEVNULL, text = True) as proc:
  337. stdin = proc.stdout
  338. for line in stdin:
  339. if line.find ("Class") >= 0:
  340. LP64 = line.find ("ELF64") >= 0
  341. WORD_SIZE = 8 if LP64 else 4
  342. break
  343. else:
  344. raise ValueError ("could not find ELF class in file")
  345. output_dwarf (process_dwarf (gen_dwarf (stdin)))
  346. if __name__ == "__main__":
  347. main (sys.argv[1])