avrasmpost 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625
  1. #!/usr/bin/env python3
  2. """
  3. # Simple AVR disassembly postprocessor
  4. #
  5. # Copyright (C) 2012-2014 Michael Buesch <m@bues.ch>
  6. #
  7. # This program is free software; you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation; either version 2 of the License, or
  10. # (at your option) any later version.
  11. #
  12. # This program is distributed in the hope that it will be useful,
  13. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. # GNU General Public License for more details.
  16. #
  17. # You should have received a copy of the GNU General Public License along
  18. # with this program; if not, write to the Free Software Foundation, Inc.,
  19. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  20. """
  21. import sys
  22. import re
  23. import getopt
  24. LABEL_FMT = "L%04X"
  25. def die(msg):
  26. sys.stderr.write(msg + "\n")
  27. sys.stderr.flush()
  28. sys.exit(1)
  29. def parseint(s):
  30. if s.lower().startswith("0x"):
  31. return int(s, 16)
  32. return int(s, 10)
  33. def ishex(s):
  34. for c in s:
  35. if c not in "0123456789abcdefABCDEF":
  36. return False
  37. return True
  38. def eff_linelen(s):
  39. '''Get effective line length (Tabs => 8 characters).'''
  40. count = 0
  41. for c in s:
  42. if c == '\t':
  43. count = (count + 8) // 8 * 8
  44. if c == '\n':
  45. count = 0
  46. else:
  47. count += 1
  48. return count
  49. def pad_to_length(s, target_len):
  50. '''Pad a string up to the specified effective length.'''
  51. slen = eff_linelen(s)
  52. if slen >= target_len:
  53. return s
  54. return s + ' ' * (target_len - slen)
  55. def fix_twos_complement(val, nrBits):
  56. sign = 1 << nrBits
  57. mask = (sign << 1) - 1
  58. val &= mask
  59. if val & sign:
  60. return -((~val + 1) & mask)
  61. return val
  62. class IncFile(object):
  63. '''A parsed INC-file.'''
  64. equ_re = re.compile(r"^\s*\.equ\s+(\w+)\s*=\s*(\w+)\s*(?:;.*)?")
  65. flash_end_re = re.compile(r"^\s*\.equ\s+FLASHEND\s*=\s*(\w+)\s*(?:;.*)?")
  66. def __init__(self, inc_file_path):
  67. self.ioaddr_map = {}
  68. self.irq_map = {}
  69. self.irq_vectors_size = None
  70. self.flash_size = None
  71. in_io = False
  72. in_irq = False
  73. try:
  74. lines = open(inc_file_path, "r").readlines()
  75. except IOError as e:
  76. die("Failed to read INC-FILE '%s': %s" % (inc_file_path, str(e)))
  77. for line in lines:
  78. line = line.strip()
  79. if "I/O REGISTER DEFINITIONS" in line:
  80. in_io = True
  81. continue
  82. if "INTERRUPT VECTORS" in line:
  83. in_irq = True
  84. continue
  85. if line.startswith("; *****"):
  86. in_io = False
  87. in_irq = False
  88. continue
  89. if in_io:
  90. self.__parse_iomap_entry(line)
  91. elif in_irq:
  92. self.__parse_irqmap_entry(line)
  93. else:
  94. m = self.flash_end_re.match(line)
  95. if m:
  96. try:
  97. end = int(m.group(1), 16)
  98. self.flash_size = end + 1
  99. self.flash_size *= 2 # To bytes
  100. except ValueError:
  101. pass
  102. if not self.flash_size:
  103. die("Failed to get FLASHEND from INC-FILE")
  104. self.flash_mask = self.flash_size - 1
  105. if not self.ioaddr_map:
  106. die("Failed to parse I/O-map from INC-FILE")
  107. if not self.irq_map or not self.irq_vectors_size:
  108. die("Failed to parse IRQ-map from INC-FILE")
  109. if 0 not in self.irq_map:
  110. self.irq_map[0] = "RESET"
  111. # Parse one I/O map entry
  112. def __parse_iomap_entry(self, line):
  113. m = self.equ_re.match(line)
  114. if not m:
  115. return
  116. name, addr = m.group(1), m.group(2)
  117. try:
  118. addr = int(addr, 16)
  119. except ValueError:
  120. die("Failed to convert I/O map address: %s" % line)
  121. self.ioaddr_map[addr] = name
  122. # Parse one IRQ map entry
  123. def __parse_irqmap_entry(self, line):
  124. m = self.equ_re.match(line)
  125. if not m:
  126. return
  127. name, addr = m.group(1), m.group(2)
  128. if name == "INT_VECTORS_SIZE":
  129. try:
  130. self.irq_vectors_size = int(addr, 10)
  131. self.irq_vectors_size *= 2 # To byte size
  132. except ValueError:
  133. die("Failed to parse IRQ map size: %s" %\
  134. line)
  135. return
  136. if not name.endswith("addr"):
  137. return
  138. try:
  139. addr = int(addr, 16)
  140. except ValueError:
  141. die("Failed to convert IRQ map address: %s" % line)
  142. addr *= 2 # To byte address
  143. self.irq_map[addr] = name
  144. class Insn(object):
  145. '''An AVR assembly instruction'''
  146. class StringErr(Exception): pass
  147. class StringIgnore(Exception): pass
  148. def __init__(self, insn_string):
  149. # Check whether this is an instruction line.
  150. m = re.match(r'^\s*[0-9a-fA-F]+:\s+', insn_string)
  151. if not m:
  152. raise Insn.StringIgnore()
  153. # Look for comments
  154. self.comment = ""
  155. if ';' in insn_string:
  156. i = insn_string.index(';')
  157. self.comment = insn_string[i+1:].strip()
  158. # Strip it off
  159. insn_string = insn_string[:i]
  160. # Fix 0x0x breakage
  161. self.comment = self.comment.replace("0x0x", "0x")
  162. s = insn_string.split()
  163. if len(s) < 2:
  164. raise Insn.StringErr()
  165. # Extract the raw bytes
  166. self.raw_bytes = []
  167. while len(s[1]) == 2 and ishex(s[1]):
  168. hexval = s.pop(1)
  169. try:
  170. hexval = int(hexval, 16)
  171. except ValueError:
  172. die("Failed to parse raw byte")
  173. self.raw_bytes.append(hexval)
  174. # Extract offset (2ab:)
  175. try:
  176. off = s[0]
  177. off = off[:-1] # Strip colon
  178. self.offset = int(off, 16)
  179. self.offset_label = None
  180. except TypeError:
  181. die("Failed to extract insn offset")
  182. # Extract insn string (jmp...)
  183. self.insn = s[1].lower()
  184. # Extract operands
  185. self.operands = []
  186. try:
  187. self.operands = s[2:]
  188. except IndexError as e:
  189. pass
  190. for i, op in enumerate(self.operands):
  191. # Strip commas from operands
  192. op = self.operands[i] = op.replace(",", "")
  193. # Fix 0x0x breakage
  194. op = self.operands[i] = op.replace("0x0x", "0x")
  195. self.callers = []
  196. self.jmpsources = []
  197. def __makeLabel(self):
  198. lbl = self.get_offset_string() + ":"
  199. lbl = pad_to_length(lbl, 10)
  200. return lbl
  201. def get_as_data_string(self, inc_file):
  202. '''Returns the full string of the instruction as data.'''
  203. max_vect = inc_file.irq_vectors_size - 2
  204. s = ""
  205. # Space between IRQ vectors and program
  206. if self.get_offset() == max_vect + 2:
  207. s += "\n"
  208. # Dump the bytes
  209. s += self.__makeLabel()
  210. if len(self.raw_bytes) == 2:
  211. s += ".dw 0x%02X%02X" % (self.raw_bytes[1],\
  212. self.raw_bytes[0])
  213. else:
  214. die("Do not know how to handle raw bytes")
  215. return s
  216. def get_as_instruction_string(self, inc_file):
  217. '''Returns a full string of the instruction'''
  218. max_vect = inc_file.irq_vectors_size - 2
  219. is_irq_handler = any(s.get_offset() <= max_vect
  220. for s in self.jmpsources)
  221. s = ""
  222. # Show CALLers
  223. if self.callers:
  224. s += "\n; FUNCTION called by "
  225. c = []
  226. pfx = ""
  227. for i, caller in enumerate(self.callers):
  228. c.append(pfx + caller.get_offset_string())
  229. if i != 0 and \
  230. (i + 1) % 6 == 0 and \
  231. i != len(self.callers) - 1:
  232. pfx = "\n;\t\t"
  233. else:
  234. pfx = ""
  235. s += ", ".join(c)
  236. s += "\n"
  237. # Space between IRQ vectors and program
  238. if self.get_offset() == max_vect + 2 and not self.callers:
  239. s += "\n"
  240. # Show IRQ vector jump sources
  241. if is_irq_handler and not self.callers:
  242. s += "\n"
  243. if is_irq_handler:
  244. # This is jumped to from IRQ vectors.
  245. s += "; IRQ handler for "
  246. s += ", ".join(s.get_offset_string()
  247. for s in self.jmpsources)
  248. s += "\n"
  249. # Dump the instruction string
  250. s += self.__makeLabel()
  251. s += self.get_insn()
  252. if self.get_operands():
  253. s = pad_to_length(s, 18)
  254. s += ", ".join(self.get_operands())
  255. # Add the comment string
  256. comm = self.get_comment()
  257. if comm or self.jmpsources:
  258. s = pad_to_length(s, 35)
  259. s += ";"
  260. if comm:
  261. s += comm
  262. if self.jmpsources:
  263. s += " / "
  264. # Add the (R)JMP sources
  265. if self.jmpsources:
  266. nonirq_jmpsrcs = [ s for s in self.jmpsources
  267. if s.get_offset() > max_vect ]
  268. if nonirq_jmpsrcs:
  269. s += "JUMPTARGET from "
  270. s += ", ".join(s.get_offset_string()
  271. for s in nonirq_jmpsrcs)
  272. return s
  273. def get_offset(self):
  274. return self.offset
  275. def get_offset_label(self):
  276. return self.offset_label
  277. def get_offset_string(self):
  278. label = self.get_offset_label()
  279. if label:
  280. return label
  281. return LABEL_FMT % self.get_offset()
  282. def get_insn(self):
  283. return self.insn
  284. def set_insn(self, insn):
  285. self.insn = insn
  286. def get_operands(self):
  287. return self.operands
  288. def get_comment(self):
  289. return self.comment
  290. def add_caller(self, insn):
  291. self.callers.append(insn)
  292. def add_jmpsource(self, insn):
  293. self.jmpsources.append(insn)
  294. def __rewrite_irq_label(self, inc_file):
  295. offset = self.get_offset()
  296. if offset >= inc_file.irq_vectors_size:
  297. return
  298. try:
  299. label = inc_file.irq_map[offset]
  300. except KeyError:
  301. return
  302. self.comment = label
  303. if label.endswith("addr"):
  304. label = label[:-4]
  305. label = "L_" + label
  306. self.offset_label = label
  307. def __rewrite_jmp_targets(self, inc_file):
  308. if self.get_insn() != "jmp" and self.get_insn() != "call":
  309. return
  310. operands = self.get_operands()
  311. if len(operands) != 1:
  312. die("Error: more than one JMP/CALL operand")
  313. operands[0] = LABEL_FMT % int(operands[0], 0)
  314. def __rewrite_rjmp_targets(self, inc_file):
  315. operlist = self.get_operands()
  316. r = re.compile(r"^\.([\+-][0-9]+)")
  317. for i in range(0, len(operlist)):
  318. m = r.match(operlist[i])
  319. if not m:
  320. continue
  321. offs = fix_twos_complement(int(m.group(1)), 12) + 2
  322. offs = (self.get_offset() + offs) & inc_file.flash_mask
  323. operlist[i] = LABEL_FMT % offs
  324. break
  325. def __rewrite_io_addrs(self, inc_file):
  326. offsets = { "sts" : (0, "mem"),
  327. "lds" : (1, "mem"),
  328. "in" : (1, "io"),
  329. "out" : (0, "io"),
  330. "sbic" : (0, "io"),
  331. "sbis" : (0, "io"),
  332. "sbi" : (0, "io"),
  333. "cbi" : (0, "io"), }
  334. try:
  335. (offset, optype) = offsets[self.get_insn()]
  336. except KeyError as e:
  337. return
  338. operands = self.get_operands()
  339. ioaddr = int(operands[offset], 0)
  340. if optype == "mem":
  341. if ioaddr < 0x20:
  342. print("Error: mem-op offset operand < 0x20")
  343. exit(1)
  344. if ioaddr < 0x60:
  345. ioaddr -= 0x20
  346. try:
  347. name = inc_file.ioaddr_map[ioaddr]
  348. except KeyError as e:
  349. return
  350. if optype == "mem" and ioaddr < 0x60:
  351. name += " + 0x20"
  352. # Got a name for it. Reassign it.
  353. operands[offset] = name
  354. def __rewrite_special_registers(self, inc_file):
  355. special_regs_tab = { 26 : "XL",
  356. 27 : "XH",
  357. 28 : "YL",
  358. 29 : "YH",
  359. 30 : "ZL",
  360. 31 : "ZH", }
  361. r = re.compile(r"^[rR]([0-9]+)$")
  362. operands = self.get_operands()
  363. for i in range(0, len(operands)):
  364. m = r.match(operands[i])
  365. if not m:
  366. continue
  367. regnum = int(m.group(1))
  368. try:
  369. name = special_regs_tab[regnum]
  370. except KeyError as e:
  371. continue
  372. operands[i] = name
  373. def __fix_raw_words(self, inc_file):
  374. if self.get_insn() == ".word":
  375. self.set_insn(".dw")
  376. def rewrite(self, inc_file):
  377. '''Rewrite the instruction to be better human readable'''
  378. self.__rewrite_irq_label(inc_file)
  379. self.__rewrite_jmp_targets(inc_file)
  380. self.__rewrite_rjmp_targets(inc_file)
  381. self.__rewrite_io_addrs(inc_file)
  382. self.__rewrite_special_registers(inc_file)
  383. self.__fix_raw_words(inc_file)
  384. def usage():
  385. print("avr-postproc [OPTIONS] INC-FILE")
  386. print("")
  387. print("INC-FILE is the assembly .inc file.")
  388. print("Objdump assembly is read from stdin.")
  389. print("Processed assembly is written to stdout.")
  390. print("")
  391. print("Options:")
  392. print(" -I|--infile FILEPATH Path to input file.")
  393. print(" If not specified, stdin is used.")
  394. print(" -O|--outfile FILEPATH Path to output file.")
  395. print(" If not specified, stdout is used.")
  396. print(" -s|--start OFFSET Start offset. Default 0x0.")
  397. print(" -e|--end OFFSET End offset. Default all.")
  398. print(" -d|--data-range RANGES Define a pure data range in program memory.")
  399. print(" Example: -d 0x0-0x1F -d 0x100-0x1FF")
  400. print(" Defines byte range 0h-1Fh and 100h-1FFh as data.")
  401. print(" -L|--label-file FILE Label file to pick label names from.")
  402. print(" -C|--comment-file FILE Comment file to pick comments from.")
  403. def main():
  404. infilename = None
  405. outfilename = None
  406. start_offset = 0
  407. stop_offset = -1
  408. data_ranges = []
  409. label_files = []
  410. comment_files = []
  411. try:
  412. (opts, args) = getopt.getopt(sys.argv[1:],
  413. "hI:O:s:e:d:L:C:",
  414. [ "help", "infile=", "outfile=",
  415. "start=", "end=", "data-range=",
  416. "label=", "comment=", ])
  417. except getopt.GetoptError as e:
  418. usage()
  419. return 1
  420. for (o, v) in opts:
  421. if o in ("-h", "--help"):
  422. usage()
  423. return 0
  424. if o in ("-I", "--infile"):
  425. infilename = v
  426. if o in ("-O", "--outfile"):
  427. outfilename = v
  428. if o in ("-s", "--start"):
  429. try:
  430. start_offset = parseint(v)
  431. except ValueError as e:
  432. die("-s|--start is not a number")
  433. if o in ("-e", "--end"):
  434. try:
  435. stop_offset = parseint(v)
  436. except ValueError as e:
  437. die("-e|--end is not a number")
  438. if o in ("-d", "--data-range"):
  439. if not v.strip():
  440. continue
  441. try:
  442. start, stop = v.split('-')
  443. start, stop = parseint(start), parseint(stop)
  444. if start < 0 or stop < 0 or stop < start:
  445. raise ValueError
  446. r = range(start, stop + 1)
  447. data_ranges.append(r)
  448. except ValueError as e:
  449. die("-d|--data-range invalid value")
  450. if o in ("-L", "--label"):
  451. label_files.append(v)
  452. #TODO use it
  453. if o in ("-C", "--comment"):
  454. comment_files.append(v)
  455. #TODO use it
  456. if len(args) != 1:
  457. die("INC-FILE not specified")
  458. inc_file_path = args[0]
  459. inc_file = IncFile(inc_file_path)
  460. if infilename:
  461. try:
  462. infd = open(infilename, "r")
  463. lines = infd.readlines()
  464. infd.close()
  465. except IOError as e:
  466. die("Failed to read input file '%s':\n%s" %\
  467. (infilename, str(e)))
  468. else:
  469. lines = sys.stdin.readlines()
  470. insns = []
  471. funcs = []
  472. # Parse the input and rewrite the
  473. # instructions to include symbolic names
  474. for line in lines:
  475. try:
  476. insn = Insn(line)
  477. except Insn.StringIgnore as e:
  478. continue
  479. except Insn.StringErr as e:
  480. die("ERROR: Could not parse line \"%s\"" % line)
  481. if insn.get_offset() < start_offset:
  482. continue
  483. if stop_offset != -1 and insn.get_offset() > stop_offset:
  484. break
  485. insn.rewrite(inc_file)
  486. insns.append(insn)
  487. def get_insn_by_offset(offset):
  488. for insn in insns:
  489. if insn.get_offset() == offset:
  490. return insn
  491. print("; Postproc error: Instruction with "
  492. "offset 0x%04X not found" % offset)
  493. return None
  494. # Annotate jump sources
  495. for insn in insns:
  496. branch_insns = { "jmp" : ("type_jmp", 0),
  497. "rjmp" : ("type_jmp", 0),
  498. "brbs" : ("type_jmp", 1),
  499. "brbc" : ("type_jmp", 1),
  500. "breq" : ("type_jmp", 0),
  501. "brne" : ("type_jmp", 0),
  502. "brcs" : ("type_jmp", 0),
  503. "brcc" : ("type_jmp", 0),
  504. "brsh" : ("type_jmp", 0),
  505. "brlo" : ("type_jmp", 0),
  506. "brmi" : ("type_jmp", 0),
  507. "brpl" : ("type_jmp", 0),
  508. "brge" : ("type_jmp", 0),
  509. "brlt" : ("type_jmp", 0),
  510. "brhs" : ("type_jmp", 0),
  511. "brhc" : ("type_jmp", 0),
  512. "brts" : ("type_jmp", 0),
  513. "brtc" : ("type_jmp", 0),
  514. "brvs" : ("type_jmp", 0),
  515. "brvc" : ("type_jmp", 0),
  516. "brie" : ("type_jmp", 0),
  517. "brid" : ("type_jmp", 0),
  518. "call" : ("type_call", 0),
  519. "rcall" : ("type_call", 0), }
  520. insn_name = insn.get_insn()
  521. try:
  522. (jmptype, targetoper) = branch_insns[insn_name]
  523. except KeyError as e:
  524. continue
  525. tgt_offset = int(insn.get_operands()[targetoper][1:], 16)
  526. target = get_insn_by_offset(tgt_offset)
  527. if target:
  528. if jmptype == "type_jmp":
  529. target.add_jmpsource(insn)
  530. else:
  531. target.add_caller(insn)
  532. # Write the output
  533. if outfilename:
  534. try:
  535. outfd = open(outfilename, "w")
  536. except IOError as e:
  537. die("Failed to open output file '%s':\n%s" %\
  538. (outfilename, str(e)))
  539. else:
  540. outfd = sys.stdout
  541. outfd.write('.include "' + inc_file_path.split("/")[-1] + '"\n')
  542. outfd.write('\n')
  543. outfd.write('.org 0x000\n')
  544. outfd.write('\n')
  545. for insn in insns:
  546. if any((insn.get_offset() in r) for r in data_ranges):
  547. # Dump it as data.
  548. s = insn.get_as_data_string(inc_file)
  549. if not s:
  550. continue
  551. outfd.write(s)
  552. else:
  553. # Dump the instruction.
  554. s = insn.get_as_instruction_string(inc_file)
  555. if not s:
  556. continue
  557. outfd.write(s)
  558. outfd.write("\n")
  559. if outfilename:
  560. outfd.close()
  561. return 0
  562. if __name__ == "__main__":
  563. sys.exit(main())