parse-bytecode.scm 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. ; Part of Scheme 48 1.9. See file COPYING for notices and license.
  2. ; Authors: Martin Gasbichler, Mike Sperber
  3. ;; Generic byte code parser
  4. (define-record-type attribution :attribution
  5. (make-attribution init-template template-literal
  6. opcode-table make-label at-label)
  7. attribution?
  8. (init-template attribution-init-template)
  9. (template-literal attribution-template-literal)
  10. (opcode-table attribution-opcode-table)
  11. (make-label attribution-make-label)
  12. (at-label attribution-at-label))
  13. (define (opcode-table-ref table i)
  14. (vector-ref table i))
  15. (define (opcode-table-set! table i new)
  16. (vector-set! table i new))
  17. (define (make-opcode-table default)
  18. (make-vector op-count default))
  19. ;; Example attribution
  20. (define (disass)
  21. (define (disass-init-template state template p-args push-template? push-env? push-closure?)
  22. (cons (list 0 'protocol p-args push-template? push-env? push-closure?)
  23. state))
  24. (define instruction-set-table
  25. (make-opcode-table
  26. (lambda (opcode template state pc len . args)
  27. (cons `(,pc ,(enumerand->name opcode op) ,@(map cdr args)) state))))
  28. (define (attribute-literal literal i state)
  29. state)
  30. (define (make-label target-pc)
  31. target-pc)
  32. (define (at-label label state)
  33. (cons `(,label :) state))
  34. (make-attribution disass-init-template attribute-literal
  35. instruction-set-table make-label at-label))
  36. (define (parse-template x state attribution)
  37. (let* ((tem (coerce-to-template x))
  38. (template-len (template-length tem)))
  39. (let lp ((i 1) (state state))
  40. (if (= i template-len)
  41. (parse-template-code tem (template-code tem) state attribution)
  42. (let ((literal (template-ref tem i)))
  43. (if (template? literal)
  44. (lp (+ i 1) (parse-template literal state attribution))
  45. (lp (+ i 1) ((attribution-template-literal attribution) literal i state))))))))
  46. (define (byte-code? x)
  47. (let ((code (template-code (coerce-to-template x))))
  48. (define (byte-code-protocol? protocol)
  49. (or (<= protocol maximum-stack-args)
  50. (= protocol two-byte-nargs-protocol)
  51. (= protocol two-byte-nargs+list-protocol)
  52. (= protocol ignore-values-protocol)
  53. (= protocol call-with-values-protocol)
  54. (= protocol args+nargs-protocol)
  55. (= protocol nary-dispatch-protocol)
  56. (and (= protocol big-stack-protocol)
  57. (byte-code-protocol?
  58. (code-vector-ref code (- (code-vector-length code) 3))))))
  59. (byte-code-protocol? (code-vector-ref code 1))))
  60. (define (parse-template-code tem code state attribution)
  61. (with-template
  62. tem code state attribution
  63. (lambda (pc length state)
  64. (let loop ((pc pc)
  65. (state state))
  66. (if (< pc length)
  67. (receive (size state)
  68. (parse-instruction tem code pc state attribution)
  69. (loop (+ pc size) state))
  70. state)))))
  71. (define (with-template tem code state attribution fun)
  72. (let ((length (template-code-length code)))
  73. (let-fluid
  74. *bc-make-labels* '()
  75. (lambda ()
  76. (for-each
  77. (lambda (pc) (pc->label pc attribution))
  78. (debug-data-jump-back-dests (template-debug-data tem)))
  79. (receive (size protocol-arguments)
  80. (parse-protocol code 1 attribution)
  81. (receive (push-template? push-env? push-closure?)
  82. (case (code-vector-ref code (+ size 1))
  83. ((#b000) (values #f #f #f))
  84. ((#b001) (values #t #f #f))
  85. ((#b010) (values #f #t #f))
  86. ((#b011) (values #t #t #f))
  87. ((#b100) (values #f #f #t))
  88. ((#b110) (values #f #t #t))
  89. ((#b101) (values #t #f #t))
  90. ((#b111) (values #t #t #t))
  91. (else (assertion-violation 'with-template "invalid init-template spec"
  92. (code-vector-ref code (+ size 1)))))
  93. (fun (+ size 2)
  94. length
  95. ((attribution-init-template attribution)
  96. state tem protocol-arguments push-template? push-env? push-closure?))))))))
  97. (define (parse-instruction template code pc state attribution)
  98. (let* ((opcode (code-vector-ref code pc))
  99. (len.rev-args (cond ((= opcode (enum op computed-goto)) ; unused?
  100. (assertion-violation 'parse-instruction
  101. "computed-goto in parse-bytecode"))
  102. (else
  103. (parse-opcode-args opcode
  104. pc
  105. code
  106. template
  107. attribution))))
  108. (total-len (+ 1 (car len.rev-args)))) ; 1 for the opcode
  109. (values total-len
  110. (really-parse-instruction pc total-len opcode template state
  111. (reverse (cdr len.rev-args)) attribution))))
  112. (define (really-parse-instruction pc len opcode template state args attribution)
  113. (let ((new-state (if (label-at-pc? pc)
  114. ((attribution-at-label attribution)
  115. (pc->label pc attribution)
  116. state)
  117. state)))
  118. (let ((opcode-attribution
  119. (opcode-table-ref (attribution-opcode-table attribution) opcode)))
  120. (if opcode-attribution
  121. (apply opcode-attribution opcode template new-state pc len args)
  122. (assertion-violation 'parse-instruction "cannot attribute "
  123. (enumerand->name opcode op) args)))))
  124. ;;--------------------
  125. ;; labels
  126. (define *bc-make-labels* (make-fluid '()))
  127. (define (add-pc! pc attribution)
  128. (set-fluid! *bc-make-labels*
  129. (cons (cons pc ((attribution-make-label attribution) pc))
  130. (fluid *bc-make-labels*))))
  131. (define (pc->label pc attribution)
  132. (let ((maybe-pc.label (assq pc (fluid *bc-make-labels*))))
  133. (if maybe-pc.label
  134. (cdr maybe-pc.label)
  135. (begin
  136. (add-pc! pc attribution)
  137. (pc->label pc attribution)))))
  138. (define (label-at-pc? pc)
  139. (if (assq pc (fluid *bc-make-labels*)) #t #f))
  140. ; (enum op make-[big-]flat-env)
  141. ; number of vars
  142. ; number of closures
  143. ; [offset of template in frame
  144. ; offsets of templates in template]
  145. ; number of variables in frame (size)
  146. ; offsets of vars in frame
  147. ; [offset of env in frame
  148. ; number of vars in env
  149. ; offsets of vars in level]*
  150. (define-record-type env-data :env-data
  151. (make-env-data total-count frame-offsets maybe-template-index closure-offsets
  152. env-offsets)
  153. env-data?
  154. (total-count env-data-total-count)
  155. (frame-offsets env-data-frame-offsets)
  156. (maybe-template-index env-data-maybe-template-index)
  157. (closure-offsets env-data-closure-offsets)
  158. (env-offsets env-data-env-offsets))
  159. (define (parse-flat-env-args pc code size fetch)
  160. (let ((start-pc pc)
  161. (total-count (fetch code pc))
  162. (closure-count (fetch code (+ pc size))))
  163. (receive (template-index closure-offsets)
  164. (if (< 0 closure-count)
  165. (values (fetch code (+ pc size size))
  166. (get-offsets code (+ pc size size size)
  167. size fetch closure-count))
  168. (values #f '()))
  169. (let* ((pc (if (< 0 closure-count)
  170. (+ pc
  171. (* 2 size) ; counts
  172. size ; template offset
  173. (* closure-count size)) ; subtemplates
  174. (+ pc (* 2 size)))) ; counts
  175. (frame-count (fetch code pc))
  176. (pc (+ pc size)))
  177. (let ((frame-offsets (get-offsets code pc size fetch frame-count)))
  178. (let ((pc (+ pc (* frame-count size)))
  179. (count (+ closure-count frame-count)))
  180. (let loop ((pc pc) (count count) (rev-env-offsets '()))
  181. (if (= count total-count)
  182. (values (- pc start-pc)
  183. (make-env-data total-count frame-offsets
  184. template-index closure-offsets
  185. (reverse rev-env-offsets)))
  186. (let* ((env (fetch code pc))
  187. (count-here (fetch code (+ pc size)))
  188. (indexes (get-offsets code
  189. (+ pc size size)
  190. size
  191. fetch
  192. count-here)))
  193. (loop (+ pc (* (+ 2 count-here) size))
  194. (+ count count-here)
  195. (cons (cons env indexes) rev-env-offsets)))))))))))
  196. (define (get-offsets code pc size fetch count)
  197. (do ((pc pc (+ pc size))
  198. (i 0 (+ i 1))
  199. (r '() (cons (fetch code pc) r)))
  200. ((= i count)
  201. (reverse r))))
  202. ; Parse a protocol, returning the number of bytes of instruction stream that
  203. ; were consumed. PC has to point behind the PRTOCOL opcode
  204. (define (parse-protocol code pc attribution)
  205. (let ((protocol (code-vector-ref code pc)))
  206. (really-parse-protocol protocol code pc attribution)))
  207. (define (really-parse-protocol protocol code pc attribution)
  208. (cond ((<= protocol maximum-stack-args)
  209. (values 1 (list protocol)))
  210. ((= protocol two-byte-nargs-protocol)
  211. (values 3 (list protocol (get-offset code (+ pc 1)))))
  212. ((= protocol two-byte-nargs+list-protocol)
  213. (values 3 (list protocol (get-offset code (+ pc 1)))))
  214. ((= protocol ignore-values-protocol)
  215. (values 1 (list protocol)))
  216. ((= protocol call-with-values-protocol)
  217. (let ((offset (get-offset code (+ pc 1))))
  218. (values 3 (list protocol
  219. (pc->label (- (+ offset pc) 1)
  220. attribution)
  221. (zero? offset)))))
  222. ((= protocol args+nargs-protocol)
  223. (values 2 (list protocol (code-vector-ref code (+ pc 1)))))
  224. ((= protocol nary-dispatch-protocol)
  225. (values 5 (cons protocol (parse-dispatch code pc attribution))))
  226. ((= protocol big-stack-protocol)
  227. (let ((real-protocol (code-vector-ref code
  228. (- (code-vector-length code) 3)))
  229. (stack-size (get-offset code (- (code-vector-length code) 2))))
  230. (receive (size real-attribution)
  231. (really-parse-protocol real-protocol code pc attribution)
  232. (values size
  233. (list protocol real-attribution stack-size)))))
  234. (else
  235. (assertion-violation 'parse-protocol "unknown protocol" protocol pc))))
  236. (define (parse-dispatch code pc attribution)
  237. (define (maybe-parse-one-dispatch index)
  238. (let ((offset (code-vector-ref code (+ pc index))))
  239. (if (= offset 0)
  240. #f
  241. (pc->label (+ offset pc) attribution))))
  242. (map maybe-parse-one-dispatch (list 3 4 5 2)))
  243. (define (protocol-protocol p-args)
  244. (car p-args))
  245. (define (n-ary-protocol? p-args)
  246. (let ((protocol (car p-args)))
  247. (if (or (= protocol two-byte-nargs+list-protocol)
  248. (= protocol call-with-values-protocol)
  249. (= protocol ignore-values-protocol))
  250. #t
  251. (if (or (<= protocol maximum-stack-args)
  252. (= protocol two-byte-nargs-protocol))
  253. #f
  254. (if (= protocol big-stack-protocol)
  255. (n-ary-protocol? (cadr p-args))
  256. (assertion-violation 'n-ary-protocol?
  257. "unknown protocol" p-args))))))
  258. (define (protocol-nargs p-args)
  259. (let ((protocol (car p-args)))
  260. (cond ((<= protocol maximum-stack-args)
  261. protocol)
  262. ((= protocol two-byte-nargs-protocol)
  263. (cadr p-args))
  264. ((= protocol two-byte-nargs+list-protocol)
  265. (cadr p-args))
  266. ((= protocol args+nargs-protocol)
  267. (cadr p-args))
  268. ((= protocol big-stack-protocol)
  269. (protocol-nargs (cadr p-args)))
  270. ((= protocol ignore-values-protocol)
  271. 0)
  272. ((= protocol call-with-values-protocol)
  273. (assertion-violation 'protocol-nargs
  274. "call-with-values-protocol in protocol-nargs"))
  275. (else
  276. (assertion-violation 'protocol-nargs
  277. "unknown protocol" p-args)))))
  278. (define (protocol-cwv-tailcall? p-args)
  279. (let ((protocol (protocol-protocol p-args)))
  280. (if (not (= protocol call-with-values-protocol))
  281. (assertion-violation 'protocol-cwv-tailcall?
  282. "invalid protocol" protocol))
  283. (caddr p-args)))
  284. (define (call-with-values-protocol-target p-args)
  285. (let ((protocol (protocol-protocol p-args)))
  286. (if (not (= protocol call-with-values-protocol))
  287. (assertion-violation 'call-with-values-protocol-target
  288. "invalid protocol" protocol))
  289. (cadr p-args)))
  290. ; Generic opcode argument parser
  291. (define (parse-opcode-args op start-pc code template attribution)
  292. (let ((specs (vector-ref opcode-arg-specs op)))
  293. (let loop ((specs specs) (pc (+ start-pc 1)) (len 0) (args '()))
  294. (if (null? specs)
  295. (cons len args)
  296. (let ((spec (car specs)))
  297. (cond
  298. ((eq? spec 'protocol)
  299. (receive (size p-args)
  300. (parse-protocol code pc attribution)
  301. (loop (cdr specs)
  302. (+ pc size)
  303. (+ len size)
  304. (cons (cons 'protocol p-args) args))))
  305. ((or (eq? spec 'env-data)
  306. (eq? spec 'big-env-data))
  307. (receive (size env-data)
  308. (receive (slot-size fetch)
  309. (if (eq? spec 'env-data)
  310. (values 1 code-vector-ref)
  311. (values 2 get-offset))
  312. (parse-flat-env-args pc code slot-size fetch))
  313. (loop (cdr specs)
  314. (+ pc size)
  315. (+ len size)
  316. (cons (cons 'env-data env-data) args))))
  317. ((eq? spec 'instr)
  318. (let ((opcode (code-vector-ref code pc)))
  319. (let ((len.revargs (parse-opcode-args opcode
  320. pc
  321. code
  322. template
  323. attribution)))
  324. (loop (cdr specs)
  325. (+ pc 1 (car len.revargs))
  326. (+ len 1 (car len.revargs))
  327. (cons
  328. (cons 'instr
  329. (cons opcode (reverse (cdr len.revargs))))
  330. args)))))
  331. ((= 0 (arg-spec-size spec pc code))
  332. (cons len args))
  333. (else
  334. (let ((arg (parse-opcode-arg specs
  335. pc
  336. start-pc
  337. code
  338. template
  339. attribution)))
  340. (loop (cdr specs)
  341. (+ pc (arg-spec-size spec pc code))
  342. (+ len (arg-spec-size spec pc code))
  343. (cons arg args))))))))))
  344. ; The number of bytes required by an argument.
  345. (define (arg-spec-size spec pc code)
  346. (case spec
  347. ((byte nargs stack-index index literal stob) 1)
  348. ((two-bytes two-byte-nargs two-byte-stack-index two-byte-index offset offset-) 2)
  349. ((env-data) (assertion-violation 'arg-spec-size "env-data in arg-spec-size"))
  350. ((protocol) (assertion-violation 'arg-spec-size "protocol in arg-spec-size"))
  351. ((moves-data)
  352. (let ((n-moves (code-vector-ref code pc)))
  353. (+ 1 (* 2 n-moves))))
  354. ((big-moves-data)
  355. (let ((n-moves (code-vector-ref code pc)))
  356. (+ 2 (* 4 n-moves))))
  357. ((cont-data)
  358. (- (get-offset code pc) 1)) ; size includes opcode
  359. (else 0)))
  360. ; Parse the particular type of argument.
  361. (define (parse-opcode-arg specs pc start-pc code template attribution)
  362. (cons
  363. (car specs)
  364. (case (car specs)
  365. ((byte nargs stack-index index)
  366. (code-vector-ref code pc))
  367. ((two-bytes two-byte-nargs two-byte-stack-index two-byte-index)
  368. (get-offset code pc))
  369. ((literal)
  370. (- (code-vector-ref code pc) 128))
  371. ((offset)
  372. (let ((offset (get-offset code pc)))
  373. (if (zero? offset)
  374. #f
  375. (pc->label (+ start-pc offset) attribution))))
  376. ((offset-)
  377. (pc->label (- start-pc (get-offset code pc)) attribution))
  378. ((stob)
  379. (code-vector-ref code pc))
  380. ((cont-data)
  381. (parse-cont-data-args pc code template attribution))
  382. ((moves-data)
  383. (let ((n-moves (code-vector-ref code pc)))
  384. (let loop ((offset (+ pc 1))
  385. (n n-moves))
  386. (if (zero? n)
  387. '()
  388. (cons (cons (code-vector-ref code offset)
  389. (code-vector-ref code (+ offset 1)))
  390. (loop (+ offset 2) (- n 1)))))))
  391. ((big-moves-data)
  392. (let ((n-moves (get-offset code pc)))
  393. (let loop ((offset (+ pc 2))
  394. (n n-moves))
  395. (if (zero? n)
  396. '()
  397. (cons (cons (get-offset code offset)
  398. (get-offset code (+ offset 2)))
  399. (loop (+ offset 4) (- n 1)))))))
  400. (else (assertion-violation 'parse-opcode-arg
  401. "unknown arg spec: " (car specs))))))
  402. (define-record-type cont-data :cont-data
  403. (make-cont-data length mask-bytes live-offsets template pc gc-mask-size depth)
  404. cont-data?
  405. (length cont-data-length)
  406. (mask-bytes cont-data-mask-bytes)
  407. ;; #f if all are live
  408. (live-offsets cont-data-live-offsets)
  409. (template cont-data-template)
  410. (pc cont-data-pc)
  411. (gc-mask-size cont-data-gc-mask-size)
  412. (depth cont-data-depth))
  413. (define (parse-cont-data-args pc code template attribution)
  414. (let* ((len (get-offset code pc))
  415. (end-pc (- (+ pc len) 1)) ; len includes opcode
  416. (gc-mask-size (code-vector-ref code (- end-pc 3)))
  417. (depth (get-offset code (- end-pc 2)))
  418. (offset (get-offset code (- end-pc 5)))
  419. (template (get-offset code (- end-pc 7)))
  420. (mask-bytes
  421. (let lp ((the-pc (+ pc 2)) (mask-bytes '()))
  422. (if (>= the-pc (+ pc 2 gc-mask-size))
  423. mask-bytes
  424. (lp (+ the-pc 1)
  425. (cons (code-vector-ref code the-pc) mask-bytes)))))
  426. (live-offsets
  427. (and (not (zero? gc-mask-size))
  428. (gc-mask-live-offsets (bytes->bits mask-bytes)))))
  429. (make-cont-data len
  430. mask-bytes
  431. live-offsets
  432. template
  433. (pc->label offset attribution)
  434. gc-mask-size
  435. depth)))
  436. (define (bytes->bits l)
  437. (let loop ((n 0) (l l))
  438. (if (null? l)
  439. n
  440. (loop (+ (arithmetic-shift n 8) (car l))
  441. (cdr l)))))
  442. (define (gc-mask-live-offsets mask)
  443. (let loop ((mask mask) (i 0) (l '()))
  444. (if (zero? mask)
  445. (reverse l)
  446. (loop (arithmetic-shift mask -1) (+ 1 i)
  447. (if (odd? mask)
  448. (cons i l)
  449. l)))))
  450. ;----------------
  451. ; Utilities.
  452. ; TODO: Put the template-related stuff into a separate module?
  453. ; Turn OBJ into a template, if possible.
  454. (define (coerce-to-template obj)
  455. (cond ((template? obj) obj)
  456. ((closure? obj) (closure-template obj))
  457. ((continuation? obj) (continuation-template obj))
  458. (else (assertion-violation 'coerce-to-template
  459. "expected a procedure or continuation" obj))))
  460. (define (template-code-length code)
  461. (if (and (= (enum op protocol)
  462. (code-vector-ref code 0))
  463. (= big-stack-protocol
  464. (code-vector-ref code 1)))
  465. (- (code-vector-length code) 3)
  466. (code-vector-length code)))
  467. ; Fetch the two-byte value at PC in CODE.
  468. (define (get-offset code pc)
  469. (+ (* (code-vector-ref code pc)
  470. byte-limit)
  471. (code-vector-ref code (+ pc 1))))