parse-bytecode.scm 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512
  1. ;; Generic byte code parser
  2. (define-record-type attribution :attribution
  3. (make-attribution init-template template-literal
  4. opcode-table make-label at-label)
  5. attribution?
  6. (init-template attribution-init-template)
  7. (template-literal attribution-template-literal)
  8. (opcode-table attribution-opcode-table)
  9. (make-label attribution-make-label)
  10. (at-label attribution-at-label))
  11. (define (opcode-table-ref table i)
  12. (vector-ref table i))
  13. (define (opcode-table-set! table i new)
  14. (vector-set! table i new))
  15. (define (make-opcode-table default)
  16. (make-vector op-count default))
  17. ;; Example attribution
  18. (define (disass)
  19. (define (disass-init-template state template p-args push-template? push-env? push-closure?)
  20. (cons (list 0 'protocol p-args push-template? push-env? push-closure?)
  21. state))
  22. (define instruction-set-table
  23. (make-opcode-table
  24. (lambda (opcode template state pc len . args)
  25. (cons `(,pc ,(enumerand->name opcode op) ,@(map cdr args)) state))))
  26. (define (attribute-literal literal i state)
  27. state)
  28. (define (make-label target-pc)
  29. target-pc)
  30. (define (at-label label state)
  31. (cons `(,label :) state))
  32. (make-attribution disass-init-template attribute-literal
  33. instruction-set-table make-label at-label))
  34. (define (parse-template x state attribution)
  35. (let* ((tem (coerce-to-template x))
  36. (template-len (template-length tem)))
  37. (let lp ((i 1) (state state))
  38. (if (= i template-len)
  39. (parse-template-code tem (template-code tem) state attribution)
  40. (let ((literal (template-ref tem i)))
  41. (if (template? literal)
  42. (lp (+ i 1) (parse-template literal state attribution))
  43. (lp (+ i 1) ((attribution-template-literal attribution) literal i state))))))))
  44. (define (byte-code? x)
  45. (let ((code (template-code (coerce-to-template x))))
  46. (define (byte-code-protocol? protocol)
  47. (or (<= protocol maximum-stack-args)
  48. (= protocol two-byte-nargs-protocol)
  49. (= protocol two-byte-nargs+list-protocol)
  50. (= protocol ignore-values-protocol)
  51. (= protocol call-with-values-protocol)
  52. (= protocol args+nargs-protocol)
  53. (= protocol nary-dispatch-protocol)
  54. (and (= protocol big-stack-protocol)
  55. (byte-code-protocol?
  56. (code-vector-ref code (- (code-vector-length code) 3))))))
  57. (byte-code-protocol? (code-vector-ref code 1))))
  58. (define (parse-template-code tem code state attribution)
  59. (with-template
  60. tem code state attribution
  61. (lambda (pc length state)
  62. (let loop ((pc pc)
  63. (state state))
  64. (if (< pc length)
  65. (receive (size state)
  66. (parse-instruction tem code pc state attribution)
  67. (loop (+ pc size) state))
  68. state)))))
  69. (define (with-template tem code state attribution fun)
  70. (let ((length (template-code-length code)))
  71. (let-fluid
  72. *bc-make-labels* '()
  73. (lambda ()
  74. (for-each
  75. (lambda (pc) (pc->label pc attribution))
  76. (debug-data-jump-back-dests (template-debug-data tem)))
  77. (receive (size protocol-arguments)
  78. (parse-protocol code 1 attribution)
  79. (receive (push-template? push-env? push-closure?)
  80. (case (code-vector-ref code (+ size 1))
  81. ((#b000) (values #f #f #f))
  82. ((#b001) (values #t #f #f))
  83. ((#b010) (values #f #t #f))
  84. ((#b011) (values #t #t #f))
  85. ((#b100) (values #f #f #t))
  86. ((#b110) (values #f #t #t))
  87. ((#b101) (values #t #f #t))
  88. ((#b111) (values #t #t #t))
  89. (else (error "invalid init-template spec" (code-vector-ref code (+ size 1)))))
  90. (fun (+ size 2)
  91. length
  92. ((attribution-init-template attribution)
  93. state tem protocol-arguments push-template? push-env? push-closure?))))))))
  94. (define (parse-instruction template code pc state attribution)
  95. (let* ((opcode (code-vector-ref code pc))
  96. (len.rev-args (cond ((= opcode (enum op computed-goto)) ; unused?
  97. (error "computed-goto in parse-bytecode"))
  98. (else
  99. (parse-opcode-args opcode
  100. pc
  101. code
  102. template
  103. attribution))))
  104. (total-len (+ 1 (car len.rev-args)))) ; 1 for the opcode
  105. (values total-len
  106. (really-parse-instruction pc total-len opcode template state
  107. (reverse (cdr len.rev-args)) attribution))))
  108. (define (really-parse-instruction pc len opcode template state args attribution)
  109. (let ((new-state (if (label-at-pc? pc)
  110. ((attribution-at-label attribution)
  111. (pc->label pc attribution)
  112. state)
  113. state)))
  114. (let ((opcode-attribution
  115. (opcode-table-ref (attribution-opcode-table attribution) opcode)))
  116. (if opcode-attribution
  117. (apply opcode-attribution opcode template new-state pc len args)
  118. (error "cannot attribute " (enumerand->name opcode op) args)))))
  119. ;;--------------------
  120. ;; labels
  121. (define *bc-make-labels* (make-fluid '()))
  122. (define (add-pc! pc attribution)
  123. (set-fluid! *bc-make-labels*
  124. (cons (cons pc ((attribution-make-label attribution) pc))
  125. (fluid *bc-make-labels*))))
  126. (define (pc->label pc attribution)
  127. (let ((maybe-pc.label (assq pc (fluid *bc-make-labels*))))
  128. (if maybe-pc.label
  129. (cdr maybe-pc.label)
  130. (begin
  131. (add-pc! pc attribution)
  132. (pc->label pc attribution)))))
  133. (define (label-at-pc? pc)
  134. (if (assq pc (fluid *bc-make-labels*)) #t #f))
  135. ; (enum op make-[big-]flat-env)
  136. ; number of vars
  137. ; number of closures
  138. ; [offset of template in frame
  139. ; offsets of templates in template]
  140. ; number of variables in frame (size)
  141. ; offsets of vars in frame
  142. ; [offset of env in frame
  143. ; number of vars in env
  144. ; offsets of vars in level]*
  145. (define-record-type env-data :env-data
  146. (make-env-data total-count frame-offsets maybe-template-index closure-offsets
  147. env-offsets)
  148. env-data?
  149. (total-count env-data-total-count)
  150. (frame-offsets env-data-frame-offsets)
  151. (maybe-template-index env-data-maybe-template-index)
  152. (closure-offsets env-data-closure-offsets)
  153. (env-offsets env-data-env-offsets))
  154. (define (parse-flat-env-args pc code size fetch)
  155. (let ((start-pc pc)
  156. (total-count (fetch code pc))
  157. (closure-count (fetch code (+ pc size))))
  158. (receive (template-index closure-offsets)
  159. (if (< 0 closure-count)
  160. (values (fetch code (+ pc size size))
  161. (get-offsets code (+ pc size size size)
  162. size fetch closure-count))
  163. (values #f '()))
  164. (let* ((pc (if (< 0 closure-count)
  165. (+ pc
  166. (* 2 size) ; counts
  167. size ; template offset
  168. (* closure-count size)) ; subtemplates
  169. (+ pc (* 2 size)))) ; counts
  170. (frame-count (fetch code pc))
  171. (pc (+ pc size)))
  172. (let ((frame-offsets (get-offsets code pc size fetch frame-count)))
  173. (let ((pc (+ pc (* frame-count size)))
  174. (count (+ closure-count frame-count)))
  175. (let loop ((pc pc) (count count) (rev-env-offsets '()))
  176. (if (= count total-count)
  177. (values (- pc start-pc)
  178. (make-env-data total-count frame-offsets
  179. template-index closure-offsets
  180. (reverse rev-env-offsets)))
  181. (let* ((env (fetch code pc))
  182. (count-here (fetch code (+ pc size)))
  183. (indexes (get-offsets code
  184. (+ pc size size)
  185. size
  186. fetch
  187. count-here)))
  188. (loop (+ pc (* (+ 2 count-here) size))
  189. (+ count count-here)
  190. (cons (cons env indexes) rev-env-offsets)))))))))))
  191. (define (get-offsets code pc size fetch count)
  192. (do ((pc pc (+ pc size))
  193. (i 0 (+ i 1))
  194. (r '() (cons (fetch code pc) r)))
  195. ((= i count)
  196. (reverse r))))
  197. ; Parse a protocol, returning the number of bytes of instruction stream that
  198. ; were consumed. PC has to point behind the PRTOCOL opcode
  199. (define (parse-protocol code pc attribution)
  200. (let ((protocol (code-vector-ref code pc)))
  201. (really-parse-protocol protocol code pc attribution)))
  202. (define (really-parse-protocol protocol code pc attribution)
  203. (cond ((<= protocol maximum-stack-args)
  204. (values 1 (list protocol)))
  205. ((= protocol two-byte-nargs-protocol)
  206. (values 3 (list protocol (get-offset code (+ pc 1)))))
  207. ((= protocol two-byte-nargs+list-protocol)
  208. (values 3 (list protocol (get-offset code (+ pc 1)))))
  209. ((= protocol ignore-values-protocol)
  210. (values 1 (list protocol)))
  211. ((= protocol call-with-values-protocol)
  212. (let ((offset (get-offset code (+ pc 1))))
  213. (values 3 (list protocol
  214. (pc->label (- (+ offset pc) 1)
  215. attribution)
  216. (zero? offset)))))
  217. ((= protocol args+nargs-protocol)
  218. (values 2 (list protocol (code-vector-ref code (+ pc 1)))))
  219. ((= protocol nary-dispatch-protocol)
  220. (values 5 (cons protocol (parse-dispatch code pc attribution))))
  221. ((= protocol big-stack-protocol)
  222. (let ((real-protocol (code-vector-ref code
  223. (- (code-vector-length code) 3)))
  224. (stack-size (get-offset code (- (code-vector-length code) 2))))
  225. (receive (size real-attribution)
  226. (really-parse-protocol real-protocol code pc attribution)
  227. (values size
  228. (list protocol real-attribution stack-size)))))
  229. (else
  230. (error "unknown protocol" protocol pc))))
  231. (define (parse-dispatch code pc attribution)
  232. (define (maybe-parse-one-dispatch index)
  233. (let ((offset (code-vector-ref code (+ pc index))))
  234. (if (= offset 0)
  235. #f
  236. (pc->label (+ offset pc) attribution))))
  237. (map maybe-parse-one-dispatch (list 3 4 5 2)))
  238. (define (protocol-protocol p-args)
  239. (car p-args))
  240. (define (n-ary-protocol? p-args)
  241. (let ((protocol (car p-args)))
  242. (if (or (= protocol two-byte-nargs+list-protocol)
  243. (= protocol call-with-values-protocol)
  244. (= protocol ignore-values-protocol))
  245. #t
  246. (if (or (<= protocol maximum-stack-args)
  247. (= protocol two-byte-nargs-protocol))
  248. #f
  249. (if (= protocol big-stack-protocol)
  250. (n-ary-protocol? (cadr p-args))
  251. (error "unknown protocol in n-ary-protocol?" p-args))))))
  252. (define (protocol-nargs p-args)
  253. (let ((protocol (car p-args)))
  254. (cond ((<= protocol maximum-stack-args)
  255. protocol)
  256. ((= protocol two-byte-nargs-protocol)
  257. (cadr p-args))
  258. ((= protocol two-byte-nargs+list-protocol)
  259. (cadr p-args))
  260. ((= protocol args+nargs-protocol)
  261. (cadr p-args))
  262. ((= protocol big-stack-protocol)
  263. (protocol-nargs (cadr p-args)))
  264. ((= protocol ignore-values-protocol)
  265. 0)
  266. ((= protocol call-with-values-protocol)
  267. (error "call-with-values-protocol in protocol-nargs"))
  268. (else
  269. (error "unknown protocol in protocol-nargs" p-args)))))
  270. (define (protocol-cwv-tailcall? p-args)
  271. (let ((protocol (protocol-protocol p-args)))
  272. (if (not (= protocol call-with-values-protocol))
  273. (error "invalid protocol in protocol-cwv-tailcall?" protocol))
  274. (caddr p-args)))
  275. (define (call-with-values-protocol-target p-args)
  276. (let ((protocol (protocol-protocol p-args)))
  277. (if (not (= protocol call-with-values-protocol))
  278. (error "invalid protocol in protocol-cwv-tailcall?" protocol))
  279. (cadr p-args)))
  280. ; Generic opcode argument parser
  281. (define (parse-opcode-args op start-pc code template attribution)
  282. (let ((specs (vector-ref opcode-arg-specs op)))
  283. (let loop ((specs specs) (pc (+ start-pc 1)) (len 0) (args '()))
  284. (if (null? specs)
  285. (cons len args)
  286. (let ((spec (car specs)))
  287. (cond
  288. ((eq? spec 'protocol)
  289. (receive (size p-args)
  290. (parse-protocol code pc attribution)
  291. (loop (cdr specs)
  292. (+ pc size)
  293. (+ len size)
  294. (cons (cons 'protocol p-args) args))))
  295. ((or (eq? spec 'env-data)
  296. (eq? spec 'big-env-data))
  297. (receive (size env-data)
  298. (receive (slot-size fetch)
  299. (if (eq? spec 'env-data)
  300. (values 1 code-vector-ref)
  301. (values 2 get-offset))
  302. (parse-flat-env-args pc code slot-size fetch))
  303. (loop (cdr specs)
  304. (+ pc size)
  305. (+ len size)
  306. (cons (cons 'env-data env-data) args))))
  307. ((= 0 (arg-spec-size spec pc code))
  308. (cons len args))
  309. (else
  310. (let ((arg (parse-opcode-arg specs
  311. pc
  312. start-pc
  313. code
  314. template
  315. attribution)))
  316. (loop (cdr specs)
  317. (+ pc (arg-spec-size spec pc code))
  318. (+ len (arg-spec-size spec pc code))
  319. (cons arg args))))))))))
  320. ; The number of bytes required by an argument.
  321. (define (arg-spec-size spec pc code)
  322. (case spec
  323. ((byte nargs stack-index index literal stob) 1)
  324. ((two-bytes two-byte-nargs two-byte-stack-index two-byte-index offset offset-) 2)
  325. ((env-data) (error "env-data in arg-spec-size"))
  326. ((protocol) (error "protocol in arg-spec-size"))
  327. ((moves-data)
  328. (let ((n-moves (code-vector-ref code pc)))
  329. (+ 1 (* 2 n-moves))))
  330. ((big-moves-data)
  331. (let ((n-moves (code-vector-ref code pc)))
  332. (+ 2 (* 4 n-moves))))
  333. ((cont-data)
  334. (- (get-offset code pc) 1)) ; size includes opcode
  335. (else 0)))
  336. ; Parse the particular type of argument.
  337. (define (parse-opcode-arg specs pc start-pc code template attribution)
  338. (cons
  339. (car specs)
  340. (case (car specs)
  341. ((byte nargs stack-index index)
  342. (code-vector-ref code pc))
  343. ((two-bytes two-byte-nargs two-byte-stack-index two-byte-index)
  344. (get-offset code pc))
  345. ((literal)
  346. (- (code-vector-ref code pc) 128))
  347. ((offset)
  348. (let ((offset (get-offset code pc)))
  349. (if (zero? offset)
  350. #f
  351. (pc->label (+ start-pc offset) attribution))))
  352. ((offset-)
  353. (pc->label (- start-pc (get-offset code pc)) attribution))
  354. ((stob)
  355. (code-vector-ref code pc))
  356. ((cont-data)
  357. (parse-cont-data-args pc code template attribution))
  358. ((moves-data)
  359. (let ((n-moves (code-vector-ref code pc)))
  360. (let loop ((offset (+ pc 1))
  361. (n n-moves))
  362. (if (zero? n)
  363. '()
  364. (cons (cons (code-vector-ref code offset)
  365. (code-vector-ref code (+ offset 1)))
  366. (loop (+ offset 2) (- n 1)))))))
  367. ((big-moves-data)
  368. (let ((n-moves (get-offset code pc)))
  369. (let loop ((offset (+ pc 2))
  370. (n n-moves))
  371. (if (zero? n)
  372. '()
  373. (cons (cons (get-offset code offset)
  374. (get-offset code (+ offset 2)))
  375. (loop (+ offset 4) (- n 1)))))))
  376. (else (error "unknown arg spec: " (car specs))))))
  377. (define-record-type cont-data :cont-data
  378. (make-cont-data length mask-bytes live-offsets template pc gc-mask-size depth)
  379. cont-data?
  380. (length cont-data-length)
  381. (mask-bytes cont-data-mask-bytes)
  382. ;; #f if all are live
  383. (live-offsets cont-data-live-offsets)
  384. (template cont-data-template)
  385. (pc cont-data-pc)
  386. (gc-mask-size cont-data-gc-mask-size)
  387. (depth cont-data-depth))
  388. (define (parse-cont-data-args pc code template attribution)
  389. (let* ((len (get-offset code pc))
  390. (end-pc (- (+ pc len) 1)) ; len includes opcode
  391. (gc-mask-size (code-vector-ref code (- end-pc 3)))
  392. (depth (get-offset code (- end-pc 2)))
  393. (offset (get-offset code (- end-pc 5)))
  394. (template (get-offset code (- end-pc 7)))
  395. (mask-bytes
  396. (let lp ((the-pc (+ pc 2)) (mask-bytes '()))
  397. (if (>= the-pc (+ pc 2 gc-mask-size))
  398. mask-bytes
  399. (lp (+ the-pc 1)
  400. (cons (code-vector-ref code the-pc) mask-bytes)))))
  401. (live-offsets
  402. (and (not (zero? gc-mask-size))
  403. (gc-mask-live-offsets (bytes->bits mask-bytes)))))
  404. (make-cont-data len
  405. mask-bytes
  406. live-offsets
  407. template
  408. (pc->label offset attribution)
  409. gc-mask-size
  410. depth)))
  411. (define (bytes->bits l)
  412. (let loop ((n 0) (l l))
  413. (if (null? l)
  414. n
  415. (loop (+ (arithmetic-shift n 8) (car l))
  416. (cdr l)))))
  417. (define (gc-mask-live-offsets mask)
  418. (let loop ((mask mask) (i 0) (l '()))
  419. (if (zero? mask)
  420. (reverse l)
  421. (loop (arithmetic-shift mask -1) (+ 1 i)
  422. (if (odd? mask)
  423. (cons i l)
  424. l)))))
  425. ;----------------
  426. ; Utilities.
  427. ; TODO: Put the template-related stuff into a separate module?
  428. ; Turn OBJ into a template, if possible.
  429. (define (coerce-to-template obj)
  430. (cond ((template? obj) obj)
  431. ((closure? obj) (closure-template obj))
  432. ((continuation? obj) (continuation-template obj))
  433. (else (error "expected a procedure or continuation" obj))))
  434. (define (template-code-length code)
  435. (if (and (= (enum op protocol)
  436. (code-vector-ref code 0))
  437. (= big-stack-protocol
  438. (code-vector-ref code 1)))
  439. (- (code-vector-length code) 3)
  440. (code-vector-length code)))
  441. ; Fetch the two-byte value at PC in CODE.
  442. (define (get-offset code pc)
  443. (+ (* (code-vector-ref code pc)
  444. byte-limit)
  445. (code-vector-ref code (+ pc 1))))
  446. ; Should this be in its own spot?
  447. (define-syntax receive
  448. (syntax-rules ()
  449. ((receive ?vars ?producer . ?body)
  450. (call-with-values (lambda () ?producer)
  451. (lambda ?vars . ?body)))))