slot-allocation.scm 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014
  1. ;; Continuation-passing style (CPS) intermediate language (IL)
  2. ;; Copyright (C) 2013-2019 Free Software Foundation, Inc.
  3. ;;;; This library is free software; you can redistribute it and/or
  4. ;;;; modify it under the terms of the GNU Lesser General Public
  5. ;;;; License as published by the Free Software Foundation; either
  6. ;;;; version 3 of the License, or (at your option) any later version.
  7. ;;;;
  8. ;;;; This library is distributed in the hope that it will be useful,
  9. ;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. ;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. ;;;; Lesser General Public License for more details.
  12. ;;;;
  13. ;;;; You should have received a copy of the GNU Lesser General Public
  14. ;;;; License along with this library; if not, write to the Free Software
  15. ;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  16. ;;; Commentary:
  17. ;;;
  18. ;;; A module to assign stack slots to variables in a CPS term.
  19. ;;;
  20. ;;; Code:
  21. (define-module (language cps slot-allocation)
  22. #:use-module (ice-9 control)
  23. #:use-module (ice-9 match)
  24. #:use-module (srfi srfi-1)
  25. #:use-module (srfi srfi-9)
  26. #:use-module (srfi srfi-11)
  27. #:use-module (srfi srfi-26)
  28. #:use-module (language cps)
  29. #:use-module (language cps utils)
  30. #:use-module (language cps intmap)
  31. #:use-module (language cps intset)
  32. #:export (allocate-slots
  33. lookup-slot
  34. lookup-maybe-slot
  35. lookup-representation
  36. lookup-nlocals
  37. lookup-call-proc-slot
  38. lookup-parallel-moves
  39. lookup-slot-map))
  40. (define-record-type $allocation
  41. (make-allocation slots representations call-allocs shuffles frame-size)
  42. allocation?
  43. ;; A map of VAR to slot allocation. A slot allocation is an integer,
  44. ;; if the variable has been assigned a slot.
  45. ;;
  46. (slots allocation-slots)
  47. ;; A map of VAR to representation. A representation is 'scm, 'f64,
  48. ;; 'u64, or 's64.
  49. ;;
  50. (representations allocation-representations)
  51. ;; A map of LABEL to /call allocs/, for expressions that continue to
  52. ;; $kreceive continuations: non-tail calls and $prompt terms.
  53. ;;
  54. ;; A call alloc contains two pieces of information: the call's /proc
  55. ;; slot/ and a /dead slot map/. The proc slot indicates the slot of a
  56. ;; procedure in a procedure call, or where the procedure would be in a
  57. ;; multiple-value return.
  58. ;;
  59. ;; The dead slot map indicates, what slots should be ignored by GC
  60. ;; when marking the frame. A dead slot map is a bitfield, as an
  61. ;; integer.
  62. ;;
  63. (call-allocs allocation-call-allocs)
  64. ;; A map of LABEL to /parallel moves/. Parallel moves shuffle locals
  65. ;; into position for a $call, $callk, or $values, or shuffle returned
  66. ;; values back into place in a $kreceive.
  67. ;;
  68. ;; A set of moves is expressed as an ordered list of (SRC . DST)
  69. ;; moves, where SRC and DST are slots. This may involve a temporary
  70. ;; variable.
  71. ;;
  72. (shuffles allocation-shuffles)
  73. ;; The number of local slots needed for this function. Because we can
  74. ;; contify common clause tails, we use one frame size for all clauses
  75. ;; to avoid having to adjust the frame size when continuing to labels
  76. ;; from other clauses.
  77. ;;
  78. (frame-size allocation-frame-size))
  79. (define-record-type $call-alloc
  80. (make-call-alloc proc-slot slot-map)
  81. call-alloc?
  82. (proc-slot call-alloc-proc-slot)
  83. (slot-map call-alloc-slot-map))
  84. (define (lookup-maybe-slot var allocation)
  85. (intmap-ref (allocation-slots allocation) var (lambda (_) #f)))
  86. (define (lookup-slot var allocation)
  87. (intmap-ref (allocation-slots allocation) var))
  88. (define (lookup-representation var allocation)
  89. (intmap-ref (allocation-representations allocation) var))
  90. (define *absent* (list 'absent))
  91. (define (lookup-call-alloc k allocation)
  92. (intmap-ref (allocation-call-allocs allocation) k))
  93. (define (lookup-call-proc-slot k allocation)
  94. (or (call-alloc-proc-slot (lookup-call-alloc k allocation))
  95. (error "Call has no proc slot" k)))
  96. (define (lookup-parallel-moves k allocation)
  97. (intmap-ref (allocation-shuffles allocation) k))
  98. (define (lookup-slot-map k allocation)
  99. (or (call-alloc-slot-map (lookup-call-alloc k allocation))
  100. (error "Call has no slot map" k)))
  101. (define (lookup-nlocals allocation)
  102. (allocation-frame-size allocation))
  103. (define-syntax-rule (persistent-intmap2 exp)
  104. (call-with-values (lambda () exp)
  105. (lambda (a b)
  106. (values (persistent-intmap a) (persistent-intmap b)))))
  107. (define (compute-defs-and-uses cps)
  108. "Return two LABEL->VAR... maps indicating values defined at and used
  109. by a label, respectively."
  110. (define (vars->intset vars)
  111. (fold (lambda (var set) (intset-add set var)) empty-intset vars))
  112. (persistent-intmap2
  113. (intmap-fold
  114. (lambda (label cont defs uses)
  115. (define (get-defs k)
  116. (match (intmap-ref cps k)
  117. (($ $kargs names vars) (vars->intset vars))
  118. (_ empty-intset)))
  119. (define (return d u)
  120. (values (intmap-add! defs label d)
  121. (intmap-add! uses label u)))
  122. (match cont
  123. (($ $kfun src meta self)
  124. (return (if self (intset self) empty-intset) empty-intset))
  125. (($ $kargs _ _ ($ $continue k src exp))
  126. (match exp
  127. ((or ($ $const) ($ $const-fun) ($ $code))
  128. (return (get-defs k) empty-intset))
  129. (($ $call proc args)
  130. (return (get-defs k) (intset-add (vars->intset args) proc)))
  131. (($ $callk _ proc args)
  132. (let ((args (vars->intset args)))
  133. (return (get-defs k) (if proc (intset-add args proc) args))))
  134. (($ $primcall name param args)
  135. (return (get-defs k) (vars->intset args)))
  136. (($ $values args)
  137. (return (get-defs k) (vars->intset args)))))
  138. (($ $kargs _ _ ($ $branch kf kt src op param args))
  139. (return empty-intset (vars->intset args)))
  140. (($ $kargs _ _ ($ $prompt k kh src escape? tag))
  141. (return empty-intset (intset tag)))
  142. (($ $kargs _ _ ($ $throw src op param args))
  143. (return empty-intset (vars->intset args)))
  144. (($ $kclause arity body alt)
  145. (return (get-defs body) empty-intset))
  146. (($ $kreceive arity kargs)
  147. (return (get-defs kargs) empty-intset))
  148. (($ $ktail)
  149. (return empty-intset empty-intset))))
  150. cps
  151. empty-intmap
  152. empty-intmap)))
  153. (define (compute-reverse-control-flow-order preds)
  154. "Return a LABEL->ORDER bijection where ORDER is a contiguous set of
  155. integers starting from 0 and incrementing in sort order. There is a
  156. precondition that labels in PREDS are already renumbered in reverse post
  157. order."
  158. (define (has-back-edge? preds)
  159. (let/ec return
  160. (intmap-fold (lambda (label labels)
  161. (intset-fold (lambda (pred)
  162. (if (<= label pred)
  163. (return #t)
  164. (values)))
  165. labels)
  166. (values))
  167. preds)
  168. #f))
  169. (if (has-back-edge? preds)
  170. ;; This is more involved than forward control flow because not all
  171. ;; live labels are reachable from the tail.
  172. (persistent-intmap
  173. (fold2 (lambda (component order n)
  174. (intset-fold (lambda (label order n)
  175. (values (intmap-add! order label n)
  176. (1+ n)))
  177. component order n))
  178. (reverse (compute-sorted-strongly-connected-components preds))
  179. empty-intmap 0))
  180. ;; Just reverse forward control flow.
  181. (let ((max (intmap-prev preds)))
  182. (intmap-map (lambda (label labels) (- max label)) preds))))
  183. (define* (add-prompt-control-flow-edges conts succs #:key complete?)
  184. "For all prompts in DFG in the range [MIN-LABEL, MIN-LABEL +
  185. LABEL-COUNT), invoke F with arguments PROMPT, HANDLER, and BODY for each
  186. body continuation in the prompt."
  187. (define (intset-filter pred set)
  188. (intset-fold (lambda (i set)
  189. (if (pred i) set (intset-remove set i)))
  190. set
  191. set))
  192. (define (intset-any pred set)
  193. (intset-fold (lambda (i res)
  194. (if (or res (pred i)) #t res))
  195. set
  196. #f))
  197. (define (compute-prompt-body label)
  198. (persistent-intset
  199. (let visit-cont ((label label) (level 1) (labels empty-intset))
  200. (cond
  201. ((zero? level) labels)
  202. ((intset-ref labels label) labels)
  203. (else
  204. (let ((labels (intset-add! labels label)))
  205. (match (intmap-ref conts label)
  206. (($ $kreceive arity k) (visit-cont k level labels))
  207. (($ $kargs names syms ($ $continue k src ($ $primcall 'wind)))
  208. (visit-cont k (1+ level) labels))
  209. (($ $kargs names syms ($ $continue k src ($ $primcall 'unwind)))
  210. (visit-cont k (1- level) labels))
  211. (($ $kargs names syms ($ $continue k src exp))
  212. (visit-cont k level labels))
  213. (($ $kargs names syms ($ $branch kf kt))
  214. (visit-cont kf level (visit-cont kt level labels)))
  215. (($ $kargs names syms ($ $prompt k kh src escape? tag))
  216. (visit-cont kh level (visit-cont k (1+ level) labels)))
  217. (($ $kargs names syms ($ $throw)) labels))))))))
  218. (define (visit-prompt label handler succs)
  219. (let ((body (compute-prompt-body label)))
  220. (define (out-or-back-edge? label)
  221. ;; Most uses of visit-prompt-control-flow don't need every body
  222. ;; continuation, and would be happy getting called only for
  223. ;; continuations that postdominate the rest of the body. Unless
  224. ;; you pass #:complete? #t, we only invoke F on continuations
  225. ;; that can leave the body, or on back-edges in loops.
  226. (not (intset-any (lambda (succ)
  227. (and (intset-ref body succ) (< label succ)))
  228. (intmap-ref succs label))))
  229. (intset-fold (lambda (pred succs)
  230. (intmap-replace succs pred handler intset-add))
  231. (if complete? body (intset-filter out-or-back-edge? body))
  232. succs)))
  233. (intmap-fold
  234. (lambda (label cont succs)
  235. (match cont
  236. (($ $kargs _ _ ($ $prompt k kh))
  237. (visit-prompt k kh succs))
  238. (_ succs)))
  239. conts
  240. succs))
  241. (define (rename-keys map old->new)
  242. (persistent-intmap
  243. (intmap-fold (lambda (k v out)
  244. (intmap-add! out (intmap-ref old->new k) v))
  245. map
  246. empty-intmap)))
  247. (define (rename-intset set old->new)
  248. (intset-fold (lambda (old set) (intset-add set (intmap-ref old->new old)))
  249. set empty-intset))
  250. (define (rename-graph graph old->new)
  251. (persistent-intmap
  252. (intmap-fold (lambda (pred succs out)
  253. (intmap-add! out
  254. (intmap-ref old->new pred)
  255. (rename-intset succs old->new)))
  256. graph
  257. empty-intmap)))
  258. (define (compute-live-variables cps defs uses)
  259. "Compute and return two values mapping LABEL->VAR..., where VAR... are
  260. the definitions that are live before and after LABEL, as intsets."
  261. (let* ((succs (add-prompt-control-flow-edges cps (compute-successors cps)))
  262. (preds (invert-graph succs))
  263. (old->new (compute-reverse-control-flow-order preds))
  264. (init (persistent-intmap (intmap-fold
  265. (lambda (old new init)
  266. (intmap-add! init new empty-intset))
  267. old->new empty-intmap))))
  268. (call-with-values
  269. (lambda ()
  270. (solve-flow-equations (rename-graph preds old->new)
  271. init init
  272. (rename-keys defs old->new)
  273. (rename-keys uses old->new)
  274. intset-subtract intset-union intset-union))
  275. (lambda (in out)
  276. ;; As a reverse control-flow problem, the values flowing into a
  277. ;; node are actually the live values after the node executes.
  278. ;; Funny, innit? So we return them in the reverse order.
  279. (let ((new->old (invert-bijection old->new)))
  280. (values (rename-keys out new->old)
  281. (rename-keys in new->old)))))))
  282. (define (compute-needs-slot cps defs uses)
  283. (define (get-defs k) (intmap-ref defs k))
  284. (define (get-uses label) (intmap-ref uses label))
  285. (intmap-fold
  286. (lambda (label cont needs-slot)
  287. (intset-union
  288. needs-slot
  289. (match cont
  290. (($ $kargs)
  291. (intset-union (get-defs label) (get-uses label)))
  292. (($ $kreceive arity k)
  293. ;; Only allocate results of function calls to slots if they are
  294. ;; used.
  295. empty-intset)
  296. (($ $kclause arity body alternate)
  297. (get-defs label))
  298. (($ $kfun src meta self)
  299. (if self (intset self) empty-intset))
  300. (($ $ktail)
  301. empty-intset))))
  302. cps
  303. empty-intset))
  304. (define (compute-lazy-vars cps live-in live-out defs needs-slot)
  305. "Compute and return a set of vars whose allocation can be delayed
  306. until their use is seen. These are \"lazy\" vars. A var is lazy if its
  307. uses are calls, it is always dead after the calls, and if the uses flow
  308. to the definition. A flow continues across a node iff the node kills no
  309. values that need slots, and defines only lazy vars. Calls also kill
  310. flows; there's no sense in trying to juggle a pending frame while there
  311. is an active call."
  312. (define (list->intset list)
  313. (persistent-intset
  314. (fold (lambda (i set) (intset-add! set i)) empty-intset list)))
  315. (let* ((succs (compute-successors cps))
  316. (gens (intmap-map
  317. (lambda (label cont)
  318. (match cont
  319. (($ $kargs _ _ ($ $continue _ _ ($ $call proc args)))
  320. (intset-subtract (intset-add (list->intset args) proc)
  321. (intmap-ref live-out label)))
  322. (($ $kargs _ _ ($ $continue _ _ ($ $callk _ proc args)))
  323. (let ((args (list->intset args)))
  324. (intset-subtract (if proc (intset-add args proc) args)
  325. (intmap-ref live-out label))))
  326. (($ $kargs _ _ ($ $continue k _($ $values args)))
  327. (match (intmap-ref cps k)
  328. (($ $ktail) (list->intset args))
  329. (_ #f)))
  330. (_ #f)))
  331. cps))
  332. (kills (intmap-map
  333. (lambda (label in)
  334. (let* ((out (intmap-ref live-out label))
  335. (killed (intset-subtract in out))
  336. (killed-slots (intset-intersect killed needs-slot)))
  337. (and (eq? killed-slots empty-intset)
  338. ;; Kill output variables that need slots.
  339. (intset-intersect (intmap-ref defs label)
  340. needs-slot))))
  341. live-in))
  342. (preds (invert-graph succs))
  343. (old->new (compute-reverse-control-flow-order preds)))
  344. (define (subtract lazy kill)
  345. (cond
  346. ((eq? lazy empty-intset)
  347. lazy)
  348. ((not kill)
  349. empty-intset)
  350. ((and lazy (eq? empty-intset (intset-subtract kill lazy)))
  351. (intset-subtract lazy kill))
  352. (else
  353. empty-intset)))
  354. (define (add live gen) (or gen live))
  355. (define (meet in out)
  356. ;; Initial in is #f.
  357. (if in (intset-intersect in out) out))
  358. (call-with-values
  359. (lambda ()
  360. (let ((succs (rename-graph preds old->new))
  361. (init (persistent-intmap
  362. (intmap-fold
  363. (lambda (old new in)
  364. (intmap-add! in new #f))
  365. old->new empty-intmap)))
  366. (kills (rename-keys kills old->new))
  367. (gens (rename-keys gens old->new)))
  368. (solve-flow-equations succs init init kills gens
  369. subtract add meet)))
  370. (lambda (in out)
  371. ;; A variable is lazy if its uses reach its definition.
  372. (intmap-fold (lambda (label out lazy)
  373. (match (intmap-ref cps label)
  374. (($ $kargs names vars)
  375. (let ((defs (list->intset vars)))
  376. (intset-union lazy (intset-intersect out defs))))
  377. (_ lazy)))
  378. (rename-keys out (invert-bijection old->new))
  379. empty-intset)))))
  380. (define (find-first-zero n)
  381. ;; Naive implementation.
  382. (let lp ((slot 0))
  383. (if (logbit? slot n)
  384. (lp (1+ slot))
  385. slot)))
  386. (define (find-first-trailing-zero n)
  387. (let lp ((slot (let lp ((count 2))
  388. (if (< n (ash 1 (1- count)))
  389. count
  390. ;; Grow upper bound slower than factor 2 to avoid
  391. ;; needless bignum allocation on 32-bit systems
  392. ;; when there are more than 16 locals.
  393. (lp (+ count (ash count -1)))))))
  394. (if (or (zero? slot) (logbit? (1- slot) n))
  395. slot
  396. (lp (1- slot)))))
  397. (define (integers from count)
  398. (if (zero? count)
  399. '()
  400. (cons from (integers (1+ from) (1- count)))))
  401. (define (solve-parallel-move src dst tmp)
  402. "Solve the parallel move problem between src and dst slot lists, which
  403. are comparable with eqv?. A tmp slot may be used."
  404. ;; This algorithm is taken from: "Tilting at windmills with Coq:
  405. ;; formal verification of a compilation algorithm for parallel moves"
  406. ;; by Laurence Rideau, Bernard Paul Serpette, and Xavier Leroy
  407. ;; <http://gallium.inria.fr/~xleroy/publi/parallel-move.pdf>
  408. (define (split-move moves reg)
  409. (let loop ((revhead '()) (tail moves))
  410. (match tail
  411. (((and s+d (s . d)) . rest)
  412. (if (eqv? s reg)
  413. (cons d (append-reverse revhead rest))
  414. (loop (cons s+d revhead) rest)))
  415. (_ #f))))
  416. (define (replace-last-source reg moves)
  417. (match moves
  418. ((moves ... (s . d))
  419. (append moves (list (cons reg d))))))
  420. (let loop ((to-move (map cons src dst))
  421. (being-moved '())
  422. (moved '())
  423. (last-source #f))
  424. ;; 'last-source' should always be equivalent to:
  425. ;; (and (pair? being-moved) (car (last being-moved)))
  426. (match being-moved
  427. (() (match to-move
  428. (() (reverse moved))
  429. (((and s+d (s . d)) . t1)
  430. (if (or (eqv? s d) ; idempotent
  431. (not s)) ; src is a constant and can be loaded directly
  432. (loop t1 '() moved #f)
  433. (loop t1 (list s+d) moved s)))))
  434. (((and s+d (s . d)) . b)
  435. (match (split-move to-move d)
  436. ((r . t1) (loop t1 (acons d r being-moved) moved last-source))
  437. (#f (match b
  438. (() (loop to-move '() (cons s+d moved) #f))
  439. (_ (if (eqv? d last-source)
  440. (loop to-move
  441. (replace-last-source tmp b)
  442. (cons s+d (acons d tmp moved))
  443. tmp)
  444. (loop to-move b (cons s+d moved) last-source))))))))))
  445. (define (compute-shuffles cps slots call-allocs live-in)
  446. (define (add-live-slot slot live-slots)
  447. (logior live-slots (ash 1 slot)))
  448. (define (get-cont label)
  449. (intmap-ref cps label))
  450. (define (get-slot var)
  451. (intmap-ref slots var (lambda (_) #f)))
  452. (define (get-slots vars)
  453. (let lp ((vars vars))
  454. (match vars
  455. ((var . vars) (cons (get-slot var) (lp vars)))
  456. (_ '()))))
  457. (define (get-proc-slot label)
  458. (call-alloc-proc-slot (intmap-ref call-allocs label)))
  459. (define (compute-live-slots label)
  460. (intset-fold (lambda (var live)
  461. (match (get-slot var)
  462. (#f live)
  463. (slot (add-live-slot slot live))))
  464. (intmap-ref live-in label)
  465. 0))
  466. ;; Although some parallel moves may proceed without a temporary slot,
  467. ;; in general one is needed. That temporary slot must not be part of
  468. ;; the source or destination sets, and that slot should not correspond
  469. ;; to a live variable. Usually the source and destination sets are a
  470. ;; subset of the union of the live sets before and after the move.
  471. ;; However for stack slots that don't have names -- those slots that
  472. ;; correspond to function arguments or to function return values -- it
  473. ;; could be that they are out of the computed live set. In that case
  474. ;; they need to be adjoined to the live set, used when choosing a
  475. ;; temporary slot.
  476. (define (compute-tmp-slot live stack-slots)
  477. (find-first-zero (fold add-live-slot live stack-slots)))
  478. (define (parallel-move src-slots dst-slots tmp-slot)
  479. (solve-parallel-move src-slots dst-slots tmp-slot))
  480. (define (compute-receive-shuffles label proc-slot)
  481. (match (get-cont label)
  482. (($ $kreceive arity kargs)
  483. (let* ((results (match (get-cont kargs)
  484. (($ $kargs names vars) vars)))
  485. (value-slots (integers proc-slot (length results)))
  486. (result-slots (get-slots results))
  487. ;; Filter out unused results.
  488. (value-slots (filter-map (lambda (val result) (and result val))
  489. value-slots result-slots))
  490. (result-slots (filter (lambda (x) x) result-slots))
  491. (live (compute-live-slots kargs)))
  492. (parallel-move value-slots
  493. result-slots
  494. (compute-tmp-slot live value-slots))))))
  495. (define (add-call-shuffles label k args shuffles)
  496. (match (get-cont k)
  497. (($ $ktail)
  498. (let* ((live (compute-live-slots label))
  499. (tail-slots (integers 0 (length args)))
  500. (moves (parallel-move (get-slots args)
  501. tail-slots
  502. (compute-tmp-slot live tail-slots))))
  503. (intmap-add! shuffles label moves)))
  504. (($ $kreceive)
  505. (let* ((live (compute-live-slots label))
  506. (proc-slot (get-proc-slot label))
  507. (call-slots (integers proc-slot (length args)))
  508. (arg-moves (parallel-move (get-slots args)
  509. call-slots
  510. (compute-tmp-slot live call-slots))))
  511. (intmap-add! (intmap-add! shuffles label arg-moves)
  512. k (compute-receive-shuffles k proc-slot))))))
  513. (define (add-values-shuffles label k args shuffles)
  514. (match (get-cont k)
  515. (($ $ktail)
  516. (let* ((live (compute-live-slots label))
  517. (src-slots (get-slots args))
  518. (dst-slots (integers 0 (length args)))
  519. (moves (parallel-move src-slots dst-slots
  520. (compute-tmp-slot live dst-slots))))
  521. (intmap-add! shuffles label moves)))
  522. (($ $kargs _ dst-vars)
  523. (let* ((live (logior (compute-live-slots label)
  524. (compute-live-slots k)))
  525. (src-slots (get-slots args))
  526. (dst-slots (get-slots dst-vars))
  527. (moves (parallel-move src-slots dst-slots
  528. (compute-tmp-slot live '()))))
  529. (intmap-add! shuffles label moves)))))
  530. (define (add-prompt-shuffles label k handler shuffles)
  531. (intmap-add! shuffles handler
  532. (compute-receive-shuffles handler (get-proc-slot label))))
  533. (define (compute-shuffles label cont shuffles)
  534. (match cont
  535. (($ $kargs names vars ($ $continue k src exp))
  536. (match exp
  537. (($ $call proc args)
  538. (add-call-shuffles label k (cons proc args) shuffles))
  539. (($ $callk _ proc args)
  540. (add-call-shuffles label k (if proc (cons proc args) args) shuffles))
  541. (($ $values args)
  542. (add-values-shuffles label k args shuffles))
  543. (_ shuffles)))
  544. (($ $kargs names vars ($ $prompt k kh src escape? tag))
  545. (add-prompt-shuffles label k kh shuffles))
  546. (_ shuffles)))
  547. (persistent-intmap
  548. (intmap-fold compute-shuffles cps empty-intmap)))
  549. (define (compute-frame-size cps slots call-allocs shuffles)
  550. ;; Minimum frame has one slot: the closure.
  551. (define minimum-frame-size 1)
  552. (define (get-shuffles label)
  553. (intmap-ref shuffles label))
  554. (define (get-proc-slot label)
  555. (match (intmap-ref call-allocs label (lambda (_) #f))
  556. (#f 0) ;; Tail call.
  557. (($ $call-alloc proc-slot) proc-slot)))
  558. (define (max-size var size)
  559. (match (intmap-ref slots var (lambda (_) #f))
  560. (#f size)
  561. (slot (max size (1+ slot)))))
  562. (define (max-size* vars size)
  563. (fold max-size size vars))
  564. (define (shuffle-size moves size)
  565. (match moves
  566. (() size)
  567. (((src . dst) . moves)
  568. (shuffle-size moves (max size (1+ src) (1+ dst))))))
  569. (define (call-size label nargs size)
  570. (shuffle-size (get-shuffles label)
  571. (max (+ (get-proc-slot label) nargs) size)))
  572. (define (measure-cont label cont size)
  573. (match cont
  574. (($ $kargs names vars term)
  575. (let ((size (max-size* vars size)))
  576. (match term
  577. (($ $continue _ _ ($ $call proc args))
  578. (call-size label (1+ (length args)) size))
  579. (($ $continue _ _ ($ $callk _ proc args))
  580. (let ((nclosure (if proc 1 0)))
  581. (call-size label (+ nclosure (length args)) size)))
  582. (($ $continue _ _ ($ $values args))
  583. (shuffle-size (get-shuffles label) size))
  584. (_ size))))
  585. (($ $kreceive)
  586. (shuffle-size (get-shuffles label) size))
  587. (_ size)))
  588. (intmap-fold measure-cont cps minimum-frame-size))
  589. (define (allocate-args cps)
  590. (match (intmap-ref cps (intmap-next cps))
  591. (($ $kfun _ _ has-self?)
  592. (intmap-fold (lambda (label cont slots)
  593. (match cont
  594. (($ $kfun src meta self)
  595. (if has-self?
  596. (intmap-add! slots self 0)
  597. slots))
  598. (($ $kclause arity body alt)
  599. (match (intmap-ref cps body)
  600. (($ $kargs names vars)
  601. (let lp ((vars vars) (slots slots)
  602. (n (if has-self? 1 0)))
  603. (match vars
  604. (() slots)
  605. ((var . vars)
  606. (lp vars
  607. (intmap-add! slots var n)
  608. (1+ n))))))))
  609. (_ slots)))
  610. cps empty-intmap))))
  611. (define-inlinable (add-live-slot slot live-slots)
  612. (logior live-slots (ash 1 slot)))
  613. (define-inlinable (kill-dead-slot slot live-slots)
  614. (logand live-slots (lognot (ash 1 slot))))
  615. (define-inlinable (compute-slot live-slots hint)
  616. (if (and hint (not (logbit? hint live-slots)))
  617. hint
  618. (find-first-zero live-slots)))
  619. (define (allocate-lazy-vars cps slots call-allocs live-in lazy)
  620. (define (compute-live-slots slots label)
  621. (intset-fold (lambda (var live)
  622. (match (intmap-ref slots var (lambda (_) #f))
  623. (#f live)
  624. (slot (add-live-slot slot live))))
  625. (intmap-ref live-in label)
  626. 0))
  627. (define (allocate var hint slots live)
  628. (match (and hint (intmap-ref slots var (lambda (_) #f)))
  629. (#f (if (intset-ref lazy var)
  630. (let ((slot (compute-slot live hint)))
  631. (values (intmap-add! slots var slot)
  632. (add-live-slot slot live)))
  633. (values slots live)))
  634. (slot (values slots (add-live-slot slot live)))))
  635. (define (allocate* vars hints slots live)
  636. (match (vector vars hints)
  637. (#(() ()) slots)
  638. (#((var . vars) (hint . hints))
  639. (let-values (((slots live) (allocate var hint slots live)))
  640. (allocate* vars hints slots live)))))
  641. (define (get-proc-slot label)
  642. (match (intmap-ref call-allocs label (lambda (_) #f))
  643. (#f 0)
  644. (call (call-alloc-proc-slot call))))
  645. (define (allocate-call label args slots)
  646. (allocate* args (integers (get-proc-slot label) (length args))
  647. slots (compute-live-slots slots label)))
  648. (define (allocate-values label k args slots)
  649. (match (intmap-ref cps k)
  650. (($ $ktail)
  651. (allocate* args (integers 0 (length args))
  652. slots (compute-live-slots slots label)))
  653. (($ $kargs names vars)
  654. (allocate* args
  655. (map (cut intmap-ref slots <> (lambda (_) #f)) vars)
  656. slots (compute-live-slots slots label)))))
  657. (define (allocate-lazy label cont slots)
  658. (match cont
  659. (($ $kargs names vars ($ $continue k src exp))
  660. (match exp
  661. (($ $call proc args)
  662. (allocate-call label (cons proc args) slots))
  663. (($ $callk _ proc args)
  664. (allocate-call label (if proc (cons proc args) args) slots))
  665. (($ $values args)
  666. (allocate-values label k args slots))
  667. (_ slots)))
  668. (_
  669. slots)))
  670. ;; Sweep right to left to visit uses before definitions.
  671. (persistent-intmap
  672. (intmap-fold-right allocate-lazy cps slots)))
  673. (define (compute-var-representations cps)
  674. (define (get-defs k)
  675. (match (intmap-ref cps k)
  676. (($ $kargs names vars) vars)
  677. (_ '())))
  678. (intmap-fold
  679. (lambda (label cont representations)
  680. (match cont
  681. (($ $kargs _ _ ($ $continue k _ exp))
  682. (match (get-defs k)
  683. (() representations)
  684. ((var)
  685. (match exp
  686. (($ $values (arg))
  687. (intmap-add representations var
  688. (intmap-ref representations arg)))
  689. (($ $primcall (or 'scm->f64 'load-f64 's64->f64
  690. 'f32-ref 'f64-ref
  691. 'fadd 'fsub 'fmul 'fdiv 'fsqrt 'fabs
  692. 'ffloor 'fceiling
  693. 'fsin 'fcos 'ftan 'fasin 'facos 'fatan 'fatan2))
  694. (intmap-add representations var 'f64))
  695. (($ $primcall (or 'scm->u64 'scm->u64/truncate 'load-u64
  696. 's64->u64
  697. 'assume-u64
  698. 'uadd 'usub 'umul
  699. 'ulogand 'ulogior 'ulogxor 'ulogsub 'ursh 'ulsh
  700. 'uadd/immediate 'usub/immediate 'umul/immediate
  701. 'ursh/immediate 'ulsh/immediate
  702. 'u8-ref 'u16-ref 'u32-ref 'u64-ref
  703. 'word-ref 'word-ref/immediate
  704. 'untag-char))
  705. (intmap-add representations var 'u64))
  706. (($ $primcall (or 'untag-fixnum
  707. 'assume-s64
  708. 'scm->s64 'load-s64 'u64->s64
  709. 'srsh 'srsh/immediate
  710. 's8-ref 's16-ref 's32-ref 's64-ref))
  711. (intmap-add representations var 's64))
  712. (($ $primcall (or 'pointer-ref/immediate
  713. 'tail-pointer-ref/immediate))
  714. (intmap-add representations var 'ptr))
  715. (($ $code)
  716. (intmap-add representations var 'u64))
  717. (_
  718. (intmap-add representations var 'scm))))
  719. (vars
  720. (match exp
  721. (($ $values args)
  722. (fold (lambda (arg var representations)
  723. (intmap-add representations var
  724. (intmap-ref representations arg)))
  725. representations args vars))))))
  726. (($ $kargs _ _ (or ($ $branch) ($ $prompt) ($ $throw)))
  727. representations)
  728. (($ $kfun src meta self)
  729. (if self
  730. (intmap-add representations self 'scm)
  731. representations))
  732. (($ $kclause arity body alt)
  733. (fold1 (lambda (var representations)
  734. (intmap-add representations var 'scm))
  735. (get-defs body) representations))
  736. (($ $kreceive arity kargs)
  737. (fold1 (lambda (var representations)
  738. (intmap-add representations var 'scm))
  739. (get-defs kargs) representations))
  740. (($ $ktail) representations)))
  741. cps
  742. empty-intmap))
  743. (define* (allocate-slots cps #:key (precolor-calls? #t))
  744. (let*-values (((defs uses) (compute-defs-and-uses cps))
  745. ((representations) (compute-var-representations cps))
  746. ((live-in live-out) (compute-live-variables cps defs uses))
  747. ((needs-slot) (compute-needs-slot cps defs uses))
  748. ((lazy) (if precolor-calls?
  749. (compute-lazy-vars cps live-in live-out defs
  750. needs-slot)
  751. empty-intset)))
  752. (define frame-size 3)
  753. (define (empty-live-slots)
  754. #b0)
  755. (define (compute-call-proc-slot live-slots)
  756. (+ frame-size (find-first-trailing-zero live-slots)))
  757. (define (compute-prompt-handler-proc-slot live-slots)
  758. (find-first-trailing-zero live-slots))
  759. (define (get-cont label)
  760. (intmap-ref cps label))
  761. (define (get-slot slots var)
  762. (intmap-ref slots var (lambda (_) #f)))
  763. (define (get-slots slots vars)
  764. (let lp ((vars vars))
  765. (match vars
  766. ((var . vars) (cons (get-slot slots var) (lp vars)))
  767. (_ '()))))
  768. (define (compute-live-slots* slots label live-vars)
  769. (intset-fold (lambda (var live)
  770. (match (get-slot slots var)
  771. (#f live)
  772. (slot (add-live-slot slot live))))
  773. (intmap-ref live-vars label)
  774. 0))
  775. (define (compute-live-in-slots slots label)
  776. (compute-live-slots* slots label live-in))
  777. (define (compute-live-out-slots slots label)
  778. (compute-live-slots* slots label live-out))
  779. (define slot-desc-dead 0)
  780. (define slot-desc-live-raw 1)
  781. (define slot-desc-live-scm 2)
  782. (define slot-desc-unused 3)
  783. (define (compute-slot-map slots live-vars nslots)
  784. (intset-fold
  785. (lambda (var slot-map)
  786. (match (get-slot slots var)
  787. (#f slot-map)
  788. (slot
  789. (let ((desc (match (intmap-ref representations var)
  790. ((or 'u64 'f64 's64 'ptr) slot-desc-live-raw)
  791. ('scm slot-desc-live-scm))))
  792. (logior slot-map (ash desc (* 2 slot)))))))
  793. live-vars 0))
  794. (define (allocate var hint slots live)
  795. (cond
  796. ((not (intset-ref needs-slot var))
  797. (values slots live))
  798. ((get-slot slots var)
  799. => (lambda (slot)
  800. (values slots (add-live-slot slot live))))
  801. ((and (not hint) (intset-ref lazy var))
  802. (values slots live))
  803. (else
  804. (let ((slot (compute-slot live hint)))
  805. (values (intmap-add! slots var slot)
  806. (add-live-slot slot live))))))
  807. (define (allocate* vars hints slots live)
  808. (match (vector vars hints)
  809. (#(() ()) (values slots live))
  810. (#((var . vars) (hint . hints))
  811. (call-with-values (lambda () (allocate var hint slots live))
  812. (lambda (slots live)
  813. (allocate* vars hints slots live))))))
  814. (define (allocate-defs label vars slots)
  815. (let ((live (compute-live-in-slots slots label))
  816. (live-vars (intmap-ref live-in label)))
  817. (let lp ((vars vars) (slots slots) (live live))
  818. (match vars
  819. (() (values slots live))
  820. ((var . vars)
  821. (call-with-values (lambda () (allocate var #f slots live))
  822. (lambda (slots live)
  823. (lp vars slots
  824. (let ((slot (get-slot slots var)))
  825. (if (and slot (not (intset-ref live-vars var)))
  826. (kill-dead-slot slot live)
  827. live))))))))))
  828. ;; PRE-LIVE are the live slots coming into the term. POST-LIVE
  829. ;; is the subset of PRE-LIVE that is still live after the term
  830. ;; uses its inputs.
  831. (define (allocate-call label k args slots call-allocs pre-live)
  832. (match (get-cont k)
  833. (($ $ktail)
  834. (let ((tail-slots (integers 0 (length args))))
  835. (values (allocate* args tail-slots slots pre-live)
  836. call-allocs)))
  837. (($ $kreceive arity kargs)
  838. (let*-values
  839. (((post-live) (compute-live-out-slots slots label))
  840. ((proc-slot) (compute-call-proc-slot post-live))
  841. ((call-slots) (integers proc-slot (length args)))
  842. ((slots pre-live) (allocate* args call-slots slots pre-live))
  843. ;; Allow the first result to be hinted by its use, but
  844. ;; hint the remaining results to stay in place. This
  845. ;; strikes a balance between avoiding shuffling,
  846. ;; especially for unused extra values, and avoiding frame
  847. ;; size growth due to sparse locals.
  848. ((slots result-live)
  849. (match (get-cont kargs)
  850. (($ $kargs () ())
  851. (values slots post-live))
  852. (($ $kargs (_ . _) (_ . results))
  853. (let ((result-slots (integers (+ proc-slot 1)
  854. (length results))))
  855. (allocate* results result-slots slots post-live)))))
  856. ((slot-map) (compute-slot-map slots (intmap-ref live-out label)
  857. (- proc-slot frame-size)))
  858. ((call) (make-call-alloc proc-slot slot-map)))
  859. (values slots
  860. (intmap-add! call-allocs label call))))))
  861. (define (allocate-values label k args slots call-allocs)
  862. (match (get-cont k)
  863. (($ $ktail)
  864. (values slots call-allocs))
  865. (($ $kargs (_) (dst))
  866. ;; When there is only one value in play, we allow the dst to be
  867. ;; hinted (see compute-lazy-vars). If the src doesn't have a
  868. ;; slot, then the actual slot for the dst would end up being
  869. ;; decided by the call that args it. Because we don't know the
  870. ;; slot, we can't really compute the parallel moves in that
  871. ;; case, so just bail and rely on the bytecode emitter to
  872. ;; handle the one-value case specially.
  873. (match args
  874. ((src)
  875. (let ((post-live (compute-live-out-slots slots label)))
  876. (values (allocate dst (get-slot slots src) slots post-live)
  877. call-allocs)))))
  878. (($ $kargs _ dst-vars)
  879. (let ((src-slots (get-slots slots args))
  880. (post-live (compute-live-out-slots slots label)))
  881. (values (allocate* dst-vars src-slots slots post-live)
  882. call-allocs)))))
  883. (define (allocate-prompt label k handler slots call-allocs)
  884. (match (get-cont handler)
  885. (($ $kreceive arity kargs)
  886. (let*-values
  887. (((handler-live) (compute-live-in-slots slots handler))
  888. ((proc-slot) (compute-prompt-handler-proc-slot handler-live))
  889. ((slot-map) (compute-slot-map slots (intmap-ref live-in handler)
  890. (- proc-slot frame-size)))
  891. ((result-vars) (match (get-cont kargs)
  892. (($ $kargs names vars) vars)))
  893. ((value-slots) (integers proc-slot (length result-vars)))
  894. ((slots result-live) (allocate* result-vars value-slots
  895. slots handler-live)))
  896. (values slots
  897. (intmap-add! call-allocs label
  898. (make-call-alloc proc-slot slot-map)))))))
  899. (define (allocate-cont label cont slots call-allocs)
  900. (match cont
  901. (($ $kargs names vars term)
  902. (let-values (((slots live) (allocate-defs label vars slots)))
  903. (match term
  904. (($ $continue k src ($ $call proc args))
  905. (allocate-call label k (cons proc args) slots call-allocs live))
  906. (($ $continue k src ($ $callk _ proc args))
  907. (allocate-call label k (if proc (cons proc args) args)
  908. slots call-allocs live))
  909. (($ $continue k src ($ $values args))
  910. (allocate-values label k args slots call-allocs))
  911. (($ $prompt k kh src escape? tag)
  912. (allocate-prompt label k kh slots call-allocs))
  913. (_
  914. (values slots call-allocs)))))
  915. (_
  916. (values slots call-allocs))))
  917. (call-with-values (lambda ()
  918. (let ((slots (allocate-args cps)))
  919. (intmap-fold allocate-cont cps slots empty-intmap)))
  920. (lambda (slots calls)
  921. (let* ((slots (allocate-lazy-vars cps slots calls live-in lazy))
  922. (shuffles (compute-shuffles cps slots calls live-in))
  923. (frame-size (compute-frame-size cps slots calls shuffles)))
  924. (make-allocation slots representations calls shuffles frame-size))))))