lower-stringrefs.scm 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846
  1. ;;; Pass to lower stringref to JS string builtins
  2. ;;; Copyright (C) 2023 Igalia, S.L.
  3. ;;;
  4. ;;; Licensed under the Apache License, Version 2.0 (the "License");
  5. ;;; you may not use this file except in compliance with the License.
  6. ;;; You may obtain a copy of the License at
  7. ;;;
  8. ;;; http://www.apache.org/licenses/LICENSE-2.0
  9. ;;;
  10. ;;; Unless required by applicable law or agreed to in writing, software
  11. ;;; distributed under the License is distributed on an "AS IS" BASIS,
  12. ;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. ;;; See the License for the specific language governing permissions and
  14. ;;; limitations under the License.
  15. ;;; Commentary:
  16. ;;;
  17. ;;; The hoot compiler and run-time use stringref to represent strings,
  18. ;;; albeit wrapped in a struct so they can have a hash code. Stringref
  19. ;;; (https://github.com/WebAssembly/stringref/blob/main/proposals/stringref/Overview.md)
  20. ;;; is an experimental extension to WebAssembly which exposes "host
  21. ;;; strings": strings whose representation is opaque and managed by the
  22. ;;; host. On a web browser, these will likely be WTF-16 strings; on
  23. ;;; anything else, it's probably WTF-8, though for the Hoot VM used in
  24. ;;; development, we use Guile strings there.
  25. ;;;
  26. ;;; Stringref does not yet have consensus among the WebAssembly
  27. ;;; standardization group. Chrome has an experimental
  28. ;;; --experimental-wasm-stringref flag that users have to enable
  29. ;;; explicitly, and which is also on for a limited time and set of web
  30. ;;; sites for an "origin trial". But, it's not shipping by default
  31. ;;; anywhere, so we need a mitigation.
  32. ;;;
  33. ;;; A competitor to stringref is JavaScript String Builtins
  34. ;;; (https://github.com/WebAssembly/js-string-builtins/blob/main/proposals/js-string-builtins/Overview.md).
  35. ;;; This proposal instead ships a new set of functions for JavaScript
  36. ;;; hosts, off of a new WebAssembly.String object:
  37. ;;;
  38. ;;; - WebAssembly.String.fromWtf16Array(array, start, end) -> str
  39. ;;; - WebAssembly.String.toWtf16Array(str, array, start) -> length
  40. ;;; - WebAssembly.String.fromWtf8Array(array, start, end) -> str
  41. ;;; - WebAssembly.String.fromCharCode(u16) -> str
  42. ;;; - WebAssembly.String.fromCodePoint(u32) -> str
  43. ;;; - WebAssembly.String.codePointAt(idx) -> i32
  44. ;;; - WebAssembly.String.charCodeAt(idx) -> i32
  45. ;;; - WebAssembly.String.length(str) -> i32
  46. ;;; - WebAssembly.String.concat(a, b) -> str
  47. ;;; - WebAssembly.String.substring(str, start, end) -> str
  48. ;;; - WebAssembly.String.equals(a, b) -> i32
  49. ;;; - WebAssembly.String.compare(a, b) -> i32
  50. ;;;
  51. ;;; The intent is that if a WebAssembly module imports one of these
  52. ;;; functions, it can recognize it specially, as is done for
  53. ;;; e.g. Math.cos, and compile it appropriately. Currently we would use
  54. ;;; externref to represent these strings, but in future there could be
  55. ;;; more precise typing.
  56. ;;;
  57. ;;; Another option would be to just use what WebAssembly gives us, which
  58. ;;; is arrays and structs. That would work fine within the module, but
  59. ;;; it is pretty annoying from the host side: for every string value
  60. ;;; that crosses the threshold, you need to convert to or from an i8
  61. ;;; array. However, JS hosts cannot access the i8 array, so you would
  62. ;;; need to use a reflection module and go byte-by-byte. Very annoying.
  63. ;;;
  64. ;;; Finally there is a mix between the i8 array and string builtins
  65. ;;; approach: use wtf8 internally, but then externalize via calling an
  66. ;;; import, on the boundary, from the WebAssembly side of things. It
  67. ;;; would work for function arguments with stringref-typed arguments and
  68. ;;; results. Nested data would still need to be picked apart by the
  69. ;;; host using a reflection module.
  70. ;;;
  71. ;;; In summary, our options are:
  72. ;;;
  73. ;;; 1. Just stringref. Works for Hoot VM, and Chrome with an origin
  74. ;;; trial.
  75. ;;;
  76. ;;; 2. Just i8 arrays. Works everywhere but is expensive on the
  77. ;;; boundary. Strings are (array i8), immutable, and (array i8) on
  78. ;;; boundary. Iteration views are a struct with an offset.
  79. ;;; string.const loads an immutable i8 array global. We have a
  80. ;;; little state machine for advancing / decoding. Encode and
  81. ;;; concat is array.copy.
  82. ;;;
  83. ;;; 3. Just i8 arrays, but for external functions with string params or
  84. ;;; results, the generated wasm calls
  85. ;;; WebAssembly.String.fromWtf8Array as an import, and types those
  86. ;;; values as externref. Same with toWtf8Array in the other
  87. ;;; direction, except that doesn't exist yet.
  88. ;;;
  89. ;;; 4. Strings are externref. string.const loads an immutable i8 array
  90. ;;; global, but then eagerly calls fromWtf8Array on it. All
  91. ;;; operations proxy through the JS builtin functions. We still
  92. ;;; have a struct iterator.
  93. ;;;
  94. ;;; This module implements a lowering strategy for (3). If the host
  95. ;;; does not support WebAssembly.String, we polyfill the import. This
  96. ;;; way we can target any browser, and take advantage of fromWtf8Array
  97. ;;; on the host side if present. We don't currently think that (4) will
  98. ;;; have good enough performance, and in any case the builtin strings
  99. ;;; proposal doesn't have the facilities we need (e.g. measure_wtf8).
  100. ;;;
  101. ;;; Code:
  102. (define-module (wasm lower-stringrefs)
  103. #:use-module (ice-9 match)
  104. #:use-module ((srfi srfi-1) #:select (append-map filter-map))
  105. #:use-module (rnrs bytevectors)
  106. #:use-module (wasm link)
  107. #:use-module (wasm types)
  108. #:use-module (wasm wat)
  109. #:export (lower-stringrefs))
  110. (define (wtf8-stdlib)
  111. ;; Generalized UTF-8 decoder is a translation of:
  112. ;; https://chromium.googlesource.com/v8/v8/+/main/src/third_party/utf8-decoder/generalized-utf8-decoder.h
  113. ;; generalized utf-8 decoder states
  114. (define %wtf8-reject 0)
  115. (define %wtf8-accept 11)
  116. (define %wtf8-two-byte 22)
  117. (define %wtf8-three-byte 33)
  118. (define %wtf8-four-byte 44)
  119. (define %wtf8-four-byte-low 55)
  120. (define %wtf8-three-byte-high 66)
  121. (define %wtf8-four-byte-mid-high 77)
  122. ;; This first table maps bytes to character to a transition.
  123. ;;
  124. ;; The transition value takes a state to a new state, but it also determines
  125. ;; the set of bits from the current byte that contribute to the decoded
  126. ;; codepoint:
  127. ;;
  128. ;; Transition | Current byte bits that contribute to decoded codepoint
  129. ;; -------------------------------------------------------------------
  130. ;; 0, 1 | #b01111111
  131. ;; 2, 3 | #b00111111
  132. ;; 4, 5 | #b00011111
  133. ;; 6, 7 | #b00001111
  134. ;; 8, 9 | #b00000111
  135. ;; 10 | #b00000011
  136. ;;
  137. ;; Given the WTF-8 encoding, we therefore have the following constraints:
  138. ;;
  139. ;; 1. The transition value for 1-byte encodings should have the value 0 or
  140. ;; 1 so that we preserve all of the low 7 bits.
  141. ;; 2. Continuation bytes (#x80 to #xBF) are of the form #b10xxxxxx, and
  142. ;; therefore should have transition value between 0 and 3.
  143. ;; 3. Leading bytes for 2-byte encodings are of the form #b110yyyyy, and
  144. ;; therefore the transition value can be between 2 and 5.
  145. ;; 4. Leading bytes for 3-byte encodings (#b1110zzzz) need transition
  146. ;; value between 4 and 7.
  147. ;; 5. Leading bytes for 4-byte encodings (#b11110uuu) need transition
  148. ;; value between 6 and 9.
  149. ;; 6. We need more states to impose irregular constraints. Sometimes we
  150. ;; can use the knowldege that e.g. some high significant bits of the
  151. ;; xxxx in #b1110xxxx are 0, then we can use a higher transition value.
  152. ;; 7. Transitions to invalid states can use any transition value.
  153. (define %wtf8-transitions
  154. #vu8(0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 00-0F
  155. 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 10-1F
  156. 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 20-2F
  157. 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 30-3F
  158. 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 40-4F
  159. 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 50-5F
  160. 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 60-6F
  161. 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 70-7F
  162. 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ;; 80-8F
  163. 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ;; 90-9F
  164. 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 ;; A0-AF
  165. 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 ;; B0-BF
  166. 8 8 4 4 4 4 4 4 4 4 4 4 4 4 4 4 ;; C0-CF
  167. 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 ;; D0-DF
  168. 9 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 ;; E0-EF
  169. 10 6 6 6 7 8 8 8 8 8 8 8 8 8 8 8)) ;; F0-FF
  170. ;; This second table maps a state to a new state when adding a transition.
  171. ;; 00-7F
  172. ;; | 80-8F
  173. ;; | | 90-9F
  174. ;; | | | A0-BF
  175. ;; | | | | C2-DF
  176. ;; | | | | | E1-EF
  177. ;; | | | | | | F1-F3
  178. ;; | | | | | | | F4
  179. ;; | | | | | | | | C0, C1, F5-FF
  180. ;; | | | | | | | | | E0
  181. ;; | | | | | | | | | | F0
  182. (define %wtf8-states
  183. #vu8(0 0 0 0 0 0 0 0 0 0 0 ;; REJECT = 0
  184. 11 0 0 0 22 33 44 55 0 66 77 ;; ACCEPT = 11
  185. 0 11 11 11 0 0 0 0 0 0 0 ;; 2-byte = 22
  186. 0 22 22 22 0 0 0 0 0 0 0 ;; 3-byte = 33
  187. 0 33 33 33 0 0 0 0 0 0 0 ;; 4-byte = 44
  188. 0 33 0 0 0 0 0 0 0 0 0 ;; 4-byte low = 55
  189. 0 0 0 22 0 0 0 0 0 0 0 ;; 3-byte high = 66
  190. 0 0 33 33 0 0 0 0 0 0 0)) ;; 4-byte mid/high = 77
  191. (wat->wasm
  192. `((type $wtf8 (array (mut i8)))
  193. (type $stringview-iter
  194. (struct (field $wtf8 (ref $wtf8))
  195. (field $pos (mut i32))))
  196. (func $wtf8->extern-string (import "rt" "wtf8_to_string")
  197. (param $wtf8 (ref null $wtf8))
  198. (result (ref extern)))
  199. (func $extern-string->wtf8 (import "rt" "string_to_wtf8")
  200. (param $str (ref null extern))
  201. (result (ref $wtf8)))
  202. (type $immutable-bytes (array i8))
  203. (data $wtf8-transitions ,%wtf8-transitions)
  204. (data $wtf8-states ,%wtf8-states)
  205. (global $wtf8-transitions (ref $immutable-bytes)
  206. (array.new_data $immutable-bytes $wtf8-transitions
  207. (i32.const 0)
  208. (i32.const
  209. ,(bytevector-length %wtf8-transitions))))
  210. (global $wtf8-states (ref $immutable-bytes)
  211. (array.new_data $immutable-bytes $wtf8-states
  212. (i32.const 0)
  213. (i32.const
  214. ,(bytevector-length %wtf8-states))))
  215. (func $decode-wtf8 (param $byte i32) (param $buf i32) (param $state i32)
  216. (result i32 i32) ; codepoint, state
  217. (local $type i32)
  218. (local.set $type
  219. (array.get_u $immutable-bytes
  220. (global.get $wtf8-transitions)
  221. (local.get $byte)))
  222. ;; Two values: first push the codepoint
  223. (i32.or (i32.shl (local.get $buf) (i32.const 6))
  224. (i32.and (local.get $byte)
  225. (i32.shr_u (i32.const #x7f)
  226. (i32.shr_u (local.get $type)
  227. (i32.const 1)))))
  228. ;; Then the state
  229. (array.get_u $immutable-bytes
  230. (global.get $wtf8-states)
  231. (i32.add (local.get $state) (local.get $type))))
  232. ;; Downside of wtf-8: byte-by-byte comparison.
  233. (func $string.compare
  234. (param $a (ref $wtf8))
  235. (param $b (ref $wtf8))
  236. (result i32)
  237. (local $i i32)
  238. (local $d i32)
  239. (if (ref.eq (local.get $a) (local.get $b))
  240. (then (return (i32.const 0))))
  241. (loop $lp
  242. i32
  243. (if (i32.eq (array.len (local.get $a)) (local.get $i))
  244. (then
  245. (if (i32.eq (array.len (local.get $b)) (local.get $i))
  246. (then (return (i32.const 0)))
  247. (else (return (i32.const -1))))))
  248. (if (i32.eq (array.len (local.get $b)) (local.get $i))
  249. (then (return (i32.const 1))))
  250. (local.tee
  251. $d
  252. (i32.sub
  253. (array.get_u $wtf8 (local.get $a) (local.get $i))
  254. (array.get_u $wtf8 (local.get $b) (local.get $i))))
  255. (if (i32.eqz)
  256. (then
  257. (local.set $i
  258. (i32.add (local.get $i) (i32.const 1)))
  259. (br $lp)))
  260. (if i32
  261. (i32.lt_s (local.get $d) (i32.const 0))
  262. (then (i32.const -1))
  263. (else (i32.const 1)))))
  264. (func $string.eq (param $a (ref $wtf8)) (param $b (ref $wtf8))
  265. (result i32)
  266. (i32.eqz (call $string.compare (local.get $a) (local.get $b))))
  267. (func $string.as_iter (param $wtf8 (ref $wtf8))
  268. (result (ref $stringview-iter))
  269. (struct.new $stringview-iter (local.get $wtf8) (i32.const 0)))
  270. (func $stringview_iter.next
  271. (param $iter (ref $stringview-iter))
  272. (result i32)
  273. (local $wtf8 (ref $wtf8))
  274. (local $cp i32)
  275. (local $state i32)
  276. (local $i i32)
  277. (local.set $wtf8 (struct.get $stringview-iter $wtf8
  278. (local.get $iter)))
  279. (local.set $i (struct.get $stringview-iter $pos
  280. (local.get $iter)))
  281. (local.set $state (i32.const ,%wtf8-accept))
  282. (if (i32.ge_u (local.get $i) (array.len (local.get $wtf8)))
  283. (then (return (i32.const -1))))
  284. (loop $lp
  285. (call $decode-wtf8
  286. (array.get_u $wtf8 (local.get $wtf8) (local.get $i))
  287. (local.get $cp)
  288. (local.get $state))
  289. (local.set $state)
  290. (local.set $cp)
  291. ;; Must be valid WTF-8!
  292. (if (i32.eq (local.get $state) (i32.const ,%wtf8-reject))
  293. (then (unreachable)))
  294. (local.set $i (i32.add (local.get $i) (i32.const 1)))
  295. (if (i32.ne (local.get $state) (i32.const ,%wtf8-accept))
  296. (then
  297. (if (i32.ge_u (local.get $i) (array.len (local.get $wtf8)))
  298. ;; Bad WTF-8.
  299. (then (unreachable)))
  300. (br $lp))))
  301. (struct.set $stringview-iter $pos
  302. (local.get $iter) (local.get $i))
  303. (local.get $cp))
  304. (func $stringview_iter.advance
  305. (param $iter (ref $stringview-iter)) (param $count i32)
  306. (result i32)
  307. (local $wtf8 (ref $wtf8))
  308. (local $state i32)
  309. (local $i i32)
  310. (local $advanced i32)
  311. (local.set $wtf8 (struct.get $stringview-iter $wtf8
  312. (local.get $iter)))
  313. (local.set $i (struct.get $stringview-iter $pos (local.get $iter)))
  314. (local.set $state (i32.const ,%wtf8-accept))
  315. (if (i32.eqz (local.get $count))
  316. (then (return (i32.const 0))))
  317. (loop $lp
  318. (if (i32.lt_u (local.get $i) (array.len (local.get $wtf8)))
  319. (then
  320. (call $decode-wtf8
  321. (array.get_u $wtf8 (local.get $wtf8) (local.get $i))
  322. (i32.const 0)
  323. (local.get $state))
  324. (local.set $state)
  325. (drop)
  326. ;; Must be valid WTF-8!
  327. (if (i32.eq (local.get $state) (i32.const ,%wtf8-reject))
  328. (then (unreachable)))
  329. (local.set $i (i32.add (local.get $i) (i32.const 1)))
  330. (if (i32.eq (local.get $state (i32.const ,%wtf8-accept)))
  331. (then
  332. (local.set $advanced
  333. (i32.add (local.get $advanced)
  334. (i32.const 1)))
  335. (if (i32.lt_u (local.get $advanced)
  336. (local.get $count))
  337. (then (br $lp))))
  338. (else
  339. (br $lp))))
  340. (else
  341. ;; Must be valid WTF-8!
  342. (if (i32.ne (local.get $state) (i32.const ,%wtf8-accept))
  343. (then (unreachable))))))
  344. (struct.set $stringview-iter $pos (local.get $iter) (local.get $i))
  345. (local.get $advanced))
  346. (func $stringview_iter.slice
  347. (param $iter (ref $stringview-iter)) (param $count i32)
  348. (result (ref $wtf8))
  349. (local $wtf8 (ref $wtf8))
  350. (local $start i32)
  351. (local $len i32)
  352. (local $temp (ref $stringview-iter))
  353. (local $out (ref $wtf8))
  354. (local.set $wtf8 (struct.get $stringview-iter $wtf8
  355. (local.get $iter)))
  356. (local.set $start (struct.get $stringview-iter $pos
  357. (local.get $iter)))
  358. (local.set $temp (struct.new $stringview-iter (local.get $wtf8)
  359. (local.get $start)))
  360. (call $stringview_iter.advance (local.get $temp)
  361. (local.get $count))
  362. (drop)
  363. (local.set $len
  364. (i32.sub (struct.get $stringview-iter $pos
  365. (local.get $temp))
  366. (local.get $start)))
  367. (local.set $out (array.new_default $wtf8 (local.get $len)))
  368. (array.copy $wtf8 $wtf8
  369. (local.get $out) (i32.const 0)
  370. (local.get $wtf8) (local.get $start) (local.get $len))
  371. (local.get $out))
  372. (func $string.encode_wtf8_array
  373. (param $wtf8 (ref $wtf8)) (param $out (ref $wtf8)) (param $pos i32)
  374. (array.copy $wtf8 $wtf8
  375. (local.get $out)
  376. (local.get $pos)
  377. (local.get $wtf8)
  378. (i32.const 0)
  379. (array.len (local.get $wtf8))))
  380. (func $string.new_lossy_utf8_array
  381. (param $buf (ref $wtf8)) (param $start i32) (param $len i32)
  382. (result (ref $wtf8))
  383. (local $out (ref $wtf8))
  384. ;; FIXME: validate buffer as wtf8
  385. (local.set $out (array.new_default $wtf8 (local.get $len)))
  386. (array.copy $wtf8 $wtf8
  387. (local.get $out) (i32.const 0)
  388. (local.get $buf) (local.get $start) (local.get $len))
  389. (local.get $out))
  390. (func $string.measure_wtf16 (param $wtf8 (ref $wtf8)) (result i32)
  391. (local $iter (ref $stringview-iter))
  392. (local $cp i32)
  393. (local $count i32)
  394. (local.set $iter (call $string.as_iter (local.get $wtf8)))
  395. (loop $lp
  396. (local.set $cp (call $stringview_iter.next (local.get $iter)))
  397. (if (i32.le_s (i32.const 0) (local.get $cp))
  398. (then
  399. (local.set $count
  400. (i32.add (i32.add (local.get $count)
  401. (i32.const 1))
  402. (i32.gt_u (local.get $cp)
  403. (i32.const #xffff))))
  404. (br $lp))))
  405. (local.get $count)))))
  406. ;; Some imports and exports are to other wasm modules, and some are to
  407. ;; the host. Imports and exports to other wasm modules should be
  408. ;; lowered to $wtf8, whereas "external" interfaces to the host should
  409. ;; also be wrapped with wtf8-to-string. The right way to know whether
  410. ;; an import or export is internal or external is to use some kind of
  411. ;; explicit information in a custom section. For now, though, we just
  412. ;; look at the name under which the function is imported or exported,
  413. ;; because that's compatible with what the hoot compiler does.
  414. (define (import-is-external? mod name)
  415. (not (string-prefix? "$" name)))
  416. (define (export-is-external? name)
  417. (not (string-prefix? "$" name)))
  418. (define* (lower-stringrefs/wtf8 wasm #:key
  419. (import-is-external? import-is-external?)
  420. (export-is-external? export-is-external?))
  421. (define make-id
  422. (let ((counter 0))
  423. (lambda (stem)
  424. (let ((sym (string->symbol (format #f "~a-~a" stem counter))))
  425. (set! counter (1+ counter))
  426. sym))))
  427. (define %strings (make-hash-table))
  428. (define (intern-string! str)
  429. (or (hash-ref %strings str)
  430. (let ((id (make-id '$stringref)))
  431. (hash-set! %strings str id)
  432. id)))
  433. (match wasm
  434. (($ <wasm> id types imports funcs tables memories globals exports start
  435. elems datas tags () custom)
  436. ;; need to replace string-typed imports with wrappers
  437. (define (visit-heap-type type)
  438. (match type
  439. ('string '$wtf8)
  440. ('stringview_wtf8 (error "lowering wtf8 views unsupported"))
  441. ('stringview_wtf16 (error "lowering wtf16 views unsupported"))
  442. ('stringview_iter '$stringview-iter)
  443. (_ type)))
  444. (define (visit-val-type type)
  445. (match type
  446. (($ <ref-type> nullable? ht)
  447. (make-ref-type nullable? (visit-heap-type ht)))
  448. (_ type)))
  449. (define (visit-ref-type type)
  450. (visit-val-type type))
  451. (define (visit-param param)
  452. (match param
  453. (($ <param> id type)
  454. (make-param id (visit-val-type type)))))
  455. (define (visit-field field)
  456. (match field
  457. (($ <field> id mutable? type)
  458. (make-field id mutable? (visit-val-type type)))))
  459. (define (visit-func-sig type)
  460. (match type
  461. (($ <func-sig> params results)
  462. (make-func-sig (map visit-param params)
  463. (map visit-val-type results)))))
  464. (define (visit-base-type type)
  465. (match type
  466. (($ <struct-type> fields)
  467. (make-struct-type (map visit-field fields)))
  468. (($ <array-type> mutable? type)
  469. (make-array-type mutable? (visit-val-type type)))
  470. (_
  471. (visit-func-sig type))))
  472. (define (visit-sub-type type)
  473. (match type
  474. (($ <sub-type> final? supers type)
  475. (make-sub-type final? supers (visit-base-type type)))
  476. (_ (visit-base-type type))))
  477. (define (visit-type-use type)
  478. (match type
  479. (($ <type-use> id sig)
  480. (make-type-use id (visit-func-sig sig)))))
  481. (define (visit-table-type type)
  482. (match type
  483. (($ <table-type> limits elem-type)
  484. (make-table-type limits (visit-val-type elem-type)))))
  485. (define (visit-global-type type)
  486. (match type
  487. (($ <global-type> mutable? type)
  488. (make-global-type mutable? (visit-val-type type)))))
  489. (define (visit-tag-type type)
  490. (match type
  491. (($ <tag-type> attribute type)
  492. (make-tag-type attribute (visit-type-use type)))))
  493. (define (visit-block-type type)
  494. (match type
  495. (#f #f)
  496. (($ <type-use>) (visit-type-use type))
  497. ((or ($ <ref-type>) (? symbol?)) (visit-val-type type))))
  498. (define visit-inst
  499. (match-lambda
  500. (((and inst (or 'block 'loop)) label type body)
  501. `(,inst ,label ,(visit-block-type type)
  502. ,(visit-expr body)))
  503. (('if label type consequent alternate)
  504. `(if ,label ,(visit-block-type type)
  505. ,(visit-expr consequent)
  506. ,(visit-expr alternate)))
  507. (('try label type body catches catch-all)
  508. `(try ,label ,(visit-block-type type)
  509. ,(visit-expr body)
  510. ,(map visit-expr catches)
  511. ,(and=> catch-all visit-expr)))
  512. (('try_delegate label type body handler)
  513. `(try_delegate ,label ,(visit-block-type type)
  514. ,(visit-expr body)
  515. ,handler))
  516. (('call_indirect table type)
  517. `(call_indirect ,table ,(visit-type-use type)))
  518. (('select types) `(select ,(map visit-val-type types)))
  519. ;; GC instructions.
  520. (('ref.null ht) `(ref.null ,(visit-heap-type ht)))
  521. (((and inst (or 'ref.test 'ref.cast)) rt)
  522. `(,inst ,(visit-ref-type rt)))
  523. (((and inst (or 'br_on_cast 'br_on_cast_fail)) label rt1 rt2)
  524. `(,inst ,label ,(visit-ref-type rt1) ,(visit-ref-type rt2)))
  525. ;; Stringref instructions.
  526. (('string.const str) `(global.get ,(intern-string! str)))
  527. (('string.new_utf8 mem) `(call ,(symbol-append
  528. '$string.new_utf8_ mem)))
  529. (('string.new_lossy_utf8 mem) `(call ,(symbol-append
  530. '$string.new_lossy_utf8_ mem)))
  531. (('string.new_wtf8 mem) `(call ,(symbol-append
  532. '$string.new_wtf8_ mem)))
  533. (('string.new_wtf16 mem) `(call ,(symbol-append
  534. '$string.new_wtf16_ mem)))
  535. (('string.measure_wtf8) '(array.len))
  536. (('string.measure_utf8) '(call $string.measure_utf8))
  537. (('string.measure_wtf16) '(call $string.measure_wtf16))
  538. (('string.encode_utf8 mem) `(call ,(symbol-append
  539. '$string.encode_utf8_ mem)))
  540. (('string.encode_lossy_utf8 mem) `(call ,(symbol-append
  541. '$string.encode_lossy_utf8_ mem)))
  542. (('string.encode_wtf8 mem) `(call ,(symbol-append '$string.encode_wtf8_ mem)))
  543. (('string.encode_wtf16 mem) `(call ,(symbol-append
  544. '$string.encode_wtf16_ mem)))
  545. (('string.concat) '(call $string.concat))
  546. (('string.eq) '(call $string.eq))
  547. (('string.is_usv_sequence) '(call $string.is_usv_sequence))
  548. (('string.compare) '(call $string.compare))
  549. (('string.from_code_point) '(call $string.from_code_point))
  550. (('string.as_wtf8) '(call $string.as_wtf8))
  551. (('stringview_wtf8.advance) '(call $stringview_wtf8.advance))
  552. (('stringview_wtf8.encode_utf8 mem)
  553. `(call ,(symbol-append
  554. '$stringview_wtf8.encode_utf8_ mem)))
  555. (('stringview_wtf8.encode_lossy_utf8 mem)
  556. `(call ,(symbol-append
  557. '$stringview_wtf8.encode_lossy_utf8_ mem)))
  558. (('stringview_wtf8.encode_wtf8 mem)
  559. `(call ,(symbol-append
  560. '$stringview_wtf8.encode_wtf8_ mem)))
  561. (('stringview_wtf8.slice) '(call $stringview_wtf8.slice))
  562. (('string.as_wtf16) '(call $string.as_wtf16))
  563. (('stringview_wtf16.length) '(call $stringview_wtf16.length))
  564. (('stringview_wtf16.get_codeunit) `(call $stringview_wtf16.get_codeunit))
  565. (('stringview_wtf16.encode mem) `(call ,(symbol-append
  566. '$stringview_wtf16.encode_ mem)))
  567. (('stringview_wtf16.slice) '(call $stringview_wtf16.slice))
  568. (('string.as_iter) '(call $string.as_iter))
  569. (('stringview_iter.next) '(call $stringview_iter.next))
  570. (('stringview_iter.advance) '(call $stringview_iter.advance))
  571. (('stringview_iter.rewind) '(call $stringview_iter.rewind))
  572. (('stringview_iter.slice) '(call $stringview_iter.slice))
  573. (('string.new_utf8_array) '(call $string.new_utf8_array))
  574. (('string.new_lossy_utf8_array) '(call $string.new_lossy_utf8_array))
  575. (('string.new_wtf8_array) '(call $string.new_wtf8_array))
  576. (('string.new_wtf16_array) '(call $string.new_wtf16_array))
  577. (('string.encode_utf8_array) '(call $string.encode_utf8_array))
  578. (('string.encode_lossy_utf8_array) '(call $string.encode_lossy_utf8_array))
  579. (('string.encode_wtf8_array) '(call $string.encode_wtf8_array))
  580. (('string.encode_wtf16_array) '(call $string.encode_wtf16_array))
  581. (inst inst)))
  582. (define (visit-expr expr)
  583. (map visit-inst expr))
  584. (define (visit-init expr)
  585. (visit-expr expr))
  586. (define (visit-func func)
  587. (define visit-local
  588. (match-lambda
  589. (($ <local> id type)
  590. (make-local id (visit-val-type type)))))
  591. (match func
  592. (($ <func> id type locals body)
  593. (let ((type (visit-type-use type))
  594. (locals (map visit-local locals))
  595. (body (visit-expr body)))
  596. (make-func id type locals body)))))
  597. (define (lower-extern-val-type type)
  598. (match type
  599. (($ <ref-type> nullable? 'string)
  600. (make-ref-type nullable? 'extern))
  601. (($ <ref-type> nullable? (or 'stringview_wtf8
  602. 'stringview_wtf16
  603. 'stringview_iter))
  604. (error "extern param/result with stringview type unimplemented" type))
  605. (_ (visit-val-type type))))
  606. (define (lower-extern-val type)
  607. (match type
  608. (($ <ref-type> nullable? 'string)
  609. '((call $wtf8->extern-string)))
  610. (($ <ref-type> nullable? (or 'stringview_wtf8
  611. 'stringview_wtf16
  612. 'stringview_iter))
  613. (error "extern value with stringview type unimplemented" type))
  614. (_ '())))
  615. (define (lift-extern-val type)
  616. (match type
  617. (($ <ref-type> nullable? 'string)
  618. '((call $extern-string->wtf8)))
  619. (($ <ref-type> nullable? (or 'stringview_wtf8
  620. 'stringview_wtf16
  621. 'stringview_iter))
  622. (error "extern value with stringview type unimplemented" type))
  623. (_ '())))
  624. (define (lower-extern-func-type type)
  625. (match type
  626. (($ <type-use> tid
  627. ($ <func-sig> (($ <param> pid ptype) ...) (rtype ...)))
  628. (make-type-use tid
  629. (make-func-sig
  630. (map make-param pid
  631. (map lower-extern-val-type ptype))
  632. (map lower-extern-val-type rtype))))))
  633. (define (lower-extern-tag-type type)
  634. (match type
  635. (($ <tag-type> attribute type)
  636. (make-tag-type attribute (lower-extern-func-type type)))))
  637. (define (lower-extern-func-import id wrapped-id type)
  638. (match type
  639. (($ <type-use> _ ($ <func-sig> (($ <param> _ params) ...) results))
  640. (let ((param-count (length params)))
  641. (make-func
  642. id
  643. (visit-type-use type)
  644. (map (lambda (type) (make-local #f (lower-extern-val-type type)))
  645. results)
  646. (let lp ((params params) (i 0))
  647. (match params
  648. ((param . params)
  649. `((local.get ,i)
  650. ,@(lower-extern-val param)
  651. . ,(lp params (1+ i))))
  652. (()
  653. `((call ,wrapped-id)
  654. ,@(reverse (map (lambda (i) `(local.set ,i))
  655. (iota (length results) param-count)))
  656. . ,(let lp ((results results) (i param-count))
  657. (match results
  658. (() '())
  659. ((result . results)
  660. `((local.get ,i)
  661. ,@(lift-extern-val result)
  662. . ,(lp results (1+ i)))))))))))))))
  663. (define (lower-extern-func-export id wrapped-id type)
  664. (match type
  665. (($ <type-use> _ ($ <func-sig> (($ <param> _ params) ...) results))
  666. (let ((param-count (length params)))
  667. (make-func
  668. id
  669. (lower-extern-func-type type)
  670. (map (lambda (type) (make-local #f (visit-val-type type)))
  671. results)
  672. (let lp ((params params) (i 0))
  673. (match params
  674. ((param . params)
  675. `((local.get ,i)
  676. ,@(lift-extern-val param)
  677. . ,(lp params (1+ i))))
  678. (()
  679. `((call ,wrapped-id)
  680. ,@(reverse (map (lambda (i) `(local.set ,i))
  681. (iota (length results) param-count)))
  682. . ,(let lp ((results results) (i param-count))
  683. (match results
  684. (() '())
  685. ((result . results)
  686. `((local.get ,i)
  687. ,@(lower-extern-val result)
  688. . ,(lp results (1+ i)))))))))))))))
  689. (define (lookup-func-type id)
  690. (or (or-map (match-lambda
  691. (($ <import> mod name kind id' type)
  692. (and (eq? id id') type)))
  693. imports)
  694. (or-map (match-lambda
  695. (($ <func> id' type locals body)
  696. (and (eq? id id') type)))
  697. funcs)))
  698. (let ((types (map (match-lambda
  699. (($ <rec-group> (($ <type> id type) ...))
  700. (make-rec-group
  701. (map make-type id (map visit-sub-type type))))
  702. (($ <type> id type)
  703. (make-type id (visit-sub-type type))))
  704. types))
  705. (imports (map
  706. (match-lambda
  707. (($ <import> mod name kind id type)
  708. (let* ((type* (match kind
  709. ('func (if (import-is-external? mod name)
  710. (lower-extern-func-type type)
  711. (visit-type-use type)))
  712. ('table (visit-table-type type))
  713. ('memory type)
  714. ('global (visit-global-type type))
  715. ('tag (if (import-is-external? mod name)
  716. (lower-extern-tag-type type)
  717. (visit-tag-type type)))))
  718. (id* (and (eq? kind 'func)
  719. (import-is-external? mod name)
  720. (not (equal? type type*))
  721. (make-id (symbol-append id '-stringref)))))
  722. (cons (and id* (lower-extern-func-import id id* type))
  723. (make-import mod name kind (or id* id) type*)))))
  724. imports))
  725. (exports (map
  726. (match-lambda
  727. ((and export ($ <export> name kind id))
  728. (cond
  729. ((and (eq? kind 'func)
  730. (export-is-external? name)
  731. (and=>
  732. (lookup-func-type id)
  733. (lambda (type)
  734. (if (equal? type
  735. (lower-extern-func-type type))
  736. #f
  737. type))))
  738. => (lambda (type)
  739. (let ((id* (make-id
  740. (symbol-append id '-stringref))))
  741. (cons (lower-extern-func-export id* id type)
  742. (make-export name kind id*)))))
  743. (else
  744. (cons #f export)))))
  745. exports))
  746. (funcs (map visit-func funcs))
  747. (tables (map (match-lambda
  748. (($ <table> id type init)
  749. (make-table id (visit-table-type type)
  750. (and init (visit-init init)))))
  751. tables))
  752. (globals (map (match-lambda
  753. (($ <global> id ($ <global-type> mutable? vt) init)
  754. (let* ((vt (visit-val-type vt))
  755. (type (make-global-type mutable? vt)))
  756. (make-global id type (visit-init init)))))
  757. globals))
  758. (elems (map (match-lambda
  759. (($ <elem> id mode table type offset inits)
  760. (make-elem id mode table
  761. (visit-val-type type)
  762. (and=> offset visit-init)
  763. (map visit-init inits))))
  764. elems))
  765. (datas (map (match-lambda
  766. (($ <data> id mode mem offset init)
  767. (make-data id mode mem
  768. (and=> offset visit-init)
  769. init)))
  770. datas))
  771. (tags (map (match-lambda
  772. (($ <tag> id type)
  773. (make-tag id (visit-tag-type type))))
  774. tags)))
  775. (let* ((t (make-global-type #f (make-ref-type #f '$wtf8)))
  776. (strings (hash-map->list
  777. (lambda (str id)
  778. (make-global id t
  779. `((i32.const 0)
  780. (i32.const ,(bytevector-length
  781. (string->utf8 str)))
  782. (array.new_data $wtf8 ,id))))
  783. %strings))
  784. (wtf8 (hash-map->list
  785. (lambda (str id)
  786. (make-data id 'passive #f #f (string->utf8 str)))
  787. %strings)))
  788. (add-stdlib
  789. (make-wasm id
  790. types
  791. (map cdr imports)
  792. (append funcs
  793. (filter-map car imports)
  794. (filter-map car exports))
  795. tables
  796. memories
  797. (append strings globals)
  798. (map cdr exports)
  799. start
  800. elems
  801. (append wtf8 datas)
  802. tags
  803. '()
  804. custom)
  805. (wtf8-stdlib)))))))
  806. (define* (lower-stringrefs wasm #:key (strategy 'wtf8))
  807. (match strategy
  808. ('stringref wasm)
  809. ('wtf8 (lower-stringrefs/wtf8 wasm))
  810. (_ (error "unknown stringref lowering strategy" strategy))))