lower-stringrefs.scm 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835
  1. ;;; Pass to lower stringref to JS string builtins
  2. ;;; Copyright (C) 2023 Igalia, S.L.
  3. ;;;
  4. ;;; Licensed under the Apache License, Version 2.0 (the "License");
  5. ;;; you may not use this file except in compliance with the License.
  6. ;;; You may obtain a copy of the License at
  7. ;;;
  8. ;;; http://www.apache.org/licenses/LICENSE-2.0
  9. ;;;
  10. ;;; Unless required by applicable law or agreed to in writing, software
  11. ;;; distributed under the License is distributed on an "AS IS" BASIS,
  12. ;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. ;;; See the License for the specific language governing permissions and
  14. ;;; limitations under the License.
  15. ;;; Commentary:
  16. ;;;
  17. ;;; The hoot compiler and run-time use stringref to represent strings,
  18. ;;; albeit wrapped in a struct so they can have a hash code. Stringref
  19. ;;; (https://github.com/WebAssembly/stringref/blob/main/proposals/stringref/Overview.md)
  20. ;;; is an experimental extension to WebAssembly which exposes "host
  21. ;;; strings": strings whose representation is opaque and managed by the
  22. ;;; host. On a web browser, these will likely be WTF-16 strings; on
  23. ;;; anything else, it's probably WTF-8, though for the Hoot VM used in
  24. ;;; development, we use Guile strings there.
  25. ;;;
  26. ;;; Stringref does not yet have consensus among the WebAssembly
  27. ;;; standardization group. Chrome has an experimental
  28. ;;; --experimental-wasm-stringref flag that users have to enable
  29. ;;; explicitly, and which is also on for a limited time and set of web
  30. ;;; sites for an "origin trial". But, it's not shipping by default
  31. ;;; anywhere, so we need a mitigation.
  32. ;;;
  33. ;;; A competitor to stringref is JavaScript String Builtins
  34. ;;; (https://github.com/WebAssembly/js-string-builtins/blob/main/proposals/js-string-builtins/Overview.md).
  35. ;;; This proposal instead ships a new set of functions for JavaScript
  36. ;;; hosts, off of a new WebAssembly.String object:
  37. ;;;
  38. ;;; - WebAssembly.String.fromWtf16Array(array, start, end) -> str
  39. ;;; - WebAssembly.String.toWtf16Array(str, array, start) -> length
  40. ;;; - WebAssembly.String.fromWtf8Array(array, start, end) -> str
  41. ;;; - WebAssembly.String.fromCharCode(u16) -> str
  42. ;;; - WebAssembly.String.fromCodePoint(u32) -> str
  43. ;;; - WebAssembly.String.codePointAt(idx) -> i32
  44. ;;; - WebAssembly.String.charCodeAt(idx) -> i32
  45. ;;; - WebAssembly.String.length(str) -> i32
  46. ;;; - WebAssembly.String.concat(a, b) -> str
  47. ;;; - WebAssembly.String.substring(str, start, end) -> str
  48. ;;; - WebAssembly.String.equals(a, b) -> i32
  49. ;;; - WebAssembly.String.compare(a, b) -> i32
  50. ;;;
  51. ;;; The intent is that if a WebAssembly module imports one of these
  52. ;;; functions, it can recognize it specially, as is done for
  53. ;;; e.g. Math.cos, and compile it appropriately. Currently we would use
  54. ;;; externref to represent these strings, but in future there could be
  55. ;;; more precise typing.
  56. ;;;
  57. ;;; Another option would be to just use what WebAssembly gives us, which
  58. ;;; is arrays and structs. That would work fine within the module, but
  59. ;;; it is pretty annoying from the host side: for every string value
  60. ;;; that crosses the threshold, you need to convert to or from an i8
  61. ;;; array. However, JS hosts cannot access the i8 array, so you would
  62. ;;; need to use a reflection module and go byte-by-byte. Very annoying.
  63. ;;;
  64. ;;; Finally there is a mix between the i8 array and string builtins
  65. ;;; approach: use wtf8 internally, but then externalize via calling an
  66. ;;; import, on the boundary, from the WebAssembly side of things. It
  67. ;;; would work for function arguments with stringref-typed arguments and
  68. ;;; results. Nested data would still need to be picked apart by the
  69. ;;; host using a reflection module.
  70. ;;;
  71. ;;; In summary, our options are:
  72. ;;;
  73. ;;; 1. Just stringref. Works for Hoot VM, and Chrome with an origin
  74. ;;; trial.
  75. ;;;
  76. ;;; 2. Just i8 arrays. Works everywhere but is expensive on the
  77. ;;; boundary. Strings are (array i8), immutable, and (array i8) on
  78. ;;; boundary. Iteration views are a struct with an offset.
  79. ;;; string.const loads an immutable i8 array global. We have a
  80. ;;; little state machine for advancing / decoding. Encode and
  81. ;;; concat is array.copy.
  82. ;;;
  83. ;;; 3. Just i8 arrays, but for external functions with string params or
  84. ;;; results, the generated wasm calls
  85. ;;; WebAssembly.String.fromWtf8Array as an import, and types those
  86. ;;; values as externref. Same with toWtf8Array in the other
  87. ;;; direction, except that doesn't exist yet.
  88. ;;;
  89. ;;; 4. Strings are externref. string.const loads an immutable i8 array
  90. ;;; global, but then eagerly calls fromWtf8Array on it. All
  91. ;;; operations proxy through the JS builtin functions. We still
  92. ;;; have a struct iterator.
  93. ;;;
  94. ;;; This module implements a lowering strategy for (3). If the host
  95. ;;; does not support WebAssembly.String, we polyfill the import. This
  96. ;;; way we can target any browser, and take advantage of fromWtf8Array
  97. ;;; on the host side if present. We don't currently think that (4) will
  98. ;;; have good enough performance, and in any case the builtin strings
  99. ;;; proposal doesn't have the facilities we need (e.g. measure_wtf8).
  100. ;;;
  101. ;;; Code:
  102. (define-module (wasm lower-stringrefs)
  103. #:use-module (ice-9 match)
  104. #:use-module ((srfi srfi-1) #:select (append-map filter-map))
  105. #:use-module (rnrs bytevectors)
  106. #:use-module (wasm link)
  107. #:use-module (wasm types)
  108. #:use-module (wasm wat)
  109. #:export (lower-stringrefs))
  110. (define (wtf8-stdlib)
  111. ;; Generalized UTF-8 decoder is a translation of:
  112. ;; https://chromium.googlesource.com/v8/v8/+/main/src/third_party/utf8-decoder/generalized-utf8-decoder.h
  113. ;; generalized utf-8 decoder states
  114. (define %wtf8-reject 0)
  115. (define %wtf8-accept 11)
  116. (define %wtf8-two-byte 22)
  117. (define %wtf8-three-byte 33)
  118. (define %wtf8-four-byte 44)
  119. (define %wtf8-four-byte-low 55)
  120. (define %wtf8-three-byte-high 66)
  121. (define %wtf8-four-byte-mid-high 77)
  122. ;; This first table maps bytes to character to a transition.
  123. ;;
  124. ;; The transition value takes a state to a new state, but it also determines
  125. ;; the set of bits from the current byte that contribute to the decoded
  126. ;; codepoint:
  127. ;;
  128. ;; Transition | Current byte bits that contribute to decoded codepoint
  129. ;; -------------------------------------------------------------------
  130. ;; 0, 1 | #b01111111
  131. ;; 2, 3 | #b00111111
  132. ;; 4, 5 | #b00011111
  133. ;; 6, 7 | #b00001111
  134. ;; 8, 9 | #b00000111
  135. ;; 10 | #b00000011
  136. ;;
  137. ;; Given the WTF-8 encoding, we therefore have the following constraints:
  138. ;;
  139. ;; 1. The transition value for 1-byte encodings should have the value 0 or
  140. ;; 1 so that we preserve all of the low 7 bits.
  141. ;; 2. Continuation bytes (#x80 to #xBF) are of the form #b10xxxxxx, and
  142. ;; therefore should have transition value between 0 and 3.
  143. ;; 3. Leading bytes for 2-byte encodings are of the form #b110yyyyy, and
  144. ;; therefore the transition value can be between 2 and 5.
  145. ;; 4. Leading bytes for 3-byte encodings (#b1110zzzz) need transition
  146. ;; value between 4 and 7.
  147. ;; 5. Leading bytes for 4-byte encodings (#b11110uuu) need transition
  148. ;; value between 6 and 9.
  149. ;; 6. We need more states to impose irregular constraints. Sometimes we
  150. ;; can use the knowldege that e.g. some high significant bits of the
  151. ;; xxxx in #b1110xxxx are 0, then we can use a higher transition value.
  152. ;; 7. Transitions to invalid states can use any transition value.
  153. (define %wtf8-transitions
  154. #vu8(0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 00-0F
  155. 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 10-1F
  156. 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 20-2F
  157. 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 30-3F
  158. 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 40-4F
  159. 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 50-5F
  160. 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 60-6F
  161. 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 70-7F
  162. 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ;; 80-8F
  163. 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ;; 90-9F
  164. 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 ;; A0-AF
  165. 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 ;; B0-BF
  166. 8 8 4 4 4 4 4 4 4 4 4 4 4 4 4 4 ;; C0-CF
  167. 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 ;; D0-DF
  168. 9 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 ;; E0-EF
  169. 10 6 6 6 7 8 8 8 8 8 8 8 8 8 8 8)) ;; F0-FF
  170. ;; This second table maps a state to a new state when adding a transition.
  171. ;; 00-7F
  172. ;; | 80-8F
  173. ;; | | 90-9F
  174. ;; | | | A0-BF
  175. ;; | | | | C2-DF
  176. ;; | | | | | E1-EF
  177. ;; | | | | | | F1-F3
  178. ;; | | | | | | | F4
  179. ;; | | | | | | | | C0, C1, F5-FF
  180. ;; | | | | | | | | | E0
  181. ;; | | | | | | | | | | F0
  182. (define %wtf8-states
  183. #vu8(0 0 0 0 0 0 0 0 0 0 0 ;; REJECT = 0
  184. 11 0 0 0 22 33 44 55 0 66 77 ;; ACCEPT = 11
  185. 0 11 11 11 0 0 0 0 0 0 0 ;; 2-byte = 22
  186. 0 22 22 22 0 0 0 0 0 0 0 ;; 3-byte = 33
  187. 0 33 33 33 0 0 0 0 0 0 0 ;; 4-byte = 44
  188. 0 33 0 0 0 0 0 0 0 0 0 ;; 4-byte low = 55
  189. 0 0 0 22 0 0 0 0 0 0 0 ;; 3-byte high = 66
  190. 0 0 33 33 0 0 0 0 0 0 0)) ;; 4-byte mid/high = 77
  191. (wat->wasm
  192. `((type $wtf8 (array (mut i8)))
  193. (type $stringview-iter
  194. (struct (field $wtf8 (ref $wtf8))
  195. (field $pos (mut i32))))
  196. (func $wtf8->extern-string (import "rt" "wtf8_to_string")
  197. (param $wtf8 (ref null $wtf8))
  198. (result (ref extern)))
  199. (func $extern-string->wtf8 (import "rt" "string_to_wtf8")
  200. (param $str (ref null extern))
  201. (result (ref $wtf8)))
  202. (type $immutable-bytes (array i8))
  203. (data $wtf8-transitions ,%wtf8-transitions)
  204. (data $wtf8-states ,%wtf8-states)
  205. (global $wtf8-transitions (ref $immutable-bytes)
  206. (array.new_data $immutable-bytes $wtf8-transitions
  207. (i32.const 0)
  208. (i32.const
  209. ,(bytevector-length %wtf8-transitions))))
  210. (global $wtf8-states (ref $immutable-bytes)
  211. (array.new_data $immutable-bytes $wtf8-states
  212. (i32.const 0)
  213. (i32.const
  214. ,(bytevector-length %wtf8-states))))
  215. (func $decode-wtf8 (param $byte i32) (param $buf i32) (param $state i32)
  216. (result i32 i32) ; codepoint, state
  217. (local $type i32)
  218. (local.set $type
  219. (array.get_u $immutable-bytes
  220. (global.get $wtf8-transitions)
  221. (local.get $byte)))
  222. ;; Two values: first push the codepoint
  223. (i32.or (i32.shl (local.get $buf) (i32.const 6))
  224. (i32.and (local.get $byte)
  225. (i32.shr_u (i32.const #x7f)
  226. (i32.shr_u (local.get $type)
  227. (i32.const 1)))))
  228. ;; Then the state
  229. (array.get_u $immutable-bytes
  230. (global.get $wtf8-states)
  231. (i32.add (local.get $state) (local.get $type))))
  232. ;; Downside of wtf-8: byte-by-byte comparison.
  233. (func $string.compare
  234. (param $a (ref $wtf8))
  235. (param $b (ref $wtf8))
  236. (result i32)
  237. (local $i i32)
  238. (local $d i32)
  239. (if (ref.eq (local.get $a) (local.get $b))
  240. (then (return (i32.const 0))))
  241. (loop $lp
  242. i32
  243. (if (i32.eq (array.len (local.get $a)) (local.get $i))
  244. (then
  245. (if (i32.eq (array.len (local.get $b)) (local.get $i))
  246. (then (return (i32.const 0)))
  247. (else (return (i32.const -1))))))
  248. (if (i32.eq (array.len (local.get $b)) (local.get $i))
  249. (then (return (i32.const 1))))
  250. (local.tee
  251. $d
  252. (i32.sub
  253. (array.get_u $wtf8 (local.get $a) (local.get $i))
  254. (array.get_u $wtf8 (local.get $b) (local.get $i))))
  255. (if (i32.eqz)
  256. (then
  257. (local.set $i
  258. (i32.add (local.get $i) (i32.const 1)))
  259. (br $lp)))
  260. (if i32
  261. (i32.lt_s (local.get $d) (i32.const 0))
  262. (then (i32.const -1))
  263. (else (i32.const 1)))))
  264. (func $string.eq (param $a (ref $wtf8)) (param $b (ref $wtf8))
  265. (result i32)
  266. (i32.eqz (call $string.compare (local.get $a) (local.get $b))))
  267. (func $string.as_iter (param $wtf8 (ref $wtf8))
  268. (result (ref $stringview-iter))
  269. (struct.new $stringview-iter (local.get $wtf8) (i32.const 0)))
  270. (func $stringview_iter.next
  271. (param $iter (ref $stringview-iter))
  272. (result i32)
  273. (local $wtf8 (ref $wtf8))
  274. (local $cp i32)
  275. (local $state i32)
  276. (local $i i32)
  277. (local.set $wtf8 (struct.get $stringview-iter $wtf8
  278. (local.get $iter)))
  279. (local.set $i (struct.get $stringview-iter $pos
  280. (local.get $iter)))
  281. (local.set $state (i32.const ,%wtf8-accept))
  282. (if (i32.ge_u (local.get $i) (array.len (local.get $wtf8)))
  283. (then (return (i32.const -1))))
  284. (loop $lp
  285. (call $decode-wtf8
  286. (array.get_u $wtf8 (local.get $wtf8) (local.get $i))
  287. (local.get $cp)
  288. (local.get $state))
  289. (local.set $state)
  290. (local.set $cp)
  291. ;; Must be valid WTF-8!
  292. (if (i32.eq (local.get $state) (i32.const ,%wtf8-reject))
  293. (then (unreachable)))
  294. (local.set $i (i32.add (local.get $i) (i32.const 1)))
  295. (if (i32.ne (local.get $state) (i32.const ,%wtf8-accept))
  296. (then
  297. (if (i32.ge_u (local.get $i) (array.len (local.get $wtf8)))
  298. ;; Bad WTF-8.
  299. (then (unreachable)))
  300. (br $lp))))
  301. (struct.set $stringview-iter $pos
  302. (local.get $iter) (local.get $i))
  303. (local.get $cp))
  304. (func $stringview_iter.advance
  305. (param $iter (ref $stringview-iter)) (param $count i32)
  306. (result i32)
  307. (local $wtf8 (ref $wtf8))
  308. (local $state i32)
  309. (local $i i32)
  310. (local $advanced i32)
  311. (local.set $wtf8 (struct.get $stringview-iter $wtf8
  312. (local.get $iter)))
  313. (local.set $i (struct.get $stringview-iter $pos (local.get $iter)))
  314. (local.set $state (i32.const ,%wtf8-accept))
  315. (if (i32.eqz (local.get $count))
  316. (then (return (i32.const 0))))
  317. (loop $lp
  318. (if (i32.lt_u (local.get $i) (array.len (local.get $wtf8)))
  319. (then
  320. (call $decode-wtf8
  321. (array.get_u $wtf8 (local.get $wtf8) (local.get $i))
  322. (i32.const 0)
  323. (local.get $state))
  324. (local.set $state)
  325. (drop)
  326. ;; Must be valid WTF-8!
  327. (if (i32.eq (local.get $state) (i32.const ,%wtf8-reject))
  328. (then (unreachable)))
  329. (local.set $i (i32.add (local.get $i) (i32.const 1)))
  330. (if (i32.eq (local.get $state (i32.const ,%wtf8-accept)))
  331. (then
  332. (local.set $advanced
  333. (i32.add (local.get $advanced)
  334. (i32.const 1)))
  335. (if (i32.lt_u (local.get $advanced)
  336. (local.get $count))
  337. (then (br $lp))))
  338. (else
  339. (br $lp))))
  340. (else
  341. ;; Must be valid WTF-8!
  342. (if (i32.ne (local.get $state) (i32.const ,%wtf8-accept))
  343. (then (unreachable))))))
  344. (struct.set $stringview-iter $pos (local.get $iter) (local.get $i))
  345. (local.get $advanced))
  346. (func $stringview_iter.slice
  347. (param $iter (ref $stringview-iter)) (param $count i32)
  348. (result (ref $wtf8))
  349. (local $wtf8 (ref $wtf8))
  350. (local $start i32)
  351. (local $len i32)
  352. (local $temp (ref $stringview-iter))
  353. (local $out (ref $wtf8))
  354. (local.set $wtf8 (struct.get $stringview-iter $wtf8
  355. (local.get $iter)))
  356. (local.set $start (struct.get $stringview-iter $pos
  357. (local.get $iter)))
  358. (local.set $temp (struct.new $stringview-iter (local.get $wtf8)
  359. (local.get $start)))
  360. (call $stringview_iter.advance (local.get $temp)
  361. (local.get $count))
  362. (drop)
  363. (local.set $len
  364. (i32.sub (struct.get $stringview-iter $pos
  365. (local.get $temp))
  366. (local.get $start)))
  367. (local.set $out (array.new_default $wtf8 (local.get $len)))
  368. (array.copy $wtf8 $wtf8
  369. (local.get $out) (i32.const 0)
  370. (local.get $wtf8) (local.get $start) (local.get $len))
  371. (local.get $out))
  372. (func $string.encode_wtf8_array
  373. (param $wtf8 (ref $wtf8)) (param $out (ref $wtf8)) (param $pos i32)
  374. (array.copy $wtf8 $wtf8
  375. (local.get $out)
  376. (local.get $pos)
  377. (local.get $wtf8)
  378. (i32.const 0)
  379. (array.len (local.get $wtf8))))
  380. (func $string.new_lossy_utf8_array
  381. (param $buf (ref $wtf8)) (param $start i32) (param $len i32)
  382. (result (ref $wtf8))
  383. (local $out (ref $wtf8))
  384. ;; FIXME: validate buffer as wtf8
  385. (local.set $out (array.new_default $wtf8 (local.get $len)))
  386. (array.copy $wtf8 $wtf8
  387. (local.get $out) (i32.const 0)
  388. (local.get $buf) (local.get $start) (local.get $len))
  389. (local.get $out))
  390. (func $string.measure_wtf16 (param $wtf8 (ref $wtf8)) (result i32)
  391. (local $iter (ref $stringview-iter))
  392. (local $cp i32)
  393. (local $count i32)
  394. (local.set $iter (call $string.as_iter (local.get $wtf8)))
  395. (loop $lp
  396. (local.set $cp (call $stringview_iter.next (local.get $iter)))
  397. (if (i32.le_s (i32.const 0) (local.get $cp))
  398. (then
  399. (local.set $count
  400. (i32.add (i32.add (local.get $count)
  401. (i32.const 1))
  402. (i32.gt_u (local.get $cp)
  403. (i32.const #xffff))))
  404. (br $lp))))
  405. (local.get $count)))))
  406. ;; Some imports and exports are to other wasm modules, and some are to
  407. ;; the host. Imports and exports to other wasm modules should be
  408. ;; lowered to $wtf8, whereas "external" interfaces to the host should
  409. ;; also be wrapped with wtf8-to-string. The right way to know whether
  410. ;; an import or export is internal or external is to use some kind of
  411. ;; explicit information in a custom section. For now, though, we just
  412. ;; look at the name under which the function is imported or exported,
  413. ;; because that's compatible with what the hoot compiler does.
  414. (define (import-is-external? mod name)
  415. (not (string-prefix? "$" name)))
  416. (define (export-is-external? name)
  417. (not (string-prefix? "$" name)))
  418. (define* (lower-stringrefs/wtf8 wasm #:key
  419. (import-is-external? import-is-external?)
  420. (export-is-external? export-is-external?))
  421. (define make-id
  422. (let ((counter 0))
  423. (lambda (stem)
  424. (let ((sym (string->symbol (format #f "~a-~a" stem counter))))
  425. (set! counter (1+ counter))
  426. sym))))
  427. (define %strings (make-hash-table))
  428. (define (intern-string! str)
  429. (or (hash-ref %strings str)
  430. (let ((id (make-id '$stringref)))
  431. (hash-set! %strings str id)
  432. id)))
  433. (match wasm
  434. (($ <wasm> id types imports funcs tables memories globals exports start
  435. elems datas tags () custom)
  436. ;; need to replace string-typed imports with wrappers
  437. (define (visit-heap-type type)
  438. (match type
  439. ('string '$wtf8)
  440. ('stringview_wtf8 (error "lowering wtf8 views unsupported"))
  441. ('stringview_wtf16 (error "lowering wtf16 views unsupported"))
  442. ('stringview_iter '$stringview-iter)
  443. (_ type)))
  444. (define (visit-val-type type)
  445. (match type
  446. (($ <ref-type> nullable? ht)
  447. (make-ref-type nullable? (visit-heap-type ht)))
  448. (_ type)))
  449. (define (visit-ref-type type)
  450. (visit-val-type type))
  451. (define (visit-param param)
  452. (match param
  453. (($ <param> id type)
  454. (make-param id (visit-val-type type)))))
  455. (define (visit-field field)
  456. (match field
  457. (($ <field> id mutable? type)
  458. (make-field id mutable? (visit-val-type type)))))
  459. (define (visit-func-sig type)
  460. (match type
  461. (($ <func-sig> params results)
  462. (make-func-sig (map visit-param params)
  463. (map visit-val-type results)))))
  464. (define (visit-base-type type)
  465. (match type
  466. (($ <struct-type> fields)
  467. (make-struct-type (map visit-field fields)))
  468. (($ <array-type> mutable? type)
  469. (make-array-type mutable? (visit-val-type type)))
  470. (_
  471. (visit-func-sig type))))
  472. (define (visit-sub-type type)
  473. (match type
  474. (($ <sub-type> final? supers type)
  475. (make-sub-type final? supers (visit-base-type type)))
  476. (_ (visit-base-type type))))
  477. (define (visit-type-use type)
  478. (match type
  479. (($ <type-use> id sig)
  480. (make-type-use id (visit-func-sig sig)))))
  481. (define (visit-table-type type)
  482. (match type
  483. (($ <table-type> limits elem-type)
  484. (make-table-type limits (visit-val-type elem-type)))))
  485. (define (visit-global-type type)
  486. (match type
  487. (($ <global-type> mutable? type)
  488. (make-global-type mutable? (visit-val-type type)))))
  489. (define (visit-block-type type)
  490. (match type
  491. (#f #f)
  492. (($ <type-use>) (visit-type-use type))
  493. ((or ($ <ref-type>) (? symbol?)) (visit-val-type type))))
  494. (define visit-inst
  495. (match-lambda
  496. (((and inst (or 'block 'loop)) label type body)
  497. `(,inst ,label ,(visit-block-type type)
  498. ,(visit-expr body)))
  499. (('if label type consequent alternate)
  500. `(if ,label ,(visit-block-type type)
  501. ,(visit-expr consequent)
  502. ,(visit-expr alternate)))
  503. (('try label type body catches catch-all)
  504. `(try ,label ,(visit-block-type type)
  505. ,(visit-expr body)
  506. ,(map visit-expr catches)
  507. ,(and=> catch-all visit-expr)))
  508. (('try_delegate label type body handler)
  509. `(try_delegate ,label ,(visit-block-type type)
  510. ,(visit-expr body)
  511. ,handler))
  512. (('call_indirect table type)
  513. `(call_indirect ,table ,(visit-type-use type)))
  514. (('select types) `(select ,(map visit-val-type types)))
  515. ;; GC instructions.
  516. (('ref.null ht) `(ref.null ,(visit-heap-type ht)))
  517. (((and inst (or 'ref.test 'ref.cast)) rt)
  518. `(,inst ,(visit-ref-type rt)))
  519. (((and inst (or 'br_on_cast 'br_on_cast_fail)) label rt1 rt2)
  520. `(,inst ,label ,(visit-ref-type rt1) ,(visit-ref-type rt2)))
  521. ;; Stringref instructions.
  522. (('string.const str) `(global.get ,(intern-string! str)))
  523. (('string.new_utf8 mem) `(call ,(symbol-append
  524. '$string.new_utf8_ mem)))
  525. (('string.new_lossy_utf8 mem) `(call ,(symbol-append
  526. '$string.new_lossy_utf8_ mem)))
  527. (('string.new_wtf8 mem) `(call ,(symbol-append
  528. '$string.new_wtf8_ mem)))
  529. (('string.new_wtf16 mem) `(call ,(symbol-append
  530. '$string.new_wtf16_ mem)))
  531. (('string.measure_wtf8) '(array.len))
  532. (('string.measure_utf8) '(call $string.measure_utf8))
  533. (('string.measure_wtf16) '(call $string.measure_wtf16))
  534. (('string.encode_utf8 mem) `(call ,(symbol-append
  535. '$string.encode_utf8_ mem)))
  536. (('string.encode_lossy_utf8 mem) `(call ,(symbol-append
  537. '$string.encode_lossy_utf8_ mem)))
  538. (('string.encode_wtf8 mem) `(call ,(symbol-append '$string.encode_wtf8_ mem)))
  539. (('string.encode_wtf16 mem) `(call ,(symbol-append
  540. '$string.encode_wtf16_ mem)))
  541. (('string.concat) '(call $string.concat))
  542. (('string.eq) '(call $string.eq))
  543. (('string.is_usv_sequence) '(call $string.is_usv_sequence))
  544. (('string.compare) '(call $string.compare))
  545. (('string.from_code_point) '(call $string.from_code_point))
  546. (('string.as_wtf8) '(call $string.as_wtf8))
  547. (('stringview_wtf8.advance) '(call $stringview_wtf8.advance))
  548. (('stringview_wtf8.encode_utf8 mem)
  549. `(call ,(symbol-append
  550. '$stringview_wtf8.encode_utf8_ mem)))
  551. (('stringview_wtf8.encode_lossy_utf8 mem)
  552. `(call ,(symbol-append
  553. '$stringview_wtf8.encode_lossy_utf8_ mem)))
  554. (('stringview_wtf8.encode_wtf8 mem)
  555. `(call ,(symbol-append
  556. '$stringview_wtf8.encode_wtf8_ mem)))
  557. (('stringview_wtf8.slice) '(call $stringview_wtf8.slice))
  558. (('string.as_wtf16) '(call $string.as_wtf16))
  559. (('stringview_wtf16.length) '(call $stringview_wtf16.length))
  560. (('stringview_wtf16.get_codeunit) `(call $stringview_wtf16.get_codeunit))
  561. (('stringview_wtf16.encode mem) `(call ,(symbol-append
  562. '$stringview_wtf16.encode_ mem)))
  563. (('stringview_wtf16.slice) '(call $stringview_wtf16.slice))
  564. (('string.as_iter) '(call $string.as_iter))
  565. (('stringview_iter.next) '(call $stringview_iter.next))
  566. (('stringview_iter.advance) '(call $stringview_iter.advance))
  567. (('stringview_iter.rewind) '(call $stringview_iter.rewind))
  568. (('stringview_iter.slice) '(call $stringview_iter.slice))
  569. (('string.new_utf8_array) '(call $string.new_utf8_array))
  570. (('string.new_lossy_utf8_array) '(call $string.new_lossy_utf8_array))
  571. (('string.new_wtf8_array) '(call $string.new_wtf8_array))
  572. (('string.new_wtf16_array) '(call $string.new_wtf16_array))
  573. (('string.encode_utf8_array) '(call $string.encode_utf8_array))
  574. (('string.encode_lossy_utf8_array) '(call $string.encode_lossy_utf8_array))
  575. (('string.encode_wtf8_array) '(call $string.encode_wtf8_array))
  576. (('string.encode_wtf16_array) '(call $string.encode_wtf16_array))
  577. (inst inst)))
  578. (define (visit-expr expr)
  579. (map visit-inst expr))
  580. (define (visit-init expr)
  581. (visit-expr expr))
  582. (define (visit-func func)
  583. (define visit-local
  584. (match-lambda
  585. (($ <local> id type)
  586. (make-local id (visit-val-type type)))))
  587. (match func
  588. (($ <func> id type locals body)
  589. (let ((type (visit-type-use type))
  590. (locals (map visit-local locals))
  591. (body (visit-expr body)))
  592. (make-func id type locals body)))))
  593. (define (lower-extern-val-type type)
  594. (match type
  595. (($ <ref-type> nullable? 'string)
  596. (make-ref-type nullable? 'extern))
  597. (($ <ref-type> nullable? (or 'stringview_wtf8
  598. 'stringview_wtf16
  599. 'stringview_iter))
  600. (error "extern param/result with stringview type unimplemented" type))
  601. (_ (visit-val-type type))))
  602. (define (lower-extern-val type)
  603. (match type
  604. (($ <ref-type> nullable? 'string)
  605. '((call $wtf8->extern-string)))
  606. (($ <ref-type> nullable? (or 'stringview_wtf8
  607. 'stringview_wtf16
  608. 'stringview_iter))
  609. (error "extern value with stringview type unimplemented" type))
  610. (_ '())))
  611. (define (lift-extern-val type)
  612. (match type
  613. (($ <ref-type> nullable? 'string)
  614. '((call $extern-string->wtf8)))
  615. (($ <ref-type> nullable? (or 'stringview_wtf8
  616. 'stringview_wtf16
  617. 'stringview_iter))
  618. (error "extern value with stringview type unimplemented" type))
  619. (_ '())))
  620. (define (lower-extern-func-type type)
  621. (match type
  622. (($ <type-use> tid
  623. ($ <func-sig> (($ <param> pid ptype) ...) (rtype ...)))
  624. (make-type-use tid
  625. (make-func-sig
  626. (map make-param pid
  627. (map lower-extern-val-type ptype))
  628. (map lower-extern-val-type rtype))))))
  629. (define (lower-extern-func-import id wrapped-id type)
  630. (match type
  631. (($ <type-use> _ ($ <func-sig> (($ <param> _ params) ...) results))
  632. (let ((param-count (length params)))
  633. (make-func
  634. id
  635. (visit-type-use type)
  636. (map (lambda (type) (make-local #f (lower-extern-val-type type)))
  637. results)
  638. (let lp ((params params) (i 0))
  639. (match params
  640. ((param . params)
  641. `((local.get ,i)
  642. ,@(lower-extern-val param)
  643. . ,(lp params (1+ i))))
  644. (()
  645. `((call ,wrapped-id)
  646. ,@(reverse (map (lambda (i) `(local.set ,i))
  647. (iota (length results) param-count)))
  648. . ,(let lp ((results results) (i param-count))
  649. (match results
  650. (() '())
  651. ((result . results)
  652. `((local.get ,i)
  653. ,@(lift-extern-val result)
  654. . ,(lp results (1+ i)))))))))))))))
  655. (define (lower-extern-func-export id wrapped-id type)
  656. (match type
  657. (($ <type-use> _ ($ <func-sig> (($ <param> _ params) ...) results))
  658. (let ((param-count (length params)))
  659. (make-func
  660. id
  661. (lower-extern-func-type type)
  662. (map (lambda (type) (make-local #f (visit-val-type type)))
  663. results)
  664. (let lp ((params params) (i 0))
  665. (match params
  666. ((param . params)
  667. `((local.get ,i)
  668. ,@(lift-extern-val param)
  669. . ,(lp params (1+ i))))
  670. (()
  671. `((call ,wrapped-id)
  672. ,@(reverse (map (lambda (i) `(local.set ,i))
  673. (iota (length results) param-count)))
  674. . ,(let lp ((results results) (i param-count))
  675. (match results
  676. (() '())
  677. ((result . results)
  678. `((local.get ,i)
  679. ,@(lower-extern-val result)
  680. . ,(lp results (1+ i)))))))))))))))
  681. (define (lookup-func-type id)
  682. (or (or-map (match-lambda
  683. (($ <import> mod name kind id' type)
  684. (and (eq? id id') type)))
  685. imports)
  686. (or-map (match-lambda
  687. (($ <func> id' type locals body)
  688. (and (eq? id id') type)))
  689. funcs)))
  690. (let ((types (map (match-lambda
  691. (($ <rec-group> (($ <type> id type) ...))
  692. (make-rec-group
  693. (map make-type id (map visit-sub-type type))))
  694. (($ <type> id type)
  695. (make-type id (visit-sub-type type))))
  696. types))
  697. (imports (map
  698. (match-lambda
  699. (($ <import> mod name kind id type)
  700. (let* ((type* (match kind
  701. ('func (if (import-is-external? mod name)
  702. (lower-extern-func-type type)
  703. (visit-type-use type)))
  704. ('table (visit-table-type type))
  705. ('memory type)
  706. ('global (visit-global-type type))))
  707. (id* (and (eq? kind 'func)
  708. (import-is-external? mod name)
  709. (not (equal? type type*))
  710. (make-id (symbol-append id '-stringref)))))
  711. (cons (and id* (lower-extern-func-import id id* type))
  712. (make-import mod name kind (or id* id) type*)))))
  713. imports))
  714. (exports (map
  715. (match-lambda
  716. ((and export ($ <export> name kind id))
  717. (cond
  718. ((and (eq? kind 'func)
  719. (export-is-external? name)
  720. (and=>
  721. (lookup-func-type id)
  722. (lambda (type)
  723. (if (equal? type
  724. (lower-extern-func-type type))
  725. #f
  726. type))))
  727. => (lambda (type)
  728. (let ((id* (make-id
  729. (symbol-append id '-stringref))))
  730. (cons (lower-extern-func-export id* id type)
  731. (make-export name kind id*)))))
  732. (else
  733. (cons #f export)))))
  734. exports))
  735. (funcs (map visit-func funcs))
  736. (tables (map (match-lambda
  737. (($ <table> id type init)
  738. (make-table id (visit-table-type type)
  739. (and init (visit-init init)))))
  740. tables))
  741. (globals (map (match-lambda
  742. (($ <global> id ($ <global-type> mutable? vt) init)
  743. (let* ((vt (visit-val-type vt))
  744. (type (make-global-type mutable? vt)))
  745. (make-global id type (visit-init init)))))
  746. globals))
  747. (elems (map (match-lambda
  748. (($ <elem> id mode table type offset inits)
  749. (make-elem id mode table
  750. (visit-val-type type)
  751. (and=> offset visit-init)
  752. (map visit-init inits))))
  753. elems))
  754. (datas (map (match-lambda
  755. (($ <data> id mode mem offset init)
  756. (make-data id mode mem
  757. (and=> offset visit-init)
  758. init)))
  759. datas))
  760. (tags (map (match-lambda
  761. (($ <tag> id type)
  762. (make-tag id (visit-type-use type))))
  763. tags)))
  764. (let* ((t (make-global-type #f (make-ref-type #f '$wtf8)))
  765. (strings (hash-map->list
  766. (lambda (str id)
  767. (make-global id t
  768. `((i32.const 0)
  769. (i32.const ,(bytevector-length
  770. (string->utf8 str)))
  771. (array.new_data $wtf8 ,id))))
  772. %strings))
  773. (wtf8 (hash-map->list
  774. (lambda (str id)
  775. (make-data id 'passive #f #f (string->utf8 str)))
  776. %strings)))
  777. (add-stdlib
  778. (make-wasm id
  779. types
  780. (map cdr imports)
  781. (append funcs
  782. (filter-map car imports)
  783. (filter-map car exports))
  784. tables
  785. memories
  786. (append strings globals)
  787. (map cdr exports)
  788. start
  789. elems
  790. (append wtf8 datas)
  791. tags
  792. '()
  793. custom)
  794. (wtf8-stdlib)))))))
  795. (define* (lower-stringrefs wasm #:key (strategy 'wtf8))
  796. (match strategy
  797. ('stringref wasm)
  798. ('wtf8 (lower-stringrefs/wtf8 wasm))
  799. (_ (error "unknown stringref lowering strategy" strategy))))