lower-stringrefs.scm 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868
  1. ;;; Pass to lower stringref to JS string builtins
  2. ;;; Copyright (C) 2023, 2024 Igalia, S.L.
  3. ;;;
  4. ;;; Licensed under the Apache License, Version 2.0 (the "License");
  5. ;;; you may not use this file except in compliance with the License.
  6. ;;; You may obtain a copy of the License at
  7. ;;;
  8. ;;; http://www.apache.org/licenses/LICENSE-2.0
  9. ;;;
  10. ;;; Unless required by applicable law or agreed to in writing, software
  11. ;;; distributed under the License is distributed on an "AS IS" BASIS,
  12. ;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. ;;; See the License for the specific language governing permissions and
  14. ;;; limitations under the License.
  15. ;;; Commentary:
  16. ;;;
  17. ;;; The hoot compiler and run-time use stringref to represent strings,
  18. ;;; albeit wrapped in a struct so they can have a hash code. Stringref
  19. ;;; (https://github.com/WebAssembly/stringref/blob/main/proposals/stringref/Overview.md)
  20. ;;; is an experimental extension to WebAssembly which exposes "host
  21. ;;; strings": strings whose representation is opaque and managed by the
  22. ;;; host. On a web browser, these will likely be WTF-16 strings; on
  23. ;;; anything else, it's probably WTF-8, though for the Hoot VM used in
  24. ;;; development, we use Guile strings there.
  25. ;;;
  26. ;;; Stringref does not yet have consensus among the WebAssembly
  27. ;;; standardization group. Chrome has an experimental
  28. ;;; --experimental-wasm-stringref flag that users have to enable
  29. ;;; explicitly, and which is also on for a limited time and set of web
  30. ;;; sites for an "origin trial". But, it's not shipping by default
  31. ;;; anywhere, so we need a mitigation.
  32. ;;;
  33. ;;; A competitor to stringref is JavaScript String Builtins
  34. ;;; (https://github.com/WebAssembly/js-string-builtins/blob/main/proposals/js-string-builtins/Overview.md).
  35. ;;; This proposal instead ships a new set of functions for JavaScript
  36. ;;; hosts, off of a new WebAssembly.String object:
  37. ;;;
  38. ;;; - WebAssembly.String.fromWtf16Array(array, start, end) -> str
  39. ;;; - WebAssembly.String.toWtf16Array(str, array, start) -> length
  40. ;;; - WebAssembly.String.fromWtf8Array(array, start, end) -> str
  41. ;;; - WebAssembly.String.fromCharCode(u16) -> str
  42. ;;; - WebAssembly.String.fromCodePoint(u32) -> str
  43. ;;; - WebAssembly.String.codePointAt(idx) -> i32
  44. ;;; - WebAssembly.String.charCodeAt(idx) -> i32
  45. ;;; - WebAssembly.String.length(str) -> i32
  46. ;;; - WebAssembly.String.concat(a, b) -> str
  47. ;;; - WebAssembly.String.substring(str, start, end) -> str
  48. ;;; - WebAssembly.String.equals(a, b) -> i32
  49. ;;; - WebAssembly.String.compare(a, b) -> i32
  50. ;;;
  51. ;;; The intent is that if a WebAssembly module imports one of these
  52. ;;; functions, it can recognize it specially, as is done for
  53. ;;; e.g. Math.cos, and compile it appropriately. Currently we would use
  54. ;;; externref to represent these strings, but in future there could be
  55. ;;; more precise typing.
  56. ;;;
  57. ;;; Another option would be to just use what WebAssembly gives us, which
  58. ;;; is arrays and structs. That would work fine within the module, but
  59. ;;; it is pretty annoying from the host side: for every string value
  60. ;;; that crosses the threshold, you need to convert to or from an i8
  61. ;;; array. However, JS hosts cannot access the i8 array, so you would
  62. ;;; need to use a reflection module and go byte-by-byte. Very annoying.
  63. ;;;
  64. ;;; Finally there is a mix between the i8 array and string builtins
  65. ;;; approach: use wtf8 internally, but then externalize via calling an
  66. ;;; import, on the boundary, from the WebAssembly side of things. It
  67. ;;; would work for function arguments with stringref-typed arguments and
  68. ;;; results. Nested data would still need to be picked apart by the
  69. ;;; host using a reflection module.
  70. ;;;
  71. ;;; In summary, our options are:
  72. ;;;
  73. ;;; 1. Just stringref. Works for Hoot VM, and Chrome with an origin
  74. ;;; trial.
  75. ;;;
  76. ;;; 2. Just i8 arrays. Works everywhere but is expensive on the
  77. ;;; boundary. Strings are (array i8), immutable, and (array i8) on
  78. ;;; boundary. Iteration views are a struct with an offset.
  79. ;;; string.const loads an immutable i8 array global. We have a
  80. ;;; little state machine for advancing / decoding. Encode and
  81. ;;; concat is array.copy.
  82. ;;;
  83. ;;; 3. Just i8 arrays, but for external functions with string params or
  84. ;;; results, the generated wasm calls
  85. ;;; WebAssembly.String.fromWtf8Array as an import, and types those
  86. ;;; values as externref. Same with toWtf8Array in the other
  87. ;;; direction, except that doesn't exist yet.
  88. ;;;
  89. ;;; 4. Strings are externref. string.const loads an immutable i8 array
  90. ;;; global, but then eagerly calls fromWtf8Array on it. All
  91. ;;; operations proxy through the JS builtin functions. We still
  92. ;;; have a struct iterator.
  93. ;;;
  94. ;;; This module implements a lowering strategy for (3). If the host
  95. ;;; does not support WebAssembly.String, we polyfill the import. This
  96. ;;; way we can target any browser, and take advantage of fromWtf8Array
  97. ;;; on the host side if present. We don't currently think that (4) will
  98. ;;; have good enough performance, and in any case the builtin strings
  99. ;;; proposal doesn't have the facilities we need (e.g. measure_wtf8).
  100. ;;;
  101. ;;; Code:
  102. (define-module (wasm lower-stringrefs)
  103. #:use-module (ice-9 match)
  104. #:use-module ((srfi srfi-1) #:select (append-map filter-map))
  105. #:use-module (rnrs bytevectors)
  106. #:use-module (wasm link)
  107. #:use-module (wasm types)
  108. #:use-module (wasm wat)
  109. #:export (lower-stringrefs))
  110. (define (wtf8-stdlib)
  111. ;; Generalized UTF-8 decoder is a translation of:
  112. ;; https://chromium.googlesource.com/v8/v8/+/main/src/third_party/utf8-decoder/generalized-utf8-decoder.h
  113. ;; generalized utf-8 decoder states
  114. (define %wtf8-reject 0)
  115. (define %wtf8-accept 11)
  116. (define %wtf8-two-byte 22)
  117. (define %wtf8-three-byte 33)
  118. (define %wtf8-four-byte 44)
  119. (define %wtf8-four-byte-low 55)
  120. (define %wtf8-three-byte-high 66)
  121. (define %wtf8-four-byte-mid-high 77)
  122. ;; This first table maps bytes to character to a transition.
  123. ;;
  124. ;; The transition value takes a state to a new state, but it also determines
  125. ;; the set of bits from the current byte that contribute to the decoded
  126. ;; codepoint:
  127. ;;
  128. ;; Transition | Current byte bits that contribute to decoded codepoint
  129. ;; -------------------------------------------------------------------
  130. ;; 0, 1 | #b01111111
  131. ;; 2, 3 | #b00111111
  132. ;; 4, 5 | #b00011111
  133. ;; 6, 7 | #b00001111
  134. ;; 8, 9 | #b00000111
  135. ;; 10 | #b00000011
  136. ;;
  137. ;; Given the WTF-8 encoding, we therefore have the following constraints:
  138. ;;
  139. ;; 1. The transition value for 1-byte encodings should have the value 0 or
  140. ;; 1 so that we preserve all of the low 7 bits.
  141. ;; 2. Continuation bytes (#x80 to #xBF) are of the form #b10xxxxxx, and
  142. ;; therefore should have transition value between 0 and 3.
  143. ;; 3. Leading bytes for 2-byte encodings are of the form #b110yyyyy, and
  144. ;; therefore the transition value can be between 2 and 5.
  145. ;; 4. Leading bytes for 3-byte encodings (#b1110zzzz) need transition
  146. ;; value between 4 and 7.
  147. ;; 5. Leading bytes for 4-byte encodings (#b11110uuu) need transition
  148. ;; value between 6 and 9.
  149. ;; 6. We need more states to impose irregular constraints. Sometimes we
  150. ;; can use the knowldege that e.g. some high significant bits of the
  151. ;; xxxx in #b1110xxxx are 0, then we can use a higher transition value.
  152. ;; 7. Transitions to invalid states can use any transition value.
  153. (define %wtf8-transitions
  154. #vu8(0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 00-0F
  155. 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 10-1F
  156. 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 20-2F
  157. 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 30-3F
  158. 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 40-4F
  159. 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 50-5F
  160. 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 60-6F
  161. 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ;; 70-7F
  162. 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 ;; 80-8F
  163. 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 ;; 90-9F
  164. 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 ;; A0-AF
  165. 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 ;; B0-BF
  166. 8 8 4 4 4 4 4 4 4 4 4 4 4 4 4 4 ;; C0-CF
  167. 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 ;; D0-DF
  168. 9 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 ;; E0-EF
  169. 10 6 6 6 7 8 8 8 8 8 8 8 8 8 8 8)) ;; F0-FF
  170. ;; This second table maps a state to a new state when adding a transition.
  171. ;; 00-7F
  172. ;; | 80-8F
  173. ;; | | 90-9F
  174. ;; | | | A0-BF
  175. ;; | | | | C2-DF
  176. ;; | | | | | E1-EF
  177. ;; | | | | | | F1-F3
  178. ;; | | | | | | | F4
  179. ;; | | | | | | | | C0, C1, F5-FF
  180. ;; | | | | | | | | | E0
  181. ;; | | | | | | | | | | F0
  182. (define %wtf8-states
  183. #vu8(0 0 0 0 0 0 0 0 0 0 0 ;; REJECT = 0
  184. 11 0 0 0 22 33 44 55 0 66 77 ;; ACCEPT = 11
  185. 0 11 11 11 0 0 0 0 0 0 0 ;; 2-byte = 22
  186. 0 22 22 22 0 0 0 0 0 0 0 ;; 3-byte = 33
  187. 0 33 33 33 0 0 0 0 0 0 0 ;; 4-byte = 44
  188. 0 33 0 0 0 0 0 0 0 0 0 ;; 4-byte low = 55
  189. 0 0 0 22 0 0 0 0 0 0 0 ;; 3-byte high = 66
  190. 0 0 33 33 0 0 0 0 0 0 0)) ;; 4-byte mid/high = 77
  191. (wat->wasm
  192. `((type $wtf8 (array (mut i8)))
  193. (type $stringview-iter
  194. (struct (field $wtf8 (ref $wtf8))
  195. (field $pos (mut i32))))
  196. (func $wtf8->extern-string (import "rt" "wtf8_to_string")
  197. (param $wtf8 (ref null $wtf8))
  198. (result (ref extern)))
  199. (func $extern-string->wtf8 (import "rt" "string_to_wtf8")
  200. (param $str (ref extern))
  201. (result (ref $wtf8)))
  202. (func $nullable-wtf8->extern-string
  203. (param $wtf8 (ref null $wtf8))
  204. (result (ref null extern))
  205. (if (ref null extern)
  206. (ref.is_null (local.get $wtf8))
  207. (then (ref.null extern))
  208. (else (return_call $wtf8->extern-string
  209. (ref.as_non_null (local.get $wtf8))))))
  210. (func $nullable-extern-string->wtf8
  211. (param $str (ref null extern))
  212. (result (ref null $wtf8))
  213. (if (ref null $wtf8)
  214. (ref.is_null (local.get $str))
  215. (then (ref.null $wtf8))
  216. (else (return_call $extern-string->wtf8
  217. (ref.as_non_null (local.get $str))))))
  218. (type $immutable-bytes (array i8))
  219. (data $wtf8-transitions ,%wtf8-transitions)
  220. (data $wtf8-states ,%wtf8-states)
  221. (global $wtf8-transitions (ref $immutable-bytes)
  222. (array.new_data $immutable-bytes $wtf8-transitions
  223. (i32.const 0)
  224. (i32.const
  225. ,(bytevector-length %wtf8-transitions))))
  226. (global $wtf8-states (ref $immutable-bytes)
  227. (array.new_data $immutable-bytes $wtf8-states
  228. (i32.const 0)
  229. (i32.const
  230. ,(bytevector-length %wtf8-states))))
  231. (func $decode-wtf8 (param $byte i32) (param $buf i32) (param $state i32)
  232. (result i32 i32) ; codepoint, state
  233. (local $type i32)
  234. (local.set $type
  235. (array.get_u $immutable-bytes
  236. (global.get $wtf8-transitions)
  237. (local.get $byte)))
  238. ;; Two values: first push the codepoint
  239. (i32.or (i32.shl (local.get $buf) (i32.const 6))
  240. (i32.and (local.get $byte)
  241. (i32.shr_u (i32.const #x7f)
  242. (i32.shr_u (local.get $type)
  243. (i32.const 1)))))
  244. ;; Then the state
  245. (array.get_u $immutable-bytes
  246. (global.get $wtf8-states)
  247. (i32.add (local.get $state) (local.get $type))))
  248. ;; Downside of wtf-8: byte-by-byte comparison.
  249. (func $string.compare
  250. (param $a (ref $wtf8))
  251. (param $b (ref $wtf8))
  252. (result i32)
  253. (local $i i32)
  254. (local $d i32)
  255. (if (ref.eq (local.get $a) (local.get $b))
  256. (then (return (i32.const 0))))
  257. (loop $lp
  258. i32
  259. (if (i32.eq (array.len (local.get $a)) (local.get $i))
  260. (then
  261. (if (i32.eq (array.len (local.get $b)) (local.get $i))
  262. (then (return (i32.const 0)))
  263. (else (return (i32.const -1))))))
  264. (if (i32.eq (array.len (local.get $b)) (local.get $i))
  265. (then (return (i32.const 1))))
  266. (local.tee
  267. $d
  268. (i32.sub
  269. (array.get_u $wtf8 (local.get $a) (local.get $i))
  270. (array.get_u $wtf8 (local.get $b) (local.get $i))))
  271. (if (i32.eqz)
  272. (then
  273. (local.set $i
  274. (i32.add (local.get $i) (i32.const 1)))
  275. (br $lp)))
  276. (if i32
  277. (i32.lt_s (local.get $d) (i32.const 0))
  278. (then (i32.const -1))
  279. (else (i32.const 1)))))
  280. (func $string.eq (param $a (ref $wtf8)) (param $b (ref $wtf8))
  281. (result i32)
  282. (i32.eqz (call $string.compare (local.get $a) (local.get $b))))
  283. (func $string.as_iter (param $wtf8 (ref $wtf8))
  284. (result (ref $stringview-iter))
  285. (struct.new $stringview-iter (local.get $wtf8) (i32.const 0)))
  286. (func $stringview_iter.next
  287. (param $iter (ref $stringview-iter))
  288. (result i32)
  289. (local $wtf8 (ref $wtf8))
  290. (local $cp i32)
  291. (local $state i32)
  292. (local $i i32)
  293. (local.set $wtf8 (struct.get $stringview-iter $wtf8
  294. (local.get $iter)))
  295. (local.set $i (struct.get $stringview-iter $pos
  296. (local.get $iter)))
  297. (local.set $state (i32.const ,%wtf8-accept))
  298. (if (i32.ge_u (local.get $i) (array.len (local.get $wtf8)))
  299. (then (return (i32.const -1))))
  300. (loop $lp
  301. (call $decode-wtf8
  302. (array.get_u $wtf8 (local.get $wtf8) (local.get $i))
  303. (local.get $cp)
  304. (local.get $state))
  305. (local.set $state)
  306. (local.set $cp)
  307. ;; Must be valid WTF-8!
  308. (if (i32.eq (local.get $state) (i32.const ,%wtf8-reject))
  309. (then (unreachable)))
  310. (local.set $i (i32.add (local.get $i) (i32.const 1)))
  311. (if (i32.ne (local.get $state) (i32.const ,%wtf8-accept))
  312. (then
  313. (if (i32.ge_u (local.get $i) (array.len (local.get $wtf8)))
  314. ;; Bad WTF-8.
  315. (then (unreachable)))
  316. (br $lp))))
  317. (struct.set $stringview-iter $pos
  318. (local.get $iter) (local.get $i))
  319. (local.get $cp))
  320. (func $stringview_iter.advance
  321. (param $iter (ref $stringview-iter)) (param $count i32)
  322. (result i32)
  323. (local $wtf8 (ref $wtf8))
  324. (local $state i32)
  325. (local $i i32)
  326. (local $advanced i32)
  327. (local.set $wtf8 (struct.get $stringview-iter $wtf8
  328. (local.get $iter)))
  329. (local.set $i (struct.get $stringview-iter $pos (local.get $iter)))
  330. (local.set $state (i32.const ,%wtf8-accept))
  331. (if (i32.eqz (local.get $count))
  332. (then (return (i32.const 0))))
  333. (loop $lp
  334. (if (i32.lt_u (local.get $i) (array.len (local.get $wtf8)))
  335. (then
  336. (call $decode-wtf8
  337. (array.get_u $wtf8 (local.get $wtf8) (local.get $i))
  338. (i32.const 0)
  339. (local.get $state))
  340. (local.set $state)
  341. (drop)
  342. ;; Must be valid WTF-8!
  343. (if (i32.eq (local.get $state) (i32.const ,%wtf8-reject))
  344. (then (unreachable)))
  345. (local.set $i (i32.add (local.get $i) (i32.const 1)))
  346. (if (i32.eq (local.get $state (i32.const ,%wtf8-accept)))
  347. (then
  348. (local.set $advanced
  349. (i32.add (local.get $advanced)
  350. (i32.const 1)))
  351. (if (i32.lt_u (local.get $advanced)
  352. (local.get $count))
  353. (then (br $lp))))
  354. (else
  355. (br $lp))))
  356. (else
  357. ;; Must be valid WTF-8!
  358. (if (i32.ne (local.get $state) (i32.const ,%wtf8-accept))
  359. (then (unreachable))))))
  360. (struct.set $stringview-iter $pos (local.get $iter) (local.get $i))
  361. (local.get $advanced))
  362. (func $stringview_iter.slice
  363. (param $iter (ref $stringview-iter)) (param $count i32)
  364. (result (ref $wtf8))
  365. (local $wtf8 (ref $wtf8))
  366. (local $start i32)
  367. (local $len i32)
  368. (local $temp (ref $stringview-iter))
  369. (local $out (ref $wtf8))
  370. (local.set $wtf8 (struct.get $stringview-iter $wtf8
  371. (local.get $iter)))
  372. (local.set $start (struct.get $stringview-iter $pos
  373. (local.get $iter)))
  374. (local.set $temp (struct.new $stringview-iter (local.get $wtf8)
  375. (local.get $start)))
  376. (call $stringview_iter.advance (local.get $temp)
  377. (local.get $count))
  378. (drop)
  379. (local.set $len
  380. (i32.sub (struct.get $stringview-iter $pos
  381. (local.get $temp))
  382. (local.get $start)))
  383. (local.set $out (array.new_default $wtf8 (local.get $len)))
  384. (array.copy $wtf8 $wtf8
  385. (local.get $out) (i32.const 0)
  386. (local.get $wtf8) (local.get $start) (local.get $len))
  387. (local.get $out))
  388. (func $string.encode_wtf8_array
  389. (param $wtf8 (ref $wtf8)) (param $out (ref $wtf8)) (param $pos i32)
  390. (array.copy $wtf8 $wtf8
  391. (local.get $out)
  392. (local.get $pos)
  393. (local.get $wtf8)
  394. (i32.const 0)
  395. (array.len (local.get $wtf8))))
  396. (func $string.new_lossy_utf8_array
  397. (param $buf (ref $wtf8)) (param $start i32) (param $len i32)
  398. (result (ref $wtf8))
  399. (local $out (ref $wtf8))
  400. ;; FIXME: validate buffer as wtf8
  401. (local.set $out (array.new_default $wtf8 (local.get $len)))
  402. (array.copy $wtf8 $wtf8
  403. (local.get $out) (i32.const 0)
  404. (local.get $buf) (local.get $start) (local.get $len))
  405. (local.get $out))
  406. (func $string.measure_wtf16 (param $wtf8 (ref $wtf8)) (result i32)
  407. (local $iter (ref $stringview-iter))
  408. (local $cp i32)
  409. (local $count i32)
  410. (local.set $iter (call $string.as_iter (local.get $wtf8)))
  411. (loop $lp
  412. (local.set $cp (call $stringview_iter.next (local.get $iter)))
  413. (if (i32.le_s (i32.const 0) (local.get $cp))
  414. (then
  415. (local.set $count
  416. (i32.add (i32.add (local.get $count)
  417. (i32.const 1))
  418. (i32.gt_u (local.get $cp)
  419. (i32.const #xffff))))
  420. (br $lp))))
  421. (local.get $count)))))
  422. ;; Some imports and exports are to other wasm modules, and some are to
  423. ;; the host. Imports and exports to other wasm modules should be
  424. ;; lowered to $wtf8, whereas "external" interfaces to the host should
  425. ;; also be wrapped with wtf8-to-string. The right way to know whether
  426. ;; an import or export is internal or external is to use some kind of
  427. ;; explicit information in a custom section. For now, though, we just
  428. ;; look at the name under which the function is imported or exported,
  429. ;; because that's compatible with what the hoot compiler does.
  430. (define (import-is-external? mod name)
  431. (not (string-prefix? "$" name)))
  432. (define (export-is-external? name)
  433. (not (string-prefix? "$" name)))
  434. (define* (lower-stringrefs/wtf8 wasm #:key
  435. (import-is-external? import-is-external?)
  436. (export-is-external? export-is-external?))
  437. (define make-id
  438. (let ((counter 0))
  439. (lambda (stem)
  440. (let ((sym (string->symbol (format #f "~a-~a" stem counter))))
  441. (set! counter (1+ counter))
  442. sym))))
  443. (define %strings (make-hash-table))
  444. (define (intern-string! str)
  445. (or (hash-ref %strings str)
  446. (let ((id (make-id '$stringref)))
  447. (hash-set! %strings str id)
  448. id)))
  449. (match wasm
  450. (($ <wasm> id types imports funcs tables memories globals exports start
  451. elems datas tags () custom)
  452. ;; need to replace string-typed imports with wrappers
  453. (define (visit-heap-type type)
  454. (match type
  455. ('string '$wtf8)
  456. ('stringview_wtf8 (error "lowering wtf8 views unsupported"))
  457. ('stringview_wtf16 (error "lowering wtf16 views unsupported"))
  458. ('stringview_iter '$stringview-iter)
  459. (_ type)))
  460. (define (visit-val-type type)
  461. (match type
  462. (($ <ref-type> nullable? ht)
  463. (make-ref-type nullable? (visit-heap-type ht)))
  464. (_ type)))
  465. (define (visit-ref-type type)
  466. (visit-val-type type))
  467. (define (visit-param param)
  468. (match param
  469. (($ <param> id type)
  470. (make-param id (visit-val-type type)))))
  471. (define (visit-field field)
  472. (match field
  473. (($ <field> id mutable? type)
  474. (make-field id mutable? (visit-val-type type)))))
  475. (define (visit-func-sig type)
  476. (match type
  477. (($ <func-sig> params results)
  478. (make-func-sig (map visit-param params)
  479. (map visit-val-type results)))))
  480. (define (visit-base-type type)
  481. (match type
  482. (($ <struct-type> fields)
  483. (make-struct-type (map visit-field fields)))
  484. (($ <array-type> mutable? type)
  485. (make-array-type mutable? (visit-val-type type)))
  486. (_
  487. (visit-func-sig type))))
  488. (define (visit-sub-type type)
  489. (match type
  490. (($ <sub-type> final? supers type)
  491. (make-sub-type final? supers (visit-base-type type)))
  492. (_ (visit-base-type type))))
  493. (define (visit-type-use type)
  494. (match type
  495. (($ <type-use> id sig)
  496. (make-type-use id (visit-func-sig sig)))))
  497. (define (visit-table-type type)
  498. (match type
  499. (($ <table-type> limits elem-type)
  500. (make-table-type limits (visit-val-type elem-type)))))
  501. (define (visit-global-type type)
  502. (match type
  503. (($ <global-type> mutable? type)
  504. (make-global-type mutable? (visit-val-type type)))))
  505. (define (visit-tag-type type)
  506. (match type
  507. (($ <tag-type> attribute type)
  508. (make-tag-type attribute (visit-type-use type)))))
  509. (define (visit-block-type type)
  510. (match type
  511. (#f #f)
  512. (($ <type-use>) (visit-type-use type))
  513. ((or ($ <ref-type>) (? symbol?)) (visit-val-type type))))
  514. (define visit-inst
  515. (match-lambda
  516. (((and inst (or 'block 'loop)) label type body)
  517. `(,inst ,label ,(visit-block-type type)
  518. ,(visit-expr body)))
  519. (('if label type consequent alternate)
  520. `(if ,label ,(visit-block-type type)
  521. ,(visit-expr consequent)
  522. ,(visit-expr alternate)))
  523. (('try label type body catches catch-all)
  524. `(try ,label ,(visit-block-type type)
  525. ,(visit-expr body)
  526. ,(map visit-expr catches)
  527. ,(and=> catch-all visit-expr)))
  528. (('try_delegate label type body handler)
  529. `(try_delegate ,label ,(visit-block-type type)
  530. ,(visit-expr body)
  531. ,handler))
  532. (('call_indirect table type)
  533. `(call_indirect ,table ,(visit-type-use type)))
  534. (('select types) `(select ,(map visit-val-type types)))
  535. ;; GC instructions.
  536. (('ref.null ht) `(ref.null ,(visit-heap-type ht)))
  537. (((and inst (or 'ref.test 'ref.cast)) rt)
  538. `(,inst ,(visit-ref-type rt)))
  539. (((and inst (or 'br_on_cast 'br_on_cast_fail)) label rt1 rt2)
  540. `(,inst ,label ,(visit-ref-type rt1) ,(visit-ref-type rt2)))
  541. ;; Stringref instructions.
  542. (('string.const str) `(global.get ,(intern-string! str)))
  543. (('string.new_utf8 mem) `(call ,(symbol-append
  544. '$string.new_utf8_ mem)))
  545. (('string.new_lossy_utf8 mem) `(call ,(symbol-append
  546. '$string.new_lossy_utf8_ mem)))
  547. (('string.new_wtf8 mem) `(call ,(symbol-append
  548. '$string.new_wtf8_ mem)))
  549. (('string.new_wtf16 mem) `(call ,(symbol-append
  550. '$string.new_wtf16_ mem)))
  551. (('string.measure_wtf8) '(array.len))
  552. (('string.measure_utf8) '(call $string.measure_utf8))
  553. (('string.measure_wtf16) '(call $string.measure_wtf16))
  554. (('string.encode_utf8 mem) `(call ,(symbol-append
  555. '$string.encode_utf8_ mem)))
  556. (('string.encode_lossy_utf8 mem) `(call ,(symbol-append
  557. '$string.encode_lossy_utf8_ mem)))
  558. (('string.encode_wtf8 mem) `(call ,(symbol-append '$string.encode_wtf8_ mem)))
  559. (('string.encode_wtf16 mem) `(call ,(symbol-append
  560. '$string.encode_wtf16_ mem)))
  561. (('string.concat) '(call $string.concat))
  562. (('string.eq) '(call $string.eq))
  563. (('string.is_usv_sequence) '(call $string.is_usv_sequence))
  564. (('string.compare) '(call $string.compare))
  565. (('string.from_code_point) '(call $string.from_code_point))
  566. (('string.as_wtf8) '(call $string.as_wtf8))
  567. (('stringview_wtf8.advance) '(call $stringview_wtf8.advance))
  568. (('stringview_wtf8.encode_utf8 mem)
  569. `(call ,(symbol-append
  570. '$stringview_wtf8.encode_utf8_ mem)))
  571. (('stringview_wtf8.encode_lossy_utf8 mem)
  572. `(call ,(symbol-append
  573. '$stringview_wtf8.encode_lossy_utf8_ mem)))
  574. (('stringview_wtf8.encode_wtf8 mem)
  575. `(call ,(symbol-append
  576. '$stringview_wtf8.encode_wtf8_ mem)))
  577. (('stringview_wtf8.slice) '(call $stringview_wtf8.slice))
  578. (('string.as_wtf16) '(call $string.as_wtf16))
  579. (('stringview_wtf16.length) '(call $stringview_wtf16.length))
  580. (('stringview_wtf16.get_codeunit) `(call $stringview_wtf16.get_codeunit))
  581. (('stringview_wtf16.encode mem) `(call ,(symbol-append
  582. '$stringview_wtf16.encode_ mem)))
  583. (('stringview_wtf16.slice) '(call $stringview_wtf16.slice))
  584. (('string.as_iter) '(call $string.as_iter))
  585. (('stringview_iter.next) '(call $stringview_iter.next))
  586. (('stringview_iter.advance) '(call $stringview_iter.advance))
  587. (('stringview_iter.rewind) '(call $stringview_iter.rewind))
  588. (('stringview_iter.slice) '(call $stringview_iter.slice))
  589. (('string.new_utf8_array) '(call $string.new_utf8_array))
  590. (('string.new_lossy_utf8_array) '(call $string.new_lossy_utf8_array))
  591. (('string.new_wtf8_array) '(call $string.new_wtf8_array))
  592. (('string.new_wtf16_array) '(call $string.new_wtf16_array))
  593. (('string.encode_utf8_array) '(call $string.encode_utf8_array))
  594. (('string.encode_lossy_utf8_array) '(call $string.encode_lossy_utf8_array))
  595. (('string.encode_wtf8_array) '(call $string.encode_wtf8_array))
  596. (('string.encode_wtf16_array) '(call $string.encode_wtf16_array))
  597. (inst inst)))
  598. (define (visit-expr expr)
  599. (map visit-inst expr))
  600. (define (visit-init expr)
  601. (visit-expr expr))
  602. (define (visit-func func)
  603. (define visit-local
  604. (match-lambda
  605. (($ <local> id type)
  606. (make-local id (visit-val-type type)))))
  607. (match func
  608. (($ <func> id type locals body)
  609. (let ((type (visit-type-use type))
  610. (locals (map visit-local locals))
  611. (body (visit-expr body)))
  612. (make-func id type locals body)))))
  613. (define (lower-extern-val-type type)
  614. (match type
  615. (($ <ref-type> nullable? 'string)
  616. (make-ref-type nullable? 'extern))
  617. (($ <ref-type> nullable? (or 'stringview_wtf8
  618. 'stringview_wtf16
  619. 'stringview_iter))
  620. (error "extern param/result with stringview type unimplemented" type))
  621. (_ (visit-val-type type))))
  622. (define (lower-extern-val type)
  623. (match type
  624. (($ <ref-type> #f 'string)
  625. '((call $wtf8->extern-string)))
  626. (($ <ref-type> nullable? 'string)
  627. '((call $nullable-wtf8->extern-string)))
  628. (($ <ref-type> nullable? (or 'stringview_wtf8
  629. 'stringview_wtf16
  630. 'stringview_iter))
  631. (error "extern value with stringview type unimplemented" type))
  632. (_ '())))
  633. (define (lift-extern-val type)
  634. (match type
  635. (($ <ref-type> #f 'string)
  636. `((call $extern-string->wtf8)))
  637. (($ <ref-type> #t 'string)
  638. `((call $nullable-extern-string->wtf8)))
  639. (($ <ref-type> nullable? (or 'stringview_wtf8
  640. 'stringview_wtf16
  641. 'stringview_iter))
  642. (error "extern value with stringview type unimplemented" type))
  643. (_ '())))
  644. (define (lower-extern-func-type type)
  645. (match type
  646. (($ <type-use> tid
  647. ($ <func-sig> (($ <param> pid ptype) ...) (rtype ...)))
  648. (make-type-use tid
  649. (make-func-sig
  650. (map make-param pid
  651. (map lower-extern-val-type ptype))
  652. (map lower-extern-val-type rtype))))))
  653. (define (lower-extern-tag-type type)
  654. (match type
  655. (($ <tag-type> attribute type)
  656. (make-tag-type attribute (lower-extern-func-type type)))))
  657. (define (lower-extern-func-import id wrapped-id type)
  658. (match type
  659. (($ <type-use> _ ($ <func-sig> (($ <param> _ params) ...) results))
  660. (let ((param-count (length params)))
  661. (make-func
  662. id
  663. (visit-type-use type)
  664. (map (lambda (type) (make-local #f (lower-extern-val-type type)))
  665. results)
  666. (let lp ((params params) (i 0))
  667. (match params
  668. ((param . params)
  669. `((local.get ,i)
  670. ,@(lower-extern-val param)
  671. . ,(lp params (1+ i))))
  672. (()
  673. `((call ,wrapped-id)
  674. ,@(reverse (map (lambda (i) `(local.set ,i))
  675. (iota (length results) param-count)))
  676. . ,(let lp ((results results) (i param-count))
  677. (match results
  678. (() '())
  679. ((result . results)
  680. `((local.get ,i)
  681. ,@(lift-extern-val result)
  682. . ,(lp results (1+ i)))))))))))))))
  683. (define (lower-extern-func-export id wrapped-id type)
  684. (match type
  685. (($ <type-use> _ ($ <func-sig> (($ <param> _ params) ...) results))
  686. (let ((param-count (length params)))
  687. (make-func
  688. id
  689. (lower-extern-func-type type)
  690. (map (lambda (type) (make-local #f (visit-val-type type)))
  691. results)
  692. (let lp ((params params) (i 0))
  693. (match params
  694. ((param . params)
  695. `((local.get ,i)
  696. ,@(lift-extern-val param)
  697. . ,(lp params (1+ i))))
  698. (()
  699. `((call ,wrapped-id)
  700. ,@(reverse (map (lambda (i) `(local.set ,i))
  701. (iota (length results) param-count)))
  702. . ,(let lp ((results results) (i param-count))
  703. (match results
  704. (() '())
  705. ((result . results)
  706. `((local.get ,i)
  707. ,@(lower-extern-val result)
  708. . ,(lp results (1+ i)))))))))))))))
  709. (define (lookup-func-type id)
  710. (or (or-map (match-lambda
  711. (($ <import> mod name kind id' type)
  712. (and (eq? id id') type)))
  713. imports)
  714. (or-map (match-lambda
  715. (($ <func> id' type locals body)
  716. (and (eq? id id') type)))
  717. funcs)))
  718. (let ((types (map (match-lambda
  719. (($ <rec-group> (($ <type> id type) ...))
  720. (make-rec-group
  721. (map make-type id (map visit-sub-type type))))
  722. (($ <type> id type)
  723. (make-type id (visit-sub-type type))))
  724. types))
  725. (imports (map
  726. (match-lambda
  727. (($ <import> mod name kind id type)
  728. (let* ((type* (match kind
  729. ('func (if (import-is-external? mod name)
  730. (lower-extern-func-type type)
  731. (visit-type-use type)))
  732. ('table (visit-table-type type))
  733. ('memory type)
  734. ('global (visit-global-type type))
  735. ('tag (if (import-is-external? mod name)
  736. (lower-extern-tag-type type)
  737. (visit-tag-type type)))))
  738. (id* (and (eq? kind 'func)
  739. (import-is-external? mod name)
  740. (not (equal? type type*))
  741. (make-id (symbol-append id '-stringref)))))
  742. (cons (and id* (lower-extern-func-import id id* type))
  743. (make-import mod name kind (or id* id) type*)))))
  744. imports))
  745. (exports (map
  746. (match-lambda
  747. ((and export ($ <export> name kind id))
  748. (cond
  749. ((and (eq? kind 'func)
  750. (export-is-external? name)
  751. (and=>
  752. (lookup-func-type id)
  753. (lambda (type)
  754. (if (equal? type
  755. (lower-extern-func-type type))
  756. #f
  757. type))))
  758. => (lambda (type)
  759. (let ((id* (make-id
  760. (symbol-append id '-stringref))))
  761. (cons (lower-extern-func-export id* id type)
  762. (make-export name kind id*)))))
  763. (else
  764. (cons #f export)))))
  765. exports))
  766. (funcs (map visit-func funcs))
  767. (tables (map (match-lambda
  768. (($ <table> id type init)
  769. (make-table id (visit-table-type type)
  770. (and init (visit-init init)))))
  771. tables))
  772. (globals (map (match-lambda
  773. (($ <global> id ($ <global-type> mutable? vt) init)
  774. (let* ((vt (visit-val-type vt))
  775. (type (make-global-type mutable? vt)))
  776. (make-global id type (visit-init init)))))
  777. globals))
  778. (elems (map (match-lambda
  779. (($ <elem> id mode table type offset inits)
  780. (make-elem id mode table
  781. (visit-val-type type)
  782. (and=> offset visit-init)
  783. (map visit-init inits))))
  784. elems))
  785. (datas (map (match-lambda
  786. (($ <data> id mode mem offset init)
  787. (make-data id mode mem
  788. (and=> offset visit-init)
  789. init)))
  790. datas))
  791. (tags (map (match-lambda
  792. (($ <tag> id type)
  793. (make-tag id (visit-tag-type type))))
  794. tags)))
  795. (let* ((t (make-global-type #f (make-ref-type #f '$wtf8)))
  796. (strings (hash-map->list
  797. (lambda (str id)
  798. (make-global id t
  799. `((i32.const 0)
  800. (i32.const ,(bytevector-length
  801. (string->utf8 str)))
  802. (array.new_data $wtf8 ,id))))
  803. %strings))
  804. (wtf8 (hash-map->list
  805. (lambda (str id)
  806. (make-data id 'passive #f #f (string->utf8 str)))
  807. %strings)))
  808. (add-stdlib
  809. (make-wasm id
  810. types
  811. (map cdr imports)
  812. (append funcs
  813. (filter-map car imports)
  814. (filter-map car exports))
  815. tables
  816. memories
  817. (append strings globals)
  818. (map cdr exports)
  819. start
  820. elems
  821. (append wtf8 datas)
  822. tags
  823. '()
  824. custom)
  825. (wtf8-stdlib)))))))
  826. (define* (lower-stringrefs wasm #:key (strategy 'wtf8))
  827. (match strategy
  828. ('stringref wasm)
  829. ('wtf8 (lower-stringrefs/wtf8 wasm))
  830. (_ (error "unknown stringref lowering strategy" strategy))))