read.scm 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469
  1. ; -*- Mode: Scheme; Syntax: Scheme; Package: Scheme; -*-
  2. ; Copyright (c) 1993-2008 by Richard Kelsey and Jonathan Rees. See file COPYING.
  3. ; A little Scheme reader.
  4. ; Nonstandard things used:
  5. ; ASCII stuff: ascii-whitespaces
  6. ; (for dispatch table; portable definition in alt/ascii.scm)
  7. ; Unicode: char->scalar-value, scalar-value->char
  8. ; reverse-list->string -- ok to define as follows:
  9. ; (define (reverse-list->string l n)
  10. ; (list->string (reverse l)))
  11. ; make-immutable! -- ok to define as follows:
  12. ; (define (make-immutable! x) x)
  13. ; signal (only for use by reading-error; easily excised)
  14. (define (read . port-option)
  15. (let ((port (input-port-option port-option)))
  16. (let loop ()
  17. (let ((form (sub-read port)))
  18. (cond ((not (reader-token? form))
  19. form)
  20. ((eq? form close-paren)
  21. ;; Too many right parens.
  22. (warn "discarding extraneous right parenthesis" port)
  23. (loop))
  24. (else
  25. (reading-error port (cdr form))))))))
  26. (define (sub-read-carefully port)
  27. (let ((form (sub-read port)))
  28. (cond ((eof-object? form)
  29. (reading-error port "unexpected end of file"))
  30. ((reader-token? form) (reading-error port (cdr form)))
  31. (else form))))
  32. (define reader-token-marker (list 'reader-token))
  33. (define (make-reader-token message) (cons reader-token-marker message))
  34. (define (reader-token? form)
  35. (and (pair? form) (eq? (car form) reader-token-marker)))
  36. (define close-paren (make-reader-token "unexpected right parenthesis"))
  37. (define dot (make-reader-token "unexpected \" . \""))
  38. ; Main dispatch
  39. (define *dispatch-table-limit* 128)
  40. (define read-dispatch-vector
  41. (make-vector *dispatch-table-limit*
  42. (lambda (c port)
  43. (reading-error port "illegal character read" c))))
  44. (define read-terminating?-vector
  45. (make-vector *dispatch-table-limit* #t))
  46. (define (set-standard-syntax! char terminating? reader)
  47. (vector-set! read-dispatch-vector (char->scalar-value char) reader)
  48. (vector-set! read-terminating?-vector (char->scalar-value char) terminating?))
  49. (define (sub-read port)
  50. (let ((c (read-char port)))
  51. (if (eof-object? c)
  52. c
  53. (let ((scalar-value (char->scalar-value c)))
  54. (cond
  55. ((< scalar-value *dispatch-table-limit*)
  56. ((vector-ref read-dispatch-vector (char->scalar-value c))
  57. c port))
  58. ((char-alphabetic? c)
  59. (sub-read-constituent c port))
  60. (else
  61. (reading-error port "illegal character read" c)))))))
  62. (let ((sub-read-whitespace
  63. (lambda (c port)
  64. c ;ignored
  65. (sub-read port))))
  66. (for-each (lambda (c)
  67. (vector-set! read-dispatch-vector c sub-read-whitespace))
  68. ascii-whitespaces))
  69. (define (sub-read-constituent c port)
  70. (parse-token (sub-read-token c port) port))
  71. (for-each (lambda (c)
  72. (set-standard-syntax! c #f sub-read-constituent))
  73. (string->list
  74. (string-append "!$%&*+-./0123456789:<=>?@^_~ABCDEFGHIJKLM"
  75. "NOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")))
  76. ; Usual read macros
  77. (define (set-standard-read-macro! c terminating? proc)
  78. (set-standard-syntax! c terminating? proc))
  79. (define (sub-read-list c port)
  80. (let ((form (sub-read port)))
  81. (if (eq? form dot)
  82. (reading-error port
  83. "missing car -- ( immediately followed by .")
  84. (let recur ((form form))
  85. (cond ((eof-object? form)
  86. (reading-error port
  87. "end of file inside list -- unbalanced parentheses"))
  88. ((eq? form close-paren) '())
  89. ((eq? form dot)
  90. (let* ((last-form (sub-read-carefully port))
  91. (another-form (sub-read port)))
  92. (if (eq? another-form close-paren)
  93. last-form
  94. (reading-error port
  95. "randomness after form after dot"
  96. another-form))))
  97. (else
  98. (cons form (recur (sub-read port)))))))))
  99. (set-standard-read-macro! #\( #t sub-read-list)
  100. (set-standard-read-macro! #\) #t
  101. (lambda (c port)
  102. c port
  103. close-paren))
  104. (set-standard-read-macro! #\' #t
  105. (lambda (c port)
  106. c
  107. (list 'quote (sub-read-carefully port))))
  108. (set-standard-read-macro! #\` #t
  109. (lambda (c port)
  110. c
  111. (list 'quasiquote (sub-read-carefully port))))
  112. (set-standard-read-macro! #\, #t
  113. (lambda (c port)
  114. c
  115. (let* ((next (peek-char port))
  116. ;; DO NOT beta-reduce!
  117. (keyword (cond ((eof-object? next)
  118. (reading-error port "end of file after ,"))
  119. ((char=? next #\@)
  120. (read-char port)
  121. 'unquote-splicing)
  122. (else 'unquote))))
  123. (list keyword
  124. (sub-read-carefully port)))))
  125. ; Don't use non-R5RS char literals to avoid bootstrap circularities
  126. (define *nul* (scalar-value->char 0))
  127. (define *alarm* (scalar-value->char 7))
  128. (define *backspace* (scalar-value->char 8))
  129. (define *tab* (scalar-value->char 9))
  130. (define *linefeed* (scalar-value->char 10))
  131. (define *vtab* (scalar-value->char 11))
  132. (define *page* (scalar-value->char 12))
  133. (define *return* (scalar-value->char 13))
  134. (define *escape* (scalar-value->char 27))
  135. (define *rubout* (scalar-value->char 127))
  136. (set-standard-read-macro! #\" #t
  137. (lambda (c port)
  138. c ;ignored
  139. (let loop ((l '()) (i 0))
  140. (let ((c (read-char port)))
  141. (cond ((eof-object? c)
  142. (reading-error port "end of file within a string"))
  143. ((char=? c #\\)
  144. (cond
  145. ((decode-escape port)
  146. => (lambda (e)
  147. (loop (cons e l) (+ i 1))))
  148. (else (loop l i))))
  149. ((char=? c #\")
  150. (reverse-list->string l i))
  151. (else
  152. (loop (cons c l) (+ i 1))))))))
  153. (define (decode-escape port)
  154. (let ((c (read-char port)))
  155. (if (eof-object? c)
  156. (reading-error port "end of file within a string"))
  157. (let ((scalar-value (char->scalar-value c)))
  158. (cond
  159. ((or (char=? c #\\) (char=? c #\"))
  160. c)
  161. ((char=? c #\newline)
  162. ;; SRFI 75; skip intra-line whitespace
  163. (let loop ()
  164. (let ((c (peek-char port)))
  165. (cond
  166. ((eof-object? c)
  167. (reading-error port "end of file within a string"))
  168. ((char-unicode-whitespace? c)
  169. (read-char port)
  170. (loop))
  171. (else #f)))))
  172. ;; SRFI 75
  173. ((char=? c #\a) *alarm*)
  174. ((char=? c #\b) *backspace*)
  175. ((char=? c #\t) *tab*)
  176. ((char=? c #\n) *linefeed*)
  177. ((char=? c #\v) *vtab*)
  178. ((char=? c #\f) *page*)
  179. ((char=? c #\r) *return*)
  180. ((char=? c #\e) *escape*)
  181. ((char=? c #\x)
  182. (let ((d (decode-hex-digits port char-semicolon? "string literal")))
  183. (read-char port) ; remove semicolon
  184. d))
  185. (else
  186. (reading-error port
  187. "invalid escaped character in string"
  188. c))))))
  189. (define (char-semicolon? c)
  190. (equal? c #\;))
  191. ; The \x syntax is shared between character and string literals
  192. ; This doesn't remove the delimiter from the port.
  193. (define (decode-hex-digits port delimiter? desc)
  194. (let loop ((rev-digits '()))
  195. (let ((c (peek-char port)))
  196. (cond
  197. ((delimiter? c)
  198. (scalar-value->char
  199. (string->number (list->string (reverse rev-digits)) 16)))
  200. ((eof-object? c)
  201. (reading-error
  202. port
  203. (string-append "premature end of a scalar-value literal within a " desc)))
  204. ((not (char-hex-digit? c))
  205. (reading-error port
  206. (string-append "invalid hex digit in a " desc)
  207. c))
  208. (else
  209. (read-char port)
  210. (loop (cons c rev-digits)))))))
  211. (define (char-hex-digit? c)
  212. (let ((scalar-value (char->scalar-value c)))
  213. (or (and (>= scalar-value 48) ; #\0
  214. (<= scalar-value 57)) ; #\9
  215. (and (>= scalar-value 65) ; #\A
  216. (<= scalar-value 70)) ; #\F
  217. (and (>= scalar-value 97) ; #\a
  218. (<= scalar-value 102))))) ; #\f
  219. (set-standard-read-macro! #\; #t
  220. (lambda (c port)
  221. c ;ignored
  222. (gobble-line port)
  223. (sub-read port)))
  224. (define (gobble-line port)
  225. (let loop ()
  226. (let ((c (read-char port)))
  227. (cond ((eof-object? c) c)
  228. ((char=? c #\newline) #f)
  229. (else (loop))))))
  230. (define *sharp-macros* '())
  231. (define (define-sharp-macro c proc)
  232. (set! *sharp-macros* (cons (cons c proc) *sharp-macros*)))
  233. (set-standard-read-macro! #\# #f
  234. (lambda (c port)
  235. c ;ignored
  236. (let* ((c (peek-char port))
  237. (c (if (eof-object? c)
  238. (reading-error port "end of file after #")
  239. (char-downcase c)))
  240. (probe (assq c *sharp-macros*)))
  241. (if probe
  242. ((cdr probe) c port)
  243. (reading-error port "unknown # syntax" c)))))
  244. (define-sharp-macro #\f
  245. (lambda (c port) (read-char port) #f))
  246. (define-sharp-macro #\t
  247. (lambda (c port) (read-char port) #t))
  248. ; These are from Matthew Flatt's Unicode proposal for R6RS
  249. ; See write.scm.
  250. ; Richard will hopefully provide a fancy version of this that provides
  251. ; all the names in the Unicode character database.
  252. (define *char-name-table*
  253. (list
  254. (cons 'space #\space)
  255. (cons 'newline #\newline)
  256. (cons 'nul *nul*)
  257. (cons 'alarm *alarm*)
  258. (cons 'backspace *backspace*)
  259. (cons 'tab *tab*)
  260. (cons 'linefeed *linefeed*)
  261. (cons 'vtab *vtab*)
  262. (cons 'page *page*)
  263. (cons 'return *return*)
  264. (cons 'escape *escape*)
  265. (cons 'rubout *rubout*)))
  266. (define-sharp-macro #\\
  267. (lambda (c port)
  268. (read-char port)
  269. (let ((c (peek-char port)))
  270. (cond ((eof-object? c)
  271. (reading-error port "end of file after #\\"))
  272. ((char=? #\x c)
  273. (read-char port)
  274. (if (delimiter? (peek-char port))
  275. c
  276. (decode-hex-digits port char-scalar-value-literal-delimiter? "char literal")))
  277. ((char-alphabetic? c)
  278. (let ((name (sub-read-carefully port)))
  279. (cond ((= (string-length (symbol->string name)) 1)
  280. c)
  281. ((assq name *char-name-table*)
  282. => cdr)
  283. (else
  284. (reading-error port "unknown #\\ name" name)))))
  285. (else
  286. (read-char port)
  287. c)))))
  288. (define (char-scalar-value-literal-delimiter? c)
  289. (or (eof-object? c)
  290. (delimiter? c)))
  291. (define-sharp-macro #\(
  292. (lambda (c port)
  293. (read-char port)
  294. (let ((elts (sub-read-list c port)))
  295. (if (proper-list? elts)
  296. (list->vector elts)
  297. (reading-error port "dot in #(...)")))))
  298. (define (proper-list? x)
  299. (cond ((null? x) #t)
  300. ((pair? x) (proper-list? (cdr x)))
  301. (else #f)))
  302. (let ((number-sharp-macro
  303. (lambda (c port)
  304. (let ((string (sub-read-token #\# port)))
  305. (or (string->number string)
  306. (reading-error port "unsupported number syntax" string))))))
  307. (for-each (lambda (c)
  308. (define-sharp-macro c number-sharp-macro))
  309. '(#\b #\o #\d #\x #\i #\e)))
  310. ; Tokens
  311. (define (sub-read-token c port)
  312. (let loop ((l (list (preferred-case c))) (n 1))
  313. (let ((c (peek-char port)))
  314. (cond
  315. ((eof-object? c)
  316. (reverse-list->string l n))
  317. ((char=? c #\\)
  318. (read-char port)
  319. (let ((c (peek-char port)))
  320. (cond
  321. ((or (eof-object? c)
  322. (not (char=? #\x c)))
  323. (reading-error port "invalid escape sequence in a symbol"
  324. c))
  325. (else
  326. (read-char port)
  327. (let ((d (decode-hex-digits port char-semicolon? "symbol literal")))
  328. (read-char port) ; remove semicolon
  329. (loop (cons d l) (+ n 1)))))))
  330. (else
  331. (let ((sv (char->scalar-value c)))
  332. (if (if (< sv *dispatch-table-limit*)
  333. (vector-ref read-terminating?-vector sv)
  334. (binary-search *non-symbol-constituents-above-127* sv))
  335. (reverse-list->string l n)
  336. (begin
  337. (read-char port)
  338. (loop (cons (preferred-case c) l)
  339. (+ n 1))))))))))
  340. (define (parse-token string port)
  341. (if (let ((c (string-ref string 0)))
  342. (or (char-numeric? c) (char=? c #\+) (char=? c #\-) (char=? c #\.)))
  343. (cond ((string->number string))
  344. ((member string strange-symbol-names)
  345. (string->symbol (make-immutable! string)))
  346. ((string=? string ".")
  347. dot)
  348. (else
  349. (reading-error port "unsupported number syntax" string)))
  350. (string->symbol (make-immutable! string))))
  351. (define strange-symbol-names
  352. '("+" "-" "..."
  353. "->" ;Only for JAR's thesis
  354. ))
  355. (define (delimiter? c)
  356. (or (char-unicode-whitespace? c)
  357. (char=? c #\))
  358. (char=? c #\()
  359. (char=? c #\")
  360. (char=? c #\;)))
  361. (define (char-unicode-whitespace? c)
  362. (binary-search *whitespaces* (char->scalar-value c)))
  363. ;--- This loses because the compiler won't in-line it.
  364. ; and it's in READ's inner loop.
  365. (define preferred-case
  366. (if (char=? (string-ref (symbol->string 't) 0) #\T)
  367. char-upcase
  368. char-downcase))
  369. ; For ASCII, we previously had this hand-hacked version,
  370. ; (define p-c-v (make-string ascii-limit #\0))
  371. ;
  372. ; (let ((p-c (if (char=? (string-ref (symbol->string 't) 0) #\T)
  373. ; char-upcase
  374. ; char-downcase)))
  375. ; (do ((i 0 (+ i 1)))
  376. ; ((>= i ascii-limit))
  377. ; (string-set! p-c-v i (p-c (ascii->char i)))))
  378. ;
  379. ; (define (preferred-case c)
  380. ; (string-ref p-c-v (char->ascii c)))
  381. ; Reader errors
  382. (define (reading-error port message . irritants)
  383. (apply signal 'read-error message
  384. (append irritants (list port))))
  385. ; returns index of value (must be number) in vector
  386. (define (binary-search vec val)
  387. (let ((size (vector-length vec)))
  388. (let loop ((low 0) ; inclusive
  389. (high size)) ; exclusive
  390. (cond
  391. ((< low (- high 1))
  392. (let* ((pos (quotient (+ low high) 2)) ; always in
  393. (at (vector-ref vec pos)))
  394. (cond
  395. ((= val at) pos)
  396. ((< val at)
  397. (loop low pos))
  398. (else
  399. (loop pos high)))))
  400. ((< low high)
  401. (if (= val (vector-ref vec low))
  402. low
  403. #f))
  404. (else #f)))))