parser.scm 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. (define (digit->number d)
  2. (case d
  3. ((#\0) 0)
  4. ((#\1) 1)
  5. ((#\2) 2)
  6. ((#\3) 3)
  7. ((#\4) 4)
  8. ((#\5) 5)
  9. ((#\6) 6)
  10. ((#\7) 7)
  11. ((#\8) 8)
  12. ((#\9) 9)
  13. (else 44)))
  14. (define (string->number s)
  15. (let ((l (string-length s)))
  16. (let loop ((n 0) (i 0))
  17. (if (= i l)
  18. n
  19. (let ((digit (string-ref s i)))
  20. (loop (+ (* 10 n) (digit->number digit)) (+ i 1)))))))
  21. (define (list->string chrs)
  22. (let ((l (length chrs)))
  23. (let ((s (make-string l #\?)))
  24. (let loop ((i 0) (chrs chrs))
  25. (if (null? chrs)
  26. s
  27. (begin
  28. (string-set! s i (car chrs))
  29. (loop (+ i 1) (cdr chrs))))))))
  30. ;;;;;;;;;;
  31. ;; tokenizer
  32. (define (tokenize queue port)
  33. (let ((ch (read-char port)))
  34. (if (eof-object? ch)
  35. '()
  36. (case ch
  37. ((#\space #\tab #\newline) (tokenize queue port))
  38. ((#\;) (state:comment queue port))
  39. ((#\() (state:open queue port))
  40. ((#\)) (state:close queue port))
  41. ((#\') (state:quote queue port))
  42. ((#\`) (state:quasiquote queue port))
  43. ((#\,) (state:unquote queue port))
  44. ((#\.) (state:dot queue port))
  45. ((#\#) (state:hash queue port))
  46. ((#\") (state:string queue port))
  47. (else (state:item queue port (cons ch '())))))))
  48. (define (numeric-char? ch)
  49. (case ch
  50. ((#\0 #\1 #\2 #\3 #\4 #\5 #\6 #\7 #\8 #\9) #t)
  51. (else #f)))
  52. (define (make-symbol-token l)
  53. (let ((r (reverse l)))
  54. (cond ((and (eq? #\- (car r)) (not (null? (cdr r))) (numeric-char? (cadr l)))
  55. (- 0 (string->number (list->string r))))
  56. ((numeric-char? (car r))
  57. (string->number (list->string r)))
  58. (else (string->symbol (list->string r))))))
  59. (define (state:item queue port acc)
  60. (let ((ch (peek-char port)))
  61. (if (eof-object? ch)
  62. (queue-push! queue (make-symbol-token acc))
  63. (case ch
  64. ((#\space #\tab #\newline #\;
  65. #\( #\) #\' #\` #\, #\. #\#)
  66. (queue-push! queue (make-symbol-token acc))
  67. (tokenize queue port))
  68. (else (state:item queue port (cons (read-char port) acc)))))))
  69. (define (state:comment queue port)
  70. (let ((ch (read-char port)))
  71. (if (eof-object? ch)
  72. '()
  73. (case ch
  74. ((#\newline) (tokenize queue port))
  75. (else (state:comment queue port))))))
  76. (define special:open (gensym "open"))
  77. (define (state:open queue port)
  78. (queue-push! queue special:open)
  79. (tokenize queue port))
  80. (define special:close (gensym "close"))
  81. (define (state:close queue port)
  82. (queue-push! queue special:close)
  83. (tokenize queue port))
  84. (define (state:string queue port)
  85. (queue-push! queue (list->string (string:char port)))
  86. (tokenize queue port))
  87. (define (string:char port)
  88. (let ((c (read-char port)))
  89. (if (eof-object? c)
  90. (error 'unescape:char "string ended too soon" 0)
  91. (case c
  92. ((#\\) (string:escape port))
  93. ((#\") '())
  94. (else (cons c (string:char port)))))))
  95. (define (string:escape port)
  96. (let ((c (read-char port)))
  97. (if (eof-object? c)
  98. (error 'unescape:esc "string ended too soon" 0)
  99. (cons c (string:char port)))))
  100. (define special:quote (gensym "quote"))
  101. (define (state:quote queue port)
  102. (queue-push! queue special:quote)
  103. (tokenize queue port))
  104. (define special:quasiquote (gensym "quasiquote"))
  105. (define (state:quasiquote queue port)
  106. (queue-push! queue special:quasiquote)
  107. (tokenize queue port))
  108. (define special:unquote (gensym "unquote"))
  109. (define (state:unquote queue port)
  110. (queue-push! queue special:unquote)
  111. (tokenize queue port))
  112. (define special:dot (gensym "dot"))
  113. (define (state:dot queue port)
  114. (queue-push! queue special:dot)
  115. (tokenize queue port))
  116. (define (state:hash queue port)
  117. (let ((ch (read-char port)))
  118. (if (eof-object? ch)
  119. '()
  120. (case ch
  121. ((#\t) (state:true queue port))
  122. ((#\f) (state:false queue port))
  123. ((#\\) (state:char queue port))
  124. ;((#\x) (state:hex port))
  125. (else (state:comment queue port))))))
  126. (define (state:true queue port)
  127. (queue-push! queue #t)
  128. (tokenize queue port))
  129. (define (state:false queue port)
  130. (queue-push! queue #f)
  131. (tokenize queue port))
  132. (define (state:char queue port)
  133. (let* ((ch (read-char port))
  134. (nxt (peek-char port)))
  135. (case nxt
  136. ((#\space #\tab #\newline #\;
  137. #\( #\) #\' #\` #\, #\. #\#)
  138. (queue-push! queue ch)
  139. (tokenize queue port))
  140. (else
  141. (state:long-char queue port (cons ch '()))))))
  142. (define (state:long-char queue port acc)
  143. (let ((ch (peek-char port)))
  144. (case ch
  145. ((#\space #\tab #\newline #\;
  146. #\( #\) #\' #\` #\, #\. #\#)
  147. (case (make-symbol-token acc)
  148. ((space)
  149. (queue-push! queue #\space)
  150. (tokenize queue port))
  151. ((tab)
  152. (queue-push! queue #\tab)
  153. (tokenize queue port))
  154. ((newline)
  155. (queue-push! queue #\newline)
  156. (tokenize queue port))
  157. (else (begin (print (make-symbol-token acc))
  158. (error 'unknown-long-char "unknown long char" acc)))))
  159. (else
  160. (state:long-char queue port (cons (read-char port) acc))))))
  161. (define (state:hex port)
  162. ;;
  163. 0
  164. )
  165. ;; parser
  166. (define (parse tokens)
  167. (if (null? (queue:top tokens))
  168. '()
  169. (let ((elt (parse-1 tokens)))
  170. (cons elt (parse tokens)))))
  171. (define (parse-1 tokens)
  172. (if (null? (queue:top tokens))
  173. (error 'parse-eof "parse eof" 0)
  174. (let ((tok (queue-pop! tokens)))
  175. (cond ((equal? tok special:open) (parse:open tokens))
  176. ((equal? tok special:close) (error 'parse-early-close "parse early tokens" 0))
  177. ((equal? tok special:quote) (list 'quote (parse-1 tokens)))
  178. ((equal? tok special:quasiquote) (list 'quasiquote (parse-1 tokens)))
  179. ((equal? tok special:unquote) (list 'unquote (parse-1 tokens)))
  180. (else tok)))))
  181. (define (queue-peek q)
  182. (car (queue:top q)))
  183. (define (parse:open tokens)
  184. (if (null? (queue:top tokens))
  185. (error 'parse:open-unexpected-eof "unexpected eof" 0)
  186. (let ((tok (queue-peek tokens)))
  187. (cond ((equal? tok special:close)
  188. (queue-pop! tokens)
  189. '())
  190. ((equal? tok special:dot)
  191. (queue-pop! tokens)
  192. (let ((r (parse-1 tokens)))
  193. (let ((next-tok (queue-pop! tokens)))
  194. (unless (equal? special:close next-tok)
  195. (print `(bad token was ,next-tok))
  196. (print `(parse-1 was ,r))
  197. (error 'didnt-close-after-dot "didnt close after dot" next-tok)))
  198. r))
  199. (else (let ((elt (parse-1 tokens)))
  200. (cons elt (parse:open tokens))))))))
  201. ;;;;;;;;;;
  202. (define (call-with-input-file f p)
  203. (let ((port (open-input-port f)))
  204. (let ((res (p port)))
  205. (close-port port)
  206. res)))
  207. (define (read-file filename)
  208. (call-with-input-file filename
  209. (lambda (port)
  210. (read-port port))))
  211. (define (read-port port)
  212. (let ((q (empty-queue)))
  213. (tokenize q port)
  214. ;; (print `(tokens ,(queue:top q)))
  215. (parse q)))