read-scheme-source.scm 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. ;;; read-scheme-source --- Read a file, recognizing scheme forms and comments
  2. ;; Copyright (C) 2001, 2006, 2011 Free Software Foundation, Inc.
  3. ;;
  4. ;; This program is free software; you can redistribute it and/or
  5. ;; modify it under the terms of the GNU Lesser General Public License
  6. ;; as published by the Free Software Foundation; either version 3, or
  7. ;; (at your option) any later version.
  8. ;;
  9. ;; This program is distributed in the hope that it will be useful,
  10. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. ;; Lesser General Public License for more details.
  13. ;;
  14. ;; You should have received a copy of the GNU Lesser General Public
  15. ;; License along with this software; see the file COPYING.LESSER. If
  16. ;; not, write to the Free Software Foundation, Inc., 51 Franklin
  17. ;; Street, Fifth Floor, Boston, MA 02110-1301 USA
  18. ;;; Author: Thien-Thi Nguyen
  19. ;;; Commentary:
  20. ;; Usage: read-scheme-source FILE1 FILE2 ...
  21. ;;
  22. ;; This program parses each FILE and writes to stdout sexps that describe the
  23. ;; top-level structures of the file: scheme forms, single-line comments, and
  24. ;; hash-bang comments. You can further process these (to associate comments
  25. ;; w/ scheme forms as a kind of documentation, for example).
  26. ;;
  27. ;; The output sexps have one of these forms:
  28. ;;
  29. ;; (quote (filename FILENAME))
  30. ;;
  31. ;; (quote (comment :leading-semicolons N
  32. ;; :text LINE))
  33. ;;
  34. ;; (quote (whitespace :text LINE))
  35. ;;
  36. ;; (quote (hash-bang-comment :line LINUM
  37. ;; :line-count N
  38. ;; :text-list (LINE1 LINE2 ...)))
  39. ;;
  40. ;; (quote (following-form-properties :line LINUM
  41. ;; :line-count N)
  42. ;; :type TYPE
  43. ;; :signature SIGNATURE
  44. ;; :std-int-doc DOCSTRING))
  45. ;;
  46. ;; SEXP
  47. ;;
  48. ;; The first four are straightforward (both FILENAME and LINE are strings sans
  49. ;; newline, while LINUM and N are integers). The last two always go together,
  50. ;; in that order. SEXP is scheme code processed only by `read' and then
  51. ;; `write'.
  52. ;;
  53. ;; The :type field may be omitted if the form is not recognized. Otherwise,
  54. ;; TYPE may be one of: procedure, alias, define-module, variable.
  55. ;;
  56. ;; The :signature field may be omitted if the form is not a procedure.
  57. ;; Otherwise, SIGNATURE is a list showing the procedure's signature.
  58. ;;
  59. ;; If the type is `procedure' and the form has a standard internal docstring
  60. ;; (first body form a string), that is extracted in full -- including any
  61. ;; embedded newlines -- and recorded by field :std-int-doc.
  62. ;;
  63. ;;
  64. ;; Usage from a program: The output list of sexps can be retrieved by scheme
  65. ;; programs w/o having to capture stdout, like so:
  66. ;;
  67. ;; (use-modules (scripts read-scheme-source))
  68. ;; (define source-forms (read-scheme-source-silently "FILE1" "FILE2" ...))
  69. ;;
  70. ;; There are also two convenience procs exported for use by Scheme programs:
  71. ;;
  72. ;; (clump FORMS) --- filter FORMS combining contiguous comment forms that
  73. ;; have the same number of leading semicolons.
  74. ;;
  75. ;; (quoted? SYM FORM) --- see if FORM looks like: "(quote (SYM ...))", parse
  76. ;; the ":tags", and return alist of (TAG . VAL) elems.
  77. ;;
  78. ;; TODO: Add option "--clump-comments", maybe w/ different clumping styles.
  79. ;; Make `annotate!' extensible.
  80. ;;; Code:
  81. (define-module (scripts read-scheme-source)
  82. :use-module (ice-9 rdelim)
  83. :export (read-scheme-source
  84. read-scheme-source-silently
  85. quoted?
  86. clump))
  87. (define %include-in-guild-list #f)
  88. (define %summary "Print a parsed representation of a Scheme file.")
  89. ;; Try to figure out what FORM is and its various attributes.
  90. ;; Call proc NOTE! with key (a symbol) and value.
  91. ;;
  92. (define (annotate! form note!)
  93. (cond ((and (list? form)
  94. (< 2 (length form))
  95. (eq? 'define (car form))
  96. (pair? (cadr form))
  97. (symbol? (caadr form)))
  98. (note! ':type 'procedure)
  99. (note! ':signature (cadr form))
  100. (and (< 3 (length form))
  101. (string? (caddr form))
  102. (note! ':std-int-doc (caddr form))))
  103. ((and (list? form)
  104. (< 2 (length form))
  105. (eq? 'define (car form))
  106. (symbol? (cadr form))
  107. (list? (caddr form))
  108. (< 3 (length (caddr form)))
  109. (eq? 'lambda (car (caddr form)))
  110. (string? (caddr (caddr form))))
  111. (note! ':type 'procedure)
  112. (note! ':signature (cons (cadr form) (cadr (caddr form))))
  113. (note! ':std-int-doc (caddr (caddr form))))
  114. ((and (list? form)
  115. (= 3 (length form))
  116. (eq? 'define (car form))
  117. (symbol? (cadr form))
  118. (symbol? (caddr form)))
  119. (note! ':type 'alias))
  120. ((and (list? form)
  121. (eq? 'define-module (car form)))
  122. (note! ':type 'define-module))
  123. ;; Add other types here.
  124. (else (note! ':type 'variable))))
  125. ;; Process FILE, calling NB! on parsed top-level elements.
  126. ;; Recognized: #!-!# and regular comments in addition to normal forms.
  127. ;;
  128. (define (process file nb!)
  129. (nb! `'(filename ,file))
  130. (let ((hash-bang-rx (make-regexp "^#!"))
  131. (bang-hash-rx (make-regexp "^!#"))
  132. (all-comment-rx (make-regexp "^[ \t]*(;+)"))
  133. (all-whitespace-rx (make-regexp "^[ \t]*$"))
  134. (p (open-input-file file)))
  135. (let loop ((n (1+ (port-line p))) (line (read-line p)))
  136. (or (not n)
  137. (eof-object? line)
  138. (begin
  139. (cond ((regexp-exec hash-bang-rx line)
  140. (let loop ((line (read-line p))
  141. (text (list line)))
  142. (if (or (eof-object? line)
  143. (regexp-exec bang-hash-rx line))
  144. (nb! `'(hash-bang-comment
  145. :line ,n
  146. :line-count ,(1+ (length text))
  147. :text-list ,(reverse
  148. (cons line text))))
  149. (loop (read-line p)
  150. (cons line text)))))
  151. ((regexp-exec all-whitespace-rx line)
  152. (nb! `'(whitespace :text ,line)))
  153. ((regexp-exec all-comment-rx line)
  154. => (lambda (m)
  155. (nb! `'(comment
  156. :leading-semicolons
  157. ,(let ((m1 (vector-ref m 1)))
  158. (- (cdr m1) (car m1)))
  159. :text ,line))))
  160. (else
  161. (unread-string line p)
  162. (let* ((form (read p))
  163. (count (- (port-line p) n))
  164. (props (let* ((props '())
  165. (prop+ (lambda args
  166. (set! props
  167. (append props args)))))
  168. (annotate! form prop+)
  169. props)))
  170. (or (= count 1) ; ugh
  171. (begin
  172. (read-line p)
  173. (set! count (1+ count))))
  174. (nb! `'(following-form-properties
  175. :line ,n
  176. :line-count ,count
  177. ,@props))
  178. (nb! form))))
  179. (loop (1+ (port-line p)) (read-line p)))))))
  180. ;;; entry points
  181. (define (read-scheme-source-silently . files)
  182. "See commentary in module (scripts read-scheme-source)."
  183. (let* ((res '()))
  184. (for-each (lambda (file)
  185. (process file (lambda (e) (set! res (cons e res)))))
  186. files)
  187. (reverse res)))
  188. (define (read-scheme-source . files)
  189. "See commentary in module (scripts read-scheme-source)."
  190. (for-each (lambda (file)
  191. (process file (lambda (e) (write e) (newline))))
  192. files))
  193. ;; Recognize: (quote (SYM :TAG1 VAL1 :TAG2 VAL2 ...))
  194. ;; and return alist: ((TAG1 . VAL1) (TAG2 . VAL2) ...)
  195. ;; where the tags are symbols.
  196. ;;
  197. (define (quoted? sym form)
  198. (and (list? form)
  199. (= 2 (length form))
  200. (eq? 'quote (car form))
  201. (let ((inside (cadr form)))
  202. (and (list? inside)
  203. (< 0 (length inside))
  204. (eq? sym (car inside))
  205. (let loop ((ls (cdr inside)) (alist '()))
  206. (if (null? ls)
  207. alist ; retval
  208. (let ((first (car ls)))
  209. (or (symbol? first)
  210. (error "bad list!"))
  211. (loop (cddr ls)
  212. (acons (string->symbol
  213. (substring (symbol->string first) 1))
  214. (cadr ls)
  215. alist)))))))))
  216. ;; Filter FORMS, combining contiguous comment forms that have the same number
  217. ;; of leading semicolons. Do not include in them whitespace lines.
  218. ;; Whitespace lines outside of such comment groupings are ignored, as are
  219. ;; hash-bang comments. All other forms are passed through unchanged.
  220. ;;
  221. (define (clump forms)
  222. (let loop ((forms forms) (acc '()) (pass-this-one-through? #f))
  223. (if (null? forms)
  224. (reverse acc) ; retval
  225. (let ((form (car forms)))
  226. (cond (pass-this-one-through?
  227. (loop (cdr forms) (cons form acc) #f))
  228. ((quoted? 'following-form-properties form)
  229. (loop (cdr forms) (cons form acc) #t))
  230. ((quoted? 'whitespace form) ;;; ignore
  231. (loop (cdr forms) acc #f))
  232. ((quoted? 'hash-bang-comment form) ;;; ignore for now
  233. (loop (cdr forms) acc #f))
  234. ((quoted? 'comment form)
  235. => (lambda (alist)
  236. (let cloop ((inner-forms (cdr forms))
  237. (level (assq-ref alist 'leading-semicolons))
  238. (text (list (assq-ref alist 'text))))
  239. (let ((up (lambda ()
  240. (loop inner-forms
  241. (cons (cons level (reverse text))
  242. acc)
  243. #f))))
  244. (if (null? inner-forms)
  245. (up)
  246. (let ((inner-form (car inner-forms)))
  247. (cond ((quoted? 'comment inner-form)
  248. => (lambda (inner-alist)
  249. (let ((new-level
  250. (assq-ref
  251. inner-alist
  252. 'leading-semicolons)))
  253. (if (= new-level level)
  254. (cloop (cdr inner-forms)
  255. level
  256. (cons (assq-ref
  257. inner-alist
  258. 'text)
  259. text))
  260. (up)))))
  261. (else (up)))))))))
  262. (else (loop (cdr forms) (cons form acc) #f)))))))
  263. ;;; script entry point
  264. (define main read-scheme-source)
  265. ;;; read-scheme-source ends here