123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283 |
- ;;; read-scheme-source --- Read a file, recognizing scheme forms and comments
- ;; Copyright (C) 2001, 2006, 2011 Free Software Foundation, Inc.
- ;;
- ;; This program is free software; you can redistribute it and/or
- ;; modify it under the terms of the GNU Lesser General Public License
- ;; as published by the Free Software Foundation; either version 3, or
- ;; (at your option) any later version.
- ;;
- ;; This program is distributed in the hope that it will be useful,
- ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
- ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- ;; Lesser General Public License for more details.
- ;;
- ;; You should have received a copy of the GNU Lesser General Public
- ;; License along with this software; see the file COPYING.LESSER. If
- ;; not, write to the Free Software Foundation, Inc., 51 Franklin
- ;; Street, Fifth Floor, Boston, MA 02110-1301 USA
- ;;; Author: Thien-Thi Nguyen
- ;;; Commentary:
- ;; Usage: read-scheme-source FILE1 FILE2 ...
- ;;
- ;; This program parses each FILE and writes to stdout sexps that describe the
- ;; top-level structures of the file: scheme forms, single-line comments, and
- ;; hash-bang comments. You can further process these (to associate comments
- ;; w/ scheme forms as a kind of documentation, for example).
- ;;
- ;; The output sexps have one of these forms:
- ;;
- ;; (quote (filename FILENAME))
- ;;
- ;; (quote (comment :leading-semicolons N
- ;; :text LINE))
- ;;
- ;; (quote (whitespace :text LINE))
- ;;
- ;; (quote (hash-bang-comment :line LINUM
- ;; :line-count N
- ;; :text-list (LINE1 LINE2 ...)))
- ;;
- ;; (quote (following-form-properties :line LINUM
- ;; :line-count N)
- ;; :type TYPE
- ;; :signature SIGNATURE
- ;; :std-int-doc DOCSTRING))
- ;;
- ;; SEXP
- ;;
- ;; The first four are straightforward (both FILENAME and LINE are strings sans
- ;; newline, while LINUM and N are integers). The last two always go together,
- ;; in that order. SEXP is scheme code processed only by `read' and then
- ;; `write'.
- ;;
- ;; The :type field may be omitted if the form is not recognized. Otherwise,
- ;; TYPE may be one of: procedure, alias, define-module, variable.
- ;;
- ;; The :signature field may be omitted if the form is not a procedure.
- ;; Otherwise, SIGNATURE is a list showing the procedure's signature.
- ;;
- ;; If the type is `procedure' and the form has a standard internal docstring
- ;; (first body form a string), that is extracted in full -- including any
- ;; embedded newlines -- and recorded by field :std-int-doc.
- ;;
- ;;
- ;; Usage from a program: The output list of sexps can be retrieved by scheme
- ;; programs w/o having to capture stdout, like so:
- ;;
- ;; (use-modules (scripts read-scheme-source))
- ;; (define source-forms (read-scheme-source-silently "FILE1" "FILE2" ...))
- ;;
- ;; There are also two convenience procs exported for use by Scheme programs:
- ;;
- ;; (clump FORMS) --- filter FORMS combining contiguous comment forms that
- ;; have the same number of leading semicolons.
- ;;
- ;; (quoted? SYM FORM) --- see if FORM looks like: "(quote (SYM ...))", parse
- ;; the ":tags", and return alist of (TAG . VAL) elems.
- ;;
- ;; TODO: Add option "--clump-comments", maybe w/ different clumping styles.
- ;; Make `annotate!' extensible.
- ;;; Code:
- (define-module (scripts read-scheme-source)
- :use-module (ice-9 rdelim)
- :export (read-scheme-source
- read-scheme-source-silently
- quoted?
- clump))
- (define %include-in-guild-list #f)
- (define %summary "Print a parsed representation of a Scheme file.")
- ;; Try to figure out what FORM is and its various attributes.
- ;; Call proc NOTE! with key (a symbol) and value.
- ;;
- (define (annotate! form note!)
- (cond ((and (list? form)
- (< 2 (length form))
- (eq? 'define (car form))
- (pair? (cadr form))
- (symbol? (caadr form)))
- (note! ':type 'procedure)
- (note! ':signature (cadr form))
- (and (< 3 (length form))
- (string? (caddr form))
- (note! ':std-int-doc (caddr form))))
- ((and (list? form)
- (< 2 (length form))
- (eq? 'define (car form))
- (symbol? (cadr form))
- (list? (caddr form))
- (< 3 (length (caddr form)))
- (eq? 'lambda (car (caddr form)))
- (string? (caddr (caddr form))))
- (note! ':type 'procedure)
- (note! ':signature (cons (cadr form) (cadr (caddr form))))
- (note! ':std-int-doc (caddr (caddr form))))
- ((and (list? form)
- (= 3 (length form))
- (eq? 'define (car form))
- (symbol? (cadr form))
- (symbol? (caddr form)))
- (note! ':type 'alias))
- ((and (list? form)
- (eq? 'define-module (car form)))
- (note! ':type 'define-module))
- ;; Add other types here.
- (else (note! ':type 'variable))))
- ;; Process FILE, calling NB! on parsed top-level elements.
- ;; Recognized: #!-!# and regular comments in addition to normal forms.
- ;;
- (define (process file nb!)
- (nb! `'(filename ,file))
- (let ((hash-bang-rx (make-regexp "^#!"))
- (bang-hash-rx (make-regexp "^!#"))
- (all-comment-rx (make-regexp "^[ \t]*(;+)"))
- (all-whitespace-rx (make-regexp "^[ \t]*$"))
- (p (open-input-file file)))
- (let loop ((n (1+ (port-line p))) (line (read-line p)))
- (or (not n)
- (eof-object? line)
- (begin
- (cond ((regexp-exec hash-bang-rx line)
- (let loop ((line (read-line p))
- (text (list line)))
- (if (or (eof-object? line)
- (regexp-exec bang-hash-rx line))
- (nb! `'(hash-bang-comment
- :line ,n
- :line-count ,(1+ (length text))
- :text-list ,(reverse
- (cons line text))))
- (loop (read-line p)
- (cons line text)))))
- ((regexp-exec all-whitespace-rx line)
- (nb! `'(whitespace :text ,line)))
- ((regexp-exec all-comment-rx line)
- => (lambda (m)
- (nb! `'(comment
- :leading-semicolons
- ,(let ((m1 (vector-ref m 1)))
- (- (cdr m1) (car m1)))
- :text ,line))))
- (else
- (unread-string line p)
- (let* ((form (read p))
- (count (- (port-line p) n))
- (props (let* ((props '())
- (prop+ (lambda args
- (set! props
- (append props args)))))
- (annotate! form prop+)
- props)))
- (or (= count 1) ; ugh
- (begin
- (read-line p)
- (set! count (1+ count))))
- (nb! `'(following-form-properties
- :line ,n
- :line-count ,count
- ,@props))
- (nb! form))))
- (loop (1+ (port-line p)) (read-line p)))))))
- ;;; entry points
- (define (read-scheme-source-silently . files)
- "See commentary in module (scripts read-scheme-source)."
- (let* ((res '()))
- (for-each (lambda (file)
- (process file (lambda (e) (set! res (cons e res)))))
- files)
- (reverse res)))
- (define (read-scheme-source . files)
- "See commentary in module (scripts read-scheme-source)."
- (for-each (lambda (file)
- (process file (lambda (e) (write e) (newline))))
- files))
- ;; Recognize: (quote (SYM :TAG1 VAL1 :TAG2 VAL2 ...))
- ;; and return alist: ((TAG1 . VAL1) (TAG2 . VAL2) ...)
- ;; where the tags are symbols.
- ;;
- (define (quoted? sym form)
- (and (list? form)
- (= 2 (length form))
- (eq? 'quote (car form))
- (let ((inside (cadr form)))
- (and (list? inside)
- (< 0 (length inside))
- (eq? sym (car inside))
- (let loop ((ls (cdr inside)) (alist '()))
- (if (null? ls)
- alist ; retval
- (let ((first (car ls)))
- (or (symbol? first)
- (error "bad list!"))
- (loop (cddr ls)
- (acons (string->symbol
- (substring (symbol->string first) 1))
- (cadr ls)
- alist)))))))))
- ;; Filter FORMS, combining contiguous comment forms that have the same number
- ;; of leading semicolons. Do not include in them whitespace lines.
- ;; Whitespace lines outside of such comment groupings are ignored, as are
- ;; hash-bang comments. All other forms are passed through unchanged.
- ;;
- (define (clump forms)
- (let loop ((forms forms) (acc '()) (pass-this-one-through? #f))
- (if (null? forms)
- (reverse acc) ; retval
- (let ((form (car forms)))
- (cond (pass-this-one-through?
- (loop (cdr forms) (cons form acc) #f))
- ((quoted? 'following-form-properties form)
- (loop (cdr forms) (cons form acc) #t))
- ((quoted? 'whitespace form) ;;; ignore
- (loop (cdr forms) acc #f))
- ((quoted? 'hash-bang-comment form) ;;; ignore for now
- (loop (cdr forms) acc #f))
- ((quoted? 'comment form)
- => (lambda (alist)
- (let cloop ((inner-forms (cdr forms))
- (level (assq-ref alist 'leading-semicolons))
- (text (list (assq-ref alist 'text))))
- (let ((up (lambda ()
- (loop inner-forms
- (cons (cons level (reverse text))
- acc)
- #f))))
- (if (null? inner-forms)
- (up)
- (let ((inner-form (car inner-forms)))
- (cond ((quoted? 'comment inner-form)
- => (lambda (inner-alist)
- (let ((new-level
- (assq-ref
- inner-alist
- 'leading-semicolons)))
- (if (= new-level level)
- (cloop (cdr inner-forms)
- level
- (cons (assq-ref
- inner-alist
- 'text)
- text))
- (up)))))
- (else (up)))))))))
- (else (loop (cdr forms) (cons form acc) #f)))))))
- ;;; script entry point
- (define main read-scheme-source)
- ;;; read-scheme-source ends here
|