read-text-outline 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. #!/bin/sh
  2. # aside from this initial boilerplate, this is actually -*- scheme -*- code
  3. main='(module-ref (resolve-module '\''(scripts read-text-outline)) '\'main')'
  4. exec ${GUILE-guile} -l $0 -c "(apply $main (cdr (command-line)))" "$@"
  5. !#
  6. ;;; read-text-outline --- Read a text outline and display it as a sexp
  7. ;; Copyright (C) 2002, 2006 Free Software Foundation, Inc.
  8. ;;
  9. ;; This program is free software; you can redistribute it and/or
  10. ;; modify it under the terms of the GNU General Public License as
  11. ;; published by the Free Software Foundation; either version 2, or
  12. ;; (at your option) any later version.
  13. ;;
  14. ;; This program is distributed in the hope that it will be useful,
  15. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  16. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  17. ;; General Public License for more details.
  18. ;;
  19. ;; You should have received a copy of the GNU General Public License
  20. ;; along with this software; see the file COPYING. If not, write to
  21. ;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  22. ;; Boston, MA 02110-1301 USA
  23. ;;; Author: Thien-Thi Nguyen <ttn@gnu.org>
  24. ;;; Commentary:
  25. ;; Usage: read-text-outline OUTLINE
  26. ;;
  27. ;; Scan OUTLINE file and display a list of trees, the structure of
  28. ;; each reflecting the "levels" in OUTLINE. The recognized outline
  29. ;; format (used to indicate outline headings) is zero or more pairs of
  30. ;; leading spaces followed by "-". Something like:
  31. ;;
  32. ;; - a 0
  33. ;; - b 1
  34. ;; - c 2
  35. ;; - d 1
  36. ;; - e 0
  37. ;; - f 1
  38. ;; - g 2
  39. ;; - h 1
  40. ;;
  41. ;; In this example the levels are shown to the right. The output for
  42. ;; such a file would be the single line:
  43. ;;
  44. ;; (("a" ("b" "c") "d") ("e" ("f" "g") "h"))
  45. ;;
  46. ;; Basically, anything at the beginning of a list is a parent, and the
  47. ;; remaining elements of that list are its children.
  48. ;;
  49. ;;
  50. ;; Usage from a Scheme program: These two procs are exported:
  51. ;;
  52. ;; (read-text-outline . args) ; only first arg is used
  53. ;; (read-text-outline-silently port)
  54. ;; (make-text-outline-reader re specs)
  55. ;;
  56. ;; `make-text-outline-reader' returns a proc that reads from PORT and
  57. ;; returns a list of trees (similar to `read-text-outline-silently').
  58. ;;
  59. ;; RE is a regular expression (string) that is used to identify a header
  60. ;; line of the outline (as opposed to a whitespace line or intervening
  61. ;; text). RE must begin w/ a sub-expression to match the "level prefix"
  62. ;; of the line. You can use `level-submatch-number' in SPECS (explained
  63. ;; below) to specify a number other than 1, the default.
  64. ;;
  65. ;; Normally, the level of the line is taken directly as the length of
  66. ;; its level prefix. This often results in adjacent levels not mapping
  67. ;; to adjacent numbers, which confuses the tree-building portion of the
  68. ;; program, which expects top-level to be 0, first sub-level to be 1,
  69. ;; etc. You can use `level-substring-divisor' or `compute-level' in
  70. ;; SPECS to specify a constant scaling factor or specify a completely
  71. ;; alternative procedure, respectively.
  72. ;;
  73. ;; SPECS is an alist which may contain the following key/value pairs:
  74. ;;
  75. ;; - level-submatch-number NUMBER
  76. ;; - level-substring-divisor NUMBER
  77. ;; - compute-level PROC
  78. ;; - body-submatch-number NUMBER
  79. ;; - extra-fields ((FIELD-1 . SUBMATCH-1) (FIELD-2 . SUBMATCH-2) ...)
  80. ;;
  81. ;; The PROC value associated with key `compute-level' should take a
  82. ;; Scheme match structure (as returned by `regexp-exec') and return a
  83. ;; number, the normalized level for that line. If this is specified,
  84. ;; it takes precedence over other level-computation methods.
  85. ;;
  86. ;; Use `body-submatch-number' if RE specifies the whole body, or if you
  87. ;; want to make use of the extra fields parsing. The `extra-fields'
  88. ;; value is a sub-alist, whose keys name additional fields that are to
  89. ;; be recognized. These fields along with `level' are set as object
  90. ;; properties of the final string ("body") that is consed into the tree.
  91. ;; If a field name ends in "?" the field value is set to be #t if there
  92. ;; is a match and the result is not an empty string, and #f otherwise.
  93. ;;
  94. ;;
  95. ;; Bugs and caveats:
  96. ;;
  97. ;; (1) Only the first file specified on the command line is scanned.
  98. ;; (2) TAB characters at the beginnings of lines are not recognized.
  99. ;; (3) Outlines that "skip" levels signal an error. In other words,
  100. ;; this will fail:
  101. ;;
  102. ;; - a 0
  103. ;; - b 1
  104. ;; - c 3 <-- skipped 2 -- error!
  105. ;; - d 1
  106. ;;
  107. ;;
  108. ;; TODO: Determine what's the right thing to do for skips.
  109. ;; Handle TABs.
  110. ;; Make line format customizable via longopts.
  111. ;;; Code:
  112. (define-module (scripts read-text-outline)
  113. :export (read-text-outline
  114. read-text-outline-silently
  115. make-text-outline-reader)
  116. :use-module (ice-9 regex)
  117. :autoload (ice-9 rdelim) (read-line)
  118. :autoload (ice-9 getopt-long) (getopt-long))
  119. (define (?? symbol)
  120. (let ((name (symbol->string symbol)))
  121. (string=? "?" (substring name (1- (string-length name))))))
  122. (define (msub n)
  123. (lambda (m)
  124. (match:substring m n)))
  125. (define (??-predicates pair)
  126. (cons (car pair)
  127. (if (?? (car pair))
  128. (lambda (m)
  129. (not (string=? "" (match:substring m (cdr pair)))))
  130. (msub (cdr pair)))))
  131. (define (make-line-parser re specs)
  132. (let* ((rx (let ((fc (substring re 0 1)))
  133. (make-regexp (if (string=? "^" fc)
  134. re
  135. (string-append "^" re)))))
  136. (check (lambda (key)
  137. (assq-ref specs key)))
  138. (level-substring (msub (or (check 'level-submatch-number) 1)))
  139. (extract-level (cond ((check 'compute-level)
  140. => (lambda (proc)
  141. (lambda (m)
  142. (proc m))))
  143. ((check 'level-substring-divisor)
  144. => (lambda (n)
  145. (lambda (m)
  146. (/ (string-length (level-substring m))
  147. n))))
  148. (else
  149. (lambda (m)
  150. (string-length (level-substring m))))))
  151. (extract-body (cond ((check 'body-submatch-number)
  152. => msub)
  153. (else
  154. (lambda (m) (match:suffix m)))))
  155. (misc-props! (cond ((check 'extra-fields)
  156. => (lambda (alist)
  157. (let ((new (map ??-predicates alist)))
  158. (lambda (obj m)
  159. (for-each
  160. (lambda (pair)
  161. (set-object-property!
  162. obj (car pair)
  163. ((cdr pair) m)))
  164. new)))))
  165. (else
  166. (lambda (obj m) #t)))))
  167. ;; retval
  168. (lambda (line)
  169. (cond ((regexp-exec rx line)
  170. => (lambda (m)
  171. (let ((level (extract-level m))
  172. (body (extract-body m)))
  173. (set-object-property! body 'level level)
  174. (misc-props! body m)
  175. body)))
  176. (else #f)))))
  177. (define (make-text-outline-reader re specs)
  178. (let ((parse-line (make-line-parser re specs)))
  179. ;; retval
  180. (lambda (port)
  181. (let* ((all '(start))
  182. (pchain (list))) ; parents chain
  183. (let loop ((line (read-line port))
  184. (prev-level -1) ; how this relates to the first input
  185. ; level determines whether or not we
  186. ; start in "sibling" or "child" mode.
  187. ; in the end, `start' is ignored and
  188. ; it's much easier to ignore parents
  189. ; than siblings (sometimes). this is
  190. ; not to encourage ignorance, however.
  191. (tp all)) ; tail pointer
  192. (or (eof-object? line)
  193. (cond ((parse-line line)
  194. => (lambda (w)
  195. (let* ((words (list w))
  196. (level (object-property w 'level))
  197. (diff (- level prev-level)))
  198. (cond
  199. ;; sibling
  200. ((zero? diff)
  201. ;; just extend the chain
  202. (set-cdr! tp words))
  203. ;; child
  204. ((positive? diff)
  205. (or (= 1 diff)
  206. (error "unhandled diff not 1:" diff line))
  207. ;; parent may be contacted by uncle later (kids
  208. ;; these days!) so save its level
  209. (set-object-property! tp 'level prev-level)
  210. (set! pchain (cons tp pchain))
  211. ;; "push down" car into hierarchy
  212. (set-car! tp (cons (car tp) words)))
  213. ;; uncle
  214. ((negative? diff)
  215. ;; prune back to where levels match
  216. (do ((p pchain (cdr p)))
  217. ((= level (object-property (car p) 'level))
  218. (set! pchain p)))
  219. ;; resume at this level
  220. (set-cdr! (car pchain) words)
  221. (set! pchain (cdr pchain))))
  222. (loop (read-line port) level words))))
  223. (else (loop (read-line port) prev-level tp)))))
  224. (set! all (car all))
  225. (if (eq? 'start all)
  226. '() ; wasteland
  227. (cdr all))))))
  228. (define read-text-outline-silently
  229. (make-text-outline-reader "(([ ][ ])*)- *"
  230. '((level-substring-divisor . 2))))
  231. (define (read-text-outline . args)
  232. (write (read-text-outline-silently (open-file (car args) "r")))
  233. (newline)
  234. #t) ; exit val
  235. (define main read-text-outline)
  236. ;;; read-text-outline ends here