SXML-tree-trans.scm 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. ; XML/HTML processing in Scheme
  2. ; SXML expression tree transformers
  3. ;
  4. ; IMPORT
  5. ; A prelude appropriate for your Scheme system
  6. ; (myenv-bigloo.scm, myenv-mit.scm, etc.)
  7. ;
  8. ; EXPORT
  9. ; (provide SRV:send-reply
  10. ; post-order pre-post-order replace-range)
  11. ;
  12. ; See vSXML-tree-trans.scm for the validation code, which also
  13. ; serves as usage examples.
  14. ;
  15. ; $Id: SXML-tree-trans.scm,v 1.6 2003/04/25 19:16:15 oleg Exp $
  16. ; Output the 'fragments'
  17. ; The fragments are a list of strings, characters,
  18. ; numbers, thunks, #f, #t -- and other fragments.
  19. ; The function traverses the tree depth-first, writes out
  20. ; strings and characters, executes thunks, and ignores
  21. ; #f and '().
  22. ; The function returns #t if anything was written at all;
  23. ; otherwise the result is #f
  24. ; If #t occurs among the fragments, it is not written out
  25. ; but causes the result of SRV:send-reply to be #t
  26. (define (SRV:send-reply . fragments)
  27. (let loop ((fragments fragments) (result #f))
  28. (cond
  29. ((null? fragments) result)
  30. ((not (car fragments)) (loop (cdr fragments) result))
  31. ((null? (car fragments)) (loop (cdr fragments) result))
  32. ((eq? #t (car fragments)) (loop (cdr fragments) #t))
  33. ((pair? (car fragments))
  34. (loop (cdr fragments) (loop (car fragments) result)))
  35. ((procedure? (car fragments))
  36. ((car fragments))
  37. (loop (cdr fragments) #t))
  38. (else
  39. (display (car fragments))
  40. (loop (cdr fragments) #t)))))
  41. ;------------------------------------------------------------------------
  42. ; Traversal of an SXML tree or a grove:
  43. ; a <Node> or a <Nodelist>
  44. ;
  45. ; A <Node> and a <Nodelist> are mutually-recursive datatypes that
  46. ; underlie the SXML tree:
  47. ; <Node> ::= (name . <Nodelist>) | "text string"
  48. ; An (ordered) set of nodes is just a list of the constituent nodes:
  49. ; <Nodelist> ::= (<Node> ...)
  50. ; Nodelists, and Nodes other than text strings are both lists. A
  51. ; <Nodelist> however is either an empty list, or a list whose head is
  52. ; not a symbol (an atom in general). A symbol at the head of a node is
  53. ; either an XML name (in which case it's a tag of an XML element), or
  54. ; an administrative name such as '@'.
  55. ; See SXPath.scm and SSAX.scm for more information on SXML.
  56. ; Pre-Post-order traversal of a tree and creation of a new tree:
  57. ; pre-post-order:: <tree> x <bindings> -> <new-tree>
  58. ; where
  59. ; <bindings> ::= (<binding> ...)
  60. ; <binding> ::= (<trigger-symbol> *preorder* . <handler>) |
  61. ; (<trigger-symbol> *macro* . <handler>) |
  62. ; (<trigger-symbol> <new-bindings> . <handler>) |
  63. ; (<trigger-symbol> . <handler>)
  64. ; <trigger-symbol> ::= XMLname | *text* | *default*
  65. ; <handler> :: <trigger-symbol> x [<tree>] -> <new-tree>
  66. ;
  67. ; The pre-post-order function visits the nodes and nodelists
  68. ; pre-post-order (depth-first). For each <Node> of the form (name
  69. ; <Node> ...) it looks up an association with the given 'name' among
  70. ; its <bindings>. If failed, pre-post-order tries to locate a
  71. ; *default* binding. It's an error if the latter attempt fails as
  72. ; well. Having found a binding, the pre-post-order function first
  73. ; checks to see if the binding is of the form
  74. ; (<trigger-symbol> *preorder* . <handler>)
  75. ; If it is, the handler is 'applied' to the current node. Otherwise,
  76. ; the pre-post-order function first calls itself recursively for each
  77. ; child of the current node, with <new-bindings> prepended to the
  78. ; <bindings> in effect. The result of these calls is passed to the
  79. ; <handler> (along with the head of the current <Node>). To be more
  80. ; precise, the handler is _applied_ to the head of the current node
  81. ; and its processed children. The result of the handler, which should
  82. ; also be a <tree>, replaces the current <Node>. If the current <Node>
  83. ; is a text string or other atom, a special binding with a symbol
  84. ; *text* is looked up.
  85. ;
  86. ; A binding can also be of a form
  87. ; (<trigger-symbol> *macro* . <handler>)
  88. ; This is equivalent to *preorder* described above. However, the result
  89. ; is re-processed again, with the current stylesheet.
  90. (define (pre-post-order tree bindings)
  91. (let* ((default-binding (assq '*default* bindings))
  92. (text-binding (or (assq '*text* bindings) default-binding))
  93. (text-handler ; Cache default and text bindings
  94. (and text-binding
  95. (if (procedure? (cdr text-binding))
  96. (cdr text-binding) (cddr text-binding)))))
  97. (let loop ((tree tree))
  98. (cond
  99. ((null? tree) '())
  100. ((not (pair? tree))
  101. (let ((trigger '*text*))
  102. (if text-handler (text-handler trigger tree)
  103. (error "Unknown binding for " trigger " and no default"))))
  104. ((not (symbol? (car tree))) (map loop tree)) ; tree is a nodelist
  105. (else ; tree is an SXML node
  106. (let* ((trigger (car tree))
  107. (binding (or (assq trigger bindings) default-binding)))
  108. (cond
  109. ((not binding)
  110. (error "Unknown binding for " trigger " and no default"))
  111. ((not (pair? (cdr binding))) ; must be a procedure: handler
  112. (apply (cdr binding) trigger (map loop (cdr tree))))
  113. ((eq? '*preorder* (cadr binding))
  114. (apply (cddr binding) tree))
  115. ((eq? '*macro* (cadr binding))
  116. (loop (apply (cddr binding) tree)))
  117. (else ; (cadr binding) is a local binding
  118. (apply (cddr binding) trigger
  119. (pre-post-order (cdr tree) (append (cadr binding) bindings)))
  120. ))))))))
  121. ; post-order is a strict subset of pre-post-order without *preorder*
  122. ; (let alone *macro*) traversals.
  123. ; Now pre-post-order is actually faster than the old post-order.
  124. ; The function post-order is deprecated and is aliased below for
  125. ; backward compatibility.
  126. (define post-order pre-post-order)
  127. ;------------------------------------------------------------------------
  128. ; Extended tree fold
  129. ; tree = atom | (node-name tree ...)
  130. ;
  131. ; foldts fdown fup fhere seed (Leaf str) = fhere seed str
  132. ; foldts fdown fup fhere seed (Nd kids) =
  133. ; fup seed $ foldl (foldts fdown fup fhere) (fdown seed) kids
  134. ; procedure fhere: seed -> atom -> seed
  135. ; procedure fdown: seed -> node -> seed
  136. ; procedure fup: parent-seed -> last-kid-seed -> node -> seed
  137. ; foldts returns the final seed
  138. (define (foldts fdown fup fhere seed tree)
  139. (cond
  140. ((null? tree) seed)
  141. ((not (pair? tree)) ; An atom
  142. (fhere seed tree))
  143. (else
  144. (let loop ((kid-seed (fdown seed tree)) (kids (cdr tree)))
  145. (if (null? kids)
  146. (fup seed kid-seed tree)
  147. (loop (foldts fdown fup fhere kid-seed (car kids))
  148. (cdr kids)))))))
  149. ;------------------------------------------------------------------------
  150. ; Traverse a forest depth-first and cut/replace ranges of nodes.
  151. ;
  152. ; The nodes that define a range don't have to have the same immediate
  153. ; parent, don't have to be on the same level, and the end node of a
  154. ; range doesn't even have to exist. A replace-range procedure removes
  155. ; nodes from the beginning node of the range up to (but not including)
  156. ; the end node of the range. In addition, the beginning node of the
  157. ; range can be replaced by a node or a list of nodes. The range of
  158. ; nodes is cut while depth-first traversing the forest. If all
  159. ; branches of the node are cut a node is cut as well. The procedure
  160. ; can cut several non-overlapping ranges from a forest.
  161. ; replace-range:: BEG-PRED x END-PRED x FOREST -> FOREST
  162. ; where
  163. ; type FOREST = (NODE ...)
  164. ; type NODE = Atom | (Name . FOREST) | FOREST
  165. ;
  166. ; The range of nodes is specified by two predicates, beg-pred and end-pred.
  167. ; beg-pred:: NODE -> #f | FOREST
  168. ; end-pred:: NODE -> #f | FOREST
  169. ; The beg-pred predicate decides on the beginning of the range. The node
  170. ; for which the predicate yields non-#f marks the beginning of the range
  171. ; The non-#f value of the predicate replaces the node. The value can be a
  172. ; list of nodes. The replace-range procedure then traverses the tree and skips
  173. ; all the nodes, until the end-pred yields non-#f. The value of the end-pred
  174. ; replaces the end-range node. The new end node and its brothers will be
  175. ; re-scanned.
  176. ; The predicates are evaluated pre-order. We do not descend into a node that
  177. ; is marked as the beginning of the range.
  178. (define (replace-range beg-pred end-pred forest)
  179. ; loop forest keep? new-forest
  180. ; forest is the forest to traverse
  181. ; new-forest accumulates the nodes we will keep, in the reverse
  182. ; order
  183. ; If keep? is #t, keep the curr node if atomic. If the node is not atomic,
  184. ; traverse its children and keep those that are not in the skip range.
  185. ; If keep? is #f, skip the current node if atomic. Otherwise,
  186. ; traverse its children. If all children are skipped, skip the node
  187. ; as well.
  188. (define (loop forest keep? new-forest)
  189. (if (null? forest) (values (reverse new-forest) keep?)
  190. (let ((node (car forest)))
  191. (if keep?
  192. (cond ; accumulate mode
  193. ((beg-pred node) => ; see if the node starts the skip range
  194. (lambda (repl-branches) ; if so, skip/replace the node
  195. (loop (cdr forest) #f
  196. (append (reverse repl-branches) new-forest))))
  197. ((not (pair? node)) ; it's an atom, keep it
  198. (loop (cdr forest) keep? (cons node new-forest)))
  199. (else
  200. (let*-values
  201. (((node?) (symbol? (car node))) ; or is it a nodelist?
  202. ((new-kids keep?) ; traverse its children
  203. (loop (if node? (cdr node) node) #t '())))
  204. (loop (cdr forest) keep?
  205. (cons
  206. (if node? (cons (car node) new-kids) new-kids)
  207. new-forest)))))
  208. ; skip mode
  209. (cond
  210. ((end-pred node) => ; end the skip range
  211. (lambda (repl-branches) ; repl-branches will be re-scanned
  212. (loop (append repl-branches (cdr forest)) #t
  213. new-forest)))
  214. ((not (pair? node)) ; it's an atom, skip it
  215. (loop (cdr forest) keep? new-forest))
  216. (else
  217. (let*-values
  218. (((node?) (symbol? (car node))) ; or is it a nodelist?
  219. ((new-kids keep?) ; traverse its children
  220. (loop (if node? (cdr node) node) #f '())))
  221. (loop (cdr forest) keep?
  222. (if (or keep? (pair? new-kids))
  223. (cons
  224. (if node? (cons (car node) new-kids) new-kids)
  225. new-forest)
  226. new-forest) ; if all kids are skipped
  227. )))))))) ; skip the node too
  228. (let*-values (((new-forest keep?) (loop forest #t '())))
  229. new-forest))