dom.el 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. ;;; dom.el --- XML/HTML (etc.) DOM manipulation and searching functions -*- lexical-binding: t -*-
  2. ;; Copyright (C) 2014-2017 Free Software Foundation, Inc.
  3. ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
  4. ;; Keywords: xml, html
  5. ;; This file is part of GNU Emacs.
  6. ;; GNU Emacs is free software: you can redistribute it and/or modify
  7. ;; it under the terms of the GNU General Public License as published by
  8. ;; the Free Software Foundation, either version 3 of the License, or
  9. ;; (at your option) any later version.
  10. ;; GNU Emacs is distributed in the hope that it will be useful,
  11. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. ;; GNU General Public License for more details.
  14. ;; You should have received a copy of the GNU General Public License
  15. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  16. ;;; Commentary:
  17. ;;; Code:
  18. (require 'cl-lib)
  19. (eval-when-compile (require 'subr-x))
  20. (defsubst dom-tag (node)
  21. "Return the NODE tag."
  22. ;; Called on a list of nodes. Use the first.
  23. (if (consp (car node))
  24. (caar node)
  25. (car node)))
  26. (defsubst dom-attributes (node)
  27. "Return the NODE attributes."
  28. ;; Called on a list of nodes. Use the first.
  29. (if (consp (car node))
  30. (cadr (car node))
  31. (cadr node)))
  32. (defsubst dom-children (node)
  33. "Return the NODE children."
  34. ;; Called on a list of nodes. Use the first.
  35. (if (consp (car node))
  36. (cddr (car node))
  37. (cddr node)))
  38. (defun dom-non-text-children (node)
  39. "Return all non-text-node children of NODE."
  40. (cl-loop for child in (dom-children node)
  41. unless (stringp child)
  42. collect child))
  43. (defun dom-set-attributes (node attributes)
  44. "Set the attributes of NODE to ATTRIBUTES."
  45. (setq node (dom-ensure-node node))
  46. (setcar (cdr node) attributes))
  47. (defun dom-set-attribute (node attribute value)
  48. "Set ATTRIBUTE in NODE to VALUE."
  49. (setq node (dom-ensure-node node))
  50. (let ((old (assoc attribute (cadr node))))
  51. (if old
  52. (setcdr old value)
  53. (setcar (cdr node) (nconc (cadr node) (list (cons attribute value)))))))
  54. (defmacro dom-attr (node attr)
  55. "Return the attribute ATTR from NODE.
  56. A typical attribute is `href'."
  57. `(cdr (assq ,attr (dom-attributes ,node))))
  58. (defun dom-text (node)
  59. "Return all the text bits in the current node concatenated."
  60. (mapconcat 'identity (cl-remove-if-not 'stringp (dom-children node)) " "))
  61. (defun dom-texts (node &optional separator)
  62. "Return all textual data under NODE concatenated with SEPARATOR in-between."
  63. (mapconcat
  64. 'identity
  65. (mapcar
  66. (lambda (elem)
  67. (if (stringp elem)
  68. elem
  69. (dom-texts elem separator)))
  70. (dom-children node))
  71. (or separator " ")))
  72. (defun dom-child-by-tag (dom tag)
  73. "Return the first child of DOM that is of type TAG."
  74. (assoc tag (dom-children dom)))
  75. (defun dom-by-tag (dom tag)
  76. "Return elements in DOM that is of type TAG.
  77. A name is a symbol like `td'."
  78. (let ((matches (cl-loop for child in (dom-children dom)
  79. for matches = (and (not (stringp child))
  80. (dom-by-tag child tag))
  81. when matches
  82. append matches)))
  83. (if (equal (dom-tag dom) tag)
  84. (cons dom matches)
  85. matches)))
  86. (defun dom-strings (dom)
  87. "Return elements in DOM that are strings."
  88. (cl-loop for child in (dom-children dom)
  89. if (stringp child)
  90. collect child
  91. else
  92. append (dom-strings child)))
  93. (defun dom-by-class (dom match)
  94. "Return elements in DOM that have a class name that matches regexp MATCH."
  95. (dom-elements dom 'class match))
  96. (defun dom-by-style (dom match)
  97. "Return elements in DOM that have a style that matches regexp MATCH."
  98. (dom-elements dom 'style match))
  99. (defun dom-by-id (dom match)
  100. "Return elements in DOM that have an ID that matches regexp MATCH."
  101. (dom-elements dom 'id match))
  102. (defun dom-elements (dom attribute match)
  103. "Find elements matching MATCH (a regexp) in ATTRIBUTE.
  104. ATTRIBUTE would typically be `class', `id' or the like."
  105. (let ((matches (cl-loop for child in (dom-children dom)
  106. for matches = (and (not (stringp child))
  107. (dom-elements child attribute
  108. match))
  109. when matches
  110. append matches))
  111. (attr (dom-attr dom attribute)))
  112. (if (and attr
  113. (string-match match attr))
  114. (cons dom matches)
  115. matches)))
  116. (defun dom-remove-node (dom node)
  117. "Remove NODE from DOM."
  118. ;; If we're removing the top level node, just return nil.
  119. (dolist (child (dom-children dom))
  120. (cond
  121. ((eq node child)
  122. (delq node dom))
  123. ((not (stringp child))
  124. (dom-remove-node child node)))))
  125. (defun dom-parent (dom node)
  126. "Return the parent of NODE in DOM."
  127. (if (memq node (dom-children dom))
  128. dom
  129. (let ((result nil))
  130. (dolist (elem (dom-children dom))
  131. (when (and (not result)
  132. (not (stringp elem)))
  133. (setq result (dom-parent elem node))))
  134. result)))
  135. (defun dom-previous-sibling (dom node)
  136. "Return the previous sibling of NODE in DOM."
  137. (when-let (parent (dom-parent dom node))
  138. (let ((siblings (dom-children parent))
  139. (previous nil))
  140. (while siblings
  141. (when (eq (cadr siblings) node)
  142. (setq previous (car siblings)))
  143. (pop siblings))
  144. previous)))
  145. (defun dom-node (tag &optional attributes &rest children)
  146. "Return a DOM node with TAG and ATTRIBUTES."
  147. (if children
  148. `(,tag ,attributes ,@children)
  149. (list tag attributes)))
  150. (defun dom-append-child (node child)
  151. "Append CHILD to the end of NODE's children."
  152. (setq node (dom-ensure-node node))
  153. (nconc node (list child)))
  154. (defun dom-add-child-before (node child &optional before)
  155. "Add CHILD to NODE's children before child BEFORE.
  156. If BEFORE is nil, make CHILD NODE's first child."
  157. (setq node (dom-ensure-node node))
  158. (let ((children (dom-children node)))
  159. (when (and before
  160. (not (memq before children)))
  161. (error "%s does not exist as a child" before))
  162. (let ((pos (if before
  163. (cl-position before children)
  164. 0)))
  165. (if (zerop pos)
  166. ;; First child.
  167. (setcdr (cdr node) (cons child (cddr node)))
  168. (setcdr (nthcdr (1- pos) children)
  169. (cons child (nthcdr pos children))))))
  170. node)
  171. (defun dom-ensure-node (node)
  172. "Ensure that NODE is a proper DOM node."
  173. ;; Add empty attributes, if none.
  174. (when (consp (car node))
  175. (setq node (car node)))
  176. (when (= (length node) 1)
  177. (setcdr node (list nil)))
  178. node)
  179. (defun dom-pp (dom &optional remove-empty)
  180. "Pretty-print DOM at point.
  181. If REMOVE-EMPTY, ignore textual nodes that contain just
  182. white-space."
  183. (let ((column (current-column)))
  184. (insert (format "(%S " (dom-tag dom)))
  185. (let* ((attr (dom-attributes dom))
  186. (times (length attr))
  187. (column (1+ (current-column))))
  188. (if (null attr)
  189. (insert "nil")
  190. (insert "(")
  191. (dolist (elem attr)
  192. (insert (format "(%S . %S)" (car elem) (cdr elem)))
  193. (if (zerop (cl-decf times))
  194. (insert ")")
  195. (insert "\n" (make-string column ? ))))))
  196. (let* ((children (if remove-empty
  197. (cl-remove-if
  198. (lambda (child)
  199. (and (stringp child)
  200. (string-match "\\`[\n\r\t  ]*\\'" child)))
  201. (dom-children dom))
  202. (dom-children dom)))
  203. (times (length children)))
  204. (if (null children)
  205. (insert ")")
  206. (insert "\n" (make-string (1+ column) ? ))
  207. (dolist (child children)
  208. (if (stringp child)
  209. (if (or (not remove-empty)
  210. (not (string-match "\\`[\n\r\t  ]*\\'" child)))
  211. (insert (format "%S" child)))
  212. (dom-pp child remove-empty))
  213. (if (zerop (cl-decf times))
  214. (insert ")")
  215. (insert "\n" (make-string (1+ column) ? ))))))))
  216. (provide 'dom)
  217. ;;; dom.el ends here