xmltok.el 58 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771
  1. ;;; xmltok.el --- XML tokenization -*- lexical-binding:t -*-
  2. ;; Copyright (C) 2003, 2007-2017 Free Software Foundation, Inc.
  3. ;; Author: James Clark
  4. ;; Keywords: wp, hypermedia, languages, XML
  5. ;; This file is part of GNU Emacs.
  6. ;; GNU Emacs is free software: you can redistribute it and/or modify
  7. ;; it under the terms of the GNU General Public License as published by
  8. ;; the Free Software Foundation, either version 3 of the License, or
  9. ;; (at your option) any later version.
  10. ;; GNU Emacs is distributed in the hope that it will be useful,
  11. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. ;; GNU General Public License for more details.
  14. ;; You should have received a copy of the GNU General Public License
  15. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  16. ;;; Commentary:
  17. ;; This implements an XML 1.0 parser. It also implements the XML
  18. ;; Namespaces Recommendation. It is designed to be conforming, but it
  19. ;; works a bit differently from a normal XML parser. An XML document
  20. ;; consists of the prolog and an instance. The prolog is parsed as a
  21. ;; single unit using `xmltok-forward-prolog'. The instance is
  22. ;; considered as a sequence of tokens, where a token is something like
  23. ;; a start-tag, a comment, a chunk of data or a CDATA section. The
  24. ;; tokenization of the instance is stateless: the tokenization of one
  25. ;; part of the instance does not depend on tokenization of the
  26. ;; preceding part of the instance. This allows the instance to be
  27. ;; parsed incrementally. The main entry point is `xmltok-forward':
  28. ;; this can be called at any point in the instance provided it is
  29. ;; between tokens.
  30. ;;
  31. ;; This is a non-validating XML 1.0 processor. It does not resolve
  32. ;; parameter entities (including the external DTD subset) and it does
  33. ;; not resolve external general entities.
  34. ;;
  35. ;; It is non-conformant by design in the following respects.
  36. ;;
  37. ;; 1. It expects the client to detect aspects of well-formedness that
  38. ;; are not internal to a single token, specifically checking that
  39. ;; end-tags match start-tags and that the instance contains exactly
  40. ;; one element.
  41. ;;
  42. ;; 2. It expects the client to detect duplicate attributes. Detection
  43. ;; of duplicate attributes after expansion of namespace prefixes
  44. ;; requires the namespace processing state. Detection of duplicate
  45. ;; attributes before expansion of namespace prefixes does not, but is
  46. ;; redundant given that the client will do detection of duplicate
  47. ;; attributes after expansion of namespace prefixes.
  48. ;;
  49. ;; 3. It allows the client to recover from well-formedness errors.
  50. ;; This is essential for use in applications where the document is
  51. ;; being parsed during the editing process.
  52. ;;
  53. ;; 4. It does not support documents that do not conform to the lexical
  54. ;; requirements of the XML Namespaces Recommendation (e.g. a document
  55. ;; with a colon in an entity name).
  56. ;;
  57. ;; There are also a number of things that have not yet been
  58. ;; implemented that make it non-conformant.
  59. ;;
  60. ;; 1. It does not implement default attributes. ATTLIST declarations
  61. ;; are parsed, but no checking is done on the content of attribute
  62. ;; value literals specifying default attribute values, and default
  63. ;; attribute values are not reported to the client.
  64. ;;
  65. ;; 2. It does not implement internal entities containing elements. If
  66. ;; an internal entity is referenced and parsing its replacement text
  67. ;; yields one or more tags, then it will skip the reference and
  68. ;; report this to the client.
  69. ;;
  70. ;; 3. It does not check the syntax of public identifiers in the DTD.
  71. ;;
  72. ;; 4. It allows some non-ASCII characters in certain situations where
  73. ;; it should not. For example, it only enforces XML 1.0's
  74. ;; restrictions on name characters strictly for ASCII characters. The
  75. ;; problem here is XML's character model is based squarely on Unicode,
  76. ;; whereas Emacs's is not (as of version 21). It is not clear what
  77. ;; the right thing to do is.
  78. ;;; Code:
  79. (defvar xmltok-type nil)
  80. (defvar xmltok-start nil)
  81. (defvar xmltok-name-colon nil)
  82. (defvar xmltok-name-end nil)
  83. (defvar xmltok-replacement nil
  84. "String containing replacement for a character or entity reference.")
  85. (defvar xmltok-attributes nil
  86. "List containing attributes of last scanned element.
  87. Each member of the list is a vector representing an attribute, which
  88. can be accessed using the functions `xmltok-attribute-name-start',
  89. `xmltok-attribute-name-colon', `xmltok-attribute-name-end',
  90. `xmltok-attribute-value-start', `xmltok-attribute-value-end',
  91. `xmltok-attribute-raw-normalized-value', `xmltok-attribute-refs'.")
  92. (defvar xmltok-namespace-attributes nil
  93. "List containing namespace declarations of last scanned element.
  94. List has same format as `xmltok-attributes'.")
  95. (defvar xmltok-dtd nil
  96. "Information about the DTD used by `xmltok-forward'.
  97. `xmltok-forward-prolog' sets this up.
  98. It consists of an alist of general entity names vs definitions. The
  99. first member of the alist is t if references to entities not in the
  100. alist are well-formed \(e.g. because there's an external subset that
  101. wasn't parsed).
  102. Each general entity name is a string. The definition is either nil,
  103. a symbol, a string, a cons cell. If the definition is nil, then it
  104. means that it's an internal entity but the result of parsing it is
  105. unknown. If it is a symbol, then the symbol is either `unparsed',
  106. meaning the entity is an unparsed entity, `external', meaning the
  107. entity is or references an external entity, `element', meaning the
  108. entity includes one or more elements, or `not-well-formed', meaning
  109. the replacement text is not well-formed. If the definition is a
  110. string, then the replacement text of the entity is that string; this
  111. happens only during the parsing of the prolog. If the definition is
  112. a cons cell \(ER . AR), then ER specifies the string that results
  113. from referencing the entity in element content and AR is either nil,
  114. meaning the replacement text included a <, or a string which is the
  115. normalized attribute value.")
  116. (defvar xmltok-errors nil
  117. "List of errors detected by `xmltok-forward' and `xmltok-forward-prolog'.
  118. When `xmltok-forward' and `xmltok-forward-prolog' detect a
  119. well-formedness error, they will add an entry to the beginning of this
  120. list. Each entry is a vector [MESSAGE START END], where MESSAGE is a
  121. string giving the error message and START and END are integers
  122. indicating the position of the error.")
  123. (defmacro xmltok-save (&rest body)
  124. (declare (indent 0) (debug t))
  125. `(let (xmltok-type
  126. xmltok-start
  127. xmltok-name-colon
  128. xmltok-name-end
  129. xmltok-replacement
  130. xmltok-attributes
  131. xmltok-namespace-attributes
  132. xmltok-errors)
  133. ,@body))
  134. (defsubst xmltok-attribute-name-start (att)
  135. (aref att 0))
  136. (defsubst xmltok-attribute-name-colon (att)
  137. (aref att 1))
  138. (defsubst xmltok-attribute-name-end (att)
  139. (aref att 2))
  140. (defsubst xmltok-attribute-value-start (att)
  141. (aref att 3))
  142. (defsubst xmltok-attribute-value-end (att)
  143. (aref att 4))
  144. (defsubst xmltok-attribute-raw-normalized-value (att)
  145. "Return an object representing the normalized value of ATT.
  146. This can be t indicating that the normalized value is the same as
  147. the buffer substring from the start to the end of the value, or nil
  148. indicating that the value is not well-formed or a string."
  149. (aref att 5))
  150. (defsubst xmltok-attribute-refs (att)
  151. "Return a list of the entity and character references in ATT.
  152. Each member is a vector [TYPE START END] where TYPE is either char-ref
  153. or entity-ref and START and END are integers giving the start and end of
  154. the reference. Nested entity references are not included in the list."
  155. (aref att 6))
  156. (defun xmltok-attribute-prefix (att)
  157. (let ((colon (xmltok-attribute-name-colon att)))
  158. (and colon
  159. (buffer-substring-no-properties (xmltok-attribute-name-start att)
  160. colon))))
  161. (defun xmltok-attribute-local-name (att)
  162. (let ((colon (xmltok-attribute-name-colon att)))
  163. (buffer-substring-no-properties (if colon
  164. (1+ colon)
  165. (xmltok-attribute-name-start att))
  166. (xmltok-attribute-name-end att))))
  167. (defun xmltok-attribute-value (att)
  168. (let ((rnv (xmltok-attribute-raw-normalized-value att)))
  169. (and rnv
  170. (if (stringp rnv)
  171. rnv
  172. (buffer-substring-no-properties (xmltok-attribute-value-start att)
  173. (xmltok-attribute-value-end att))))))
  174. (defun xmltok-start-tag-prefix ()
  175. (and xmltok-name-colon
  176. (buffer-substring-no-properties (1+ xmltok-start)
  177. xmltok-name-colon)))
  178. (defun xmltok-start-tag-local-name ()
  179. (buffer-substring-no-properties (1+ (or xmltok-name-colon
  180. xmltok-start))
  181. xmltok-name-end))
  182. (defun xmltok-end-tag-prefix ()
  183. (and xmltok-name-colon
  184. (buffer-substring-no-properties (+ 2 xmltok-start)
  185. xmltok-name-colon)))
  186. (defun xmltok-end-tag-local-name ()
  187. (buffer-substring-no-properties (if xmltok-name-colon
  188. (1+ xmltok-name-colon)
  189. (+ 2 xmltok-start))
  190. xmltok-name-end))
  191. (defun xmltok-start-tag-qname ()
  192. (buffer-substring-no-properties (+ xmltok-start 1) xmltok-name-end))
  193. (defun xmltok-end-tag-qname ()
  194. (buffer-substring-no-properties (+ xmltok-start 2) xmltok-name-end))
  195. (defsubst xmltok-make-attribute (name-begin
  196. name-colon
  197. name-end
  198. &optional
  199. value-begin
  200. value-end
  201. raw-normalized-value)
  202. "Make an attribute.
  203. RAW-NORMALIZED-VALUE is nil if the value is not well-formed,
  204. t if the normalized value is the string between VALUE-BEGIN
  205. and VALUE-END, otherwise a STRING giving the value."
  206. (vector name-begin
  207. name-colon
  208. name-end
  209. value-begin
  210. value-end
  211. raw-normalized-value
  212. nil))
  213. (defsubst xmltok-error-message (err)
  214. (aref err 0))
  215. (defsubst xmltok-error-start (err)
  216. (aref err 1))
  217. (defsubst xmltok-error-end (err)
  218. (aref err 2))
  219. (defsubst xmltok-make-error (message start end)
  220. (vector message start end))
  221. (defun xmltok-add-error (message &optional start end)
  222. (push (xmltok-make-error message
  223. (or start xmltok-start)
  224. (or end (point)))
  225. xmltok-errors))
  226. (defun xmltok-forward ()
  227. (setq xmltok-start (point))
  228. (let* ((case-fold-search nil)
  229. (space-count (skip-chars-forward " \t\r\n"))
  230. (ch (char-after)))
  231. (cond ((eq ch ?\<)
  232. (cond ((> space-count 0)
  233. (setq xmltok-type 'space))
  234. (t
  235. (forward-char 1)
  236. (xmltok-scan-after-lt))))
  237. ((eq ch ?\&)
  238. (cond ((> space-count 0)
  239. (setq xmltok-type 'space))
  240. (t
  241. (forward-char 1)
  242. (xmltok-scan-after-amp 'xmltok-handle-entity))))
  243. ((re-search-forward "[<&]\\|\\(]]>\\)" nil t)
  244. (cond ((not (match-beginning 1))
  245. (goto-char (match-beginning 0))
  246. ;; must have got a non-space char
  247. (setq xmltok-type 'data))
  248. ((= (match-beginning 1) xmltok-start)
  249. (xmltok-add-error "Found `]]>' not closing a CDATA section")
  250. (setq xmltok-type 'not-well-formed))
  251. (t
  252. (goto-char (match-beginning 0))
  253. (setq xmltok-type
  254. (if (= (point) (+ xmltok-start space-count))
  255. 'space
  256. 'data)))))
  257. ((eq ch nil)
  258. (setq xmltok-type
  259. (if (> space-count 0)
  260. 'space
  261. nil)))
  262. (t
  263. (goto-char (point-max))
  264. (setq xmltok-type 'data)))))
  265. (eval-when-compile
  266. ;; A symbolic regexp is represented by a list whose CAR is the string
  267. ;; containing the regexp and whose cdr is a list of symbolic names
  268. ;; for the groups in the string.
  269. ;; Construct a symbolic regexp from a regexp.
  270. (defun xmltok-r (str)
  271. (cons str nil))
  272. ;; Concatenate zero of more regexps and symbolic regexps.
  273. (defun xmltok+ (&rest args)
  274. (let (strs names)
  275. (while args
  276. (let ((arg (car args)))
  277. (if (stringp arg)
  278. (setq strs (cons arg strs))
  279. (setq strs (cons (car arg) strs))
  280. (setq names (cons (cdr arg) names)))
  281. (setq args (cdr args))))
  282. (cons (apply 'concat (nreverse strs))
  283. (apply 'append (nreverse names))))))
  284. (eval-when-compile
  285. ;; Make a symbolic group named NAME from the regexp R.
  286. ;; R may be a symbolic regexp or an ordinary regexp.
  287. (defmacro xmltok-g (name &rest r)
  288. (let ((sym (make-symbol "r")))
  289. `(let ((,sym (xmltok+ ,@r)))
  290. (if (stringp ,sym)
  291. (cons (concat "\\(" ,sym "\\)") (cons ',name nil))
  292. (cons (concat "\\(" (car ,sym) "\\)") (cons ',name (cdr ,sym)))))))
  293. (defun xmltok-p (&rest r) (xmltok+ "\\(?:"
  294. (apply 'xmltok+ r)
  295. "\\)"))
  296. ;; Get the group index of ELEM in a LIST of symbols.
  297. (defun xmltok-get-index (elem list)
  298. (or elem
  299. (error "Missing group name"))
  300. (let ((found nil)
  301. (i 1))
  302. (while list
  303. (cond ((eq elem (car list))
  304. (setq found i)
  305. (setq list nil))
  306. (t
  307. (setq i (1+ i))
  308. (setq list (cdr list)))))
  309. (or found
  310. (error "Bad group name %s" elem))))
  311. ;; Define a macro SYM using a symbolic regexp R.
  312. ;; SYM can be called in three ways:
  313. ;; (SYM regexp)
  314. ;; expands to the regexp in R
  315. ;; (SYM start G)
  316. ;; expands to
  317. ;; (match-beginning N)
  318. ;; where N is the group index of G in R.
  319. ;; (SYM end G)
  320. ;; expands to
  321. ;; (match-end N)
  322. ;; where N is the group index of G in R.
  323. (defmacro xmltok-defregexp (sym r)
  324. `(defalias ',sym
  325. (let ((r ,r))
  326. `(macro lambda (action &optional group-name)
  327. (cond ((eq action 'regexp)
  328. ,(car r))
  329. ((or (eq action 'start) (eq action 'beginning))
  330. (list 'match-beginning (xmltok-get-index group-name
  331. ',(cdr r))))
  332. ((eq action 'end)
  333. (list 'match-end (xmltok-get-index group-name
  334. ',(cdr r))))
  335. ((eq action 'string)
  336. (list 'match-string
  337. (xmltok-get-index group-name ',(cdr r))))
  338. ((eq action 'string-no-properties)
  339. (list 'match-string-no-properties
  340. (xmltok-get-index group-name ',(cdr r))))
  341. (t (error "Invalid action: %s" action))))))))
  342. (eval-when-compile
  343. (let* ((or "\\|")
  344. (open "\\(?:")
  345. (close "\\)")
  346. (name-start-char "[_[:alpha:]]")
  347. (name-continue-not-start-char "[-.[:digit:]]")
  348. (name-continue-char "[-._[:alnum:]]")
  349. (* "*")
  350. (+ "+")
  351. (opt "?")
  352. (question "\\?")
  353. (s "[ \r\t\n]")
  354. (s+ (concat s +))
  355. (s* (concat s *))
  356. (ncname (concat name-start-char name-continue-char *))
  357. (entity-ref
  358. (xmltok+ (xmltok-g entity-name ncname)
  359. (xmltok-g entity-ref-close ";") opt))
  360. (decimal-ref
  361. (xmltok+ (xmltok-g decimal "[0-9]" +)
  362. (xmltok-g decimal-ref-close ";") opt))
  363. (hex-ref
  364. (xmltok+ "x" open
  365. (xmltok-g hex "[0-9a-fA-F]" +)
  366. (xmltok-g hex-ref-close ";") opt
  367. close opt))
  368. (char-ref
  369. (xmltok+ (xmltok-g number-sign "#")
  370. open decimal-ref or hex-ref close opt))
  371. (start-tag-close
  372. (xmltok+ open (xmltok-g start-tag-close s* ">")
  373. or open (xmltok-g empty-tag-slash s* "/")
  374. (xmltok-g empty-tag-close ">") opt close
  375. or (xmltok-g start-tag-s s+)
  376. close))
  377. (start-tag
  378. (xmltok+ (xmltok-g start-tag-name
  379. ncname (xmltok-g start-tag-colon ":" ncname) opt)
  380. start-tag-close opt))
  381. (end-tag
  382. (xmltok+ (xmltok-g end-tag-slash "/")
  383. open (xmltok-g end-tag-name
  384. ncname
  385. (xmltok-g end-tag-colon ":" ncname) opt)
  386. (xmltok-g end-tag-close s* ">") opt
  387. close opt))
  388. (comment
  389. (xmltok+ (xmltok-g markup-declaration "!")
  390. (xmltok-g comment-first-dash "-"
  391. (xmltok-g comment-open "-") opt) opt))
  392. (cdata-section
  393. (xmltok+ "!"
  394. (xmltok-g marked-section-open "\\[")
  395. open "C"
  396. open "D"
  397. open "A"
  398. open "T"
  399. open "A"
  400. (xmltok-g cdata-section-open "\\[" ) opt
  401. close opt ; A
  402. close opt ; T
  403. close opt ; A
  404. close opt ; D
  405. close opt)) ; C
  406. (processing-instruction
  407. (xmltok-g processing-instruction-question question)))
  408. (xmltok-defregexp xmltok-ncname (xmltok+ open ncname close))
  409. (xmltok-defregexp xmltok-after-amp
  410. (xmltok+ entity-ref or char-ref))
  411. (xmltok-defregexp xmltok-after-lt
  412. (xmltok+ start-tag
  413. or end-tag
  414. ;; cdata-section must come before comment
  415. ;; because we treat <! as a comment
  416. ;; and Emacs doesn't do fully greedy matching
  417. ;; by default
  418. or cdata-section
  419. or comment
  420. or processing-instruction))
  421. (xmltok-defregexp
  422. xmltok-attribute
  423. (let* ((lit1
  424. (xmltok+ "'"
  425. "[^<'&\r\n\t]*"
  426. (xmltok-g complex1 "[&\r\n\t][^<']*") opt
  427. "'"))
  428. (lit2 (cons (replace-regexp-in-string "'" "\"" (car lit1))
  429. '(complex2)))
  430. (literal (xmltok-g literal lit1 or lit2))
  431. (name (xmltok+ open (xmltok-g xmlns "xmlns") or ncname close
  432. (xmltok-g colon ":" ncname) opt)))
  433. (xmltok+ (xmltok-g name name)
  434. s* "="
  435. ;; If the literal isn't followed by what it should be,
  436. ;; then the closing delimiter is probably really the
  437. ;; opening delimiter of another literal, so don't
  438. ;; absorb the literal in this case.
  439. open s* literal start-tag-close close opt)))
  440. (xmltok-defregexp
  441. xmltok-xml-declaration
  442. (let* ((literal-content "[-._:a-zA-Z0-9]+")
  443. (literal
  444. (concat open "\"" literal-content "\""
  445. or "'" literal-content "'" close))
  446. (version-att
  447. (xmltok+ open
  448. s+ (xmltok-g version-name "version")
  449. s* "="
  450. s* (xmltok-g version-value literal)
  451. close opt))
  452. (encoding-att
  453. (xmltok+ open
  454. s+ (xmltok-g encoding-name "encoding")
  455. s* "="
  456. s* (xmltok-g encoding-value literal)
  457. close opt))
  458. (yes-no
  459. (concat open "yes" or "no" close))
  460. (standalone-att
  461. (xmltok+ open
  462. s+ (xmltok-g standalone-name "standalone")
  463. s* "="
  464. s* (xmltok-g standalone-value
  465. "\"" yes-no "\"" or "'" yes-no "'")
  466. close opt)))
  467. (xmltok+ "<" question "xml"
  468. version-att
  469. encoding-att
  470. standalone-att
  471. s* question ">")))
  472. (xmltok-defregexp
  473. xmltok-prolog
  474. (let* ((single-char (xmltok-g single-char "[[|,(\"'>]"))
  475. (internal-subset-close (xmltok-g internal-subset-close
  476. "][ \t\r\n]*>"))
  477. (starts-with-close-paren
  478. (xmltok-g close-paren
  479. ")"
  480. (xmltok-p
  481. (xmltok-g close-paren-occur "[+?]")
  482. or
  483. (xmltok-g close-paren-star "\\*"))
  484. opt))
  485. (starts-with-percent
  486. (xmltok-g percent
  487. "%" (xmltok-g param-entity-ref
  488. ncname
  489. (xmltok-g param-entity-ref-close
  490. ";") opt) opt))
  491. (starts-with-nmtoken-not-name
  492. (xmltok-g nmtoken
  493. (xmltok-p name-continue-not-start-char or ":")
  494. (xmltok-p name-continue-char or ":") *))
  495. (nmtoken-after-colon
  496. (xmltok+
  497. (xmltok-p name-continue-not-start-char or ":")
  498. (xmltok-p name-continue-char or ":") *
  499. or
  500. name-start-char
  501. name-continue-char *
  502. ":"
  503. (xmltok-p name-continue-char or ":") *))
  504. (after-ncname
  505. (xmltok+ (xmltok-g ncname-nmtoken
  506. ":" (xmltok-p nmtoken-after-colon))
  507. or (xmltok-p (xmltok-g colon ":" ncname)
  508. (xmltok-g colon-name-occur "[?+*]") opt)
  509. or (xmltok-g ncname-occur "[?+*]")
  510. or (xmltok-g ncname-colon ":")))
  511. (starts-with-name
  512. (xmltok-g name ncname (xmltok-p after-ncname) opt))
  513. (starts-with-hash
  514. (xmltok-g pound
  515. "#" (xmltok-g hash-name ncname)))
  516. (markup-declaration
  517. (xmltok-g markup-declaration
  518. "!" (xmltok-p (xmltok-g comment-first-dash "-"
  519. (xmltok-g comment-open "-") opt)
  520. or (xmltok-g named-markup-declaration
  521. ncname)) opt))
  522. (after-lt
  523. (xmltok+ markup-declaration
  524. or (xmltok-g processing-instruction-question
  525. question)
  526. or (xmltok-g instance-start
  527. ncname)))
  528. (starts-with-lt (xmltok-g less-than "<" (xmltok-p after-lt) opt)))
  529. (xmltok+ starts-with-lt
  530. or single-char
  531. or starts-with-close-paren
  532. or starts-with-percent
  533. or starts-with-name
  534. or starts-with-nmtoken-not-name
  535. or starts-with-hash
  536. or internal-subset-close)))))
  537. (defconst xmltok-ncname-regexp (xmltok-ncname regexp))
  538. (defun xmltok-scan-after-lt ()
  539. (cond ((not (looking-at (xmltok-after-lt regexp)))
  540. (xmltok-add-error "`<' that is not markup must be entered as `&lt;'")
  541. (setq xmltok-type 'not-well-formed))
  542. (t
  543. (goto-char (match-end 0))
  544. (cond ((xmltok-after-lt start start-tag-close)
  545. (setq xmltok-name-end
  546. (xmltok-after-lt end start-tag-name))
  547. (setq xmltok-name-colon
  548. (xmltok-after-lt start start-tag-colon))
  549. (setq xmltok-attributes nil)
  550. (setq xmltok-namespace-attributes nil)
  551. (setq xmltok-type 'start-tag))
  552. ((xmltok-after-lt start end-tag-close)
  553. (setq xmltok-name-end
  554. (xmltok-after-lt end end-tag-name))
  555. (setq xmltok-name-colon
  556. (xmltok-after-lt start end-tag-colon))
  557. (setq xmltok-type 'end-tag))
  558. ((xmltok-after-lt start start-tag-s)
  559. (setq xmltok-name-end
  560. (xmltok-after-lt end start-tag-name))
  561. (setq xmltok-name-colon
  562. (xmltok-after-lt start start-tag-colon))
  563. (setq xmltok-namespace-attributes nil)
  564. (setq xmltok-attributes nil)
  565. (xmltok-scan-attributes)
  566. xmltok-type)
  567. ((xmltok-after-lt start empty-tag-close)
  568. (setq xmltok-name-end
  569. (xmltok-after-lt end start-tag-name))
  570. (setq xmltok-name-colon
  571. (xmltok-after-lt start start-tag-colon))
  572. (setq xmltok-attributes nil)
  573. (setq xmltok-namespace-attributes nil)
  574. (setq xmltok-type 'empty-element))
  575. ((xmltok-after-lt start cdata-section-open)
  576. (setq xmltok-type
  577. (progn (search-forward "]]>" nil 'move)
  578. 'cdata-section)))
  579. ((xmltok-after-lt start processing-instruction-question)
  580. (xmltok-scan-after-processing-instruction-open))
  581. ((xmltok-after-lt start comment-open)
  582. (xmltok-scan-after-comment-open))
  583. ((xmltok-after-lt start empty-tag-slash)
  584. (setq xmltok-name-end
  585. (xmltok-after-lt end start-tag-name))
  586. (setq xmltok-name-colon
  587. (xmltok-after-lt start start-tag-colon))
  588. (setq xmltok-attributes nil)
  589. (setq xmltok-namespace-attributes nil)
  590. (xmltok-add-error "Expected `/>'" (1- (point)))
  591. (setq xmltok-type 'partial-empty-element))
  592. ((xmltok-after-lt start start-tag-name)
  593. (xmltok-add-error "Missing `>'"
  594. nil
  595. (1+ xmltok-start))
  596. (setq xmltok-name-end
  597. (xmltok-after-lt end start-tag-name))
  598. (setq xmltok-name-colon
  599. (xmltok-after-lt start start-tag-colon))
  600. (setq xmltok-namespace-attributes nil)
  601. (setq xmltok-attributes nil)
  602. (setq xmltok-type 'partial-start-tag))
  603. ((xmltok-after-lt start end-tag-name)
  604. (setq xmltok-name-end (xmltok-after-lt end end-tag-name))
  605. (setq xmltok-name-colon
  606. (xmltok-after-lt start end-tag-colon))
  607. (cond ((and (not xmltok-name-colon)
  608. (eq (char-after) ?:))
  609. (goto-char (1+ (point)))
  610. (xmltok-add-error "Expected name following `:'"
  611. (1- (point))))
  612. (t
  613. (xmltok-add-error "Missing `>'"
  614. nil
  615. (1+ xmltok-start))))
  616. (setq xmltok-type 'partial-end-tag))
  617. ((xmltok-after-lt start end-tag-slash)
  618. (xmltok-add-error "Expected name following `</'")
  619. (setq xmltok-name-end nil)
  620. (setq xmltok-name-colon nil)
  621. (setq xmltok-type 'partial-end-tag))
  622. ((xmltok-after-lt start marked-section-open)
  623. (xmltok-add-error "Expected `CDATA[' after `<!['"
  624. xmltok-start
  625. (+ 3 xmltok-start))
  626. (setq xmltok-type 'not-well-formed))
  627. ((xmltok-after-lt start comment-first-dash)
  628. (xmltok-add-error "Expected `-' after `<!-'"
  629. xmltok-start
  630. (+ 3 xmltok-start))
  631. (setq xmltok-type 'not-well-formed))
  632. ((xmltok-after-lt start markup-declaration)
  633. (xmltok-add-error "Expected `[CDATA[' or `--' after `<!'"
  634. xmltok-start
  635. (+ 2 xmltok-start))
  636. (setq xmltok-type 'not-well-formed))
  637. (t
  638. (xmltok-add-error "Not well-formed")
  639. (setq xmltok-type 'not-well-formed))))))
  640. ;; XXX This should be unified with
  641. ;; xmltok-scan-prolog-after-processing-instruction-open
  642. ;; XXX maybe should include rest of line (up to any <,>) in unclosed PI
  643. (defun xmltok-scan-after-processing-instruction-open ()
  644. (search-forward "?>" nil 'move)
  645. (cond ((not (save-excursion
  646. (goto-char (+ 2 xmltok-start))
  647. (and (looking-at (xmltok-ncname regexp))
  648. (setq xmltok-name-end (match-end 0)))))
  649. (setq xmltok-name-end (+ xmltok-start 2))
  650. (xmltok-add-error "<? not followed by name"
  651. (+ xmltok-start 2)
  652. (+ xmltok-start 3)))
  653. ((not (or (memq (char-after xmltok-name-end)
  654. '(?\n ?\t ?\r ? ))
  655. (= xmltok-name-end (- (point) 2))))
  656. (xmltok-add-error "Target not followed by whitespace"
  657. xmltok-name-end
  658. (1+ xmltok-name-end)))
  659. ((and (= xmltok-name-end (+ xmltok-start 5))
  660. (save-excursion
  661. (goto-char (+ xmltok-start 2))
  662. (let ((case-fold-search t))
  663. (looking-at "xml"))))
  664. (xmltok-add-error "Processing instruction target is xml"
  665. (+ xmltok-start 2)
  666. (+ xmltok-start 5))))
  667. (setq xmltok-type 'processing-instruction))
  668. (defun xmltok-scan-after-comment-open ()
  669. (while (and (re-search-forward "--\\(>\\)?" nil 'move)
  670. (not (match-end 1)))
  671. (xmltok-add-error "`--' not followed by `>'" (match-beginning 0)))
  672. (setq xmltok-type 'comment))
  673. (defun xmltok-scan-attributes ()
  674. (let ((recovering nil)
  675. (atts-needing-normalization nil))
  676. (while (cond ((or (looking-at (xmltok-attribute regexp))
  677. ;; use non-greedy group
  678. (when (looking-at (concat "[^<>\n]+?"
  679. (xmltok-attribute regexp)))
  680. (unless recovering
  681. (xmltok-add-error "Malformed attribute"
  682. (point)
  683. (save-excursion
  684. (goto-char (xmltok-attribute start
  685. name))
  686. (skip-chars-backward "\r\n\t ")
  687. (point))))
  688. t))
  689. (setq recovering nil)
  690. (goto-char (match-end 0))
  691. (let ((att (xmltok-add-attribute)))
  692. (when att
  693. (setq atts-needing-normalization
  694. (cons att atts-needing-normalization))))
  695. (cond ((xmltok-attribute start start-tag-s) t)
  696. ((xmltok-attribute start start-tag-close)
  697. (setq xmltok-type 'start-tag)
  698. nil)
  699. ((xmltok-attribute start empty-tag-close)
  700. (setq xmltok-type 'empty-element)
  701. nil)
  702. ((xmltok-attribute start empty-tag-slash)
  703. (setq xmltok-type 'partial-empty-element)
  704. (xmltok-add-error "Expected `/>'"
  705. (1- (point)))
  706. nil)
  707. ((looking-at "[ \t\r\n]*[\"']")
  708. (goto-char (match-end 0))
  709. (xmltok-add-error "Missing closing delimiter"
  710. (1- (point)))
  711. (setq recovering t)
  712. t)
  713. ((looking-at "[ \t]*\\([^ \t\r\n\"'=<>/]+\\)[ \t\r\n/>]")
  714. (goto-char (match-end 1))
  715. (xmltok-add-error "Attribute value not quoted"
  716. (match-beginning 1))
  717. (setq recovering t)
  718. t)
  719. (t
  720. (xmltok-add-error "Missing attribute value"
  721. (1- (point)))
  722. (setq recovering t)
  723. t)))
  724. ((looking-at "[^<>\n]*/>")
  725. (let ((start (point)))
  726. (goto-char (match-end 0))
  727. (unless recovering
  728. (xmltok-add-error "Malformed empty-element"
  729. start
  730. (- (point) 2))))
  731. (setq xmltok-type 'empty-element)
  732. nil)
  733. ((looking-at "[^<>\n]*>")
  734. (let ((start (point)))
  735. (goto-char (match-end 0))
  736. (unless recovering
  737. (xmltok-add-error "Malformed start-tag"
  738. start
  739. (1- (point)))))
  740. (setq xmltok-type 'start-tag)
  741. nil)
  742. (t
  743. (when recovering
  744. (skip-chars-forward "^<>\n"))
  745. (xmltok-add-error "Missing `>'"
  746. xmltok-start
  747. (1+ xmltok-start))
  748. (setq xmltok-type 'partial-start-tag)
  749. nil)))
  750. (while atts-needing-normalization
  751. (xmltok-normalize-attribute (car atts-needing-normalization))
  752. (setq atts-needing-normalization (cdr atts-needing-normalization))))
  753. (setq xmltok-attributes
  754. (nreverse xmltok-attributes))
  755. (setq xmltok-namespace-attributes
  756. (nreverse xmltok-namespace-attributes)))
  757. (defun xmltok-add-attribute ()
  758. "Return the attribute if it needs normalizing, otherwise nil."
  759. (let* ((needs-normalizing nil)
  760. (att
  761. (if (xmltok-attribute start literal)
  762. (progn
  763. (setq needs-normalizing
  764. (or (xmltok-attribute start complex1)
  765. (xmltok-attribute start complex2)))
  766. (xmltok-make-attribute (xmltok-attribute start name)
  767. (xmltok-attribute start colon)
  768. (xmltok-attribute end name)
  769. (1+ (xmltok-attribute start literal))
  770. (1- (xmltok-attribute end literal))
  771. (not needs-normalizing)))
  772. (xmltok-make-attribute (xmltok-attribute start name)
  773. (xmltok-attribute start colon)
  774. (xmltok-attribute end name)))))
  775. (if (xmltok-attribute start xmlns)
  776. (setq xmltok-namespace-attributes
  777. (cons att xmltok-namespace-attributes))
  778. (setq xmltok-attributes
  779. (cons att xmltok-attributes)))
  780. (and needs-normalizing
  781. att)))
  782. (defun xmltok-normalize-attribute (att)
  783. (let ((end (xmltok-attribute-value-end att))
  784. (well-formed t)
  785. (value-parts nil)
  786. (refs nil))
  787. (save-excursion
  788. (goto-char (xmltok-attribute-value-start att))
  789. (while (progn
  790. (let ((n (skip-chars-forward "^\r\t\n&" end)))
  791. (when (> n 0)
  792. (setq value-parts
  793. (cons (buffer-substring-no-properties (- (point) n)
  794. (point))
  795. value-parts))))
  796. (when (< (point) end)
  797. (goto-char (1+ (point)))
  798. (cond ((eq (char-before) ?\&)
  799. (let ((xmltok-start (1- (point)))
  800. xmltok-type xmltok-replacement)
  801. (xmltok-scan-after-amp
  802. (lambda (start end)
  803. (xmltok-handle-entity start end t)))
  804. (cond ((or (eq xmltok-type 'char-ref)
  805. (eq xmltok-type 'entity-ref))
  806. (setq refs
  807. (cons (vector xmltok-type
  808. xmltok-start
  809. (point))
  810. refs))
  811. (if xmltok-replacement
  812. (setq value-parts
  813. (cons xmltok-replacement
  814. value-parts))
  815. (setq well-formed nil)))
  816. (t (setq well-formed nil)))))
  817. (t (setq value-parts
  818. (cons " " value-parts)))))
  819. (< (point) end))))
  820. (when well-formed
  821. (aset att 5 (apply 'concat (nreverse value-parts))))
  822. (aset att 6 (nreverse refs))))
  823. (defun xmltok-scan-after-amp (entity-handler)
  824. (cond ((not (looking-at (xmltok-after-amp regexp)))
  825. (xmltok-add-error "`&' that is not markup must be entered as `&amp;'")
  826. (setq xmltok-type 'not-well-formed))
  827. (t
  828. (goto-char (match-end 0))
  829. (cond ((xmltok-after-amp start entity-ref-close)
  830. (funcall entity-handler
  831. (xmltok-after-amp start entity-name)
  832. (xmltok-after-amp end entity-name))
  833. (setq xmltok-type 'entity-ref))
  834. ((xmltok-after-amp start decimal-ref-close)
  835. (xmltok-scan-char-ref (xmltok-after-amp start decimal)
  836. (xmltok-after-amp end decimal)
  837. 10))
  838. ((xmltok-after-amp start hex-ref-close)
  839. (xmltok-scan-char-ref (xmltok-after-amp start hex)
  840. (xmltok-after-amp end hex)
  841. 16))
  842. ((xmltok-after-amp start number-sign)
  843. (xmltok-add-error "Missing character number")
  844. (setq xmltok-type 'not-well-formed))
  845. (t
  846. (xmltok-add-error "Missing closing `;'")
  847. (setq xmltok-type 'not-well-formed))))))
  848. (defconst xmltok-entity-error-messages
  849. '((unparsed . "Referenced entity is unparsed")
  850. (not-well-formed . "Referenced entity is not well-formed")
  851. (external nil . "Referenced entity is external")
  852. (element nil . "Referenced entity contains <")))
  853. (defun xmltok-handle-entity (start end &optional attributep)
  854. (let* ((name (buffer-substring-no-properties start end))
  855. (name-def (assoc name xmltok-dtd))
  856. (def (cdr name-def)))
  857. (cond ((setq xmltok-replacement (and (consp def)
  858. (if attributep
  859. (cdr def)
  860. (car def)))))
  861. ((null name-def)
  862. (unless (eq (car xmltok-dtd) t)
  863. (xmltok-add-error "Referenced entity has not been defined"
  864. start
  865. end)))
  866. ((and attributep (consp def))
  867. (xmltok-add-error "Referenced entity contains <"
  868. start
  869. end))
  870. (t
  871. (let ((err (cdr (assq def xmltok-entity-error-messages))))
  872. (when (consp err)
  873. (setq err (if attributep (cdr err) (car err))))
  874. (when err
  875. (xmltok-add-error err start end)))))))
  876. (defun xmltok-scan-char-ref (start end base)
  877. (setq xmltok-replacement
  878. (let ((n (string-to-number (buffer-substring-no-properties start end)
  879. base)))
  880. (cond ((and (integerp n) (xmltok-valid-char-p n))
  881. (setq n (xmltok-unicode-to-char n))
  882. (and n (string n)))
  883. (t
  884. (xmltok-add-error "Invalid character code" start end)
  885. nil))))
  886. (setq xmltok-type 'char-ref))
  887. (defun xmltok-char-number (start end)
  888. (let* ((base (if (eq (char-after (+ start 2)) ?x)
  889. 16
  890. 10))
  891. (n (string-to-number
  892. (buffer-substring-no-properties (+ start (if (= base 16) 3 2))
  893. (1- end))
  894. base)))
  895. (and (integerp n)
  896. (xmltok-valid-char-p n)
  897. n)))
  898. (defun xmltok-valid-char-p (n)
  899. "Return non-nil if N is the Unicode code of a valid XML character."
  900. (cond ((< n #x20) (memq n '(#xA #xD #x9)))
  901. ((< n #xD800) t)
  902. ((< n #xE000) nil)
  903. ((< n #xFFFE) t)
  904. (t (and (> n #xFFFF)
  905. (< n #x110000)))))
  906. (defun xmltok-unicode-to-char (n)
  907. "Return the character corresponding to Unicode scalar value N.
  908. Return nil if unsupported in Emacs."
  909. (decode-char 'ucs n))
  910. ;;; Prolog parsing
  911. (defvar xmltok-contains-doctype nil)
  912. (defvar xmltok-doctype-external-subset-flag nil)
  913. (defvar xmltok-internal-subset-start nil)
  914. (defvar xmltok-had-param-entity-ref nil)
  915. (defvar xmltok-prolog-regions nil)
  916. (defvar xmltok-standalone nil
  917. "Non-nil if there was an XML declaration specifying standalone=\"yes\".")
  918. (defvar xmltok-markup-declaration-doctype-flag nil)
  919. (defconst xmltok-predefined-entity-alist
  920. '(("lt" "<" . "<")
  921. ("gt" ">" . ">")
  922. ("amp" "&" . "&")
  923. ("apos" "'" . "'")
  924. ("quot" "\"" . "\"")))
  925. (defun xmltok-forward-prolog ()
  926. "Move forward to the end of the XML prolog.
  927. Returns a list of vectors [TYPE START END] where TYPE is a symbol and
  928. START and END are integers giving the start and end of the region of
  929. that type. TYPE can be one of xml-declaration,
  930. xml-declaration-attribute-name, xml-declaration-attribute-value,
  931. comment, processing-instruction-left, processing-instruction-right,
  932. markup-declaration-open, markup-declaration-close,
  933. internal-subset-open, internal-subset-close, hash-name, keyword,
  934. literal, encoding-name.
  935. Adds to `xmltok-errors' as appropriate."
  936. (let ((case-fold-search nil)
  937. xmltok-start
  938. xmltok-type
  939. xmltok-prolog-regions
  940. xmltok-contains-doctype
  941. xmltok-internal-subset-start
  942. xmltok-had-param-entity-ref
  943. xmltok-standalone
  944. xmltok-doctype-external-subset-flag
  945. xmltok-markup-declaration-doctype-flag)
  946. (setq xmltok-dtd xmltok-predefined-entity-alist)
  947. (xmltok-scan-xml-declaration)
  948. (xmltok-next-prolog-token)
  949. (while (condition-case nil
  950. (when (xmltok-parse-prolog-item)
  951. (xmltok-next-prolog-token))
  952. (xmltok-markup-declaration-parse-error
  953. (xmltok-skip-markup-declaration))))
  954. (when xmltok-internal-subset-start
  955. (xmltok-add-error "No closing ]"
  956. (1- xmltok-internal-subset-start)
  957. xmltok-internal-subset-start))
  958. (xmltok-parse-entities)
  959. (nreverse xmltok-prolog-regions)))
  960. (defconst xmltok-bad-xml-decl-regexp
  961. "[ \t\r\n]*<\\?xml\\(?:[ \t\r\n]\\|\\?>\\)")
  962. ;;;###autoload
  963. (defun xmltok-get-declared-encoding-position (&optional limit)
  964. "Return the position of the encoding in the XML declaration at point.
  965. If there is a well-formed XML declaration starting at point and it
  966. contains an encoding declaration, then return (START . END)
  967. where START and END are the positions of the start and the end
  968. of the encoding name; if there is no encoding declaration return
  969. the position where and encoding declaration could be inserted.
  970. If there is XML that is not well-formed that looks like an XML
  971. declaration, return nil. Otherwise, return t.
  972. If LIMIT is non-nil, then do not consider characters beyond LIMIT."
  973. (cond ((let ((case-fold-search nil))
  974. (and (looking-at (xmltok-xml-declaration regexp))
  975. (or (not limit) (<= (match-end 0) limit))))
  976. (let ((end (xmltok-xml-declaration end encoding-value)))
  977. (if end
  978. (cons (1+ (xmltok-xml-declaration start encoding-value))
  979. (1- end))
  980. (or (xmltok-xml-declaration end version-value)
  981. (+ (point) 5)))))
  982. ((not (let ((case-fold-search t))
  983. (looking-at xmltok-bad-xml-decl-regexp))))))
  984. (defun xmltok-scan-xml-declaration ()
  985. (when (looking-at (xmltok-xml-declaration regexp))
  986. (xmltok-add-prolog-region 'xml-declaration (point) (match-end 0))
  987. (goto-char (match-end 0))
  988. (when (xmltok-xml-declaration start version-name)
  989. (xmltok-add-prolog-region 'xml-declaration-attribute-name
  990. (xmltok-xml-declaration start version-name)
  991. (xmltok-xml-declaration end version-name))
  992. (let ((start (xmltok-xml-declaration start version-value))
  993. (end (xmltok-xml-declaration end version-value)))
  994. (xmltok-add-prolog-region 'xml-declaration-attribute-value
  995. start
  996. end)))
  997. ;; XXX need to check encoding name
  998. ;; Should start with letter, not contain colon
  999. (when (xmltok-xml-declaration start encoding-name)
  1000. (xmltok-add-prolog-region 'xml-declaration-attribute-name
  1001. (xmltok-xml-declaration start encoding-name)
  1002. (xmltok-xml-declaration end encoding-name))
  1003. (let ((start (xmltok-xml-declaration start encoding-value))
  1004. (end (xmltok-xml-declaration end encoding-value)))
  1005. (xmltok-add-prolog-region 'encoding-name
  1006. (1+ start)
  1007. (1- end))
  1008. (xmltok-add-prolog-region 'xml-declaration-attribute-value
  1009. start
  1010. end)))
  1011. (when (xmltok-xml-declaration start standalone-name)
  1012. (xmltok-add-prolog-region 'xml-declaration-attribute-name
  1013. (xmltok-xml-declaration start standalone-name)
  1014. (xmltok-xml-declaration end standalone-name))
  1015. (let ((start (xmltok-xml-declaration start standalone-value))
  1016. (end (xmltok-xml-declaration end standalone-value)))
  1017. (xmltok-add-prolog-region 'xml-declaration-attribute-value
  1018. start
  1019. end)
  1020. (setq xmltok-standalone
  1021. (string= (buffer-substring-no-properties (1+ start) (1- end))
  1022. "yes"))))
  1023. t))
  1024. (defconst xmltok-markup-declaration-alist
  1025. '(("ELEMENT" . xmltok-parse-element-declaration)
  1026. ("ATTLIST" . xmltok-parse-attlist-declaration)
  1027. ("ENTITY" . xmltok-parse-entity-declaration)
  1028. ("NOTATION" . xmltok-parse-notation-declaration)))
  1029. (defun xmltok-parse-prolog-item ()
  1030. (cond ((eq xmltok-type 'comment)
  1031. (xmltok-add-prolog-region 'comment
  1032. xmltok-start
  1033. (point))
  1034. t)
  1035. ((eq xmltok-type 'processing-instruction))
  1036. ((eq xmltok-type 'named-markup-declaration)
  1037. (setq xmltok-markup-declaration-doctype-flag nil)
  1038. (xmltok-add-prolog-region 'markup-declaration-open
  1039. xmltok-start
  1040. (point))
  1041. (let* ((name (buffer-substring-no-properties
  1042. (+ xmltok-start 2)
  1043. (point)))
  1044. (fun (cdr (assoc name xmltok-markup-declaration-alist))))
  1045. (cond (fun
  1046. (unless xmltok-internal-subset-start
  1047. (xmltok-add-error
  1048. "Declaration allowed only in internal subset"))
  1049. (funcall fun))
  1050. ((string= name "DOCTYPE")
  1051. (xmltok-parse-doctype))
  1052. (t
  1053. (xmltok-add-error "Unknown markup declaration"
  1054. (+ xmltok-start 2))
  1055. (xmltok-next-prolog-token)
  1056. (xmltok-markup-declaration-parse-error))))
  1057. t)
  1058. ((or (eq xmltok-type 'end-prolog)
  1059. (not xmltok-type))
  1060. nil)
  1061. ((eq xmltok-type 'internal-subset-close)
  1062. (xmltok-add-prolog-region 'internal-subset-close
  1063. xmltok-start
  1064. (1+ xmltok-start))
  1065. (xmltok-add-prolog-region 'markup-declaration-close
  1066. (1- (point))
  1067. (point))
  1068. (if xmltok-internal-subset-start
  1069. (setq xmltok-internal-subset-start nil)
  1070. (xmltok-add-error "]> outside internal subset"))
  1071. t)
  1072. ((eq xmltok-type 'param-entity-ref)
  1073. (if xmltok-internal-subset-start
  1074. (setq xmltok-had-param-entity-ref t)
  1075. (xmltok-add-error "Parameter entity reference outside document type declaration"))
  1076. t)
  1077. ;; If we don't do this, we can get thousands of errors when
  1078. ;; a plain text file is parsed.
  1079. ((not xmltok-internal-subset-start)
  1080. (when (let ((err (car xmltok-errors)))
  1081. (or (not err)
  1082. (<= (xmltok-error-end err) xmltok-start)))
  1083. (goto-char xmltok-start))
  1084. nil)
  1085. ((eq xmltok-type 'not-well-formed) t)
  1086. (t
  1087. (xmltok-add-error "Token allowed only inside markup declaration")
  1088. t)))
  1089. (defun xmltok-parse-doctype ()
  1090. (setq xmltok-markup-declaration-doctype-flag t)
  1091. (xmltok-next-prolog-token)
  1092. (when xmltok-internal-subset-start
  1093. (xmltok-add-error "DOCTYPE declaration not allowed in internal subset")
  1094. (xmltok-markup-declaration-parse-error))
  1095. (when xmltok-contains-doctype
  1096. (xmltok-add-error "Duplicate DOCTYPE declaration")
  1097. (xmltok-markup-declaration-parse-error))
  1098. (setq xmltok-contains-doctype t)
  1099. (xmltok-require-token 'name 'prefixed-name)
  1100. (xmltok-require-next-token "SYSTEM" "PUBLIC" ?\[ ?>)
  1101. (cond ((eq xmltok-type ?\[)
  1102. (setq xmltok-internal-subset-start (point)))
  1103. ((eq xmltok-type ?>))
  1104. (t
  1105. (setq xmltok-doctype-external-subset-flag t)
  1106. (xmltok-parse-external-id)
  1107. (xmltok-require-token ?\[ ?>)
  1108. (when (eq xmltok-type ?\[)
  1109. (setq xmltok-internal-subset-start (point))))))
  1110. (defun xmltok-parse-attlist-declaration ()
  1111. (xmltok-require-next-token 'prefixed-name 'name)
  1112. (while (progn
  1113. (xmltok-require-next-token ?> 'name 'prefixed-name)
  1114. (if (eq xmltok-type ?>)
  1115. nil
  1116. (xmltok-require-next-token ?\(
  1117. "CDATA"
  1118. "ID"
  1119. "IDREF"
  1120. "IDREFS"
  1121. "ENTITY"
  1122. "ENTITIES"
  1123. "NMTOKEN"
  1124. "NMTOKENS"
  1125. "NOTATION")
  1126. (cond ((eq xmltok-type ?\()
  1127. (xmltok-parse-nmtoken-group))
  1128. ((string= (xmltok-current-token-string)
  1129. "NOTATION")
  1130. (xmltok-require-next-token ?\()
  1131. (xmltok-parse-nmtoken-group)))
  1132. (xmltok-require-next-token "#IMPLIED"
  1133. "#REQUIRED"
  1134. "#FIXED"
  1135. 'literal)
  1136. (when (string= (xmltok-current-token-string) "#FIXED")
  1137. (xmltok-require-next-token 'literal))
  1138. t))))
  1139. (defun xmltok-parse-nmtoken-group ()
  1140. (while (progn
  1141. (xmltok-require-next-token 'nmtoken 'prefixed-name 'name)
  1142. (xmltok-require-next-token ?| ?\))
  1143. (eq xmltok-type ?|))))
  1144. (defun xmltok-parse-element-declaration ()
  1145. (xmltok-require-next-token 'name 'prefixed-name)
  1146. (xmltok-require-next-token "EMPTY" "ANY" ?\()
  1147. (when (eq xmltok-type ?\()
  1148. (xmltok-require-next-token "#PCDATA"
  1149. 'name
  1150. 'prefixed-name
  1151. 'name-occur
  1152. ?\()
  1153. (cond ((eq xmltok-type 'hash-name)
  1154. (xmltok-require-next-token ?| ?\) 'close-paren-star)
  1155. (while (eq xmltok-type ?|)
  1156. (xmltok-require-next-token 'name 'prefixed-name)
  1157. (xmltok-require-next-token 'close-paren-star ?|)))
  1158. (t (xmltok-parse-model-group))))
  1159. (xmltok-require-next-token ?>))
  1160. (defun xmltok-parse-model-group ()
  1161. (xmltok-parse-model-group-member)
  1162. (xmltok-require-next-token ?|
  1163. ?,
  1164. ?\)
  1165. 'close-paren-star
  1166. 'close-paren-occur)
  1167. (when (memq xmltok-type '(?, ?|))
  1168. (let ((connector xmltok-type))
  1169. (while (progn
  1170. (xmltok-next-prolog-token)
  1171. (xmltok-parse-model-group-member)
  1172. (xmltok-require-next-token connector
  1173. ?\)
  1174. 'close-paren-star
  1175. 'close-paren-occur)
  1176. (eq xmltok-type connector))))))
  1177. (defun xmltok-parse-model-group-member ()
  1178. (xmltok-require-token 'name
  1179. 'prefixed-name
  1180. 'name-occur
  1181. ?\()
  1182. (when (eq xmltok-type ?\()
  1183. (xmltok-next-prolog-token)
  1184. (xmltok-parse-model-group)))
  1185. (defun xmltok-parse-entity-declaration ()
  1186. (let (paramp name)
  1187. (xmltok-require-next-token 'name ?%)
  1188. (when (eq xmltok-type ?%)
  1189. (setq paramp t)
  1190. (xmltok-require-next-token 'name))
  1191. (setq name (xmltok-current-token-string))
  1192. (xmltok-require-next-token 'literal "SYSTEM" "PUBLIC")
  1193. (cond ((eq xmltok-type 'literal)
  1194. (let ((replacement (xmltok-parse-entity-value)))
  1195. (unless paramp
  1196. (xmltok-define-entity name replacement)))
  1197. (xmltok-require-next-token ?>))
  1198. (t
  1199. (xmltok-parse-external-id)
  1200. (if paramp
  1201. (xmltok-require-token ?>)
  1202. (xmltok-require-token ?> "NDATA")
  1203. (if (eq xmltok-type ?>)
  1204. (xmltok-define-entity name 'external)
  1205. (xmltok-require-next-token 'name)
  1206. (xmltok-require-next-token ?>)
  1207. (xmltok-define-entity name 'unparsed)))))))
  1208. (defun xmltok-define-entity (name value)
  1209. (when (and (or (not xmltok-had-param-entity-ref)
  1210. xmltok-standalone)
  1211. (not (assoc name xmltok-dtd)))
  1212. (setq xmltok-dtd
  1213. (cons (cons name value) xmltok-dtd))))
  1214. (defun xmltok-parse-entity-value ()
  1215. (let ((lim (1- (point)))
  1216. (well-formed t)
  1217. value-parts
  1218. start)
  1219. (save-excursion
  1220. (goto-char (1+ xmltok-start))
  1221. (setq start (point))
  1222. (while (progn
  1223. (skip-chars-forward "^%&" lim)
  1224. (when (< (point) lim)
  1225. (goto-char (1+ (point)))
  1226. (cond ((eq (char-before) ?%)
  1227. (xmltok-add-error "Parameter entity references are not allowed in the internal subset"
  1228. (1- (point))
  1229. (point))
  1230. (setq well-formed nil))
  1231. (t
  1232. (let ((xmltok-start (1- (point)))
  1233. xmltok-type xmltok-replacement)
  1234. (xmltok-scan-after-amp (lambda (_start _end)))
  1235. (cond ((eq xmltok-type 'char-ref)
  1236. (setq value-parts
  1237. (cons (buffer-substring-no-properties
  1238. start
  1239. xmltok-start)
  1240. value-parts))
  1241. (setq value-parts
  1242. (cons xmltok-replacement
  1243. value-parts))
  1244. (setq start (point)))
  1245. ((eq xmltok-type 'not-well-formed)
  1246. (setq well-formed nil))))))
  1247. t))))
  1248. (if (not well-formed)
  1249. nil
  1250. (apply 'concat
  1251. (nreverse (cons (buffer-substring-no-properties start lim)
  1252. value-parts))))))
  1253. (defun xmltok-parse-notation-declaration ()
  1254. (xmltok-require-next-token 'name)
  1255. (xmltok-require-next-token "SYSTEM" "PUBLIC")
  1256. (let ((publicp (string= (xmltok-current-token-string) "PUBLIC")))
  1257. (xmltok-require-next-token 'literal)
  1258. (cond (publicp
  1259. (xmltok-require-next-token 'literal ?>)
  1260. (unless (eq xmltok-type ?>)
  1261. (xmltok-require-next-token ?>)))
  1262. (t (xmltok-require-next-token ?>)))))
  1263. (defun xmltok-parse-external-id ()
  1264. (xmltok-require-token "SYSTEM" "PUBLIC")
  1265. (let ((publicp (string= (xmltok-current-token-string) "PUBLIC")))
  1266. (xmltok-require-next-token 'literal)
  1267. (when publicp
  1268. (xmltok-require-next-token 'literal)))
  1269. (xmltok-next-prolog-token))
  1270. (defun xmltok-require-next-token (&rest types)
  1271. (xmltok-next-prolog-token)
  1272. (apply 'xmltok-require-token types))
  1273. (defun xmltok-require-token (&rest types)
  1274. ;; XXX Generate a more helpful error message
  1275. (while (and (not (let ((type (car types)))
  1276. (if (stringp (car types))
  1277. (string= (xmltok-current-token-string) type)
  1278. (eq type xmltok-type))))
  1279. (setq types (cdr types))))
  1280. (unless types
  1281. (when (and xmltok-type
  1282. (not (eq xmltok-type 'not-well-formed)))
  1283. (xmltok-add-error "Unexpected token"))
  1284. (xmltok-markup-declaration-parse-error))
  1285. (let ((region-type (xmltok-prolog-region-type (car types))))
  1286. (when region-type
  1287. (xmltok-add-prolog-region region-type
  1288. xmltok-start
  1289. (point)))))
  1290. (defun xmltok-current-token-string ()
  1291. (buffer-substring-no-properties xmltok-start (point)))
  1292. (define-error 'xmltok-markup-declaration-parse-error
  1293. "Syntax error in markup declaration")
  1294. (defun xmltok-markup-declaration-parse-error ()
  1295. (signal 'xmltok-markup-declaration-parse-error nil))
  1296. (defun xmltok-skip-markup-declaration ()
  1297. (while (cond ((eq xmltok-type ?>)
  1298. (xmltok-next-prolog-token)
  1299. nil)
  1300. ((and xmltok-markup-declaration-doctype-flag
  1301. (eq xmltok-type ?\[))
  1302. (setq xmltok-internal-subset-start (point))
  1303. (xmltok-next-prolog-token)
  1304. nil)
  1305. ((memq xmltok-type '(nil
  1306. end-prolog
  1307. named-markup-declaration
  1308. comment
  1309. processing-instruction))
  1310. nil)
  1311. ((and xmltok-internal-subset-start
  1312. (eq xmltok-type 'internal-subset-close))
  1313. nil)
  1314. (t (xmltok-next-prolog-token) t)))
  1315. xmltok-type)
  1316. (defun xmltok-prolog-region-type (required)
  1317. (cond ((cdr (assq xmltok-type
  1318. '((literal . literal)
  1319. (?> . markup-declaration-close)
  1320. (?\[ . internal-subset-open)
  1321. (hash-name . hash-name)))))
  1322. ((and (stringp required) (eq xmltok-type 'name))
  1323. 'keyword)))
  1324. ;; Return new token type.
  1325. (defun xmltok-next-prolog-token ()
  1326. (skip-chars-forward " \t\r\n")
  1327. (setq xmltok-start (point))
  1328. (cond ((not (and (looking-at (xmltok-prolog regexp))
  1329. (goto-char (match-end 0))))
  1330. (let ((ch (char-after)))
  1331. (cond (ch
  1332. (goto-char (1+ (point)))
  1333. (xmltok-add-error "Illegal char in prolog")
  1334. (setq xmltok-type 'not-well-formed))
  1335. (t (setq xmltok-type nil)))))
  1336. ((or (xmltok-prolog start ncname-occur)
  1337. (xmltok-prolog start colon-name-occur))
  1338. (setq xmltok-name-end (1- (point)))
  1339. (setq xmltok-name-colon (xmltok-prolog start colon))
  1340. (setq xmltok-type 'name-occur))
  1341. ((xmltok-prolog start colon)
  1342. (setq xmltok-name-end (point))
  1343. (setq xmltok-name-colon (xmltok-prolog start colon))
  1344. (unless (looking-at "[ \t\r\n>),|[%]")
  1345. (xmltok-add-error "Missing space after name"))
  1346. (setq xmltok-type 'prefixed-name))
  1347. ((or (xmltok-prolog start ncname-nmtoken)
  1348. (xmltok-prolog start ncname-colon))
  1349. (unless (looking-at "[ \t\r\n>),|[%]")
  1350. (xmltok-add-error "Missing space after name token"))
  1351. (setq xmltok-type 'nmtoken))
  1352. ((xmltok-prolog start name)
  1353. (setq xmltok-name-end (point))
  1354. (setq xmltok-name-colon nil)
  1355. (unless (looking-at "[ \t\r\n>),|[%]")
  1356. (xmltok-add-error "Missing space after name"))
  1357. (setq xmltok-type 'name))
  1358. ((xmltok-prolog start hash-name)
  1359. (setq xmltok-name-end (point))
  1360. (unless (looking-at "[ \t\r\n>)|%]")
  1361. (xmltok-add-error "Missing space after name"))
  1362. (setq xmltok-type 'hash-name))
  1363. ((xmltok-prolog start processing-instruction-question)
  1364. (xmltok-scan-prolog-after-processing-instruction-open))
  1365. ((xmltok-prolog start comment-open)
  1366. ;; XXX if not-well-formed, ignore some stuff
  1367. (xmltok-scan-after-comment-open))
  1368. ((xmltok-prolog start named-markup-declaration)
  1369. (setq xmltok-type 'named-markup-declaration))
  1370. ((xmltok-prolog start instance-start)
  1371. (goto-char xmltok-start)
  1372. (setq xmltok-type 'end-prolog))
  1373. ((xmltok-prolog start close-paren-star)
  1374. (setq xmltok-type 'close-paren-star))
  1375. ((xmltok-prolog start close-paren-occur)
  1376. (setq xmltok-type 'close-paren-occur))
  1377. ((xmltok-prolog start close-paren)
  1378. (unless (looking-at "[ \t\r\n>,|)]")
  1379. (xmltok-add-error "Missing space after )"))
  1380. (setq xmltok-type ?\)))
  1381. ((xmltok-prolog start single-char)
  1382. (let ((ch (char-before)))
  1383. (cond ((memq ch '(?\" ?\'))
  1384. (xmltok-scan-prolog-literal))
  1385. (t (setq xmltok-type ch)))))
  1386. ((xmltok-prolog start percent)
  1387. (cond ((xmltok-prolog start param-entity-ref-close)
  1388. (setq xmltok-name-end (1- (point)))
  1389. (setq xmltok-type 'param-entity-ref))
  1390. ((xmltok-prolog start param-entity-ref)
  1391. (xmltok-add-error "Missing ;")
  1392. (setq xmltok-name-end (point))
  1393. (setq xmltok-type 'param-entity-ref))
  1394. ((looking-at "[ \t\r\n%]")
  1395. (setq xmltok-type ?%))
  1396. (t
  1397. (xmltok-add-error "Expected name after %")
  1398. (setq xmltok-type 'not-well-formed))))
  1399. ((xmltok-prolog start nmtoken)
  1400. (unless (looking-at "[ \t\r\n>),|[%]")
  1401. (xmltok-add-error "Missing space after name token"))
  1402. (setq xmltok-type 'nmtoken))
  1403. ((xmltok-prolog start internal-subset-close)
  1404. (setq xmltok-type 'internal-subset-close))
  1405. ((xmltok-prolog start pound)
  1406. (xmltok-add-error "Expected name after #")
  1407. (setq xmltok-type 'not-well-formed))
  1408. ((xmltok-prolog start markup-declaration)
  1409. (xmltok-add-error "Expected name or -- after <!")
  1410. (setq xmltok-type 'not-well-formed))
  1411. ((xmltok-prolog start comment-first-dash)
  1412. (xmltok-add-error "Expected <!--")
  1413. (setq xmltok-type 'not-well-formed))
  1414. ((xmltok-prolog start less-than)
  1415. (xmltok-add-error "Incomplete markup")
  1416. (setq xmltok-type 'not-well-formed))
  1417. (t (error "Unhandled token in prolog %s"
  1418. (match-string-no-properties 0)))))
  1419. (defun xmltok-scan-prolog-literal ()
  1420. (let* ((delim (string (char-before)))
  1421. (safe-end (save-excursion
  1422. (skip-chars-forward (concat "^<>[]" delim))
  1423. (point)))
  1424. (end (save-excursion
  1425. (goto-char safe-end)
  1426. (search-forward delim nil t))))
  1427. (cond ((or (not end)
  1428. (save-excursion
  1429. (goto-char end)
  1430. (looking-at "[ \t\r\n>%[]")))
  1431. (goto-char end))
  1432. ((eq (1+ safe-end) end)
  1433. (goto-char end)
  1434. (xmltok-add-error (format "Missing space after %s" delim)
  1435. safe-end)))
  1436. (setq xmltok-type 'literal)))
  1437. (defun xmltok-scan-prolog-after-processing-instruction-open ()
  1438. (search-forward "?>" nil 'move)
  1439. (let* ((end (point))
  1440. (target
  1441. (save-excursion
  1442. (goto-char (+ xmltok-start 2))
  1443. (and (looking-at (xmltok-ncname regexp))
  1444. (or (memq (char-after (match-end 0))
  1445. '(?\n ?\t ?\r ? ))
  1446. (= (match-end 0) (- end 2)))
  1447. (match-string-no-properties 0)))))
  1448. (cond ((not target)
  1449. (xmltok-add-error "\
  1450. Processing instruction does not start with a name"
  1451. (+ xmltok-start 2)
  1452. (+ xmltok-start 3)))
  1453. ((not (and (= (length target) 3)
  1454. (let ((case-fold-search t))
  1455. (string-match "xml" target)))))
  1456. ((= xmltok-start 1)
  1457. (xmltok-add-error "Invalid XML declaration"
  1458. xmltok-start
  1459. (point)))
  1460. ((save-excursion
  1461. (goto-char xmltok-start)
  1462. (looking-at (xmltok-xml-declaration regexp)))
  1463. (xmltok-add-error "XML declaration not at beginning of file"
  1464. xmltok-start
  1465. (point)))
  1466. (t
  1467. (xmltok-add-error "Processing instruction has target of xml"
  1468. (+ xmltok-start 2)
  1469. (+ xmltok-start 5))))
  1470. (xmltok-add-prolog-region 'processing-instruction-left
  1471. xmltok-start
  1472. (+ xmltok-start
  1473. 2
  1474. (if target
  1475. (length target)
  1476. 0)))
  1477. (xmltok-add-prolog-region 'processing-instruction-right
  1478. (if target
  1479. (save-excursion
  1480. (goto-char (+ xmltok-start
  1481. (length target)
  1482. 2))
  1483. (skip-chars-forward " \t\r\n")
  1484. (point))
  1485. (+ xmltok-start 2))
  1486. (point)))
  1487. (setq xmltok-type 'processing-instruction))
  1488. (defun xmltok-parse-entities ()
  1489. (let ((todo xmltok-dtd))
  1490. (when (and (or xmltok-had-param-entity-ref
  1491. xmltok-doctype-external-subset-flag)
  1492. (not xmltok-standalone))
  1493. (setq xmltok-dtd (cons t xmltok-dtd)))
  1494. (while todo
  1495. (xmltok-parse-entity (car todo))
  1496. (setq todo (cdr todo)))))
  1497. (defun xmltok-parse-entity (name-def)
  1498. (let ((def (cdr name-def))
  1499. ;; in case its value is buffer local
  1500. (xmltok-dtd xmltok-dtd)
  1501. buf)
  1502. (when (stringp def)
  1503. (if (string-match "\\`[^&<\t\r\n]*\\'" def)
  1504. (setcdr name-def (cons def def))
  1505. (setcdr name-def 'not-well-formed) ; avoid infinite expansion loops
  1506. (setq buf (get-buffer-create
  1507. (format " *Entity %s*" (car name-def))))
  1508. (with-current-buffer buf
  1509. (erase-buffer)
  1510. (insert def)
  1511. (goto-char (point-min))
  1512. (setcdr name-def
  1513. (xmltok-parse-entity-replacement)))
  1514. (kill-buffer buf)))))
  1515. (defun xmltok-parse-entity-replacement ()
  1516. (let ((def (cons "" "")))
  1517. (while (let* ((start (point))
  1518. (found (re-search-forward "[<&\t\r\n]\\|]]>" nil t))
  1519. (ch (and found (char-before)))
  1520. (str (buffer-substring-no-properties
  1521. start
  1522. (if found
  1523. (match-beginning 0)
  1524. (point-max)))))
  1525. (setq def
  1526. (xmltok-append-entity-def def
  1527. (cons str str)))
  1528. (cond ((not found) nil)
  1529. ((eq ch ?>)
  1530. (setq def 'not-well-formed)
  1531. nil)
  1532. ((eq ch ?<)
  1533. (xmltok-save
  1534. (setq xmltok-start (1- (point)))
  1535. (xmltok-scan-after-lt)
  1536. (setq def
  1537. (xmltok-append-entity-def
  1538. def
  1539. (cond ((memq xmltok-type
  1540. '(start-tag
  1541. end-tag
  1542. empty-element))
  1543. 'element)
  1544. ((memq xmltok-type
  1545. '(comment
  1546. processing-instruction))
  1547. (cons "" nil))
  1548. ((eq xmltok-type
  1549. 'cdata-section)
  1550. (cons (buffer-substring-no-properties
  1551. (+ xmltok-start 9)
  1552. (- (point) 3))
  1553. nil))
  1554. (t 'not-well-formed)))))
  1555. t)
  1556. ((eq ch ?&)
  1557. (let ((xmltok-start (1- (point)))
  1558. xmltok-type
  1559. xmltok-replacement
  1560. xmltok-errors)
  1561. (xmltok-scan-after-amp 'xmltok-handle-nested-entity)
  1562. (cond ((eq xmltok-type 'entity-ref)
  1563. (setq def
  1564. (xmltok-append-entity-def
  1565. def
  1566. xmltok-replacement)))
  1567. ((eq xmltok-type 'char-ref)
  1568. (setq def
  1569. (xmltok-append-entity-def
  1570. def
  1571. (if xmltok-replacement
  1572. (cons xmltok-replacement
  1573. xmltok-replacement)
  1574. (and xmltok-errors 'not-well-formed)))))
  1575. (t
  1576. (setq def 'not-well-formed))))
  1577. t)
  1578. (t
  1579. (setq def
  1580. (xmltok-append-entity-def
  1581. def
  1582. (cons (match-string-no-properties 0)
  1583. " ")))
  1584. t))))
  1585. def))
  1586. (defun xmltok-handle-nested-entity (start end)
  1587. (let* ((name-def (assoc (buffer-substring-no-properties start end)
  1588. xmltok-dtd))
  1589. (def (cdr name-def)))
  1590. (when (stringp def)
  1591. (xmltok-parse-entity name-def)
  1592. (setq def (cdr name-def)))
  1593. (setq xmltok-replacement
  1594. (cond ((null name-def)
  1595. (if (eq (car xmltok-dtd) t)
  1596. nil
  1597. 'not-well-formed))
  1598. ((eq def 'unparsed) 'not-well-formed)
  1599. (t def)))))
  1600. (defun xmltok-append-entity-def (d1 d2)
  1601. (cond ((consp d1)
  1602. (if (consp d2)
  1603. (cons (concat (car d1) (car d2))
  1604. (and (cdr d1)
  1605. (cdr d2)
  1606. (concat (cdr d1) (cdr d2))))
  1607. d2))
  1608. ((consp d2) d1)
  1609. (t
  1610. (let ((defs '(not-well-formed external element)))
  1611. (while (not (or (eq (car defs) d1)
  1612. (eq (car defs) d2)))
  1613. (setq defs (cdr defs)))
  1614. (car defs)))))
  1615. (defun xmltok-add-prolog-region (type start end)
  1616. (setq xmltok-prolog-regions
  1617. (cons (vector type start end)
  1618. xmltok-prolog-regions)))
  1619. (defun xmltok-merge-attributes ()
  1620. "Return a list merging `xmltok-attributes' and `xmltok-namespace-attributes'.
  1621. The members of the merged list are in order of occurrence in the
  1622. document. The list may share list structure with `xmltok-attributes'
  1623. and `xmltok-namespace-attributes'."
  1624. (cond ((not xmltok-namespace-attributes)
  1625. xmltok-attributes)
  1626. ((not xmltok-attributes)
  1627. xmltok-namespace-attributes)
  1628. (t
  1629. (let ((atts1 xmltok-attributes)
  1630. (atts2 xmltok-namespace-attributes)
  1631. merged)
  1632. (while (and atts1 atts2)
  1633. (cond ((< (xmltok-attribute-name-start (car atts1))
  1634. (xmltok-attribute-name-start (car atts2)))
  1635. (setq merged (cons (car atts1) merged))
  1636. (setq atts1 (cdr atts1)))
  1637. (t
  1638. (setq merged (cons (car atts2) merged))
  1639. (setq atts2 (cdr atts2)))))
  1640. (setq merged (nreverse merged))
  1641. (cond (atts1 (setq merged (nconc merged atts1)))
  1642. (atts2 (setq merged (nconc merged atts2))))
  1643. merged))))
  1644. ;;; Testing
  1645. (defun xmltok-forward-test ()
  1646. (interactive)
  1647. (if (xmltok-forward)
  1648. (message "Scanned %s" xmltok-type)
  1649. (message "Scanned nothing")))
  1650. (defun xmltok-next-prolog-token-test ()
  1651. (interactive)
  1652. (if (xmltok-next-prolog-token)
  1653. (message "Scanned %s"
  1654. (if (integerp xmltok-type)
  1655. (string xmltok-type)
  1656. xmltok-type))
  1657. (message "Scanned end of file")))
  1658. (provide 'xmltok)
  1659. ;;; xmltok.el ends here