rng-cmpct.el 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929
  1. ;;; rng-cmpct.el --- parsing of RELAX NG Compact Syntax schemas -*- lexical-binding:t -*-
  2. ;; Copyright (C) 2003, 2007-2017 Free Software Foundation, Inc.
  3. ;; Author: James Clark
  4. ;; Keywords: wp, hypermedia, languages, XML, RelaxNG
  5. ;; This file is part of GNU Emacs.
  6. ;; GNU Emacs is free software: you can redistribute it and/or modify
  7. ;; it under the terms of the GNU General Public License as published by
  8. ;; the Free Software Foundation, either version 3 of the License, or
  9. ;; (at your option) any later version.
  10. ;; GNU Emacs is distributed in the hope that it will be useful,
  11. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. ;; GNU General Public License for more details.
  14. ;; You should have received a copy of the GNU General Public License
  15. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  16. ;;; Commentary:
  17. ;; This parses a RELAX NG Compact Syntax schema into the form
  18. ;; specified in rng-pttrn.el.
  19. ;;
  20. ;; RELAX NG Compact Syntax is specified by
  21. ;; http://relaxng.org/compact.html
  22. ;;
  23. ;; This file uses the prefix "rng-c-".
  24. ;;; Code:
  25. (require 'nxml-util)
  26. (require 'rng-util)
  27. (require 'rng-uri)
  28. (require 'rng-pttrn)
  29. ;;;###autoload
  30. (defun rng-c-load-schema (filename)
  31. "Load a schema in RELAX NG compact syntax from FILENAME.
  32. Return a pattern."
  33. (rng-c-parse-file filename))
  34. ;;; Error handling
  35. (define-error 'rng-c-incorrect-schema
  36. "Incorrect schema" '(rng-error nxml-file-parse-error))
  37. (defun rng-c-signal-incorrect-schema (filename pos message)
  38. (nxml-signal-file-parse-error filename
  39. pos
  40. message
  41. 'rng-c-incorrect-schema))
  42. ;;; Lexing
  43. (defconst rng-c-keywords
  44. '("attribute"
  45. "default"
  46. "datatypes"
  47. "div"
  48. "element"
  49. "empty"
  50. "external"
  51. "grammar"
  52. "include"
  53. "inherit"
  54. "list"
  55. "mixed"
  56. "namespace"
  57. "notAllowed"
  58. "parent"
  59. "start"
  60. "string"
  61. "text"
  62. "token")
  63. "List of strings that are keywords in the compact syntax.")
  64. (defconst rng-c-anchored-keyword-re
  65. (concat "\\`\\(" (regexp-opt rng-c-keywords) "\\)\\'")
  66. "Regular expression to match a keyword in the compact syntax.")
  67. (defvar rng-c-syntax-table nil
  68. "Syntax table for parsing the compact syntax.")
  69. (if rng-c-syntax-table
  70. ()
  71. (setq rng-c-syntax-table (make-syntax-table))
  72. (modify-syntax-entry ?# "<" rng-c-syntax-table)
  73. (modify-syntax-entry ?\n ">" rng-c-syntax-table)
  74. (modify-syntax-entry ?- "w" rng-c-syntax-table)
  75. (modify-syntax-entry ?. "w" rng-c-syntax-table)
  76. (modify-syntax-entry ?_ "w" rng-c-syntax-table)
  77. (modify-syntax-entry ?: "_" rng-c-syntax-table))
  78. (defconst rng-c-literal-1-re
  79. "'\\(''\\([^']\\|'[^']\\|''[^']\\)*''\\|[^'\n]*\\)'"
  80. "Regular expression to match a single-quoted literal.")
  81. (defconst rng-c-literal-2-re
  82. (replace-regexp-in-string "'" "\"" rng-c-literal-1-re)
  83. "Regular expression to match a double-quoted literal.")
  84. (defconst rng-c-ncname-re "\\w+")
  85. (defconst rng-c-anchored-ncname-re
  86. (concat "\\`" rng-c-ncname-re "\\'"))
  87. (defconst rng-c-token-re
  88. (concat "[&|]=" "\\|"
  89. "[][()|&,*+?{}~=-]" "\\|"
  90. rng-c-literal-1-re "\\|"
  91. rng-c-literal-2-re "\\|"
  92. rng-c-ncname-re "\\(:\\(\\*\\|" rng-c-ncname-re "\\)\\)?" "\\|"
  93. "\\\\" rng-c-ncname-re "\\|"
  94. ">>")
  95. "Regular expression to match a token in the compact syntax.")
  96. (defun rng-c-init-buffer ()
  97. (setq case-fold-search nil) ; automatically becomes buffer-local when set
  98. (set-buffer-multibyte t)
  99. (set-syntax-table rng-c-syntax-table))
  100. (defvar rng-c-current-token nil)
  101. (make-variable-buffer-local 'rng-c-current-token)
  102. (defun rng-c-advance ()
  103. (cond ((looking-at rng-c-token-re)
  104. (setq rng-c-current-token (match-string 0))
  105. (goto-char (match-end 0))
  106. (forward-comment (point-max)))
  107. ((= (point) (point-max))
  108. (setq rng-c-current-token ""))
  109. (t (rng-c-error "Invalid token"))))
  110. (defconst rng-c-anchored-datatype-name-re
  111. (concat "\\`" rng-c-ncname-re ":" rng-c-ncname-re "\\'"))
  112. (defsubst rng-c-current-token-keyword-p ()
  113. (string-match rng-c-anchored-keyword-re rng-c-current-token))
  114. (defsubst rng-c-current-token-prefixed-name-p ()
  115. (string-match rng-c-anchored-datatype-name-re rng-c-current-token))
  116. (defsubst rng-c-current-token-literal-p ()
  117. (string-match "\\`['\"]" rng-c-current-token))
  118. (defsubst rng-c-current-token-quoted-identifier-p ()
  119. (string-match "\\`\\\\" rng-c-current-token))
  120. (defsubst rng-c-current-token-ncname-p ()
  121. (string-match rng-c-anchored-ncname-re rng-c-current-token))
  122. (defsubst rng-c-current-token-ns-name-p ()
  123. (let ((len (length rng-c-current-token)))
  124. (and (> len 0)
  125. (= (aref rng-c-current-token (- len 1)) ?*))))
  126. ;;; Namespaces
  127. (defvar rng-c-inherit-namespace nil)
  128. (defvar rng-c-default-namespace nil)
  129. (defvar rng-c-default-namespace-declared nil)
  130. (defvar rng-c-namespace-decls nil
  131. "Alist of namespace declarations.")
  132. (defconst rng-c-no-namespace nil)
  133. (defun rng-c-declare-standard-namespaces ()
  134. (setq rng-c-namespace-decls
  135. (cons (cons "xml" nxml-xml-namespace-uri)
  136. rng-c-namespace-decls))
  137. (when (and (not rng-c-default-namespace-declared)
  138. rng-c-inherit-namespace)
  139. (setq rng-c-default-namespace rng-c-inherit-namespace)))
  140. (defun rng-c-expand-name (prefixed-name)
  141. (let ((i (string-match ":" prefixed-name)))
  142. (rng-make-name (rng-c-lookup-prefix (substring prefixed-name
  143. 0
  144. i))
  145. (substring prefixed-name (+ i 1)))))
  146. (defun rng-c-lookup-prefix (prefix)
  147. (let ((binding (assoc prefix rng-c-namespace-decls)))
  148. (or binding (rng-c-error "Undefined prefix %s" prefix))
  149. (cdr binding)))
  150. (defun rng-c-unqualified-namespace (attribute)
  151. (if attribute
  152. rng-c-no-namespace
  153. rng-c-default-namespace))
  154. (defun rng-c-make-context ()
  155. (cons rng-c-default-namespace rng-c-namespace-decls))
  156. ;;; Datatypes
  157. (defconst rng-string-datatype
  158. (rng-make-datatype rng-builtin-datatypes-uri "string"))
  159. (defconst rng-token-datatype
  160. (rng-make-datatype rng-builtin-datatypes-uri "token"))
  161. (defvar rng-c-datatype-decls nil
  162. "Alist of datatype declarations.
  163. Contains a list of pairs (PREFIX . URI) where PREFIX is a string
  164. and URI is a symbol.")
  165. (defun rng-c-declare-standard-datatypes ()
  166. (setq rng-c-datatype-decls
  167. (cons (cons "xsd" rng-xsd-datatypes-uri)
  168. rng-c-datatype-decls)))
  169. (defun rng-c-lookup-datatype-prefix (prefix)
  170. (let ((binding (assoc prefix rng-c-datatype-decls)))
  171. (or binding (rng-c-error "Undefined prefix %s" prefix))
  172. (cdr binding)))
  173. (defun rng-c-expand-datatype (prefixed-name)
  174. (let ((i (string-match ":" prefixed-name)))
  175. (rng-make-datatype
  176. (rng-c-lookup-datatype-prefix (substring prefixed-name 0 i))
  177. (substring prefixed-name (+ i 1)))))
  178. ;;; Grammars
  179. (defvar rng-c-current-grammar nil)
  180. (defvar rng-c-parent-grammar nil)
  181. (defun rng-c-make-grammar ()
  182. (make-hash-table :test 'equal))
  183. (defconst rng-c-about-override-slot 0)
  184. (defconst rng-c-about-combine-slot 1)
  185. (defun rng-c-lookup-create (name grammar)
  186. "Return a def object for NAME.
  187. A def object is a pair \(ABOUT . REF) where REF is returned by
  188. `rng-make-ref'.
  189. ABOUT is a two-element vector [OVERRIDE COMBINE].
  190. COMBINE is either nil, choice or interleave.
  191. OVERRIDE is either nil, require or t."
  192. (let ((def (gethash name grammar)))
  193. (if def
  194. def
  195. (progn
  196. (setq def (cons (vector nil nil) (rng-make-ref name)))
  197. (puthash name def grammar)
  198. def))))
  199. (defun rng-c-make-ref (name)
  200. (or rng-c-current-grammar
  201. (rng-c-error "Reference not in a grammar"))
  202. (cdr (rng-c-lookup-create name rng-c-current-grammar)))
  203. (defun rng-c-make-parent-ref (name)
  204. (or rng-c-parent-grammar
  205. (rng-c-error "Reference to non-existent parent grammar"))
  206. (cdr (rng-c-lookup-create name rng-c-parent-grammar)))
  207. (defvar rng-c-overrides nil
  208. "Contains a list of (NAME . DEF) pairs.")
  209. (defun rng-c-merge-combine (def combine name)
  210. (let* ((about (car def))
  211. (current-combine (aref about rng-c-about-combine-slot)))
  212. (if combine
  213. (if current-combine
  214. (or (eq combine current-combine)
  215. (rng-c-error "Inconsistent combine for %s" name))
  216. (aset about rng-c-about-combine-slot combine))
  217. current-combine)))
  218. (defun rng-c-prepare-define (name combine in-include)
  219. (let* ((def (rng-c-lookup-create name rng-c-current-grammar))
  220. (about (car def))
  221. (overridden (aref about rng-c-about-override-slot)))
  222. (and in-include
  223. (setq rng-c-overrides (cons (cons name def) rng-c-overrides)))
  224. (cond (overridden (and (eq overridden 'require)
  225. (aset about rng-c-about-override-slot t))
  226. nil)
  227. (t (setq combine (rng-c-merge-combine def combine name))
  228. (and (rng-ref-get (cdr def))
  229. (not combine)
  230. (rng-c-error "Duplicate definition of %s" name))
  231. def))))
  232. (defun rng-c-start-include (overrides)
  233. (mapcar (lambda (name-def)
  234. (let* ((def (cdr name-def))
  235. (about (car def))
  236. (save (aref about rng-c-about-override-slot)))
  237. (aset about rng-c-about-override-slot 'require)
  238. (cons save name-def)))
  239. overrides))
  240. (defun rng-c-end-include (overrides)
  241. (mapcar (lambda (o)
  242. (let* ((saved (car o))
  243. (name-def (cdr o))
  244. (name (car name-def))
  245. (def (cdr name-def))
  246. (about (car def)))
  247. (and (eq (aref about rng-c-about-override-slot) 'require)
  248. (rng-c-error "Definition of %s in include did not override definition in included file" name))
  249. (aset about rng-c-about-override-slot saved)))
  250. overrides))
  251. (defun rng-c-define (def value)
  252. (and def
  253. (let ((current-value (rng-ref-get (cdr def))))
  254. (rng-ref-set (cdr def)
  255. (if current-value
  256. (if (eq (aref (car def) rng-c-about-combine-slot)
  257. 'choice)
  258. (rng-make-choice (list current-value value))
  259. (rng-make-interleave (list current-value value)))
  260. value)))))
  261. (defun rng-c-finish-grammar ()
  262. (maphash (lambda (key def)
  263. (or (rng-ref-get (cdr def))
  264. (rng-c-error "Reference to undefined pattern %s" key)))
  265. rng-c-current-grammar)
  266. (rng-ref-get (cdr (or (gethash 'start rng-c-current-grammar)
  267. (rng-c-error "No definition of start")))))
  268. ;;; Parsing
  269. (defvar rng-c-escape-positions nil)
  270. (make-variable-buffer-local 'rng-c-escape-positions)
  271. (defvar rng-c-file-name nil)
  272. (make-variable-buffer-local 'rng-c-file-name)
  273. (defvar rng-c-file-index nil)
  274. (defun rng-c-parse-file (filename &optional context)
  275. (with-current-buffer (get-buffer-create (rng-c-buffer-name context))
  276. (erase-buffer)
  277. (rng-c-init-buffer)
  278. (setq rng-c-file-name
  279. (car (insert-file-contents filename)))
  280. (setq rng-c-escape-positions nil)
  281. (rng-c-process-escapes)
  282. (rng-c-parse-top-level context)))
  283. (defun rng-c-buffer-name (context)
  284. (concat " *RNC Input"
  285. (if context
  286. (concat "<"
  287. (number-to-string (setq rng-c-file-index
  288. (1+ rng-c-file-index)))
  289. ">*")
  290. (setq rng-c-file-index 1)
  291. "*")))
  292. (defun rng-c-process-escapes ()
  293. ;; Check for any NULs, since we will use NUL chars
  294. ;; for internal purposes.
  295. (let ((pos (search-forward "\C-@" nil t)))
  296. (and pos
  297. (rng-c-error "Nul character found (binary file?)")))
  298. (let ((offset 0))
  299. (while (re-search-forward "\\\\x+{\\([0-9a-fA-F]+\\)}"
  300. (point-max)
  301. t)
  302. (let* ((ch (decode-char 'ucs (string-to-number (match-string 1) 16))))
  303. (if (and ch (> ch 0))
  304. (let ((begin (match-beginning 0))
  305. (end (match-end 0)))
  306. (delete-region begin end)
  307. ;; Represent an escaped newline by nul, so
  308. ;; that we can distinguish it from a literal newline.
  309. ;; We will translate it back into a real newline later.
  310. (insert (if (eq ch ?\n) 0 ch))
  311. (setq offset (+ offset (- end begin 1)))
  312. (setq rng-c-escape-positions
  313. (cons (cons (point) offset)
  314. rng-c-escape-positions)))
  315. (rng-c-error "Invalid character escape")))))
  316. (goto-char 1))
  317. (defun rng-c-translate-position (pos)
  318. (let ((tem rng-c-escape-positions))
  319. (while (and tem
  320. (> (caar tem) pos))
  321. (setq tem (cdr tem)))
  322. (if tem
  323. (+ pos (cdar tem))
  324. pos)))
  325. (defun rng-c-error (&rest args)
  326. (rng-c-signal-incorrect-schema rng-c-file-name
  327. (rng-c-translate-position (point))
  328. (apply #'format-message args)))
  329. (defun rng-c-parse-top-level (context)
  330. (let ((rng-c-namespace-decls nil)
  331. (rng-c-default-namespace nil)
  332. (rng-c-datatype-decls nil))
  333. (goto-char (point-min))
  334. (forward-comment (point-max))
  335. (rng-c-advance)
  336. (rng-c-parse-decls)
  337. (let ((p (if (eq context 'include)
  338. (if (rng-c-implicit-grammar-p)
  339. (rng-c-parse-grammar-body "")
  340. (rng-c-parse-included-grammar))
  341. (if (rng-c-implicit-grammar-p)
  342. (rng-c-parse-implicit-grammar)
  343. (rng-c-parse-pattern)))))
  344. (or (string-equal rng-c-current-token "")
  345. (rng-c-error "Unexpected characters after pattern"))
  346. p)))
  347. (defun rng-c-parse-included-grammar ()
  348. (or (string-equal rng-c-current-token "grammar")
  349. (rng-c-error "Included schema is not a grammar"))
  350. (rng-c-advance)
  351. (rng-c-expect "{")
  352. (rng-c-parse-grammar-body "}"))
  353. (defun rng-c-implicit-grammar-p ()
  354. (or (and (or (rng-c-current-token-prefixed-name-p)
  355. (rng-c-current-token-quoted-identifier-p)
  356. (and (rng-c-current-token-ncname-p)
  357. (not (rng-c-current-token-keyword-p))))
  358. (looking-at "\\["))
  359. (and (string-equal rng-c-current-token "[")
  360. (rng-c-parse-lead-annotation)
  361. nil)
  362. (member rng-c-current-token '("div" "include" ""))
  363. (looking-at "[|&]?=")))
  364. (defun rng-c-parse-decls ()
  365. (setq rng-c-default-namespace-declared nil)
  366. (while (progn
  367. (let ((binding
  368. (assoc rng-c-current-token
  369. '(("namespace" . rng-c-parse-namespace)
  370. ("datatypes" . rng-c-parse-datatypes)
  371. ("default" . rng-c-parse-default)))))
  372. (if binding
  373. (progn
  374. (rng-c-advance)
  375. (funcall (cdr binding))
  376. t)
  377. nil))))
  378. (rng-c-declare-standard-datatypes)
  379. (rng-c-declare-standard-namespaces))
  380. (defun rng-c-parse-datatypes ()
  381. (let ((prefix (rng-c-parse-identifier-or-keyword)))
  382. (or (not (assoc prefix rng-c-datatype-decls))
  383. (rng-c-error "Duplicate datatypes declaration for prefix %s" prefix))
  384. (rng-c-expect "=")
  385. (setq rng-c-datatype-decls
  386. (cons (cons prefix
  387. (rng-make-datatypes-uri (rng-c-parse-literal)))
  388. rng-c-datatype-decls))))
  389. (defun rng-c-parse-namespace ()
  390. (rng-c-declare-namespace nil
  391. (rng-c-parse-identifier-or-keyword)))
  392. (defun rng-c-parse-default ()
  393. (rng-c-expect "namespace")
  394. (rng-c-declare-namespace t
  395. (if (string-equal rng-c-current-token "=")
  396. nil
  397. (rng-c-parse-identifier-or-keyword))))
  398. (defun rng-c-declare-namespace (declare-default prefix)
  399. (rng-c-expect "=")
  400. (let ((ns (cond ((string-equal rng-c-current-token "inherit")
  401. (rng-c-advance)
  402. rng-c-inherit-namespace)
  403. (t
  404. (nxml-make-namespace (rng-c-parse-literal))))))
  405. (and prefix
  406. (or (not (assoc prefix rng-c-namespace-decls))
  407. (rng-c-error "Duplicate namespace declaration for prefix %s"
  408. prefix))
  409. (setq rng-c-namespace-decls
  410. (cons (cons prefix ns) rng-c-namespace-decls)))
  411. (and declare-default
  412. (or (not rng-c-default-namespace-declared)
  413. (rng-c-error "Duplicate default namespace declaration"))
  414. (setq rng-c-default-namespace-declared t)
  415. (setq rng-c-default-namespace ns))))
  416. (defun rng-c-parse-implicit-grammar ()
  417. (let* ((rng-c-parent-grammar rng-c-current-grammar)
  418. (rng-c-current-grammar (rng-c-make-grammar)))
  419. (rng-c-parse-grammar-body "")
  420. (rng-c-finish-grammar)))
  421. (defun rng-c-parse-grammar-body (close-token &optional in-include)
  422. (while (not (string-equal rng-c-current-token close-token))
  423. (cond ((rng-c-current-token-keyword-p)
  424. (let ((kw (intern rng-c-current-token)))
  425. (cond ((eq kw 'start)
  426. (rng-c-parse-define 'start in-include))
  427. ((eq kw 'div)
  428. (rng-c-advance)
  429. (rng-c-parse-div in-include))
  430. ((eq kw 'include)
  431. (and in-include
  432. (rng-c-error "Nested include"))
  433. (rng-c-advance)
  434. (rng-c-parse-include))
  435. (t (rng-c-error "Invalid grammar keyword")))))
  436. ((rng-c-current-token-ncname-p)
  437. (if (looking-at "\\[")
  438. (rng-c-parse-annotation-element)
  439. (rng-c-parse-define rng-c-current-token
  440. in-include)))
  441. ((rng-c-current-token-quoted-identifier-p)
  442. (if (looking-at "\\[")
  443. (rng-c-parse-annotation-element)
  444. (rng-c-parse-define (substring rng-c-current-token 1)
  445. in-include)))
  446. ((rng-c-current-token-prefixed-name-p)
  447. (rng-c-parse-annotation-element))
  448. ((string-equal rng-c-current-token "[")
  449. (rng-c-parse-lead-annotation)
  450. (and (string-equal rng-c-current-token close-token)
  451. (rng-c-error "Missing annotation subject"))
  452. (and (looking-at "\\[")
  453. (rng-c-error "Leading annotation applied to annotation")))
  454. (t (rng-c-error "Invalid grammar content"))))
  455. (or (string-equal rng-c-current-token "")
  456. (rng-c-advance)))
  457. (defun rng-c-parse-div (in-include)
  458. (rng-c-expect "{")
  459. (rng-c-parse-grammar-body "}" in-include))
  460. (defun rng-c-parse-include ()
  461. (let* ((filename (rng-c-expand-file (rng-c-parse-literal)))
  462. (rng-c-inherit-namespace (rng-c-parse-opt-inherit))
  463. overrides)
  464. (cond ((string-equal rng-c-current-token "{")
  465. (rng-c-advance)
  466. (let ((rng-c-overrides nil))
  467. (rng-c-parse-grammar-body "}" t)
  468. (setq overrides rng-c-overrides))
  469. (setq overrides (rng-c-start-include overrides))
  470. (rng-c-parse-file filename 'include)
  471. (rng-c-end-include overrides))
  472. (t (rng-c-parse-file filename 'include)))))
  473. (defun rng-c-parse-define (name in-include)
  474. (rng-c-advance)
  475. (let ((assign (assoc rng-c-current-token
  476. '(("=" . nil)
  477. ("|=" . choice)
  478. ("&=" . interleave)))))
  479. (or assign
  480. (rng-c-error "Expected assignment operator"))
  481. (rng-c-advance)
  482. (let ((ref (rng-c-prepare-define name (cdr assign) in-include)))
  483. (rng-c-define ref (rng-c-parse-pattern)))))
  484. (defvar rng-c-had-except nil)
  485. (defun rng-c-parse-pattern ()
  486. (let* ((rng-c-had-except nil)
  487. (p (rng-c-parse-repeated))
  488. (op (assoc rng-c-current-token
  489. '(("|" . rng-make-choice)
  490. ("," . rng-make-group)
  491. ("&" . rng-make-interleave)))))
  492. (if op
  493. (if rng-c-had-except
  494. (rng-c-error "Parentheses required around pattern using -")
  495. (let* ((patterns (cons p nil))
  496. (tail patterns)
  497. (connector rng-c-current-token))
  498. (while (progn
  499. (rng-c-advance)
  500. (let ((newcdr (cons (rng-c-parse-repeated) nil)))
  501. (setcdr tail newcdr)
  502. (setq tail newcdr))
  503. (string-equal rng-c-current-token connector)))
  504. (funcall (cdr op) patterns)))
  505. p)))
  506. (defun rng-c-parse-repeated ()
  507. (let ((p (rng-c-parse-follow-annotations
  508. (rng-c-parse-primary)))
  509. (op (assoc rng-c-current-token
  510. '(("*" . rng-make-zero-or-more)
  511. ("+" . rng-make-one-or-more)
  512. ("?" . rng-make-optional)))))
  513. (if op
  514. (if rng-c-had-except
  515. (rng-c-error "Parentheses required around pattern using -")
  516. (rng-c-parse-follow-annotations
  517. (progn
  518. (rng-c-advance)
  519. (funcall (cdr op) p))))
  520. p)))
  521. (defun rng-c-parse-primary ()
  522. "Parse a primary expression.
  523. The current token must be the first token of the expression.
  524. After parsing the current token should be the token following
  525. the primary expression."
  526. (cond ((rng-c-current-token-keyword-p)
  527. (let ((parse-function (get (intern rng-c-current-token)
  528. 'rng-c-pattern)))
  529. (or parse-function
  530. (rng-c-error "Keyword %s does not introduce a pattern"
  531. rng-c-current-token))
  532. (rng-c-advance)
  533. (funcall parse-function)))
  534. ((rng-c-current-token-ncname-p)
  535. (rng-c-advance-with (rng-c-make-ref rng-c-current-token)))
  536. ((string-equal rng-c-current-token "(")
  537. (rng-c-advance)
  538. (let ((p (rng-c-parse-pattern)))
  539. (rng-c-expect ")")
  540. p))
  541. ((rng-c-current-token-prefixed-name-p)
  542. (let ((name (rng-c-expand-datatype rng-c-current-token)))
  543. (rng-c-advance)
  544. (rng-c-parse-data name)))
  545. ((rng-c-current-token-literal-p)
  546. (rng-make-value rng-token-datatype (rng-c-parse-literal) nil))
  547. ((rng-c-current-token-quoted-identifier-p)
  548. (rng-c-advance-with
  549. (rng-c-make-ref (substring rng-c-current-token 1))))
  550. ((string-equal rng-c-current-token "[")
  551. (rng-c-parse-lead-annotation)
  552. (rng-c-parse-primary))
  553. (t (rng-c-error "Invalid pattern"))))
  554. (defun rng-c-parse-parent ()
  555. (and (rng-c-current-token-keyword-p)
  556. (rng-c-error "Keyword following parent was not quoted"
  557. rng-c-current-token))
  558. (rng-c-make-parent-ref (rng-c-parse-identifier-or-keyword)))
  559. (defun rng-c-parse-literal ()
  560. (rng-c-fix-escaped-newlines
  561. (apply 'concat (rng-c-parse-literal-segments))))
  562. (defun rng-c-parse-literal-segments ()
  563. (let ((str (rng-c-parse-literal-segment)))
  564. (cons str
  565. (cond ((string-equal rng-c-current-token "~")
  566. (rng-c-advance)
  567. (rng-c-parse-literal-segments))
  568. (t nil)))))
  569. (defun rng-c-parse-literal-segment ()
  570. (or (rng-c-current-token-literal-p)
  571. (rng-c-error "Expected a literal"))
  572. (rng-c-advance-with
  573. (let ((n (if (and (>= (length rng-c-current-token) 6)
  574. (eq (aref rng-c-current-token 0)
  575. (aref rng-c-current-token 1)))
  576. 3
  577. 1)))
  578. (substring rng-c-current-token n (- n)))))
  579. (defun rng-c-fix-escaped-newlines (str)
  580. (subst-char-in-string ?\C-@ ?\n str))
  581. (defun rng-c-parse-identifier-or-keyword ()
  582. (cond ((rng-c-current-token-ncname-p)
  583. (rng-c-advance-with rng-c-current-token))
  584. ((rng-c-current-token-quoted-identifier-p)
  585. (rng-c-advance-with (substring rng-c-current-token 1)))
  586. (t (rng-c-error "Expected identifier or keyword"))))
  587. (put 'string 'rng-c-pattern 'rng-c-parse-string)
  588. (put 'token 'rng-c-pattern 'rng-c-parse-token)
  589. (put 'element 'rng-c-pattern 'rng-c-parse-element)
  590. (put 'attribute 'rng-c-pattern 'rng-c-parse-attribute)
  591. (put 'list 'rng-c-pattern 'rng-c-parse-list)
  592. (put 'mixed 'rng-c-pattern 'rng-c-parse-mixed)
  593. (put 'text 'rng-c-pattern 'rng-c-parse-text)
  594. (put 'empty 'rng-c-pattern 'rng-c-parse-empty)
  595. (put 'notAllowed 'rng-c-pattern 'rng-c-parse-not-allowed)
  596. (put 'grammar 'rng-c-pattern 'rng-c-parse-grammar)
  597. (put 'parent 'rng-c-pattern 'rng-c-parse-parent)
  598. (put 'external 'rng-c-pattern 'rng-c-parse-external)
  599. (defun rng-c-parse-element ()
  600. (let ((name-class (rng-c-parse-name-class nil)))
  601. (rng-c-expect "{")
  602. (let ((pattern (rng-c-parse-pattern)))
  603. (rng-c-expect "}")
  604. (rng-make-element name-class pattern))))
  605. (defun rng-c-parse-attribute ()
  606. (let ((name-class (rng-c-parse-name-class 'attribute)))
  607. (rng-c-expect "{")
  608. (let ((pattern (rng-c-parse-pattern)))
  609. (rng-c-expect "}")
  610. (rng-make-attribute name-class pattern))))
  611. (defun rng-c-parse-name-class (attribute)
  612. (let* ((rng-c-had-except nil)
  613. (name-class
  614. (rng-c-parse-follow-annotations
  615. (rng-c-parse-primary-name-class attribute))))
  616. (if (string-equal rng-c-current-token "|")
  617. (let* ((name-classes (cons name-class nil))
  618. (tail name-classes))
  619. (or (not rng-c-had-except)
  620. (rng-c-error "Parentheses required around name-class using - operator"))
  621. (while (progn
  622. (rng-c-advance)
  623. (let ((newcdr
  624. (cons (rng-c-parse-follow-annotations
  625. (rng-c-parse-primary-name-class attribute))
  626. nil)))
  627. (setcdr tail newcdr)
  628. (setq tail newcdr))
  629. (string-equal rng-c-current-token "|")))
  630. (rng-make-choice-name-class name-classes))
  631. name-class)))
  632. (defun rng-c-parse-primary-name-class (attribute)
  633. (cond ((rng-c-current-token-ncname-p)
  634. (rng-c-advance-with
  635. (rng-make-name-name-class
  636. (rng-make-name (rng-c-unqualified-namespace attribute)
  637. rng-c-current-token))))
  638. ((rng-c-current-token-prefixed-name-p)
  639. (rng-c-advance-with
  640. (rng-make-name-name-class
  641. (rng-c-expand-name rng-c-current-token))))
  642. ((string-equal rng-c-current-token "*")
  643. (let ((except (rng-c-parse-opt-except-name-class attribute)))
  644. (if except
  645. (rng-make-any-name-except-name-class except)
  646. (rng-make-any-name-name-class))))
  647. ((rng-c-current-token-ns-name-p)
  648. (let* ((ns
  649. (rng-c-lookup-prefix (substring rng-c-current-token
  650. 0
  651. -2)))
  652. (except (rng-c-parse-opt-except-name-class attribute)))
  653. (if except
  654. (rng-make-ns-name-except-name-class ns except)
  655. (rng-make-ns-name-name-class ns))))
  656. ((string-equal rng-c-current-token "(")
  657. (rng-c-advance)
  658. (let ((name-class (rng-c-parse-name-class attribute)))
  659. (rng-c-expect ")")
  660. name-class))
  661. ((rng-c-current-token-quoted-identifier-p)
  662. (rng-c-advance-with
  663. (rng-make-name-name-class
  664. (rng-make-name (rng-c-unqualified-namespace attribute)
  665. (substring rng-c-current-token 1)))))
  666. ((string-equal rng-c-current-token "[")
  667. (rng-c-parse-lead-annotation)
  668. (rng-c-parse-primary-name-class attribute))
  669. (t (rng-c-error "Bad name class"))))
  670. (defun rng-c-parse-opt-except-name-class (attribute)
  671. (rng-c-advance)
  672. (and (string-equal rng-c-current-token "-")
  673. (or (not rng-c-had-except)
  674. (rng-c-error "Parentheses required around name-class using - operator"))
  675. (setq rng-c-had-except t)
  676. (progn
  677. (rng-c-advance)
  678. (rng-c-parse-primary-name-class attribute))))
  679. (defun rng-c-parse-mixed ()
  680. (rng-c-expect "{")
  681. (let ((pattern (rng-make-mixed (rng-c-parse-pattern))))
  682. (rng-c-expect "}")
  683. pattern))
  684. (defun rng-c-parse-list ()
  685. (rng-c-expect "{")
  686. (let ((pattern (rng-make-list (rng-c-parse-pattern))))
  687. (rng-c-expect "}")
  688. pattern))
  689. (defun rng-c-parse-text ()
  690. (rng-make-text))
  691. (defun rng-c-parse-empty ()
  692. (rng-make-empty))
  693. (defun rng-c-parse-not-allowed ()
  694. (rng-make-not-allowed))
  695. (defun rng-c-parse-string ()
  696. (rng-c-parse-data rng-string-datatype))
  697. (defun rng-c-parse-token ()
  698. (rng-c-parse-data rng-token-datatype))
  699. (defun rng-c-parse-data (name)
  700. (if (rng-c-current-token-literal-p)
  701. (rng-make-value name
  702. (rng-c-parse-literal)
  703. (and (car name)
  704. (rng-c-make-context)))
  705. (let ((params (rng-c-parse-optional-params)))
  706. (if (string-equal rng-c-current-token "-")
  707. (progn
  708. (if rng-c-had-except
  709. (rng-c-error "Parentheses required around pattern using -")
  710. (setq rng-c-had-except t))
  711. (rng-c-advance)
  712. (rng-make-data-except name
  713. params
  714. (rng-c-parse-primary)))
  715. (rng-make-data name params)))))
  716. (defun rng-c-parse-optional-params ()
  717. (and (string-equal rng-c-current-token "{")
  718. (let* ((head (cons nil nil))
  719. (tail head))
  720. (rng-c-advance)
  721. (while (not (string-equal rng-c-current-token "}"))
  722. (and (string-equal rng-c-current-token "[")
  723. (rng-c-parse-lead-annotation))
  724. (let ((name (rng-c-parse-identifier-or-keyword)))
  725. (rng-c-expect "=")
  726. (let ((newcdr (cons (cons (intern name)
  727. (rng-c-parse-literal))
  728. nil)))
  729. (setcdr tail newcdr)
  730. (setq tail newcdr))))
  731. (rng-c-advance)
  732. (cdr head))))
  733. (defun rng-c-parse-external ()
  734. (let* ((filename (rng-c-expand-file (rng-c-parse-literal)))
  735. (rng-c-inherit-namespace (rng-c-parse-opt-inherit)))
  736. (rng-c-parse-file filename 'external)))
  737. (defun rng-c-expand-file (uri)
  738. (condition-case err
  739. (rng-uri-file-name (rng-uri-resolve uri
  740. (rng-file-name-uri rng-c-file-name)))
  741. (rng-uri-error
  742. (rng-c-error (cadr err)))))
  743. (defun rng-c-parse-opt-inherit ()
  744. (cond ((string-equal rng-c-current-token "inherit")
  745. (rng-c-advance)
  746. (rng-c-expect "=")
  747. (rng-c-lookup-prefix (rng-c-parse-identifier-or-keyword)))
  748. (t rng-c-default-namespace)))
  749. (defun rng-c-parse-grammar ()
  750. (rng-c-expect "{")
  751. (let* ((rng-c-parent-grammar rng-c-current-grammar)
  752. (rng-c-current-grammar (rng-c-make-grammar)))
  753. (rng-c-parse-grammar-body "}")
  754. (rng-c-finish-grammar)))
  755. (defun rng-c-parse-lead-annotation ()
  756. (rng-c-parse-annotation-body)
  757. (and (string-equal rng-c-current-token "[")
  758. (rng-c-error "Multiple leading annotations")))
  759. (defun rng-c-parse-follow-annotations (obj)
  760. (while (string-equal rng-c-current-token ">>")
  761. (rng-c-advance)
  762. (if (rng-c-current-token-prefixed-name-p)
  763. (rng-c-advance)
  764. (rng-c-parse-identifier-or-keyword))
  765. (rng-c-parse-annotation-body t))
  766. obj)
  767. (defun rng-c-parse-annotation-element ()
  768. (rng-c-advance)
  769. (rng-c-parse-annotation-body t))
  770. ;; XXX need stricter checking of attribute names
  771. ;; XXX don't allow attributes after text
  772. (defun rng-c-parse-annotation-body (&optional allow-text)
  773. "Current token is [. Parse up to matching ].
  774. Current token after parse is token following ]."
  775. (or (string-equal rng-c-current-token "[")
  776. (rng-c-error "Expected ["))
  777. (rng-c-advance)
  778. (while (not (string-equal rng-c-current-token "]"))
  779. (cond ((rng-c-current-token-literal-p)
  780. (or allow-text
  781. (rng-c-error "Out of place text within annotation"))
  782. (rng-c-parse-literal))
  783. (t
  784. (if (rng-c-current-token-prefixed-name-p)
  785. (rng-c-advance)
  786. (rng-c-parse-identifier-or-keyword))
  787. (cond ((string-equal rng-c-current-token "[")
  788. (rng-c-parse-annotation-body t))
  789. ((string-equal rng-c-current-token "=")
  790. (rng-c-advance)
  791. (rng-c-parse-literal))
  792. (t (rng-c-error "Expected = or ["))))))
  793. (rng-c-advance))
  794. (defun rng-c-advance-with (pattern)
  795. (rng-c-advance)
  796. pattern)
  797. (defun rng-c-expect (str)
  798. (or (string-equal rng-c-current-token str)
  799. (rng-c-error "Expected `%s' but got `%s'" str rng-c-current-token))
  800. (rng-c-advance))
  801. (provide 'rng-cmpct)
  802. ;;; rng-cmpct.el