123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740 |
- ;;; rng-match.el --- matching of RELAX NG patterns against XML events
- ;; Copyright (C) 2003, 2007-2012 Free Software Foundation, Inc.
- ;; Author: James Clark
- ;; Keywords: XML, RelaxNG
- ;; This file is part of GNU Emacs.
- ;; GNU Emacs is free software: you can redistribute it and/or modify
- ;; it under the terms of the GNU General Public License as published by
- ;; the Free Software Foundation, either version 3 of the License, or
- ;; (at your option) any later version.
- ;; GNU Emacs is distributed in the hope that it will be useful,
- ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
- ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- ;; GNU General Public License for more details.
- ;; You should have received a copy of the GNU General Public License
- ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
- ;;; Commentary:
- ;; This uses the algorithm described in
- ;; http://www.thaiopensource.com/relaxng/derivative.html
- ;;
- ;; The schema to be used is contained in the variable
- ;; rng-current-schema. It has the form described in the file
- ;; rng-pttrn.el.
- ;;
- ;;; Code:
- (require 'rng-pttrn)
- (require 'rng-util)
- (require 'rng-dt)
- (defvar rng-not-allowed-ipattern nil)
- (defvar rng-empty-ipattern nil)
- (defvar rng-text-ipattern nil)
- (defvar rng-compile-table nil)
- (defvar rng-being-compiled nil
- "Contains a list of ref patterns currently being compiled.
- Used to detect invalid recursive references.")
- (defvar rng-ipattern-table nil)
- (defvar rng-last-ipattern-index nil)
- (defvar rng-match-state nil
- "An ipattern representing the current state of validation.")
- ;;; Inline functions
- (defsubst rng-update-match-state (new-state)
- (if (and (eq new-state rng-not-allowed-ipattern)
- (not (eq rng-match-state rng-not-allowed-ipattern)))
- nil
- (setq rng-match-state new-state)
- t))
- ;;; Interned patterns
- (eval-when-compile
- (defun rng-ipattern-slot-accessor-name (slot-name)
- (intern (concat "rng-ipattern-get-"
- (symbol-name slot-name))))
- (defun rng-ipattern-slot-setter-name (slot-name)
- (intern (concat "rng-ipattern-set-"
- (symbol-name slot-name)))))
- (defmacro rng-ipattern-defslot (slot-name index)
- `(progn
- (defsubst ,(rng-ipattern-slot-accessor-name slot-name) (ipattern)
- (aref ipattern ,index))
- (defsubst ,(rng-ipattern-slot-setter-name slot-name) (ipattern value)
- (aset ipattern ,index value))))
- (rng-ipattern-defslot type 0)
- (rng-ipattern-defslot index 1)
- (rng-ipattern-defslot name-class 2)
- (rng-ipattern-defslot datatype 2)
- (rng-ipattern-defslot after 2)
- (rng-ipattern-defslot child 3)
- (rng-ipattern-defslot value-object 3)
- (rng-ipattern-defslot nullable 4)
- (rng-ipattern-defslot memo-text-typed 5)
- (rng-ipattern-defslot memo-map-start-tag-open-deriv 6)
- (rng-ipattern-defslot memo-map-start-attribute-deriv 7)
- (rng-ipattern-defslot memo-start-tag-close-deriv 8)
- (rng-ipattern-defslot memo-text-only-deriv 9)
- (rng-ipattern-defslot memo-mixed-text-deriv 10)
- (rng-ipattern-defslot memo-map-data-deriv 11)
- (rng-ipattern-defslot memo-end-tag-deriv 12)
- (defconst rng-memo-map-alist-max 10)
- (defsubst rng-memo-map-get (key mm)
- "Return the value associated with KEY in memo-map MM."
- (let ((found (assoc key mm)))
- (if found
- (cdr found)
- (and mm
- (let ((head (car mm)))
- (and (hash-table-p head)
- (gethash key head)))))))
- (defun rng-memo-map-add (key value mm &optional weakness)
- "Associate KEY with VALUE in memo-map MM and return the new memo-map.
- The new memo-map may or may not be a different object from MM.
- Alists are better for small maps. Hash tables are better for large
- maps. A memo-map therefore starts off as an alist and switches to a
- hash table for large memo-maps. A memo-map is always a list. An empty
- memo-map is represented by nil. A large memo-map is represented by a
- list containing just a hash-table. A small memo map is represented by
- a list whose cdr is an alist and whose car is the number of entries in
- the alist. The complete memo-map can be passed to `assoc' without
- problems: assoc ignores any members that are not cons cells. There is
- therefore minimal overhead in successful lookups on small lists
- \(which is the most common case)."
- (if (null mm)
- (list 1 (cons key value))
- (let ((head (car mm)))
- (cond ((hash-table-p head)
- (puthash key value head)
- mm)
- ((>= head rng-memo-map-alist-max)
- (let ((ht (make-hash-table :test 'equal
- :weakness weakness
- :size (* 2 rng-memo-map-alist-max))))
- (setq mm (cdr mm))
- (while mm
- (setq head (car mm))
- (puthash (car head) (cdr head) ht)
- (setq mm (cdr mm)))
- (cons ht nil)))
- (t (cons (1+ head)
- (cons (cons key value)
- (cdr mm))))))))
- (defsubst rng-make-ipattern (type index name-class child nullable)
- (vector type index name-class child nullable
- ;; 5 memo-text-typed
- 'unknown
- ;; 6 memo-map-start-tag-open-deriv
- nil
- ;; 7 memo-map-start-attribute-deriv
- nil
- ;; 8 memo-start-tag-close-deriv
- nil
- ;; 9 memo-text-only-deriv
- nil
- ;; 10 memo-mixed-text-deriv
- nil
- ;; 11 memo-map-data-deriv
- nil
- ;; 12 memo-end-tag-deriv
- nil))
- (defun rng-ipattern-maybe-init ()
- (unless rng-ipattern-table
- (setq rng-ipattern-table (make-hash-table :test 'equal))
- (setq rng-last-ipattern-index -1)))
- (defun rng-ipattern-clear ()
- (when rng-ipattern-table
- (clrhash rng-ipattern-table))
- (setq rng-last-ipattern-index -1))
- (defsubst rng-gen-ipattern-index ()
- (setq rng-last-ipattern-index (1+ rng-last-ipattern-index)))
- (defun rng-put-ipattern (key type name-class child nullable)
- (let ((ipattern
- (rng-make-ipattern type
- (rng-gen-ipattern-index)
- name-class
- child
- nullable)))
- (puthash key ipattern rng-ipattern-table)
- ipattern))
- (defun rng-get-ipattern (key)
- (gethash key rng-ipattern-table))
- (or rng-not-allowed-ipattern
- (setq rng-not-allowed-ipattern
- (rng-make-ipattern 'not-allowed -3 nil nil nil)))
- (or rng-empty-ipattern
- (setq rng-empty-ipattern
- (rng-make-ipattern 'empty -2 nil nil t)))
- (or rng-text-ipattern
- (setq rng-text-ipattern
- (rng-make-ipattern 'text -1 nil nil t)))
- (defconst rng-const-ipatterns
- (list rng-not-allowed-ipattern
- rng-empty-ipattern
- rng-text-ipattern))
- (defun rng-intern-after (child after)
- (if (eq child rng-not-allowed-ipattern)
- rng-not-allowed-ipattern
- (let ((key (list 'after
- (rng-ipattern-get-index child)
- (rng-ipattern-get-index after))))
- (or (rng-get-ipattern key)
- (rng-put-ipattern key
- 'after
- after
- child
- nil)))))
- (defun rng-intern-attribute (name-class ipattern)
- (if (eq ipattern rng-not-allowed-ipattern)
- rng-not-allowed-ipattern
- (let ((key (list 'attribute
- name-class
- (rng-ipattern-get-index ipattern))))
- (or (rng-get-ipattern key)
- (rng-put-ipattern key
- 'attribute
- name-class
- ipattern
- nil)))))
- (defun rng-intern-data (dt matches-anything)
- (let ((key (list 'data dt)))
- (or (rng-get-ipattern key)
- (let ((ipattern (rng-put-ipattern key
- 'data
- dt
- nil
- matches-anything)))
- (rng-ipattern-set-memo-text-typed ipattern
- (not matches-anything))
- ipattern))))
- (defun rng-intern-data-except (dt ipattern)
- (let ((key (list 'data-except dt ipattern)))
- (or (rng-get-ipattern key)
- (rng-put-ipattern key
- 'data-except
- dt
- ipattern
- nil))))
- (defun rng-intern-value (dt obj)
- (let ((key (list 'value dt obj)))
- (or (rng-get-ipattern key)
- (rng-put-ipattern key
- 'value
- dt
- obj
- nil))))
- (defun rng-intern-one-or-more (ipattern)
- (or (rng-intern-one-or-more-shortcut ipattern)
- (let ((key (cons 'one-or-more
- (list (rng-ipattern-get-index ipattern)))))
- (or (rng-get-ipattern key)
- (rng-put-ipattern key
- 'one-or-more
- nil
- ipattern
- (rng-ipattern-get-nullable ipattern))))))
- (defun rng-intern-one-or-more-shortcut (ipattern)
- (cond ((eq ipattern rng-not-allowed-ipattern)
- rng-not-allowed-ipattern)
- ((eq ipattern rng-empty-ipattern)
- rng-empty-ipattern)
- ((eq (rng-ipattern-get-type ipattern) 'one-or-more)
- ipattern)
- (t nil)))
- (defun rng-intern-list (ipattern)
- (if (eq ipattern rng-not-allowed-ipattern)
- rng-not-allowed-ipattern
- (let ((key (cons 'list
- (list (rng-ipattern-get-index ipattern)))))
- (or (rng-get-ipattern key)
- (rng-put-ipattern key
- 'list
- nil
- ipattern
- nil)))))
- (defun rng-intern-group (ipatterns)
- "Return an ipattern for the list of group members in IPATTERNS."
- (or (rng-intern-group-shortcut ipatterns)
- (let* ((tem (rng-normalize-group-list ipatterns))
- (normalized (cdr tem)))
- (or (rng-intern-group-shortcut normalized)
- (let ((key (cons 'group
- (mapcar 'rng-ipattern-get-index normalized))))
- (or (rng-get-ipattern key)
- (rng-put-ipattern key
- 'group
- nil
- normalized
- (car tem))))))))
- (defun rng-intern-group-shortcut (ipatterns)
- "Try to shortcut interning a group list.
- If successful, return the interned pattern. Otherwise return nil."
- (while (and ipatterns
- (eq (car ipatterns) rng-empty-ipattern))
- (setq ipatterns (cdr ipatterns)))
- (if ipatterns
- (let ((ret (car ipatterns)))
- (if (eq ret rng-not-allowed-ipattern)
- rng-not-allowed-ipattern
- (setq ipatterns (cdr ipatterns))
- (while (and ipatterns ret)
- (let ((tem (car ipatterns)))
- (cond ((eq tem rng-not-allowed-ipattern)
- (setq ret tem)
- (setq ipatterns nil))
- ((eq tem rng-empty-ipattern)
- (setq ipatterns (cdr ipatterns)))
- (t
- ;; Stop here rather than continuing
- ;; looking for not-allowed patterns.
- ;; We do a complete scan elsewhere.
- (setq ret nil)))))
- ret))
- rng-empty-ipattern))
- (defun rng-normalize-group-list (ipatterns)
- "Normalize a list containing members of a group.
- Expands nested groups, removes empty members, handles notAllowed.
- Returns a pair whose car says whether the list is nullable and whose
- cdr is the normalized list."
- (let ((nullable t)
- (result nil)
- member)
- (while ipatterns
- (setq member (car ipatterns))
- (setq ipatterns (cdr ipatterns))
- (when nullable
- (setq nullable (rng-ipattern-get-nullable member)))
- (cond ((eq (rng-ipattern-get-type member) 'group)
- (setq result
- (nconc (reverse (rng-ipattern-get-child member))
- result)))
- ((eq member rng-not-allowed-ipattern)
- (setq result (list rng-not-allowed-ipattern))
- (setq ipatterns nil))
- ((not (eq member rng-empty-ipattern))
- (setq result (cons member result)))))
- (cons nullable (nreverse result))))
- (defun rng-intern-interleave (ipatterns)
- (or (rng-intern-group-shortcut ipatterns)
- (let* ((tem (rng-normalize-interleave-list ipatterns))
- (normalized (cdr tem)))
- (or (rng-intern-group-shortcut normalized)
- (let ((key (cons 'interleave
- (mapcar 'rng-ipattern-get-index normalized))))
- (or (rng-get-ipattern key)
- (rng-put-ipattern key
- 'interleave
- nil
- normalized
- (car tem))))))))
- (defun rng-normalize-interleave-list (ipatterns)
- "Normalize a list containing members of an interleave.
- Expands nested groups, removes empty members, handles notAllowed.
- Returns a pair whose car says whether the list is nullable and whose
- cdr is the normalized list."
- (let ((nullable t)
- (result nil)
- member)
- (while ipatterns
- (setq member (car ipatterns))
- (setq ipatterns (cdr ipatterns))
- (when nullable
- (setq nullable (rng-ipattern-get-nullable member)))
- (cond ((eq (rng-ipattern-get-type member) 'interleave)
- (setq result
- (append (rng-ipattern-get-child member)
- result)))
- ((eq member rng-not-allowed-ipattern)
- (setq result (list rng-not-allowed-ipattern))
- (setq ipatterns nil))
- ((not (eq member rng-empty-ipattern))
- (setq result (cons member result)))))
- (cons nullable (sort result 'rng-compare-ipattern))))
- ;; Would be cleaner if this didn't modify IPATTERNS.
- (defun rng-intern-choice (ipatterns)
- "Return a choice ipattern for the list of choices in IPATTERNS.
- May alter IPATTERNS."
- (or (rng-intern-choice-shortcut ipatterns)
- (let* ((tem (rng-normalize-choice-list ipatterns))
- (normalized (cdr tem)))
- (or (rng-intern-choice-shortcut normalized)
- (rng-intern-choice1 normalized (car tem))))))
- (defun rng-intern-optional (ipattern)
- (cond ((rng-ipattern-get-nullable ipattern) ipattern)
- ((eq ipattern rng-not-allowed-ipattern) rng-empty-ipattern)
- (t (rng-intern-choice1
- ;; This is sorted since the empty pattern
- ;; is before everything except not allowed.
- ;; It cannot have a duplicate empty pattern,
- ;; since it is not nullable.
- (cons rng-empty-ipattern
- (if (eq (rng-ipattern-get-type ipattern) 'choice)
- (rng-ipattern-get-child ipattern)
- (list ipattern)))
- t))))
- (defun rng-intern-choice1 (normalized nullable)
- (let ((key (cons 'choice
- (mapcar 'rng-ipattern-get-index normalized))))
- (or (rng-get-ipattern key)
- (rng-put-ipattern key
- 'choice
- nil
- normalized
- nullable))))
- (defun rng-intern-choice-shortcut (ipatterns)
- "Try to shortcut interning a choice list.
- If successful, return the interned pattern. Otherwise return nil."
- (while (and ipatterns
- (eq (car ipatterns)
- rng-not-allowed-ipattern))
- (setq ipatterns (cdr ipatterns)))
- (if ipatterns
- (let ((ret (car ipatterns)))
- (setq ipatterns (cdr ipatterns))
- (while (and ipatterns ret)
- (or (eq (car ipatterns) rng-not-allowed-ipattern)
- (eq (car ipatterns) ret)
- (setq ret nil))
- (setq ipatterns (cdr ipatterns)))
- ret)
- rng-not-allowed-ipattern))
- (defun rng-normalize-choice-list (ipatterns)
- "Normalize a list of choices.
- Expands nested choices, removes not-allowed members, sorts by index
- and removes duplicates. Return a pair whose car says whether the
- list is nullable and whose cdr is the normalized list."
- (let ((sorted t)
- (nullable nil)
- (head (cons nil ipatterns)))
- (let ((tail head)
- (final-tail nil)
- (prev-index -100)
- (cur ipatterns)
- member)
- ;; the cdr of tail is always cur
- (while cur
- (setq member (car cur))
- (or nullable
- (setq nullable (rng-ipattern-get-nullable member)))
- (cond ((eq (rng-ipattern-get-type member) 'choice)
- (setq final-tail
- (append (rng-ipattern-get-child member)
- final-tail))
- (setq cur (cdr cur))
- (setq sorted nil)
- (setcdr tail cur))
- ((eq member rng-not-allowed-ipattern)
- (setq cur (cdr cur))
- (setcdr tail cur))
- (t
- (if (and sorted
- (let ((cur-index (rng-ipattern-get-index member)))
- (if (>= prev-index cur-index)
- (or (= prev-index cur-index) ; will remove it
- (setq sorted nil)) ; won't remove it
- (setq prev-index cur-index)
- ;; won't remove it
- nil)))
- (progn
- ;; remove it
- (setq cur (cdr cur))
- (setcdr tail cur))
- ;; don't remove it
- (setq tail cur)
- (setq cur (cdr cur))))))
- (setcdr tail final-tail))
- (setq head (cdr head))
- (cons nullable
- (if sorted
- head
- (rng-uniquify-eq (sort head 'rng-compare-ipattern))))))
- (defun rng-compare-ipattern (p1 p2)
- (< (rng-ipattern-get-index p1)
- (rng-ipattern-get-index p2)))
- ;;; Name classes
- (defsubst rng-name-class-contains (nc nm)
- (if (consp nc)
- (equal nm nc)
- (rng-name-class-contains1 nc nm)))
- (defun rng-name-class-contains1 (nc nm)
- (let ((type (aref nc 0)))
- (cond ((eq type 'any-name) t)
- ((eq type 'any-name-except)
- (not (rng-name-class-contains (aref nc 1) nm)))
- ((eq type 'ns-name)
- (eq (car nm) (aref nc 1)))
- ((eq type 'ns-name-except)
- (and (eq (car nm) (aref nc 1))
- (not (rng-name-class-contains (aref nc 2) nm))))
- ((eq type 'choice)
- (let ((choices (aref nc 1))
- (ret nil))
- (while choices
- (if (rng-name-class-contains (car choices) nm)
- (progn
- (setq choices nil)
- (setq ret t))
- (setq choices (cdr choices))))
- ret)))))
- (defun rng-name-class-possible-names (nc accum)
- "Return a list of possible names that nameclass NC can match.
- Each possible name should be returned as a (NAMESPACE . LOCAL-NAME)
- pair, where NAMESPACE is a symbol or nil and LOCAL-NAME is a string.
- NAMESPACE, if nil, matches the absent namespace. ACCUM is a list of
- names which should be appended to the returned list. The returned
- list may contain duplicates."
- (if (consp nc)
- (cons nc accum)
- (when (eq (aref nc 0) 'choice)
- (let ((members (aref nc 1)) member)
- (while members
- (setq member (car members))
- (setq accum
- (if (consp member)
- (cons member accum)
- (rng-name-class-possible-names member
- accum)))
- (setq members (cdr members)))))
- accum))
- ;;; Debugging utilities
- (defun rng-ipattern-to-string (ipattern)
- (let ((type (rng-ipattern-get-type ipattern)))
- (cond ((eq type 'after)
- (concat (rng-ipattern-to-string
- (rng-ipattern-get-child ipattern))
- " </> "
- (rng-ipattern-to-string
- (rng-ipattern-get-after ipattern))))
- ((eq type 'element)
- (concat "element "
- (rng-name-class-to-string
- (rng-ipattern-get-name-class ipattern))
- ;; we can get cycles with elements so don't print it out
- " {...}"))
- ((eq type 'attribute)
- (concat "attribute "
- (rng-name-class-to-string
- (rng-ipattern-get-name-class ipattern))
- " { "
- (rng-ipattern-to-string
- (rng-ipattern-get-child ipattern))
- " } "))
- ((eq type 'empty) "empty")
- ((eq type 'text) "text")
- ((eq type 'not-allowed) "notAllowed")
- ((eq type 'one-or-more)
- (concat (rng-ipattern-to-string
- (rng-ipattern-get-child ipattern))
- "+"))
- ((eq type 'choice)
- (concat "("
- (mapconcat 'rng-ipattern-to-string
- (rng-ipattern-get-child ipattern)
- " | ")
- ")"))
- ((eq type 'group)
- (concat "("
- (mapconcat 'rng-ipattern-to-string
- (rng-ipattern-get-child ipattern)
- ", ")
- ")"))
- ((eq type 'interleave)
- (concat "("
- (mapconcat 'rng-ipattern-to-string
- (rng-ipattern-get-child ipattern)
- " & ")
- ")"))
- (t (symbol-name type)))))
- (defun rng-name-class-to-string (nc)
- (if (consp nc)
- (cdr nc)
- (let ((type (aref nc 0)))
- (cond ((eq type 'choice)
- (mapconcat 'rng-name-class-to-string
- (aref nc 1)
- "|"))
- (t (concat (symbol-name type) "*"))))))
- ;;; Compiling
- (defun rng-compile-maybe-init ()
- (unless rng-compile-table
- (setq rng-compile-table (make-hash-table :test 'eq))))
- (defun rng-compile-clear ()
- (when rng-compile-table
- (clrhash rng-compile-table)))
- (defun rng-compile (pattern)
- (or (gethash pattern rng-compile-table)
- (let ((ipattern (apply (get (car pattern) 'rng-compile)
- (cdr pattern))))
- (puthash pattern ipattern rng-compile-table)
- ipattern)))
- (put 'empty 'rng-compile 'rng-compile-empty)
- (put 'text 'rng-compile 'rng-compile-text)
- (put 'not-allowed 'rng-compile 'rng-compile-not-allowed)
- (put 'element 'rng-compile 'rng-compile-element)
- (put 'attribute 'rng-compile 'rng-compile-attribute)
- (put 'choice 'rng-compile 'rng-compile-choice)
- (put 'optional 'rng-compile 'rng-compile-optional)
- (put 'group 'rng-compile 'rng-compile-group)
- (put 'interleave 'rng-compile 'rng-compile-interleave)
- (put 'ref 'rng-compile 'rng-compile-ref)
- (put 'one-or-more 'rng-compile 'rng-compile-one-or-more)
- (put 'zero-or-more 'rng-compile 'rng-compile-zero-or-more)
- (put 'mixed 'rng-compile 'rng-compile-mixed)
- (put 'data 'rng-compile 'rng-compile-data)
- (put 'data-except 'rng-compile 'rng-compile-data-except)
- (put 'value 'rng-compile 'rng-compile-value)
- (put 'list 'rng-compile 'rng-compile-list)
- (defun rng-compile-not-allowed () rng-not-allowed-ipattern)
- (defun rng-compile-empty () rng-empty-ipattern)
- (defun rng-compile-text () rng-text-ipattern)
- (defun rng-compile-element (name-class pattern)
- ;; don't intern
- (rng-make-ipattern 'element
- (rng-gen-ipattern-index)
- (rng-compile-name-class name-class)
- pattern ; compile lazily
- nil))
- (defun rng-element-get-child (element)
- (let ((tem (rng-ipattern-get-child element)))
- (if (vectorp tem)
- tem
- (rng-ipattern-set-child element (rng-compile tem)))))
- (defun rng-compile-attribute (name-class pattern)
- (rng-intern-attribute (rng-compile-name-class name-class)
- (rng-compile pattern)))
- (defun rng-compile-ref (pattern name)
- (and (memq pattern rng-being-compiled)
- (rng-compile-error "Reference loop on symbol %s" name))
- (setq rng-being-compiled
- (cons pattern rng-being-compiled))
- (unwind-protect
- (rng-compile pattern)
- (setq rng-being-compiled
- (cdr rng-being-compiled))))
- (defun rng-compile-one-or-more (pattern)
- (rng-intern-one-or-more (rng-compile pattern)))
- (defun rng-compile-zero-or-more (pattern)
- (rng-intern-optional
- (rng-intern-one-or-more (rng-compile pattern))))
- (defun rng-compile-optional (pattern)
- (rng-intern-optional (rng-compile pattern)))
- (defun rng-compile-mixed (pattern)
- (rng-intern-interleave (cons rng-text-ipattern
- (list (rng-compile pattern)))))
- (defun rng-compile-list (pattern)
- (rng-intern-list (rng-compile pattern)))
- (defun rng-compile-choice (&rest patterns)
- (rng-intern-choice (mapcar 'rng-compile patterns)))
- (defun rng-compile-group (&rest patterns)
- (rng-intern-group (mapcar 'rng-compile patterns)))
- (defun rng-compile-interleave (&rest patterns)
- (rng-intern-interleave (mapcar 'rng-compile patterns)))
- (defun rng-compile-dt (name params)
- (let ((rng-dt-error-reporter 'rng-compile-error))
- (funcall (let ((uri (car name)))
- (or (get uri 'rng-dt-compile)
- (rng-compile-error "Unknown datatype library %s" uri)))
- (cdr name)
- params)))
- (defun rng-compile-data (name params)
- (let ((dt (rng-compile-dt name params)))
- (rng-intern-data (cdr dt) (car dt))))
- (defun rng-compile-data-except (name params pattern)
- (rng-intern-data-except (cdr (rng-compile-dt name params))
- (rng-compile pattern)))
- (defun rng-compile-value (name str context)
- (let* ((dt (cdr (rng-compile-dt name '())))
- (rng-dt-namespace-context-getter (list 'identity context))
- (obj (rng-dt-make-value dt str)))
- (if obj
- (rng-intern-value dt obj)
- (rng-compile-error "Value %s is not a valid instance of the datatype %s"
- str
- name))))
- (defun rng-compile-name-class (nc)
- (let ((type (car nc)))
- (cond ((eq type 'name) (nth 1 nc))
- ((eq type 'any-name) [any-name])
- ((eq type 'any-name-except)
- (vector 'any-name-except
- (rng-compile-name-class (nth 1 nc))))
- ((eq type 'ns-name)
- (vector 'ns-name (nth 1 nc)))
- ((eq type 'ns-name-except)
- (vector 'ns-name-except
- (nth 1 nc)
- (rng-compile-name-class (nth 2 nc))))
- ((eq type 'choice)
- (vector 'choice
- (mapcar 'rng-compile-name-class (cdr nc))))
- (t (error "Bad name-class type %s" type)))))
- ;;; Searching patterns
- ;; We write this non-recursively to avoid hitting max-lisp-eval-depth
- ;; on large schemas.
- (defun rng-map-element-attribute (function pattern accum &rest args)
- (let ((searched (make-hash-table :test 'eq))
- type todo patterns)
- (while (progn
- (setq type (car pattern))
- (cond ((memq type '(element attribute))
- (setq accum
- (apply function
- (cons pattern
- (cons accum args))))
- (setq pattern (nth 2 pattern)))
- ((eq type 'ref)
- (setq pattern (nth 1 pattern))
- (if (gethash pattern searched)
- (setq pattern nil)
- (puthash pattern t searched)))
- ((memq type '(choice group interleave))
- (setq todo (cons (cdr pattern) todo))
- (setq pattern nil))
- ((memq type '(one-or-more
- zero-or-more
- optional
- mixed))
- (setq pattern (nth 1 pattern)))
- (t (setq pattern nil)))
- (cond (pattern)
- (patterns
- (setq pattern (car patterns))
- (setq patterns (cdr patterns))
- t)
- (todo
- (setq patterns (car todo))
- (setq todo (cdr todo))
- (setq pattern (car patterns))
- (setq patterns (cdr patterns))
- t))))
- accum))
- (defun rng-find-element-content-pattern (pattern accum name)
- (if (and (eq (car pattern) 'element)
- (rng-search-name name (nth 1 pattern)))
- (cons (rng-compile (nth 2 pattern)) accum)
- accum))
- (defun rng-search-name (name nc)
- (let ((type (car nc)))
- (cond ((eq type 'name)
- (equal (cadr nc) name))
- ((eq type 'choice)
- (let ((choices (cdr nc))
- (found nil))
- (while (and choices (not found))
- (if (rng-search-name name (car choices))
- (setq found t)
- (setq choices (cdr choices))))
- found))
- (t nil))))
- (defun rng-find-name-class-uris (nc accum)
- (let ((type (car nc)))
- (cond ((eq type 'name)
- (rng-accum-namespace-uri (car (nth 1 nc)) accum))
- ((memq type '(ns-name ns-name-except))
- (rng-accum-namespace-uri (nth 1 nc) accum))
- ((eq type 'choice)
- (let ((choices (cdr nc)))
- (while choices
- (setq accum
- (rng-find-name-class-uris (car choices) accum))
- (setq choices (cdr choices))))
- accum)
- (t accum))))
- (defun rng-accum-namespace-uri (ns accum)
- (if (and ns (not (memq ns accum)))
- (cons ns accum)
- accum))
- ;;; Derivatives
- (defun rng-ipattern-text-typed-p (ipattern)
- (let ((memo (rng-ipattern-get-memo-text-typed ipattern)))
- (if (eq memo 'unknown)
- (rng-ipattern-set-memo-text-typed
- ipattern
- (rng-ipattern-compute-text-typed-p ipattern))
- memo)))
- (defun rng-ipattern-compute-text-typed-p (ipattern)
- (let ((type (rng-ipattern-get-type ipattern)))
- (cond ((eq type 'choice)
- (let ((cur (rng-ipattern-get-child ipattern))
- (ret nil))
- (while (and cur (not ret))
- (if (rng-ipattern-text-typed-p (car cur))
- (setq ret t)
- (setq cur (cdr cur))))
- ret))
- ((eq type 'group)
- (let ((cur (rng-ipattern-get-child ipattern))
- (ret nil)
- member)
- (while (and cur (not ret))
- (setq member (car cur))
- (if (rng-ipattern-text-typed-p member)
- (setq ret t))
- (setq cur
- (and (rng-ipattern-get-nullable member)
- (cdr cur))))
- ret))
- ((eq type 'after)
- (rng-ipattern-text-typed-p (rng-ipattern-get-child ipattern)))
- (t (and (memq type '(value list data data-except)) t)))))
- (defun rng-start-tag-open-deriv (ipattern nm)
- (or (rng-memo-map-get
- nm
- (rng-ipattern-get-memo-map-start-tag-open-deriv ipattern))
- (rng-ipattern-memo-start-tag-open-deriv
- ipattern
- nm
- (rng-compute-start-tag-open-deriv ipattern nm))))
- (defun rng-ipattern-memo-start-tag-open-deriv (ipattern nm deriv)
- (or (memq ipattern rng-const-ipatterns)
- (rng-ipattern-set-memo-map-start-tag-open-deriv
- ipattern
- (rng-memo-map-add nm
- deriv
- (rng-ipattern-get-memo-map-start-tag-open-deriv
- ipattern))))
- deriv)
- (defun rng-compute-start-tag-open-deriv (ipattern nm)
- (let ((type (rng-ipattern-get-type ipattern)))
- (cond ((eq type 'choice)
- (rng-transform-choice `(lambda (p)
- (rng-start-tag-open-deriv p ',nm))
- ipattern))
- ((eq type 'element)
- (if (rng-name-class-contains
- (rng-ipattern-get-name-class ipattern)
- nm)
- (rng-intern-after (rng-element-get-child ipattern)
- rng-empty-ipattern)
- rng-not-allowed-ipattern))
- ((eq type 'group)
- (rng-transform-group-nullable
- `(lambda (p) (rng-start-tag-open-deriv p ',nm))
- 'rng-cons-group-after
- ipattern))
- ((eq type 'interleave)
- (rng-transform-interleave-single
- `(lambda (p) (rng-start-tag-open-deriv p ',nm))
- 'rng-subst-interleave-after
- ipattern))
- ((eq type 'one-or-more)
- (rng-apply-after
- `(lambda (p)
- (rng-intern-group (list p ,(rng-intern-optional ipattern))))
- (rng-start-tag-open-deriv (rng-ipattern-get-child ipattern)
- nm)))
- ((eq type 'after)
- (rng-apply-after
- `(lambda (p)
- (rng-intern-after p
- ,(rng-ipattern-get-after ipattern)))
- (rng-start-tag-open-deriv (rng-ipattern-get-child ipattern)
- nm)))
- (t rng-not-allowed-ipattern))))
- (defun rng-start-attribute-deriv (ipattern nm)
- (or (rng-memo-map-get
- nm
- (rng-ipattern-get-memo-map-start-attribute-deriv ipattern))
- (rng-ipattern-memo-start-attribute-deriv
- ipattern
- nm
- (rng-compute-start-attribute-deriv ipattern nm))))
- (defun rng-ipattern-memo-start-attribute-deriv (ipattern nm deriv)
- (or (memq ipattern rng-const-ipatterns)
- (rng-ipattern-set-memo-map-start-attribute-deriv
- ipattern
- (rng-memo-map-add
- nm
- deriv
- (rng-ipattern-get-memo-map-start-attribute-deriv ipattern))))
- deriv)
- (defun rng-compute-start-attribute-deriv (ipattern nm)
- (let ((type (rng-ipattern-get-type ipattern)))
- (cond ((eq type 'choice)
- (rng-transform-choice `(lambda (p)
- (rng-start-attribute-deriv p ',nm))
- ipattern))
- ((eq type 'attribute)
- (if (rng-name-class-contains
- (rng-ipattern-get-name-class ipattern)
- nm)
- (rng-intern-after (rng-ipattern-get-child ipattern)
- rng-empty-ipattern)
- rng-not-allowed-ipattern))
- ((eq type 'group)
- (rng-transform-interleave-single
- `(lambda (p) (rng-start-attribute-deriv p ',nm))
- 'rng-subst-group-after
- ipattern))
- ((eq type 'interleave)
- (rng-transform-interleave-single
- `(lambda (p) (rng-start-attribute-deriv p ',nm))
- 'rng-subst-interleave-after
- ipattern))
- ((eq type 'one-or-more)
- (rng-apply-after
- `(lambda (p)
- (rng-intern-group (list p ,(rng-intern-optional ipattern))))
- (rng-start-attribute-deriv (rng-ipattern-get-child ipattern)
- nm)))
- ((eq type 'after)
- (rng-apply-after
- `(lambda (p)
- (rng-intern-after p ,(rng-ipattern-get-after ipattern)))
- (rng-start-attribute-deriv (rng-ipattern-get-child ipattern)
- nm)))
- (t rng-not-allowed-ipattern))))
- (defun rng-cons-group-after (x y)
- (rng-apply-after `(lambda (p) (rng-intern-group (cons p ',y)))
- x))
- (defun rng-subst-group-after (new old list)
- (rng-apply-after `(lambda (p)
- (rng-intern-group (rng-substq p ,old ',list)))
- new))
- (defun rng-subst-interleave-after (new old list)
- (rng-apply-after `(lambda (p)
- (rng-intern-interleave (rng-substq p ,old ',list)))
- new))
- (defun rng-apply-after (f ipattern)
- (let ((type (rng-ipattern-get-type ipattern)))
- (cond ((eq type 'after)
- (rng-intern-after
- (rng-ipattern-get-child ipattern)
- (funcall f
- (rng-ipattern-get-after ipattern))))
- ((eq type 'choice)
- (rng-transform-choice `(lambda (x) (rng-apply-after ,f x))
- ipattern))
- (t rng-not-allowed-ipattern))))
- (defun rng-start-tag-close-deriv (ipattern)
- (or (rng-ipattern-get-memo-start-tag-close-deriv ipattern)
- (rng-ipattern-set-memo-start-tag-close-deriv
- ipattern
- (rng-compute-start-tag-close-deriv ipattern))))
- (defconst rng-transform-map
- '((choice . rng-transform-choice)
- (group . rng-transform-group)
- (interleave . rng-transform-interleave)
- (one-or-more . rng-transform-one-or-more)
- (after . rng-transform-after-child)))
- (defun rng-compute-start-tag-close-deriv (ipattern)
- (let* ((type (rng-ipattern-get-type ipattern)))
- (if (eq type 'attribute)
- rng-not-allowed-ipattern
- (let ((transform (assq type rng-transform-map)))
- (if transform
- (funcall (cdr transform)
- 'rng-start-tag-close-deriv
- ipattern)
- ipattern)))))
- (defun rng-ignore-attributes-deriv (ipattern)
- (let* ((type (rng-ipattern-get-type ipattern)))
- (if (eq type 'attribute)
- rng-empty-ipattern
- (let ((transform (assq type rng-transform-map)))
- (if transform
- (funcall (cdr transform)
- 'rng-ignore-attributes-deriv
- ipattern)
- ipattern)))))
- (defun rng-text-only-deriv (ipattern)
- (or (rng-ipattern-get-memo-text-only-deriv ipattern)
- (rng-ipattern-set-memo-text-only-deriv
- ipattern
- (rng-compute-text-only-deriv ipattern))))
- (defun rng-compute-text-only-deriv (ipattern)
- (let* ((type (rng-ipattern-get-type ipattern)))
- (if (eq type 'element)
- rng-not-allowed-ipattern
- (let ((transform (assq type
- '((choice . rng-transform-choice)
- (group . rng-transform-group)
- (interleave . rng-transform-interleave)
- (one-or-more . rng-transform-one-or-more)
- (after . rng-transform-after-child)))))
- (if transform
- (funcall (cdr transform)
- 'rng-text-only-deriv
- ipattern)
- ipattern)))))
- (defun rng-mixed-text-deriv (ipattern)
- (or (rng-ipattern-get-memo-mixed-text-deriv ipattern)
- (rng-ipattern-set-memo-mixed-text-deriv
- ipattern
- (rng-compute-mixed-text-deriv ipattern))))
- (defun rng-compute-mixed-text-deriv (ipattern)
- (let ((type (rng-ipattern-get-type ipattern)))
- (cond ((eq type 'text) ipattern)
- ((eq type 'after)
- (rng-transform-after-child 'rng-mixed-text-deriv
- ipattern))
- ((eq type 'choice)
- (rng-transform-choice 'rng-mixed-text-deriv
- ipattern))
- ((eq type 'one-or-more)
- (rng-intern-group
- (list (rng-mixed-text-deriv
- (rng-ipattern-get-child ipattern))
- (rng-intern-optional ipattern))))
- ((eq type 'group)
- (rng-transform-group-nullable
- 'rng-mixed-text-deriv
- (lambda (x y) (rng-intern-group (cons x y)))
- ipattern))
- ((eq type 'interleave)
- (rng-transform-interleave-single
- 'rng-mixed-text-deriv
- (lambda (new old list) (rng-intern-interleave
- (rng-substq new old list)))
- ipattern))
- ((and (eq type 'data)
- (not (rng-ipattern-get-memo-text-typed ipattern)))
- ipattern)
- (t rng-not-allowed-ipattern))))
- (defun rng-end-tag-deriv (ipattern)
- (or (rng-ipattern-get-memo-end-tag-deriv ipattern)
- (rng-ipattern-set-memo-end-tag-deriv
- ipattern
- (rng-compute-end-tag-deriv ipattern))))
- (defun rng-compute-end-tag-deriv (ipattern)
- (let ((type (rng-ipattern-get-type ipattern)))
- (cond ((eq type 'choice)
- (rng-intern-choice
- (mapcar 'rng-end-tag-deriv
- (rng-ipattern-get-child ipattern))))
- ((eq type 'after)
- (if (rng-ipattern-get-nullable
- (rng-ipattern-get-child ipattern))
- (rng-ipattern-get-after ipattern)
- rng-not-allowed-ipattern))
- (t rng-not-allowed-ipattern))))
- (defun rng-data-deriv (ipattern value)
- (or (rng-memo-map-get value
- (rng-ipattern-get-memo-map-data-deriv ipattern))
- (and (rng-memo-map-get
- (cons value (rng-namespace-context-get-no-trace))
- (rng-ipattern-get-memo-map-data-deriv ipattern))
- (rng-memo-map-get
- (cons value (apply (car rng-dt-namespace-context-getter)
- (cdr rng-dt-namespace-context-getter)))
- (rng-ipattern-get-memo-map-data-deriv ipattern)))
- (let* ((used-context (vector nil))
- (rng-dt-namespace-context-getter
- (cons 'rng-namespace-context-tracer
- (cons used-context
- rng-dt-namespace-context-getter)))
- (deriv (rng-compute-data-deriv ipattern value)))
- (rng-ipattern-memo-data-deriv ipattern
- value
- (aref used-context 0)
- deriv))))
- (defun rng-namespace-context-tracer (used getter &rest args)
- (let ((context (apply getter args)))
- (aset used 0 context)
- context))
- (defun rng-namespace-context-get-no-trace ()
- (let ((tem rng-dt-namespace-context-getter))
- (while (and tem (eq (car tem) 'rng-namespace-context-tracer))
- (setq tem (cddr tem)))
- (apply (car tem) (cdr tem))))
- (defconst rng-memo-data-deriv-max-length 80
- "Don't memoize data-derivs for values longer than this.")
- (defun rng-ipattern-memo-data-deriv (ipattern value context deriv)
- (or (memq ipattern rng-const-ipatterns)
- (> (length value) rng-memo-data-deriv-max-length)
- (rng-ipattern-set-memo-map-data-deriv
- ipattern
- (rng-memo-map-add (if context (cons value context) value)
- deriv
- (rng-ipattern-get-memo-map-data-deriv ipattern)
- t)))
- deriv)
- (defun rng-compute-data-deriv (ipattern value)
- (let ((type (rng-ipattern-get-type ipattern)))
- (cond ((eq type 'text) ipattern)
- ((eq type 'choice)
- (rng-transform-choice `(lambda (p) (rng-data-deriv p ,value))
- ipattern))
- ((eq type 'group)
- (rng-transform-group-nullable
- `(lambda (p) (rng-data-deriv p ,value))
- (lambda (x y) (rng-intern-group (cons x y)))
- ipattern))
- ((eq type 'one-or-more)
- (rng-intern-group (list (rng-data-deriv
- (rng-ipattern-get-child ipattern)
- value)
- (rng-intern-optional ipattern))))
- ((eq type 'after)
- (let ((child (rng-ipattern-get-child ipattern)))
- (if (or (rng-ipattern-get-nullable
- (rng-data-deriv child value))
- (and (rng-ipattern-get-nullable child)
- (rng-blank-p value)))
- (rng-ipattern-get-after ipattern)
- rng-not-allowed-ipattern)))
- ((eq type 'data)
- (if (rng-dt-make-value (rng-ipattern-get-datatype ipattern)
- value)
- rng-empty-ipattern
- rng-not-allowed-ipattern))
- ((eq type 'data-except)
- (if (and (rng-dt-make-value (rng-ipattern-get-datatype ipattern)
- value)
- (not (rng-ipattern-get-nullable
- (rng-data-deriv
- (rng-ipattern-get-child ipattern)
- value))))
- rng-empty-ipattern
- rng-not-allowed-ipattern))
- ((eq type 'value)
- (if (equal (rng-dt-make-value (rng-ipattern-get-datatype ipattern)
- value)
- (rng-ipattern-get-value-object ipattern))
- rng-empty-ipattern
- rng-not-allowed-ipattern))
- ((eq type 'list)
- (let ((tokens (split-string value))
- (state (rng-ipattern-get-child ipattern)))
- (while (and tokens
- (not (eq state rng-not-allowed-ipattern)))
- (setq state (rng-data-deriv state (car tokens)))
- (setq tokens (cdr tokens)))
- (if (rng-ipattern-get-nullable state)
- rng-empty-ipattern
- rng-not-allowed-ipattern)))
- ;; don't think interleave can occur
- ;; since we do text-only-deriv first
- (t rng-not-allowed-ipattern))))
- (defun rng-transform-multi (f ipattern interner)
- (let* ((members (rng-ipattern-get-child ipattern))
- (transformed (mapcar f members)))
- (if (rng-members-eq members transformed)
- ipattern
- (funcall interner transformed))))
- (defun rng-transform-choice (f ipattern)
- (rng-transform-multi f ipattern 'rng-intern-choice))
- (defun rng-transform-group (f ipattern)
- (rng-transform-multi f ipattern 'rng-intern-group))
- (defun rng-transform-interleave (f ipattern)
- (rng-transform-multi f ipattern 'rng-intern-interleave))
- (defun rng-transform-one-or-more (f ipattern)
- (let* ((child (rng-ipattern-get-child ipattern))
- (transformed (funcall f child)))
- (if (eq child transformed)
- ipattern
- (rng-intern-one-or-more transformed))))
- (defun rng-transform-after-child (f ipattern)
- (let* ((child (rng-ipattern-get-child ipattern))
- (transformed (funcall f child)))
- (if (eq child transformed)
- ipattern
- (rng-intern-after transformed
- (rng-ipattern-get-after ipattern)))))
- (defun rng-transform-interleave-single (f subster ipattern)
- (let ((children (rng-ipattern-get-child ipattern))
- found)
- (while (and children (not found))
- (let* ((child (car children))
- (transformed (funcall f child)))
- (if (eq transformed rng-not-allowed-ipattern)
- (setq children (cdr children))
- (setq found
- (funcall subster
- transformed
- child
- (rng-ipattern-get-child ipattern))))))
- (or found
- rng-not-allowed-ipattern)))
- (defun rng-transform-group-nullable (f conser ipattern)
- "Given a group x1,...,xn,y1,...,yn where the xs are all
- nullable and y1 isn't, return a choice
- (conser f(x1) x2,...,xm,y1,...,yn)
- |(conser f(x2) x3,...,xm,y1,...,yn)
- |...
- |(conser f(xm) y1,...,yn)
- |(conser f(y1) y2,...,yn)"
- (rng-intern-choice
- (rng-transform-group-nullable-gen-choices
- f
- conser
- (rng-ipattern-get-child ipattern))))
- (defun rng-transform-group-nullable-gen-choices (f conser members)
- (let ((head (car members))
- (tail (cdr members)))
- (if tail
- (cons (funcall conser (funcall f head) tail)
- (if (rng-ipattern-get-nullable head)
- (rng-transform-group-nullable-gen-choices f conser tail)
- nil))
- (list (funcall f head)))))
- (defun rng-members-eq (list1 list2)
- (while (and list1
- list2
- (eq (car list1) (car list2)))
- (setq list1 (cdr list1))
- (setq list2 (cdr list2)))
- (and (null list1) (null list2)))
- (defun rng-ipattern-after (ipattern)
- (let ((type (rng-ipattern-get-type ipattern)))
- (cond ((eq type 'choice)
- (rng-transform-choice 'rng-ipattern-after ipattern))
- ((eq type 'after)
- (rng-ipattern-get-after ipattern))
- ((eq type 'not-allowed)
- ipattern)
- (t (error "Internal error in rng-ipattern-after: unexpected type %s" type)))))
- (defun rng-unknown-start-tag-open-deriv (ipattern)
- (rng-intern-after (rng-compile rng-any-content) ipattern))
- (defun rng-ipattern-optionalize-elements (ipattern)
- (let* ((type (rng-ipattern-get-type ipattern))
- (transform (assq type rng-transform-map)))
- (cond (transform
- (funcall (cdr transform)
- 'rng-ipattern-optionalize-elements
- ipattern))
- ((eq type 'element)
- (rng-intern-optional ipattern))
- (t ipattern))))
- (defun rng-ipattern-empty-before-p (ipattern)
- (let ((type (rng-ipattern-get-type ipattern)))
- (cond ((eq type 'after)
- (eq (rng-ipattern-get-child ipattern) rng-empty-ipattern))
- ((eq type 'choice)
- (let ((members (rng-ipattern-get-child ipattern))
- (ret t))
- (while (and members ret)
- (or (rng-ipattern-empty-before-p (car members))
- (setq ret nil))
- (setq members (cdr members)))
- ret))
- (t nil))))
- (defun rng-ipattern-possible-start-tags (ipattern accum)
- (let ((type (rng-ipattern-get-type ipattern)))
- (cond ((eq type 'after)
- (rng-ipattern-possible-start-tags
- (rng-ipattern-get-child ipattern)
- accum))
- ((memq type '(choice interleave))
- (let ((members (rng-ipattern-get-child ipattern)))
- (while members
- (setq accum
- (rng-ipattern-possible-start-tags (car members)
- accum))
- (setq members (cdr members))))
- accum)
- ((eq type 'group)
- (let ((members (rng-ipattern-get-child ipattern)))
- (while members
- (setq accum
- (rng-ipattern-possible-start-tags (car members)
- accum))
- (setq members
- (and (rng-ipattern-get-nullable (car members))
- (cdr members)))))
- accum)
- ((eq type 'element)
- (if (eq (rng-element-get-child ipattern) rng-not-allowed-ipattern)
- accum
- (rng-name-class-possible-names
- (rng-ipattern-get-name-class ipattern)
- accum)))
- ((eq type 'one-or-more)
- (rng-ipattern-possible-start-tags
- (rng-ipattern-get-child ipattern)
- accum))
- (t accum))))
- (defun rng-ipattern-start-tag-possible-p (ipattern)
- (let ((type (rng-ipattern-get-type ipattern)))
- (cond ((memq type '(after one-or-more))
- (rng-ipattern-start-tag-possible-p
- (rng-ipattern-get-child ipattern)))
- ((memq type '(choice interleave))
- (let ((members (rng-ipattern-get-child ipattern))
- (possible nil))
- (while (and members (not possible))
- (setq possible
- (rng-ipattern-start-tag-possible-p (car members)))
- (setq members (cdr members)))
- possible))
- ((eq type 'group)
- (let ((members (rng-ipattern-get-child ipattern))
- (possible nil))
- (while (and members (not possible))
- (setq possible
- (rng-ipattern-start-tag-possible-p (car members)))
- (setq members
- (and (rng-ipattern-get-nullable (car members))
- (cdr members))))
- possible))
- ((eq type 'element)
- (not (eq (rng-element-get-child ipattern)
- rng-not-allowed-ipattern)))
- (t nil))))
- (defun rng-ipattern-possible-attributes (ipattern accum)
- (let ((type (rng-ipattern-get-type ipattern)))
- (cond ((eq type 'after)
- (rng-ipattern-possible-attributes (rng-ipattern-get-child ipattern)
- accum))
- ((memq type '(choice interleave group))
- (let ((members (rng-ipattern-get-child ipattern)))
- (while members
- (setq accum
- (rng-ipattern-possible-attributes (car members)
- accum))
- (setq members (cdr members))))
- accum)
- ((eq type 'attribute)
- (rng-name-class-possible-names
- (rng-ipattern-get-name-class ipattern)
- accum))
- ((eq type 'one-or-more)
- (rng-ipattern-possible-attributes
- (rng-ipattern-get-child ipattern)
- accum))
- (t accum))))
- (defun rng-ipattern-possible-values (ipattern accum)
- (let ((type (rng-ipattern-get-type ipattern)))
- (cond ((eq type 'after)
- (rng-ipattern-possible-values (rng-ipattern-get-child ipattern)
- accum))
- ((eq type 'choice)
- (let ((members (rng-ipattern-get-child ipattern)))
- (while members
- (setq accum
- (rng-ipattern-possible-values (car members)
- accum))
- (setq members (cdr members))))
- accum)
- ((eq type 'value)
- (let ((value-object (rng-ipattern-get-value-object ipattern)))
- (if (stringp value-object)
- (cons value-object accum)
- accum)))
- (t accum))))
- (defun rng-ipattern-required-element (ipattern)
- (let ((type (rng-ipattern-get-type ipattern)))
- (cond ((memq type '(after one-or-more))
- (rng-ipattern-required-element (rng-ipattern-get-child ipattern)))
- ((eq type 'choice)
- (let* ((members (rng-ipattern-get-child ipattern))
- (required (rng-ipattern-required-element (car members))))
- (while (and required
- (setq members (cdr members)))
- (unless (equal required
- (rng-ipattern-required-element (car members)))
- (setq required nil)))
- required))
- ((eq type 'group)
- (let ((members (rng-ipattern-get-child ipattern))
- required)
- (while (and (not (setq required
- (rng-ipattern-required-element
- (car members))))
- (rng-ipattern-get-nullable (car members))
- (setq members (cdr members))))
- required))
- ((eq type 'interleave)
- (let ((members (rng-ipattern-get-child ipattern))
- required)
- (while members
- (let ((tem (rng-ipattern-required-element (car members))))
- (cond ((not tem)
- (setq members (cdr members)))
- ((not required)
- (setq required tem)
- (setq members (cdr members)))
- ((equal required tem)
- (setq members (cdr members)))
- (t
- (setq required nil)
- (setq members nil)))))
- required))
- ((eq type 'element)
- (let ((nc (rng-ipattern-get-name-class ipattern)))
- (and (consp nc)
- (not (eq (rng-element-get-child ipattern)
- rng-not-allowed-ipattern))
- nc))))))
- (defun rng-ipattern-required-attributes (ipattern accum)
- (let ((type (rng-ipattern-get-type ipattern)))
- (cond ((eq type 'after)
- (rng-ipattern-required-attributes (rng-ipattern-get-child ipattern)
- accum))
- ((memq type '(interleave group))
- (let ((members (rng-ipattern-get-child ipattern)))
- (while members
- (setq accum
- (rng-ipattern-required-attributes (car members)
- accum))
- (setq members (cdr members))))
- accum)
- ((eq type 'choice)
- (let ((members (rng-ipattern-get-child ipattern))
- in-all in-this new-in-all)
- (setq in-all
- (rng-ipattern-required-attributes (car members)
- nil))
- (while (and in-all (setq members (cdr members)))
- (setq in-this
- (rng-ipattern-required-attributes (car members) nil))
- (setq new-in-all nil)
- (while in-this
- (when (member (car in-this) in-all)
- (setq new-in-all
- (cons (car in-this) new-in-all)))
- (setq in-this (cdr in-this)))
- (setq in-all new-in-all))
- (append in-all accum)))
- ((eq type 'attribute)
- (let ((nc (rng-ipattern-get-name-class ipattern)))
- (if (consp nc)
- (cons nc accum)
- accum)))
- ((eq type 'one-or-more)
- (rng-ipattern-required-attributes (rng-ipattern-get-child ipattern)
- accum))
- (t accum))))
- (defun rng-compile-error (&rest args)
- (signal 'rng-compile-error
- (list (apply 'format args))))
- (put 'rng-compile-error
- 'error-conditions
- '(error rng-error rng-compile-error))
- (put 'rng-compile-error
- 'error-message
- "Incorrect schema")
- ;;; External API
- (defsubst rng-match-state () rng-match-state)
- (defsubst rng-set-match-state (state)
- (setq rng-match-state state))
- (defsubst rng-match-state-equal (state)
- (eq state rng-match-state))
- (defun rng-schema-changed ()
- (rng-ipattern-clear)
- (rng-compile-clear))
- (defun rng-match-init-buffer ()
- (make-local-variable 'rng-compile-table)
- (make-local-variable 'rng-ipattern-table)
- (make-local-variable 'rng-last-ipattern-index))
- (defun rng-match-start-document ()
- (rng-ipattern-maybe-init)
- (rng-compile-maybe-init)
- (add-hook 'rng-schema-change-hook 'rng-schema-changed nil t)
- (setq rng-match-state (rng-compile rng-current-schema)))
- (defun rng-match-start-tag-open (name)
- (rng-update-match-state (rng-start-tag-open-deriv rng-match-state
- name)))
- (defun rng-match-attribute-name (name)
- (rng-update-match-state (rng-start-attribute-deriv rng-match-state
- name)))
- (defun rng-match-attribute-value (value)
- (rng-update-match-state (rng-data-deriv rng-match-state
- value)))
- (defun rng-match-element-value (value)
- (and (rng-update-match-state (rng-text-only-deriv rng-match-state))
- (rng-update-match-state (rng-data-deriv rng-match-state
- value))))
- (defun rng-match-start-tag-close ()
- (rng-update-match-state (rng-start-tag-close-deriv rng-match-state)))
- (defun rng-match-mixed-text ()
- (rng-update-match-state (rng-mixed-text-deriv rng-match-state)))
- (defun rng-match-end-tag ()
- (rng-update-match-state (rng-end-tag-deriv rng-match-state)))
- (defun rng-match-after ()
- (rng-update-match-state
- (rng-ipattern-after rng-match-state)))
- (defun rng-match-out-of-context-start-tag-open (name)
- (let* ((found (rng-map-element-attribute 'rng-find-element-content-pattern
- rng-current-schema
- nil
- name))
- (content-pattern (if found
- (rng-intern-choice found)
- rng-not-allowed-ipattern)))
- (rng-update-match-state
- (rng-intern-after content-pattern rng-match-state))))
- (defun rng-match-possible-namespace-uris ()
- "Return a list of all the namespace URIs used in the current schema.
- The absent URI is not included, so the result is always a list of symbols."
- (rng-map-element-attribute (lambda (pattern accum)
- (rng-find-name-class-uris (nth 1 pattern)
- accum))
- rng-current-schema
- nil))
- (defun rng-match-unknown-start-tag-open ()
- (rng-update-match-state
- (rng-unknown-start-tag-open-deriv rng-match-state)))
- (defun rng-match-optionalize-elements ()
- (rng-update-match-state
- (rng-ipattern-optionalize-elements rng-match-state)))
- (defun rng-match-ignore-attributes ()
- (rng-update-match-state
- (rng-ignore-attributes-deriv rng-match-state)))
- (defun rng-match-text-typed-p ()
- (rng-ipattern-text-typed-p rng-match-state))
- (defun rng-match-empty-content ()
- (if (rng-match-text-typed-p)
- (rng-match-element-value "")
- (rng-match-end-tag)))
- (defun rng-match-empty-before-p ()
- "Return non-nil if what can be matched before an end-tag is empty.
- In other words, return non-nil if the pattern for what can be matched
- for an end-tag is equivalent to empty."
- (rng-ipattern-empty-before-p rng-match-state))
- (defun rng-match-infer-start-tag-namespace (local-name)
- (let ((ncs (rng-ipattern-possible-start-tags rng-match-state nil))
- (nc nil)
- (ns nil))
- (while ncs
- (setq nc (car ncs))
- (if (and (equal (cdr nc) local-name)
- (symbolp (car nc)))
- (cond ((not ns)
- ;; first possible namespace
- (setq ns (car nc))
- (setq ncs (cdr ncs)))
- ((equal ns (car nc))
- ;; same as first namespace
- (setq ncs (cdr ncs)))
- (t
- ;; more than one possible namespace
- (setq ns nil)
- (setq ncs nil)))
- (setq ncs (cdr ncs))))
- ns))
- (defun rng-match-nullable-p ()
- (rng-ipattern-get-nullable rng-match-state))
- (defun rng-match-possible-start-tag-names ()
- "Return a list of possible names that would be valid for start-tags.
- Each possible name is returned as a (NAMESPACE . LOCAL-NAME) pair,
- where NAMESPACE is a symbol or nil (meaning the absent namespace) and
- LOCAL-NAME is a string. The returned list may contain duplicates."
- (rng-ipattern-possible-start-tags rng-match-state nil))
- ;; This is no longer used. It might be useful so leave it in for now.
- (defun rng-match-start-tag-possible-p ()
- "Return non-nil if a start-tag is possible."
- (rng-ipattern-start-tag-possible-p rng-match-state))
- (defun rng-match-possible-attribute-names ()
- "Return a list of possible names that would be valid for attributes.
- See the function `rng-match-possible-start-tag-names' for
- more information."
- (rng-ipattern-possible-attributes rng-match-state nil))
- (defun rng-match-possible-value-strings ()
- "Return a list of strings that would be valid as content.
- The list may contain duplicates. Typically, the list will not
- be exhaustive."
- (rng-ipattern-possible-values rng-match-state nil))
- (defun rng-match-required-element-name ()
- "Return the name of an element which must occur, or nil if none."
- (rng-ipattern-required-element rng-match-state))
- (defun rng-match-required-attribute-names ()
- "Return a list of names of attributes which must all occur."
- (rng-ipattern-required-attributes rng-match-state nil))
- (defmacro rng-match-save (&rest body)
- (let ((state (make-symbol "state")))
- `(let ((,state rng-match-state))
- (unwind-protect
- (progn ,@body)
- (setq rng-match-state ,state)))))
- (put 'rng-match-save 'lisp-indent-function 0)
- (def-edebug-spec rng-match-save t)
- (defmacro rng-match-with-schema (schema &rest body)
- `(let ((rng-current-schema ,schema)
- rng-match-state
- rng-compile-table
- rng-ipattern-table
- rng-last-ipattern-index)
- (rng-ipattern-maybe-init)
- (rng-compile-maybe-init)
- (setq rng-match-state (rng-compile rng-current-schema))
- ,@body))
- (put 'rng-match-with-schema 'lisp-indent-function 1)
- (def-edebug-spec rng-match-with-schema t)
- (provide 'rng-match)
- ;;; rng-match.el ends here
|