ebnf-dtd.el 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353
  1. ;;; ebnf-dtd.el --- parser for DTD (Data Type Description for XML)
  2. ;; Copyright (C) 2001-2012 Free Software Foundation, Inc.
  3. ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
  4. ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
  5. ;; Keywords: wp, ebnf, PostScript
  6. ;; Version: 1.1
  7. ;; Package: ebnf2ps
  8. ;; This file is part of GNU Emacs.
  9. ;; GNU Emacs is free software: you can redistribute it and/or modify
  10. ;; it under the terms of the GNU General Public License as published by
  11. ;; the Free Software Foundation, either version 3 of the License, or
  12. ;; (at your option) any later version.
  13. ;; GNU Emacs is distributed in the hope that it will be useful,
  14. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. ;; GNU General Public License for more details.
  17. ;; You should have received a copy of the GNU General Public License
  18. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  19. ;;; Commentary:
  20. ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  21. ;;
  22. ;;
  23. ;; This is part of ebnf2ps package.
  24. ;;
  25. ;; This package defines a parser for DTD (Data Type Description for XML).
  26. ;;
  27. ;; See ebnf2ps.el for documentation.
  28. ;;
  29. ;;
  30. ;; DTD Syntax
  31. ;; ----------
  32. ;;
  33. ;; See the URLs:
  34. ;; `http://www.w3.org/TR/2004/REC-xml-20040204/'
  35. ;; (Extensible Markup Language (XML) 1.0 (Third Edition))
  36. ;; `http://www.w3.org/TR/html40/'
  37. ;; (HTML 4.01 Specification)
  38. ;; `http://www.w3.org/TR/NOTE-html-970421'
  39. ;; (HTML DTD with support for Style Sheets)
  40. ;;
  41. ;;
  42. ;; /* Document */
  43. ;;
  44. ;; document ::= prolog element Misc*
  45. ;; /* Note that *only* the prolog will be parsed */
  46. ;;
  47. ;;
  48. ;; /* Characters */
  49. ;;
  50. ;; Char ::= #x9 | #xA | #xD
  51. ;; | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
  52. ;; /* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. */
  53. ;;
  54. ;; /* NOTE:
  55. ;;
  56. ;; Document authors are encouraged to avoid "compatibility characters", as
  57. ;; defined in section 6.8 of [Unicode] (see also D21 in section 3.6 of
  58. ;; [Unicode3]). The characters defined in the following ranges are also
  59. ;; discouraged. They are either control characters or permanently undefined
  60. ;; Unicode characters:
  61. ;;
  62. ;; [#x7F-#x84], [#x86-#x9F], [#xFDD0-#xFDDF],
  63. ;; [#1FFFE-#x1FFFF], [#2FFFE-#x2FFFF], [#3FFFE-#x3FFFF],
  64. ;; [#4FFFE-#x4FFFF], [#5FFFE-#x5FFFF], [#6FFFE-#x6FFFF],
  65. ;; [#7FFFE-#x7FFFF], [#8FFFE-#x8FFFF], [#9FFFE-#x9FFFF],
  66. ;; [#AFFFE-#xAFFFF], [#BFFFE-#xBFFFF], [#CFFFE-#xCFFFF],
  67. ;; [#DFFFE-#xDFFFF], [#EFFFE-#xEFFFF], [#FFFFE-#xFFFFF],
  68. ;; [#10FFFE-#x10FFFF]. */
  69. ;;
  70. ;;
  71. ;; /* White Space */
  72. ;;
  73. ;; S ::= (#x20 | #x9 | #xD | #xA)+
  74. ;;
  75. ;;
  76. ;; /* Names and Tokens */
  77. ;;
  78. ;; NameChar ::= Letter | Digit | '.' | '-' | '_' | ':'
  79. ;; | CombiningChar | Extender
  80. ;;
  81. ;; Name ::= (Letter | '_' | ':') (NameChar)*
  82. ;;
  83. ;; Names ::= Name (#x20 Name)*
  84. ;;
  85. ;; Nmtoken ::= (NameChar)+
  86. ;;
  87. ;; Nmtokens ::= Nmtoken (#x20 Nmtoken)*
  88. ;;
  89. ;;
  90. ;; /* Literals */
  91. ;;
  92. ;; EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
  93. ;; | "'" ([^%&'] | PEReference | Reference)* "'"
  94. ;;
  95. ;; AttValue ::= '"' ([^<&"] | Reference)* '"'
  96. ;; | "'" ([^<&'] | Reference)* "'"
  97. ;;
  98. ;; SystemLiteral ::= ('"' [^"]* '"')
  99. ;; | ("'" [^']* "'")
  100. ;;
  101. ;; PubidLiteral ::= '"' PubidChar* '"'
  102. ;; | "'" (PubidChar - "'")* "'"
  103. ;;
  104. ;; PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
  105. ;;
  106. ;; /* NOTE:
  107. ;;
  108. ;; Although the EntityValue production allows the definition of a general
  109. ;; entity consisting of a single explicit < in the literal (e.g., <!ENTITY
  110. ;; mylt "<">), it is strongly advised to avoid this practice since any
  111. ;; reference to that entity will cause a well-formedness error. */
  112. ;;
  113. ;;
  114. ;; /* Character Data */
  115. ;;
  116. ;; CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
  117. ;;
  118. ;;
  119. ;; /* Comments */
  120. ;;
  121. ;; Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
  122. ;;
  123. ;;
  124. ;; /* Processing Instructions */
  125. ;;
  126. ;; PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
  127. ;;
  128. ;; PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
  129. ;;
  130. ;;
  131. ;; /* CDATA Sections */
  132. ;;
  133. ;; CDSect ::= CDStart CData CDEnd
  134. ;;
  135. ;; CDStart ::= '<![CDATA['
  136. ;;
  137. ;; CData ::= (Char* - (Char* ']]>' Char*))
  138. ;;
  139. ;; CDEnd ::= ']]>'
  140. ;;
  141. ;;
  142. ;; /* Prolog */
  143. ;;
  144. ;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
  145. ;;
  146. ;; XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  147. ;;
  148. ;; VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
  149. ;;
  150. ;; Eq ::= S? '=' S?
  151. ;;
  152. ;; VersionNum ::= '1.0'
  153. ;;
  154. ;; Misc ::= Comment | PI | S
  155. ;;
  156. ;;
  157. ;; /* Document Type Definition */
  158. ;;
  159. ;; doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
  160. ;; ('[' intSubset ']' S?)? '>'
  161. ;; [VC: Root Element Type]
  162. ;; [WFC: External Subset]
  163. ;;
  164. ;; DeclSep ::= PEReference | S
  165. ;; [WFC: PE Between Declarations]
  166. ;;
  167. ;; intSubset ::= (markupdecl | DeclSep)*
  168. ;;
  169. ;; markupdecl ::= elementdecl | AttlistDecl | EntityDecl
  170. ;; | NotationDecl | PI | Comment
  171. ;; [VC: Proper Declaration/PE Nesting]
  172. ;; [WFC: PEs in Internal Subset]
  173. ;;
  174. ;;
  175. ;; /* External Subset */
  176. ;;
  177. ;; extSubset ::= TextDecl? extSubsetDecl
  178. ;;
  179. ;; extSubsetDecl ::= ( markupdecl | conditionalSect | DeclSep)*
  180. ;;
  181. ;;
  182. ;; /* Standalone Document Declaration */
  183. ;;
  184. ;; SDDecl ::= S 'standalone' Eq
  185. ;; (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
  186. ;; [VC: Standalone Document Declaration]
  187. ;;
  188. ;;
  189. ;; /* Element */
  190. ;;
  191. ;; element ::= EmptyElemTag | STag content ETag
  192. ;; [WFC: Element Type Match]
  193. ;; [VC: Element Valid]
  194. ;;
  195. ;;
  196. ;; /* Start-tag */
  197. ;;
  198. ;; STag ::= '<' Name (S Attribute)* S? '>'
  199. ;; [WFC: Unique Att Spec]
  200. ;;
  201. ;; Attribute ::= Name Eq AttValue
  202. ;; [VC: Attribute Value Type]
  203. ;; [WFC: No External Entity References]
  204. ;; [WFC: No < in Attribute Values]
  205. ;;
  206. ;;
  207. ;; /* End-tag */
  208. ;;
  209. ;; ETag ::= '</' Name S? '>'
  210. ;;
  211. ;;
  212. ;; /* Content of Elements */
  213. ;;
  214. ;; content ::= CharData?
  215. ;; ((element | Reference | CDSect | PI | Comment) CharData?)*
  216. ;;
  217. ;;
  218. ;; /* Tags for Empty Elements */
  219. ;;
  220. ;; EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
  221. ;; [WFC: Unique Att Spec]
  222. ;;
  223. ;;
  224. ;; /* Element Type Declaration */
  225. ;;
  226. ;; elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
  227. ;; [VC: Unique Element Type Declaration]
  228. ;;
  229. ;; contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
  230. ;;
  231. ;;
  232. ;; /* Element-content Models */
  233. ;;
  234. ;; children ::= (choice | seq) ('?' | '*' | '+')?
  235. ;;
  236. ;; cp ::= (Name | choice | seq) ('?' | '*' | '+')?
  237. ;;
  238. ;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
  239. ;; [VC: Proper Group/PE Nesting]
  240. ;;
  241. ;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
  242. ;; [VC: Proper Group/PE Nesting]
  243. ;;
  244. ;;
  245. ;; /* Mixed-content Declaration */
  246. ;;
  247. ;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
  248. ;; | '(' S? '#PCDATA' S? ')'
  249. ;; [VC: Proper Group/PE Nesting]
  250. ;; [VC: No Duplicate Types]
  251. ;;
  252. ;;
  253. ;; /* Attribute-list Declaration */
  254. ;;
  255. ;; AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
  256. ;;
  257. ;; AttDef ::= S Name S AttType S DefaultDecl
  258. ;;
  259. ;;
  260. ;; /* Attribute Types */
  261. ;;
  262. ;; AttType ::= StringType | TokenizedType | EnumeratedType
  263. ;;
  264. ;; StringType ::= 'CDATA'
  265. ;;
  266. ;; TokenizedType ::= 'ID' [VC: ID]
  267. ;; [VC: One ID per Element Type]
  268. ;; [VC: ID Attribute Default]
  269. ;; | 'IDREF' [VC: IDREF]
  270. ;; | 'IDREFS' [VC: IDREF]
  271. ;; | 'ENTITY' [VC: Entity Name]
  272. ;; | 'ENTITIES' [VC: Entity Name]
  273. ;; | 'NMTOKEN' [VC: Name Token]
  274. ;; | 'NMTOKENS' [VC: Name Token]
  275. ;;
  276. ;;
  277. ;; /* Enumerated Attribute Types */
  278. ;;
  279. ;; EnumeratedType ::= NotationType | Enumeration
  280. ;;
  281. ;; NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
  282. ;; [VC: Notation Attributes]
  283. ;; [VC: One Notation Per Element Type]
  284. ;; [VC: No Notation on Empty Element]
  285. ;; [VC: No Duplicate Tokens]
  286. ;;
  287. ;; Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
  288. ;; [VC: Enumeration]
  289. ;; [VC: No Duplicate Tokens]
  290. ;;
  291. ;;
  292. ;; /* Attribute Defaults */
  293. ;;
  294. ;; DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
  295. ;; | (('#FIXED' S)? AttValue)
  296. ;; [VC: Required Attribute]
  297. ;; [VC: Attribute Default Value Syntactically Correct]
  298. ;; [WFC: No < in Attribute Values]
  299. ;; [VC: Fixed Attribute Default]
  300. ;;
  301. ;;
  302. ;; /* Conditional Section */
  303. ;;
  304. ;; conditionalSect ::= includeSect | ignoreSect
  305. ;;
  306. ;; includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
  307. ;; [VC: Proper Conditional Section/PE Nesting]
  308. ;;
  309. ;; ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
  310. ;; [VC: Proper Conditional Section/PE Nesting]
  311. ;;
  312. ;; ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
  313. ;;
  314. ;; Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
  315. ;;
  316. ;;
  317. ;; /* Character Reference */
  318. ;;
  319. ;; CharRef ::= '&#' [0-9]+ ';'
  320. ;; | '&#x' [0-9a-fA-F]+ ';'
  321. ;; [WFC: Legal Character]
  322. ;;
  323. ;;
  324. ;; /* Entity Reference */
  325. ;;
  326. ;; Reference ::= EntityRef | CharRef
  327. ;;
  328. ;; EntityRef ::= '&' Name ';'
  329. ;; [WFC: Entity Declared]
  330. ;; [VC: Entity Declared]
  331. ;; [WFC: Parsed Entity]
  332. ;; [WFC: No Recursion]
  333. ;;
  334. ;; PEReference ::= '%' Name ';'
  335. ;; [VC: Entity Declared]
  336. ;; [WFC: No Recursion]
  337. ;; [WFC: In DTD]
  338. ;;
  339. ;;
  340. ;; /* Entity Declaration */
  341. ;;
  342. ;; EntityDecl ::= GEDecl | PEDecl
  343. ;;
  344. ;; GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
  345. ;;
  346. ;; PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
  347. ;;
  348. ;; EntityDef ::= EntityValue | (ExternalID NDataDecl?)
  349. ;;
  350. ;; PEDef ::= EntityValue | ExternalID
  351. ;;
  352. ;;
  353. ;; /* External Entity Declaration */
  354. ;;
  355. ;; ExternalID ::= 'SYSTEM' S SystemLiteral
  356. ;; | 'PUBLIC' S PubidLiteral S SystemLiteral
  357. ;;
  358. ;; NDataDecl ::= S 'NDATA' S Name
  359. ;; [VC: Notation Declared]
  360. ;;
  361. ;;
  362. ;; /* Text Declaration */
  363. ;;
  364. ;; TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
  365. ;;
  366. ;;
  367. ;; /* Well-Formed External Parsed Entity */
  368. ;;
  369. ;; extParsedEnt ::= TextDecl? content
  370. ;;
  371. ;;
  372. ;; /* Encoding Declaration */
  373. ;;
  374. ;; EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
  375. ;;
  376. ;; EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
  377. ;; /* Encoding name contains only Latin characters */
  378. ;;
  379. ;;
  380. ;; /* Notation Declarations */
  381. ;;
  382. ;; NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
  383. ;; [VC: Unique Notation Name]
  384. ;;
  385. ;; PublicID ::= 'PUBLIC' S PubidLiteral
  386. ;;
  387. ;;
  388. ;; /* Characters */
  389. ;;
  390. ;; Letter ::= BaseChar | Ideographic
  391. ;;
  392. ;; BaseChar ::= [#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6]
  393. ;; | [#x00D8-#x00F6] | [#x00F8-#x00FF] | [#x0100-#x0131]
  394. ;; | [#x0134-#x013E] | [#x0141-#x0148] | [#x014A-#x017E]
  395. ;; | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5]
  396. ;; | [#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1]
  397. ;; | #x0386 | [#x0388-#x038A] | #x038C
  398. ;; | [#x038E-#x03A1] | [#x03A3-#x03CE] | [#x03D0-#x03D6]
  399. ;; | #x03DA | #x03DC | #x03DE
  400. ;; | #x03E0 | [#x03E2-#x03F3] | [#x0401-#x040C]
  401. ;; | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481]
  402. ;; | [#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC]
  403. ;; | [#x04D0-#x04EB] | [#x04EE-#x04F5] | [#x04F8-#x04F9]
  404. ;; | [#x0531-#x0556] | #x0559 | [#x0561-#x0586]
  405. ;; | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A]
  406. ;; | [#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE]
  407. ;; | [#x06C0-#x06CE] | [#x06D0-#x06D3] | #x06D5
  408. ;; | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D
  409. ;; | [#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990]
  410. ;; | [#x0993-#x09A8] | [#x09AA-#x09B0] | #x09B2
  411. ;; | [#x09B6-#x09B9] | [#x09DC-#x09DD] | [#x09DF-#x09E1]
  412. ;; | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10]
  413. ;; | [#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33]
  414. ;; | [#x0A35-#x0A36] | [#x0A38-#x0A39] | [#x0A59-#x0A5C]
  415. ;; | #x0A5E | [#x0A72-#x0A74] | [#x0A85-#x0A8B]
  416. ;; | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8]
  417. ;; | [#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9]
  418. ;; | #x0ABD | #x0AE0 | [#x0B05-#x0B0C]
  419. ;; | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30]
  420. ;; | [#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D
  421. ;; | [#x0B5C-#x0B5D] | [#x0B5F-#x0B61] | [#x0B85-#x0B8A]
  422. ;; | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | [#x0B99-#x0B9A]
  423. ;; | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4]
  424. ;; | [#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9]
  425. ;; | [#x0C05-#x0C0C] | [#x0C0E-#x0C10] | [#x0C12-#x0C28]
  426. ;; | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | [#x0C60-#x0C61]
  427. ;; | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8]
  428. ;; | [#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE
  429. ;; | [#x0CE0-#x0CE1] | [#x0D05-#x0D0C] | [#x0D0E-#x0D10]
  430. ;; | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | [#x0D60-#x0D61]
  431. ;; | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33]
  432. ;; | [#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84
  433. ;; | [#x0E87-#x0E88] | #x0E8A | #x0E8D
  434. ;; | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3]
  435. ;; | #x0EA5 | #x0EA7 | [#x0EAA-#x0EAB]
  436. ;; | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3]
  437. ;; | #x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47]
  438. ;; | [#x0F49-#x0F69] | [#x10A0-#x10C5] | [#x10D0-#x10F6]
  439. ;; | #x1100 | [#x1102-#x1103] | [#x1105-#x1107]
  440. ;; | #x1109 | [#x110B-#x110C] | [#x110E-#x1112]
  441. ;; | #x113C | #x113E | #x1140
  442. ;; | #x114C | #x114E | #x1150
  443. ;; | [#x1154-#x1155] | #x1159 | [#x115F-#x1161]
  444. ;; | #x1163 | #x1165 | #x1167
  445. ;; | #x1169 | [#x116D-#x116E] | [#x1172-#x1173]
  446. ;; | #x1175 | #x119E | #x11A8
  447. ;; | #x11AB | [#x11AE-#x11AF] | [#x11B7-#x11B8]
  448. ;; | #x11BA | [#x11BC-#x11C2] | #x11EB
  449. ;; | #x11F0 | #x11F9 | [#x1E00-#x1E9B]
  450. ;; | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D]
  451. ;; | [#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57]
  452. ;; | #x1F59 | #x1F5B | #x1F5D
  453. ;; | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC]
  454. ;; | #x1FBE | [#x1FC2-#x1FC4] | [#x1FC6-#x1FCC]
  455. ;; | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | [#x1FE0-#x1FEC]
  456. ;; | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126
  457. ;; | [#x212A-#x212B] | #x212E | [#x2180-#x2182]
  458. ;; | [#x3041-#x3094] | [#x30A1-#x30FA] | [#x3105-#x312C]
  459. ;; | [#xAC00-#xD7A3]
  460. ;;
  461. ;; Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]
  462. ;;
  463. ;; CombiningChar ::= [#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486]
  464. ;; | [#x0591-#x05A1] | [#x05A3-#x05B9] | [#x05BB-#x05BD]
  465. ;; | #x05BF | [#x05C1-#x05C2] | #x05C4
  466. ;; | [#x064B-#x0652] | #x0670 | [#x06D6-#x06DC]
  467. ;; | [#x06DD-#x06DF] | [#x06E0-#x06E4] | [#x06E7-#x06E8]
  468. ;; | [#x06EA-#x06ED] | [#x0901-#x0903] | #x093C
  469. ;; | [#x093E-#x094C] | #x094D | [#x0951-#x0954]
  470. ;; | [#x0962-#x0963] | [#x0981-#x0983] | #x09BC
  471. ;; | #x09BE | #x09BF | [#x09C0-#x09C4]
  472. ;; | [#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7
  473. ;; | [#x09E2-#x09E3] | #x0A02 | #x0A3C
  474. ;; | #x0A3E | #x0A3F | [#x0A40-#x0A42]
  475. ;; | [#x0A47-#x0A48] | [#x0A4B-#x0A4D] | [#x0A70-#x0A71]
  476. ;; | [#x0A81-#x0A83] | #x0ABC | [#x0ABE-#x0AC5]
  477. ;; | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03]
  478. ;; | #x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48]
  479. ;; | [#x0B4B-#x0B4D] | [#x0B56-#x0B57] | [#x0B82-#x0B83]
  480. ;; | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | [#x0BCA-#x0BCD]
  481. ;; | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44]
  482. ;; | [#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56]
  483. ;; | [#x0C82-#x0C83] | [#x0CBE-#x0CC4] | [#x0CC6-#x0CC8]
  484. ;; | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | [#x0D02-#x0D03]
  485. ;; | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D]
  486. ;; | #x0D57 | #x0E31 | [#x0E34-#x0E3A]
  487. ;; | [#x0E47-#x0E4E] | #x0EB1 | [#x0EB4-#x0EB9]
  488. ;; | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19]
  489. ;; | #x0F35 | #x0F37 | #x0F39
  490. ;; | #x0F3E | #x0F3F | [#x0F71-#x0F84]
  491. ;; | [#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97
  492. ;; | [#x0F99-#x0FAD] | [#x0FB1-#x0FB7] | #x0FB9
  493. ;; | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F]
  494. ;; | #x3099 | #x309A
  495. ;;
  496. ;; Digit ::= [#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9]
  497. ;; | [#x0966-#x096F] | [#x09E6-#x09EF] | [#x0A66-#x0A6F]
  498. ;; | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | [#x0BE7-#x0BEF]
  499. ;; | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F]
  500. ;; | [#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]
  501. ;;
  502. ;; Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6
  503. ;; | #x3005 | [#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]
  504. ;;
  505. ;;
  506. ;; NOTES
  507. ;; -----
  508. ;;
  509. ;; At moment, only the `<!ELEMENT' generates a syntactic chart. The
  510. ;; `<!ATTLIST', `<!NOTATION' and `<!ENTITY' are syntactically checked but they
  511. ;; don't generate a syntactic chart.
  512. ;;
  513. ;; Besides the syntax above, ebnf-dtd also accepts a `pure' dtd file. An
  514. ;; example of a `pure' dtd file is:
  515. ;;
  516. ;; <?xml version="1.0" encoding="UTF-8"?>
  517. ;; <!--
  518. ;; The main element.
  519. ;; -->
  520. ;; <!ELEMENT workflow (registers?, trigger-functions?, initial-actions,
  521. ;; steps, splits?, joins?)>
  522. ;; <!--
  523. ;; An action that can be executed (id must be unique among actions for
  524. ;; the enclosing step).
  525. ;; Used in: actions
  526. ;; -->
  527. ;; <!ELEMENT action (restrict-to, validators?, pre-functions?, results,
  528. ;; post-functions?)>
  529. ;; <!ATTLIST action
  530. ;; id CDATA #REQUIRED
  531. ;; name CDATA #REQUIRED
  532. ;; >
  533. ;;
  534. ;;
  535. ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  536. ;;; Code:
  537. (require 'ebnf-otz)
  538. (defvar ebnf-dtd-lex nil
  539. "Value returned by `ebnf-dtd-lex' function.")
  540. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  541. ;; Syntactic analyzer
  542. ;;; document ::= prolog element Misc*
  543. ;;; /* Note that *only* the prolog will be parsed */
  544. (defun ebnf-dtd-parser (start)
  545. "DTD parser."
  546. (let ((total (+ (- ebnf-limit start) 1))
  547. (bias (1- start))
  548. (origin (point))
  549. rule-list token rule the-end)
  550. (goto-char start)
  551. (setq token (ebnf-dtd-lex))
  552. (and (eq token 'end-of-input)
  553. (error "Empty DTD file"))
  554. (setq token (ebnf-dtd-prolog token))
  555. (unless (eq (car token) 'end-prolog)
  556. (setq the-end (cdr token)
  557. token (car token))
  558. (while (not (eq token the-end))
  559. (ebnf-message-float
  560. "Parsing...%s%%"
  561. (/ (* (- (point) bias) 100.0) total))
  562. (setq token (ebnf-dtd-intsubset token)
  563. rule (cdr token)
  564. token (car token))
  565. (or (null rule)
  566. (ebnf-add-empty-rule-list rule)
  567. (setq rule-list (cons rule rule-list))))
  568. (or (eq the-end 'end-of-input)
  569. (eq (ebnf-dtd-lex) 'end-decl)
  570. (error "Missing end of DOCTYPE"))
  571. ;; adjust message, 'cause *only* prolog will be parsed
  572. (ebnf-message-float "Parsing...%s%%" 100.0))
  573. (goto-char origin)
  574. rule-list))
  575. ;;; prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
  576. ;;;
  577. ;;; XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
  578. ;;;
  579. ;;; VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"')
  580. ;;;
  581. ;;; Eq ::= S? '=' S?
  582. ;;;
  583. ;;; VersionNum ::= '1.0'
  584. ;;;
  585. ;;; Misc ::= Comment | PI | S
  586. ;;;
  587. ;;; EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" )
  588. ;;;
  589. ;;; EncName ::= [A-Za-z] ([-A-Za-z0-9._])*
  590. ;;; /* Encoding name contains only Latin characters */
  591. ;;;
  592. ;;; SDDecl ::= S 'standalone' Eq
  593. ;;; (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
  594. ;;;
  595. ;;; doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
  596. ;;; ('[' intSubset ']' S?)? '>'
  597. (defun ebnf-dtd-prolog (token)
  598. (when (and (eq token 'begin-pi) (string= ebnf-dtd-lex "xml"))
  599. ;; version = "1.0"
  600. (setq token (ebnf-dtd-attribute (ebnf-dtd-lex) 'version-attr
  601. "^1\\.0$" "XML version"))
  602. ;; ( encoding = "encoding name" )?
  603. (setq token (ebnf-dtd-attribute-optional
  604. token 'encoding-attr
  605. "^[A-Za-z][-A-Za-z0-9._]*$" "XML encoding"))
  606. ;; ( standalone = ( "yes" | "no" ) )?
  607. (setq token (ebnf-dtd-attribute-optional
  608. token 'standalone-attr
  609. "^yes|no$" "XML standalone"))
  610. (or (eq token 'end-pi)
  611. (error "Missing end of XML processing instruction")))
  612. ;; processing instructions
  613. (setq token (ebnf-dtd-pi (ebnf-dtd-lex)))
  614. (cond
  615. ;; DOCTYPE
  616. ((eq token 'doctype-decl)
  617. (or (eq (ebnf-dtd-lex) 'name)
  618. (error "Document type name is missing"))
  619. (cons (if (eq (ebnf-dtd-externalid) 'begin-subset)
  620. (ebnf-dtd-lex)
  621. 'end-prolog)
  622. 'end-subset))
  623. ((memq token '(element-decl attlist-decl entity-decl notation-decl))
  624. (cons token 'end-of-input))
  625. (t
  626. '(end-prolog . end-subset))
  627. ))
  628. (defun ebnf-dtd-attribute (token attr match attr-name)
  629. (or (eq token attr)
  630. (error "%s attribute is missing" attr-name))
  631. (ebnf-dtd-attribute-optional token attr match attr-name))
  632. (defun ebnf-dtd-attribute-optional (token attr match attr-name)
  633. (when (eq token attr)
  634. (or (and (eq (ebnf-dtd-lex) 'equal)
  635. (eq (ebnf-dtd-lex) 'string)
  636. (string-match match ebnf-dtd-lex))
  637. (error "XML %s attribute is invalid" attr-name))
  638. (setq token (ebnf-dtd-lex)))
  639. token)
  640. ;;; ExternalID ::= 'SYSTEM' S SystemLiteral
  641. ;;; | 'PUBLIC' S PubidLiteral S SystemLiteral
  642. (defun ebnf-dtd-externalid (&optional token)
  643. (let ((must-have token))
  644. (or token (setq token (ebnf-dtd-lex)))
  645. (cond ((eq token 'system)
  646. (ebnf-dtd-systemliteral))
  647. ((eq token 'public)
  648. (ebnf-dtd-pubidliteral)
  649. (ebnf-dtd-systemliteral))
  650. (must-have
  651. (error "Missing `SYSTEM' or `PUBLIC' in external id"))
  652. (t
  653. token))))
  654. ;;; SystemLiteral ::= ('"' [^"]* '"')
  655. ;;; | ("'" [^']* "'")
  656. (defun ebnf-dtd-systemliteral ()
  657. (or (eq (ebnf-dtd-lex) 'string)
  658. (error "System identifier is invalid"))
  659. (ebnf-dtd-lex))
  660. ;;; PubidLiteral ::= '"' PubidChar* '"'
  661. ;;; | "'" (PubidChar - "'")* "'"
  662. ;;;
  663. ;;; PubidChar ::= [-'()+,./:=?;!*#@$_%\n\r a-zA-Z0-9]
  664. (defun ebnf-dtd-pubidliteral ()
  665. (or (and (eq (ebnf-dtd-lex) 'string)
  666. (string-match "^[-'()+,./:=?;!*#@$_%\n\r a-zA-Z0-9]*$"
  667. ebnf-dtd-lex))
  668. (error "Public identifier is invalid")))
  669. ;;; PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
  670. ;;;
  671. ;;; PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
  672. (defun ebnf-dtd-pi (token)
  673. (while (eq token 'begin-pi)
  674. (and (string-match "^[xX][mM][lL]$" ebnf-dtd-lex)
  675. (error "Processing instruction name can not be `XML'"))
  676. (while (not (eq (ebnf-dtd-lex) 'end-pi)))
  677. (setq token (ebnf-dtd-lex)))
  678. token)
  679. ;;; doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
  680. ;;; ('[' intSubset ']' S?)? '>'
  681. ;;;
  682. ;;; intSubset ::= (markupdecl | DeclSep)*
  683. ;;;
  684. ;;; DeclSep ::= PEReference | S
  685. ;;;
  686. ;;; markupdecl ::= elementdecl | AttlistDecl | EntityDecl
  687. ;;; | NotationDecl | PI | Comment
  688. (defun ebnf-dtd-intsubset (token)
  689. ;; PI - Processing Instruction
  690. (and (eq token 'begin-pi)
  691. (setq token (ebnf-dtd-pi token)))
  692. (cond
  693. ((memq token '(end-subset end-of-input))
  694. (cons token nil))
  695. ((eq token 'pe-ref)
  696. (cons (ebnf-dtd-lex) nil)) ; annotation
  697. ((eq token 'element-decl)
  698. (ebnf-dtd-elementdecl)) ; rule
  699. ((eq token 'attlist-decl)
  700. (ebnf-dtd-attlistdecl)) ; annotation
  701. ((eq token 'entity-decl)
  702. (ebnf-dtd-entitydecl)) ; annotation
  703. ((eq token 'notation-decl)
  704. (ebnf-dtd-notationdecl)) ; annotation
  705. (t
  706. (error "Invalid DOCTYPE element"))
  707. ))
  708. ;;; elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
  709. ;;;
  710. ;;; contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
  711. ;;;
  712. ;;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
  713. ;;; | '(' S? '#PCDATA' S? ')'
  714. ;;;
  715. ;;; children ::= (choice | seq) ('?' | '*' | '+')?
  716. ;;;
  717. ;;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
  718. ;;;
  719. ;;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
  720. ;;;
  721. ;;; cp ::= (Name | choice | seq) ('?' | '*' | '+')?
  722. (defun ebnf-dtd-elementdecl ()
  723. (let ((action ebnf-action)
  724. name token body)
  725. (setq ebnf-action nil)
  726. (or (eq (ebnf-dtd-lex) 'name)
  727. (error "Invalid ELEMENT name"))
  728. (setq name ebnf-dtd-lex
  729. token (ebnf-dtd-lex)
  730. body (cond ((memq token '(empty any))
  731. (let ((term (ebnf-make-terminal ebnf-dtd-lex)))
  732. (cons (ebnf-dtd-lex) term)))
  733. ((eq token 'begin-group)
  734. (setq token (ebnf-dtd-lex))
  735. (if (eq token 'pcdata)
  736. (ebnf-dtd-mixed)
  737. (ebnf-dtd-children token)))
  738. (t
  739. (error "Invalid ELEMENT content"))
  740. ))
  741. (or (eq (car body) 'end-decl)
  742. (error "Missing `>' in ELEMENT declaration"))
  743. (ebnf-eps-add-production name)
  744. (cons (ebnf-dtd-lex)
  745. (ebnf-make-production name (cdr body) action))))
  746. ;;; Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
  747. ;;; | '(' S? '#PCDATA' S? ')'
  748. (defun ebnf-dtd-mixed ()
  749. (let* ((alt (cons (ebnf-make-terminal ebnf-dtd-lex) nil))
  750. (token (ebnf-dtd-lex))
  751. (has-alternative (eq token 'alternative)))
  752. (while (eq token 'alternative)
  753. (or (eq (ebnf-dtd-lex) 'name)
  754. (error "Invalid name"))
  755. (setq alt (cons ebnf-dtd-lex alt)
  756. token (ebnf-dtd-lex)))
  757. (or (eq token 'end-group)
  758. (error "Missing `)'"))
  759. (and has-alternative
  760. (or (eq (ebnf-dtd-lex) 'zero-or-more)
  761. (error "Missing `*'")))
  762. (ebnf-token-alternative alt (cons (ebnf-dtd-lex) nil))))
  763. ;;; children ::= (choice | seq) ('?' | '*' | '+')?
  764. (defun ebnf-dtd-children (token)
  765. (ebnf-dtd-operators (ebnf-dtd-choice-seq token)))
  766. ;;; choice ::= '(' S? cp ( S? '|' S? cp )+ S? ')'
  767. ;;;
  768. ;;; seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
  769. (defun ebnf-dtd-choice-seq (token)
  770. (setq token (ebnf-dtd-cp token))
  771. (let (elist)
  772. (cond
  773. ;; choice
  774. ((eq (car token) 'alternative)
  775. (while (eq (car token) 'alternative)
  776. (setq elist (cons (cdr token) elist)
  777. token (ebnf-dtd-cp (ebnf-dtd-lex))))
  778. (setq elist (ebnf-token-alternative elist token)))
  779. ;; seq
  780. ((eq (car token) 'comma)
  781. (while (eq (car token) 'comma)
  782. (setq elist (cons (cdr token) elist)
  783. token (ebnf-dtd-cp (ebnf-dtd-lex))))
  784. (setq elist (ebnf-token-sequence (cons (cdr token) elist))))
  785. ;; only one element
  786. (t
  787. (setq elist (cdr token))))
  788. (or (eq (car token) 'end-group)
  789. (error "Missing `)' in ELEMENT content"))
  790. elist))
  791. ;;; cp ::= (Name | choice | seq) ('?' | '*' | '+')?
  792. (defun ebnf-dtd-cp (token)
  793. (ebnf-dtd-operators (cond ((eq token 'name)
  794. (ebnf-make-terminal ebnf-dtd-lex))
  795. ((eq token 'begin-group)
  796. (ebnf-dtd-choice-seq (ebnf-dtd-lex)))
  797. (t
  798. (error "Invalid element"))
  799. )))
  800. ;;; elm ('?' | '*' | '+')?
  801. (defun ebnf-dtd-operators (elm)
  802. (let ((token (ebnf-dtd-lex)))
  803. (cond ((eq token 'optional) ; ? - optional
  804. (cons (ebnf-dtd-lex) (ebnf-token-optional elm)))
  805. ((eq token 'zero-or-more) ; * - zero or more
  806. (cons (ebnf-dtd-lex) (ebnf-make-zero-or-more elm)))
  807. ((eq token 'one-or-more) ; + - one or more
  808. (cons (ebnf-dtd-lex) (ebnf-make-one-or-more elm)))
  809. (t ; only element
  810. (cons token elm))
  811. )))
  812. ;;; AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
  813. ;;;
  814. ;;; AttDef ::= S Name S AttType S DefaultDecl
  815. ;;;
  816. ;;; AttType ::= StringType | TokenizedType | EnumeratedType
  817. ;;;
  818. ;;; StringType ::= 'CDATA'
  819. ;;;
  820. ;;; TokenizedType ::= 'ID'
  821. ;;; | 'IDREF'
  822. ;;; | 'IDREFS'
  823. ;;; | 'ENTITY'
  824. ;;; | 'ENTITIES'
  825. ;;; | 'NMTOKEN'
  826. ;;; | 'NMTOKENS'
  827. ;;;
  828. ;;; EnumeratedType ::= NotationType | Enumeration
  829. ;;;
  830. ;;; NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
  831. ;;;
  832. ;;; Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
  833. ;;;
  834. ;;; DefaultDecl ::= '#REQUIRED'
  835. ;;; | '#IMPLIED'
  836. ;;; | (('#FIXED' S)? AttValue)
  837. ;;;
  838. ;;;
  839. ;;; AttValue ::= '"' ([^<&"] | Reference)* '"'
  840. ;;; | "'" ([^<&'] | Reference)* "'"
  841. ;;;
  842. ;;; Reference ::= EntityRef | CharRef
  843. ;;;
  844. ;;; EntityRef ::= '&' Name ';'
  845. ;;;
  846. ;;; CharRef ::= '&#' [0-9]+ ';'
  847. ;;; | '&#x' [0-9a-fA-F]+ ';'
  848. ;;; "^\\(&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^<&]\\)*$"
  849. (defun ebnf-dtd-attlistdecl ()
  850. (or (eq (ebnf-dtd-lex) 'name)
  851. (error "Invalid ATTLIST name"))
  852. (let (token)
  853. (while (eq (setq token (ebnf-dtd-lex)) 'name)
  854. ;; type
  855. (setq token (ebnf-dtd-lex))
  856. (cond
  857. ((eq token 'notation)
  858. (or (eq (ebnf-dtd-lex) 'begin-group)
  859. (error "Missing `(' in NOTATION type in ATTLIST declaration"))
  860. (ebnf-dtd-namelist "NOTATION" '(name)))
  861. ((eq token 'begin-group)
  862. (ebnf-dtd-namelist "enumeration" '(name name-char)))
  863. ((memq token
  864. '(cdata id idref idrefs entity entities nmtoken nmtokens)))
  865. (t
  866. (error "Invalid type in ATTLIST declaration")))
  867. ;; default value
  868. (setq token (ebnf-dtd-lex))
  869. (unless (memq token '(required implied))
  870. (and (eq token 'fixed)
  871. (setq token (ebnf-dtd-lex)))
  872. (or (and (eq token 'string)
  873. (string-match
  874. "^\\(&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^<&]\\)*$"
  875. ebnf-dtd-lex))
  876. (error "Invalid default value in ATTLIST declaration"))))
  877. (or (eq token 'end-decl)
  878. (error "Missing `>' in end of ATTLIST"))
  879. (cons (ebnf-dtd-lex) nil)))
  880. (defun ebnf-dtd-namelist (type name-list)
  881. (let (token)
  882. (while (progn
  883. (or (memq (ebnf-dtd-lex) name-list)
  884. (error "Invalid name in %s type in ATTLIST declaration" type))
  885. (eq (setq token (ebnf-dtd-lex)) 'alternative)))
  886. (or (eq token 'end-group)
  887. (error "Missing `)' in %s type in ATTLIST declaration" type))))
  888. ;;; EntityDecl ::= GEDecl | PEDecl
  889. ;;;
  890. ;;; GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
  891. ;;;
  892. ;;; PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
  893. ;;;
  894. ;;; EntityDef ::= EntityValue | (ExternalID NDataDecl?)
  895. ;;;
  896. ;;; PEDef ::= EntityValue | ExternalID
  897. ;;;
  898. ;;; NDataDecl ::= S 'NDATA' S Name
  899. ;;;
  900. ;;;
  901. ;;; EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"'
  902. ;;; | "'" ([^%&'] | PEReference | Reference)* "'"
  903. ;;;
  904. ;;; PEReference ::= '%' Name ';'
  905. ;;;
  906. ;;; Reference ::= EntityRef | CharRef
  907. ;;;
  908. ;;; EntityRef ::= '&' Name ';'
  909. ;;;
  910. ;;; CharRef ::= '&#' [0-9]+ ';'
  911. ;;; | '&#x' [0-9a-fA-F]+ ';'
  912. ;;; "^\\(%[A-Za-z_:][-A-Za-z0-9._:]*;\\|&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^%&]\\)*$"
  913. (defun ebnf-dtd-entitydecl ()
  914. (let* ((token (ebnf-dtd-lex))
  915. (pedecl (eq token 'percent)))
  916. (and pedecl
  917. (setq token (ebnf-dtd-lex)))
  918. (or (eq token 'name)
  919. (error "Invalid name of ENTITY"))
  920. (setq token (ebnf-dtd-lex))
  921. (if (eq token 'string)
  922. (if (string-match
  923. "^\\(%[A-Za-z_:][-A-Za-z0-9._:]*;\\|&\\([A-Za-z_:][-A-Za-z0-9._:]*\\|#\\(x[0-9a-fA-F]+\\|[0-9]+\\)\\);\\|[^%&]\\)*$"
  924. ebnf-dtd-lex)
  925. (setq token (ebnf-dtd-lex))
  926. (error "Invalid ENTITY definition"))
  927. (setq token (ebnf-dtd-externalid token))
  928. (when (and (not pedecl) (eq token 'ndata))
  929. (or (eq (ebnf-dtd-lex) 'name)
  930. (error "Invalid NDATA name"))
  931. (setq token (ebnf-dtd-lex))))
  932. (or (eq token 'end-decl)
  933. (error "Missing `>' in end of ENTITY"))
  934. (cons (ebnf-dtd-lex) nil)))
  935. ;;; NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
  936. ;;;
  937. ;;; PublicID ::= 'PUBLIC' S PubidLiteral
  938. (defun ebnf-dtd-notationdecl ()
  939. (or (eq (ebnf-dtd-lex) 'name)
  940. (error "Invalid name NOTATION"))
  941. (or (eq (ebnf-dtd-externalid-or-publicid) 'end-decl)
  942. (error "Missing `>' in end of NOTATION"))
  943. (cons (ebnf-dtd-lex) nil))
  944. ;;; ExternalID ::= 'SYSTEM' S SystemLiteral
  945. ;;; | 'PUBLIC' S PubidLiteral S SystemLiteral
  946. ;;;
  947. ;;; PublicID ::= 'PUBLIC' S PubidLiteral
  948. (defun ebnf-dtd-externalid-or-publicid ()
  949. (let ((token (ebnf-dtd-lex)))
  950. (cond ((eq token 'system)
  951. (ebnf-dtd-systemliteral))
  952. ((eq token 'public)
  953. (ebnf-dtd-pubidliteral)
  954. (and (eq (setq token (ebnf-dtd-lex)) 'string)
  955. (setq token (ebnf-dtd-lex)))
  956. token)
  957. (t
  958. (error "Missing `SYSTEM' or `PUBLIC'")))))
  959. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  960. ;; Lexical analyzer
  961. (defconst ebnf-dtd-token-table (make-vector 256 'error)
  962. "Vector used to map characters to a lexical token.")
  963. (defun ebnf-dtd-initialize ()
  964. "Initialize EBNF token table."
  965. ;; control character & control 8-bit character are set to `error'
  966. (let ((char ?\060))
  967. ;; digits: 0-9
  968. (while (< char ?\072)
  969. (aset ebnf-dtd-token-table char 'name-char)
  970. (setq char (1+ char)))
  971. ;; printable character: A-Z
  972. (setq char ?\101)
  973. (while (< char ?\133)
  974. (aset ebnf-dtd-token-table char 'name)
  975. (setq char (1+ char)))
  976. ;; printable character: a-z
  977. (setq char ?\141)
  978. (while (< char ?\173)
  979. (aset ebnf-dtd-token-table char 'name)
  980. (setq char (1+ char)))
  981. ;; European 8-bit accentuated characters:
  982. (setq char ?\240)
  983. (while (< char ?\400)
  984. (aset ebnf-dtd-token-table char 'name)
  985. (setq char (1+ char)))
  986. ;; Override name characters:
  987. (aset ebnf-dtd-token-table ?_ 'name)
  988. (aset ebnf-dtd-token-table ?: 'name)
  989. (aset ebnf-dtd-token-table ?. 'name-char)
  990. (aset ebnf-dtd-token-table ?- 'name-char)
  991. ;; Override space characters:
  992. (aset ebnf-dtd-token-table ?\n 'space) ; [NL] linefeed
  993. (aset ebnf-dtd-token-table ?\r 'space) ; [CR] carriage return
  994. (aset ebnf-dtd-token-table ?\t 'space) ; [HT] horizontal tab
  995. (aset ebnf-dtd-token-table ?\ 'space) ; [SP] space
  996. ;; Override other lexical characters:
  997. (aset ebnf-dtd-token-table ?= 'equal)
  998. (aset ebnf-dtd-token-table ?, 'comma)
  999. (aset ebnf-dtd-token-table ?* 'zero-or-more)
  1000. (aset ebnf-dtd-token-table ?+ 'one-or-more)
  1001. (aset ebnf-dtd-token-table ?| 'alternative)
  1002. (aset ebnf-dtd-token-table ?% 'percent)
  1003. (aset ebnf-dtd-token-table ?& 'ampersand)
  1004. (aset ebnf-dtd-token-table ?# 'hash)
  1005. (aset ebnf-dtd-token-table ?\? 'interrogation)
  1006. (aset ebnf-dtd-token-table ?\" 'double-quote)
  1007. (aset ebnf-dtd-token-table ?\' 'single-quote)
  1008. (aset ebnf-dtd-token-table ?< 'less-than)
  1009. (aset ebnf-dtd-token-table ?> 'end-decl)
  1010. (aset ebnf-dtd-token-table ?\( 'begin-group)
  1011. (aset ebnf-dtd-token-table ?\) 'end-group)
  1012. (aset ebnf-dtd-token-table ?\[ 'begin-subset)
  1013. (aset ebnf-dtd-token-table ?\] 'end-subset)))
  1014. ;; replace the range "\240-\377" (see `ebnf-range-regexp').
  1015. (defconst ebnf-dtd-name-chars
  1016. (ebnf-range-regexp "-._:0-9A-Za-z" ?\240 ?\377))
  1017. (defconst ebnf-dtd-decl-alist
  1018. '(("ATTLIST" . attlist-decl)
  1019. ("DOCTYPE" . doctype-decl)
  1020. ("ELEMENT" . element-decl)
  1021. ("ENTITY" . entity-decl)
  1022. ("NOTATION" . notation-decl)))
  1023. (defconst ebnf-dtd-element-alist
  1024. '(("#FIXED" . fixed)
  1025. ("#IMPLIED" . implied)
  1026. ("#PCDATA" . pcdata)
  1027. ("#REQUIRED" . required)))
  1028. (defconst ebnf-dtd-name-alist
  1029. '(("ANY" . any)
  1030. ("CDATA" . cdata)
  1031. ("EMPTY" . empty)
  1032. ("ENTITIES" . entities)
  1033. ("ENTITY" . entity)
  1034. ("ID" . id)
  1035. ("IDREF" . idref)
  1036. ("IDREFS" . idrefs)
  1037. ("NDATA" . ndata)
  1038. ("NMTOKEN" . nmtoken)
  1039. ("NMTOKENS" . nmtokens)
  1040. ("NOTATION" . notation)
  1041. ("PUBLIC" . public)
  1042. ("SYSTEM" . system)
  1043. ("encoding" . encoding-attr)
  1044. ("standalone" . standalone-attr)
  1045. ("version" . version-attr)))
  1046. (defun ebnf-dtd-lex ()
  1047. "Lexical analyzer for DTD.
  1048. Return a lexical token.
  1049. See documentation for variable `ebnf-dtd-lex'."
  1050. (if (>= (point) ebnf-limit)
  1051. 'end-of-input
  1052. (let (token)
  1053. ;; skip spaces and comments
  1054. (while (if (> (following-char) 255)
  1055. (progn
  1056. (setq token 'error)
  1057. nil)
  1058. (setq token (aref ebnf-dtd-token-table (following-char)))
  1059. (cond
  1060. ((eq token 'space)
  1061. (skip-chars-forward " \n\r\t" ebnf-limit)
  1062. (< (point) ebnf-limit))
  1063. ((and (eq token 'less-than)
  1064. (looking-at "<!--"))
  1065. (ebnf-dtd-skip-comment))
  1066. (t nil)
  1067. )))
  1068. (cond
  1069. ;; end of input
  1070. ((>= (point) ebnf-limit)
  1071. 'end-of-input)
  1072. ;; error
  1073. ((eq token 'error)
  1074. (error "Invalid character"))
  1075. ;; beginning of declaration:
  1076. ;; <?name, <!ATTLIST, <!DOCTYPE, <!ELEMENT, <!ENTITY, <!NOTATION
  1077. ((eq token 'less-than)
  1078. (forward-char)
  1079. (let ((char (following-char)))
  1080. (cond ((= char ?\?) ; <?
  1081. (forward-char)
  1082. (setq ebnf-dtd-lex (ebnf-buffer-substring ebnf-dtd-name-chars))
  1083. 'begin-pi)
  1084. ((= char ?!) ; <!
  1085. (forward-char)
  1086. (let ((decl (ebnf-buffer-substring ebnf-dtd-name-chars)))
  1087. (or (cdr (assoc decl ebnf-dtd-decl-alist))
  1088. (error "Invalid declaration name `%s'" decl))))
  1089. (t ; <x
  1090. (error "Invalid declaration `<%c'" char)))))
  1091. ;; name, namechar
  1092. ((memq token '(name name-char))
  1093. (setq ebnf-dtd-lex (ebnf-buffer-substring ebnf-dtd-name-chars))
  1094. (or (cdr (assoc ebnf-dtd-lex ebnf-dtd-name-alist))
  1095. token))
  1096. ;; ?, ?>
  1097. ((eq token 'interrogation)
  1098. (forward-char)
  1099. (if (/= (following-char) ?>)
  1100. 'optional
  1101. (forward-char)
  1102. 'end-pi))
  1103. ;; #FIXED, #IMPLIED, #PCDATA, #REQUIRED
  1104. ((eq token 'hash)
  1105. (forward-char)
  1106. (setq ebnf-dtd-lex
  1107. (concat "#" (ebnf-buffer-substring ebnf-dtd-name-chars)))
  1108. (or (cdr (assoc ebnf-dtd-lex ebnf-dtd-element-alist))
  1109. (error "Invalid element `%s'" ebnf-dtd-lex)))
  1110. ;; "string"
  1111. ((eq token 'double-quote)
  1112. (setq ebnf-dtd-lex (ebnf-dtd-string ?\"))
  1113. 'string)
  1114. ;; 'string'
  1115. ((eq token 'single-quote)
  1116. (setq ebnf-dtd-lex (ebnf-dtd-string ?\'))
  1117. 'string)
  1118. ;; %, %name;
  1119. ((eq token 'percent)
  1120. (forward-char)
  1121. (if (looking-at "[ \n\r\t]")
  1122. 'percent
  1123. (setq ebnf-dtd-lex (ebnf-dtd-name-ref "%"))
  1124. 'pe-ref))
  1125. ;; &#...;, &#x...;, &name;
  1126. ((eq token 'ampersand)
  1127. (forward-char)
  1128. (if (/= (following-char) ?#)
  1129. (progn
  1130. ;; &name;
  1131. (setq ebnf-dtd-lex (ebnf-dtd-name-ref "&"))
  1132. 'entity-ref)
  1133. ;; &#...;, &#x...;
  1134. (forward-char)
  1135. (setq ebnf-dtd-lex (if (/= (following-char) ?x)
  1136. (ebnf-dtd-char-ref "&#" "0-9")
  1137. (forward-char)
  1138. (ebnf-dtd-char-ref "&#x" "0-9a-fA-F")))
  1139. 'char-ref))
  1140. ;; miscellaneous: (, ), [, ], =, |, *, +, >, `,'
  1141. (t
  1142. (forward-char)
  1143. token)
  1144. ))))
  1145. (defun ebnf-dtd-name-ref (start)
  1146. (ebnf-dtd-char-ref start ebnf-dtd-name-chars))
  1147. (defun ebnf-dtd-char-ref (start chars)
  1148. (let ((char (ebnf-buffer-substring chars)))
  1149. (or (= (following-char) ?\;)
  1150. (error "Invalid element `%s%s%c'" start char (following-char)))
  1151. (forward-char)
  1152. (format "%s%s;" start char)))
  1153. ;; replace the range "\240-\377" (see `ebnf-range-regexp').
  1154. (defconst ebnf-dtd-double-string-chars
  1155. (ebnf-range-regexp "\t -!#-~" ?\240 ?\377))
  1156. (defconst ebnf-dtd-single-string-chars
  1157. (ebnf-range-regexp "\t -&(-~" ?\240 ?\377))
  1158. (defun ebnf-dtd-string (delim)
  1159. (buffer-substring-no-properties
  1160. (progn
  1161. (forward-char)
  1162. (point))
  1163. (progn
  1164. (skip-chars-forward (if (= delim ?\")
  1165. ebnf-dtd-double-string-chars
  1166. ebnf-dtd-single-string-chars)
  1167. ebnf-limit)
  1168. (or (= (following-char) delim)
  1169. (error "Missing string delimiter `%c'" delim))
  1170. (prog1
  1171. (point)
  1172. (forward-char)))))
  1173. ;; replace the range "\177-\237" (see `ebnf-range-regexp').
  1174. (defconst ebnf-dtd-comment-chars
  1175. (ebnf-range-regexp "^-\000-\010\013\014\016-\037" ?\177 ?\237))
  1176. (defconst ebnf-dtd-filename-chars
  1177. (ebnf-range-regexp "^-\000-\037" ?\177 ?\237))
  1178. (defun ebnf-dtd-skip-comment ()
  1179. (forward-char 4) ; <!--
  1180. (cond
  1181. ;; open EPS file
  1182. ((and ebnf-eps-executing (= (following-char) ?\[))
  1183. (ebnf-eps-add-context (ebnf-dtd-eps-filename)))
  1184. ;; close EPS file
  1185. ((and ebnf-eps-executing (= (following-char) ?\]))
  1186. (ebnf-eps-remove-context (ebnf-dtd-eps-filename)))
  1187. ;; EPS header
  1188. ((and ebnf-eps-executing (= (following-char) ?H))
  1189. (ebnf-eps-header-comment (ebnf-dtd-eps-filename)))
  1190. ;; EPS footer
  1191. ((and ebnf-eps-executing (= (following-char) ?F))
  1192. (ebnf-eps-footer-comment (ebnf-dtd-eps-filename)))
  1193. ;; any other action in comment
  1194. (t
  1195. (setq ebnf-action (aref ebnf-comment-table (following-char))))
  1196. )
  1197. (while (progn
  1198. (skip-chars-forward ebnf-dtd-comment-chars ebnf-limit)
  1199. (and (< (point) ebnf-limit)
  1200. (not (looking-at "-->"))))
  1201. (skip-chars-forward "-" ebnf-limit))
  1202. ;; check for a valid end of comment
  1203. (cond ((>= (point) ebnf-limit)
  1204. nil)
  1205. ((looking-at "-->")
  1206. (forward-char 3)
  1207. t)
  1208. (t
  1209. (error "Invalid character"))
  1210. ))
  1211. (defun ebnf-dtd-eps-filename ()
  1212. (forward-char)
  1213. (let (fname)
  1214. (while (progn
  1215. (setq fname
  1216. (concat fname
  1217. (ebnf-buffer-substring ebnf-dtd-filename-chars)))
  1218. (and (< (point) ebnf-limit)
  1219. (= (following-char) ?-) ; may be \n, \t, \r
  1220. (not (looking-at "-->"))))
  1221. (setq fname (concat fname (ebnf-buffer-substring "-"))))
  1222. fname))
  1223. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
  1224. (provide 'ebnf-dtd)
  1225. ;;; ebnf-dtd.el ends here