XMLParser.java 155 KB


  1. /* XMLParser.java --
  2. Copyright (C) 2005 Free Software Foundation, Inc.
  3. This file is part of GNU Classpath.
  4. GNU Classpath is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2, or (at your option)
  7. any later version.
  8. GNU Classpath is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with GNU Classpath; see the file COPYING. If not, write to the
  14. Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  15. 02110-1301 USA.
  16. Linking this library statically or dynamically with other modules is
  17. making a combined work based on this library. Thus, the terms and
  18. conditions of the GNU General Public License cover the whole
  19. combination.
  20. As a special exception, the copyright holders of this library give you
  21. permission to link this library with independent modules to produce an
  22. executable, regardless of the license terms of these independent
  23. modules, and to copy and distribute the resulting executable under
  24. terms of your choice, provided that you also meet, for each linked
  25. independent module, the terms and conditions of the license of that
  26. module. An independent module is a module which is not derived from
  27. or based on this library. If you modify this library, you may extend
  28. this exception to your version of the library, but you are not
  29. obligated to do so. If you do not wish to do so, delete this
  30. exception statement from your version.
  31. Partly derived from code which carried the following notice:
  32. Copyright (c) 1997, 1998 by Microstar Software Ltd.
  33. AElfred is free for both commercial and non-commercial use and
  34. redistribution, provided that Microstar's copyright and disclaimer are
  35. retained intact. You are free to modify AElfred for your own use and
  36. to redistribute AElfred with your modifications, provided that the
  37. modifications are clearly documented.
  38. This program is distributed in the hope that it will be useful, but
  39. WITHOUT ANY WARRANTY; without even the implied warranty of
  40. merchantability or fitness for a particular purpose. Please use it AT
  41. YOUR OWN RISK.
  42. */
  43. package gnu.xml.stream;
  44. import gnu.java.lang.CPStringBuilder;
  45. import java.io.BufferedInputStream;
  46. import java.io.EOFException;
  47. import java.io.File;
  48. import java.io.FileOutputStream;
  49. import java.io.FileWriter;
  50. import java.io.InputStream;
  51. import java.io.InputStreamReader;
  52. import java.io.IOException;
  53. import java.io.Reader;
  54. import java.io.StringReader;
  55. import java.io.UnsupportedEncodingException;
  56. import java.net.MalformedURLException;
  57. import java.net.URL;
  58. import java.util.ArrayList;
  59. import java.util.Collections;
  60. import java.util.HashSet;
  61. import java.util.Iterator;
  62. import java.util.LinkedHashMap;
  63. import java.util.LinkedList;
  64. import java.util.Map;
  65. import java.util.NoSuchElementException;
  66. import java.util.StringTokenizer;
  67. import javax.xml.XMLConstants;
  68. import javax.xml.namespace.NamespaceContext;
  69. import javax.xml.namespace.QName;
  70. import javax.xml.stream.Location;
  71. import javax.xml.stream.XMLInputFactory;
  72. import javax.xml.stream.XMLReporter;
  73. import javax.xml.stream.XMLResolver;
  74. import javax.xml.stream.XMLStreamConstants;
  75. import javax.xml.stream.XMLStreamException;
  76. import javax.xml.stream.XMLStreamReader;
  77. import gnu.java.net.CRLFInputStream;
  78. import gnu.classpath.debug.TeeInputStream;
  79. import gnu.classpath.debug.TeeReader;
  80. /**
  81. * An XML parser.
  82. * This parser supports the following additional StAX properties:
  83. * <table>
  84. * <tr><td>gnu.xml.stream.stringInterning</td>
  85. * <td>Boolean</td>
  86. * <td>Indicates whether markup strings will be interned</td></tr>
  87. * <tr><td>gnu.xml.stream.xmlBase</td>
  88. * <td>Boolean</td>
  89. * <td>Indicates whether XML Base processing will be performed</td></tr>
  90. * <tr><td>gnu.xml.stream.baseURI</td>
  91. * <td>String</td>
  92. * <td>Returns the base URI of the current event</td></tr>
  93. * </table>
  94. *
  95. * @see http://www.w3.org/TR/REC-xml/
  96. * @see http://www.w3.org/TR/xml11/
  97. * @see http://www.w3.org/TR/REC-xml-names
  98. * @see http://www.w3.org/TR/xml-names11
  99. * @see http://www.w3.org/TR/xmlbase/
  100. *
  101. * @author <a href='mailto:dog@gnu.org'>Chris Burdess</a>
  102. */
  103. public class XMLParser
  104. implements XMLStreamReader, NamespaceContext
  105. {
  106. // -- parser state machine states --
  107. private static final int INIT = 0; // start state
  108. private static final int PROLOG = 1; // in prolog
  109. private static final int CONTENT = 2; // in content
  110. private static final int EMPTY_ELEMENT = 3; // empty element state
  111. private static final int MISC = 4; // in Misc (after root element)
  112. // -- parameters for parsing literals --
  113. private final static int LIT_ENTITY_REF = 2;
  114. private final static int LIT_NORMALIZE = 4;
  115. private final static int LIT_ATTRIBUTE = 8;
  116. private final static int LIT_DISABLE_PE = 16;
  117. private final static int LIT_DISABLE_CREF = 32;
  118. private final static int LIT_DISABLE_EREF = 64;
  119. private final static int LIT_PUBID = 256;
  120. // -- types of attribute values --
  121. final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30;
  122. final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31;
  123. final static int ATTRIBUTE_DEFAULT_IMPLIED = 32;
  124. final static int ATTRIBUTE_DEFAULT_REQUIRED = 33;
  125. final static int ATTRIBUTE_DEFAULT_FIXED = 34;
  126. // -- additional event types --
  127. final static int START_ENTITY = 50;
  128. final static int END_ENTITY = 51;
  129. /**
  130. * The current input.
  131. */
  132. private Input input;
  133. /**
  134. * Stack of inputs representing XML general entities.
  135. * The input representing the XML input stream or reader is always the
  136. * first element in this stack.
  137. */
  138. private LinkedList inputStack = new LinkedList();
  139. /**
  140. * Stack of start-entity events to be reported.
  141. */
  142. private LinkedList startEntityStack = new LinkedList();
  143. /**
  144. * Stack of end-entity events to be reported.
  145. */
  146. private LinkedList endEntityStack = new LinkedList();
  147. /**
  148. * Current parser state within the main state machine.
  149. */
  150. private int state = INIT;
  151. /**
  152. * The (type of the) current event.
  153. */
  154. private int event;
  155. /**
  156. * The element name stack. The first element in this stack will be the
  157. * root element.
  158. */
  159. private LinkedList stack = new LinkedList();
  160. /**
  161. * Stack of namespace contexts. These are maps specifying prefix-to-URI
  162. * mappings. The first element in this stack is the most recent namespace
  163. * context (i.e. the other way around from the element name stack).
  164. */
  165. private LinkedList namespaces = new LinkedList();
  166. /**
  167. * The base-URI stack. This holds the base URI context for each element.
  168. * The first element in this stack is the most recent context (i.e. the
  169. * other way around from the element name stack).
  170. */
  171. private LinkedList bases = new LinkedList();
  172. /**
  173. * The list of attributes for the current element, in the order defined in
  174. * the XML stream.
  175. */
  176. private ArrayList attrs = new ArrayList();
  177. /**
  178. * Buffer for text and character data.
  179. */
  180. private StringBuffer buf = new StringBuffer();
  181. /**
  182. * Buffer for NMTOKEN strings (markup).
  183. */
  184. private StringBuffer nmtokenBuf = new StringBuffer();
  185. /**
  186. * Buffer for string literals. (e.g. attribute values)
  187. */
  188. private StringBuffer literalBuf = new StringBuffer();
  189. /**
  190. * Temporary Unicode character buffer used during character data reads.
  191. */
  192. private int[] tmpBuf = new int[1024];
  193. /**
  194. * The element content model for the current element.
  195. */
  196. private ContentModel currentContentModel;
  197. /**
  198. * The validation stack. This holds lists of the elements seen for each
  199. * element, in order to determine whether the names and order of these
  200. * elements match the content model for the element. The last entry in
  201. * this stack represents the current element.
  202. */
  203. private LinkedList validationStack;
  204. /**
  205. * These sets contain the IDs and the IDREFs seen in the document, to
  206. * ensure that IDs are unique and that each IDREF refers to an ID in the
  207. * document.
  208. */
  209. private HashSet ids, idrefs;
  210. /**
  211. * The target and data associated with the current processing instruction
  212. * event.
  213. */
  214. private String piTarget, piData;
  215. /**
  216. * The XML version declared in the XML declaration.
  217. */
  218. private String xmlVersion;
  219. /**
  220. * The encoding declared in the XML declaration.
  221. */
  222. private String xmlEncoding;
  223. /**
  224. * The standalone value declared in the XML declaration.
  225. */
  226. private Boolean xmlStandalone;
  227. /**
  228. * The document type definition.
  229. */
  230. Doctype doctype;
  231. /**
  232. * State variables for determining parameter-entity expansion.
  233. */
  234. private boolean expandPE, peIsError;
  235. /**
  236. * Whether this is a validating parser.
  237. */
  238. private final boolean validating;
  239. /**
  240. * Whether strings representing markup will be interned.
  241. */
  242. private final boolean stringInterning;
  243. /**
  244. * If true, CDATA sections will be merged with adjacent text nodes into a
  245. * single event.
  246. */
  247. private final boolean coalescing;
  248. /**
  249. * Whether to replace general entity references with their replacement
  250. * text automatically during parsing.
  251. * Otherwise entity-reference events will be issued.
  252. */
  253. private final boolean replaceERefs;
  254. /**
  255. * Whether to support external entities.
  256. */
  257. private final boolean externalEntities;
  258. /**
  259. * Whether to support DTDs.
  260. */
  261. private final boolean supportDTD;
  262. /**
  263. * Whether to support XML namespaces. If true, namespace information will
  264. * be available. Otherwise namespaces will simply be reported as ordinary
  265. * attributes.
  266. */
  267. private final boolean namespaceAware;
  268. /**
  269. * Whether to support XML Base. If true, URIs specified in xml:base
  270. * attributes will be honoured when resolving external entities.
  271. */
  272. private final boolean baseAware;
  273. /**
  274. * Whether to report extended event types (START_ENTITY and END_ENTITY)
  275. * in addition to the standard event types. Used by the SAX parser.
  276. */
  277. private final boolean extendedEventTypes;
  278. /**
  279. * The reporter to receive parsing warnings.
  280. */
  281. final XMLReporter reporter;
  282. /**
  283. * Callback interface for resolving external entities.
  284. */
  285. final XMLResolver resolver;
  286. // -- Constants for testing the next kind of markup event --
  287. private static final String TEST_START_ELEMENT = "<";
  288. private static final String TEST_END_ELEMENT = "</";
  289. private static final String TEST_COMMENT = "<!--";
  290. private static final String TEST_PI = "<?";
  291. private static final String TEST_CDATA = "<![CDATA[";
  292. private static final String TEST_XML_DECL = "<?xml";
  293. private static final String TEST_DOCTYPE_DECL = "<!DOCTYPE";
  294. private static final String TEST_ELEMENT_DECL = "<!ELEMENT";
  295. private static final String TEST_ATTLIST_DECL = "<!ATTLIST";
  296. private static final String TEST_ENTITY_DECL = "<!ENTITY";
  297. private static final String TEST_NOTATION_DECL = "<!NOTATION";
  298. private static final String TEST_KET = ">";
  299. private static final String TEST_END_COMMENT = "--";
  300. private static final String TEST_END_PI = "?>";
  301. private static final String TEST_END_CDATA = "]]>";
  302. /**
  303. * The general entities predefined by the XML specification.
  304. */
  305. private static final LinkedHashMap PREDEFINED_ENTITIES = new LinkedHashMap();
  306. static
  307. {
  308. PREDEFINED_ENTITIES.put("amp", "&");
  309. PREDEFINED_ENTITIES.put("lt", "<");
  310. PREDEFINED_ENTITIES.put("gt", ">");
  311. PREDEFINED_ENTITIES.put("apos", "'");
  312. PREDEFINED_ENTITIES.put("quot", "\"");
  313. }
  314. /**
  315. * Creates a new XML parser for the given input stream.
  316. * This constructor should be used where possible, as it allows the
  317. * encoding of the XML data to be correctly determined from the stream.
  318. * @param in the input stream
  319. * @param systemId the URL from which the input stream was retrieved
  320. * (necessary if there are external entities to be resolved)
  321. * @param validating if the parser is to be a validating parser
  322. * @param namespaceAware if the parser should support XML Namespaces
  323. * @param coalescing if CDATA sections should be merged into adjacent text
  324. * nodes
  325. * @param replaceERefs if entity references should be automatically
  326. * replaced by their replacement text (otherwise they will be reported as
  327. * entity-reference events)
  328. * @param externalEntities if external entities should be loaded
  329. * @param supportDTD if support for the XML DTD should be enabled
  330. * @param baseAware if the parser should support XML Base to resolve
  331. * external entities
  332. * @param stringInterning whether strings will be interned during parsing
  333. * @param reporter the reporter to receive warnings during processing
  334. * @param resolver the callback interface used to resolve external
  335. * entities
  336. */
  337. public XMLParser(InputStream in, String systemId,
  338. boolean validating,
  339. boolean namespaceAware,
  340. boolean coalescing,
  341. boolean replaceERefs,
  342. boolean externalEntities,
  343. boolean supportDTD,
  344. boolean baseAware,
  345. boolean stringInterning,
  346. boolean extendedEventTypes,
  347. XMLReporter reporter,
  348. XMLResolver resolver)
  349. {
  350. this.validating = validating;
  351. this.namespaceAware = namespaceAware;
  352. this.coalescing = coalescing;
  353. this.replaceERefs = replaceERefs;
  354. this.externalEntities = externalEntities;
  355. this.supportDTD = supportDTD;
  356. this.baseAware = baseAware;
  357. this.stringInterning = stringInterning;
  358. this.extendedEventTypes = extendedEventTypes;
  359. this.reporter = reporter;
  360. this.resolver = resolver;
  361. if (validating)
  362. {
  363. validationStack = new LinkedList();
  364. ids = new HashSet();
  365. idrefs = new HashSet();
  366. }
  367. String debug = System.getProperty("gnu.xml.debug.input");
  368. if (debug != null)
  369. {
  370. try
  371. {
  372. File file = File.createTempFile(debug, ".xml");
  373. in = new TeeInputStream(in, new FileOutputStream(file));
  374. }
  375. catch (IOException e)
  376. {
  377. RuntimeException e2 = new RuntimeException();
  378. e2.initCause(e);
  379. throw e2;
  380. }
  381. }
  382. systemId = canonicalize(systemId);
  383. pushInput(new Input(in, null, null, systemId, null, null, false, true));
  384. }
  385. /**
  386. * Creates a new XML parser for the given character stream.
  387. * This constructor is only available for compatibility with the JAXP
  388. * APIs, which permit XML to be parsed from a character stream. Because
  389. * the encoding specified by the character stream may conflict with that
  390. * specified in the XML declaration, this method should be avoided where
  391. * possible.
  392. * @param in the input stream
  393. * @param systemId the URL from which the input stream was retrieved
  394. * (necessary if there are external entities to be resolved)
  395. * @param validating if the parser is to be a validating parser
  396. * @param namespaceAware if the parser should support XML Namespaces
  397. * @param coalescing if CDATA sections should be merged into adjacent text
  398. * nodes
  399. * @param replaceERefs if entity references should be automatically
  400. * replaced by their replacement text (otherwise they will be reported as
  401. * entity-reference events)
  402. * @param externalEntities if external entities should be loaded
  403. * @param supportDTD if support for the XML DTD should be enabled
  404. * @param baseAware if the parser should support XML Base to resolve
  405. * external entities
  406. * @param stringInterning whether strings will be interned during parsing
  407. * @param reporter the reporter to receive warnings during processing
  408. * @param resolver the callback interface used to resolve external
  409. * entities
  410. */
  411. public XMLParser(Reader reader, String systemId,
  412. boolean validating,
  413. boolean namespaceAware,
  414. boolean coalescing,
  415. boolean replaceERefs,
  416. boolean externalEntities,
  417. boolean supportDTD,
  418. boolean baseAware,
  419. boolean stringInterning,
  420. boolean extendedEventTypes,
  421. XMLReporter reporter,
  422. XMLResolver resolver)
  423. {
  424. this.validating = validating;
  425. this.namespaceAware = namespaceAware;
  426. this.coalescing = coalescing;
  427. this.replaceERefs = replaceERefs;
  428. this.externalEntities = externalEntities;
  429. this.supportDTD = supportDTD;
  430. this.baseAware = baseAware;
  431. this.stringInterning = stringInterning;
  432. this.extendedEventTypes = extendedEventTypes;
  433. this.reporter = reporter;
  434. this.resolver = resolver;
  435. if (validating)
  436. {
  437. validationStack = new LinkedList();
  438. ids = new HashSet();
  439. idrefs = new HashSet();
  440. }
  441. String debug = System.getProperty("gnu.xml.debug.input");
  442. if (debug != null)
  443. {
  444. try
  445. {
  446. File file = File.createTempFile(debug, ".xml");
  447. reader = new TeeReader(reader, new FileWriter(file));
  448. }
  449. catch (IOException e)
  450. {
  451. RuntimeException e2 = new RuntimeException();
  452. e2.initCause(e);
  453. throw e2;
  454. }
  455. }
  456. systemId = canonicalize(systemId);
  457. pushInput(new Input(null, reader, null, systemId, null, null, false, true));
  458. }
  459. // -- NamespaceContext --
  460. public String getNamespaceURI(String prefix)
  461. {
  462. if (XMLConstants.XML_NS_PREFIX.equals(prefix))
  463. return XMLConstants.XML_NS_URI;
  464. if (XMLConstants.XMLNS_ATTRIBUTE.equals(prefix))
  465. return XMLConstants.XMLNS_ATTRIBUTE_NS_URI;
  466. for (Iterator i = namespaces.iterator(); i.hasNext(); )
  467. {
  468. LinkedHashMap ctx = (LinkedHashMap) i.next();
  469. String namespaceURI = (String) ctx.get(prefix);
  470. if (namespaceURI != null)
  471. return namespaceURI;
  472. }
  473. return null;
  474. }
  475. public String getPrefix(String namespaceURI)
  476. {
  477. if (XMLConstants.XML_NS_URI.equals(namespaceURI))
  478. return XMLConstants.XML_NS_PREFIX;
  479. if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI))
  480. return XMLConstants.XMLNS_ATTRIBUTE;
  481. for (Iterator i = namespaces.iterator(); i.hasNext(); )
  482. {
  483. LinkedHashMap ctx = (LinkedHashMap) i.next();
  484. if (ctx.containsValue(namespaceURI))
  485. {
  486. for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); )
  487. {
  488. Map.Entry entry = (Map.Entry) i.next();
  489. String uri = (String) entry.getValue();
  490. if (uri.equals(namespaceURI))
  491. return (String) entry.getKey();
  492. }
  493. }
  494. }
  495. return null;
  496. }
  497. public Iterator getPrefixes(String namespaceURI)
  498. {
  499. if (XMLConstants.XML_NS_URI.equals(namespaceURI))
  500. return Collections.singleton(XMLConstants.XML_NS_PREFIX).iterator();
  501. if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(namespaceURI))
  502. return Collections.singleton(XMLConstants.XMLNS_ATTRIBUTE).iterator();
  503. LinkedList acc = new LinkedList();
  504. for (Iterator i = namespaces.iterator(); i.hasNext(); )
  505. {
  506. LinkedHashMap ctx = (LinkedHashMap) i.next();
  507. if (ctx.containsValue(namespaceURI))
  508. {
  509. for (Iterator j = ctx.entrySet().iterator(); j.hasNext(); )
  510. {
  511. Map.Entry entry = (Map.Entry) i.next();
  512. String uri = (String) entry.getValue();
  513. if (uri.equals(namespaceURI))
  514. acc.add(entry.getKey());
  515. }
  516. }
  517. }
  518. return acc.iterator();
  519. }
  520. // -- XMLStreamReader --
  521. public void close()
  522. throws XMLStreamException
  523. {
  524. stack = null;
  525. namespaces = null;
  526. bases = null;
  527. buf = null;
  528. attrs = null;
  529. doctype = null;
  530. inputStack = null;
  531. validationStack = null;
  532. ids = null;
  533. idrefs = null;
  534. }
  535. public NamespaceContext getNamespaceContext()
  536. {
  537. return this;
  538. }
  539. public int getAttributeCount()
  540. {
  541. return attrs.size();
  542. }
  543. public String getAttributeLocalName(int index)
  544. {
  545. Attribute a = (Attribute) attrs.get(index);
  546. return a.localName;
  547. }
  548. public String getAttributeNamespace(int index)
  549. {
  550. String prefix = getAttributePrefix(index);
  551. return getNamespaceURI(prefix);
  552. }
  553. public String getAttributePrefix(int index)
  554. {
  555. Attribute a = (Attribute) attrs.get(index);
  556. return a.prefix;
  557. }
  558. public QName getAttributeName(int index)
  559. {
  560. Attribute a = (Attribute) attrs.get(index);
  561. String namespaceURI = getNamespaceURI(a.prefix);
  562. return new QName(namespaceURI, a.localName, a.prefix);
  563. }
  564. public String getAttributeType(int index)
  565. {
  566. Attribute a = (Attribute) attrs.get(index);
  567. return a.type;
  568. }
  569. private String getAttributeType(String elementName, String attName)
  570. {
  571. if (doctype != null)
  572. {
  573. AttributeDecl att = doctype.getAttributeDecl(elementName, attName);
  574. if (att != null)
  575. return att.type;
  576. }
  577. return "CDATA";
  578. }
  579. public String getAttributeValue(int index)
  580. {
  581. Attribute a = (Attribute) attrs.get(index);
  582. return a.value;
  583. }
  584. public String getAttributeValue(String namespaceURI, String localName)
  585. {
  586. for (Iterator i = attrs.iterator(); i.hasNext(); )
  587. {
  588. Attribute a = (Attribute) i.next();
  589. if (a.localName.equals(localName))
  590. {
  591. String uri = getNamespaceURI(a.prefix);
  592. if ((uri == null && namespaceURI == null) ||
  593. (uri != null && uri.equals(namespaceURI)))
  594. return a.value;
  595. }
  596. }
  597. return null;
  598. }
  599. boolean isAttributeDeclared(int index)
  600. {
  601. if (doctype == null)
  602. return false;
  603. Attribute a = (Attribute) attrs.get(index);
  604. String qn = ("".equals(a.prefix)) ? a.localName :
  605. a.prefix + ":" + a.localName;
  606. String elementName = buf.toString();
  607. return doctype.isAttributeDeclared(elementName, qn);
  608. }
  609. public String getCharacterEncodingScheme()
  610. {
  611. return xmlEncoding;
  612. }
  613. public String getElementText()
  614. throws XMLStreamException
  615. {
  616. if (event != XMLStreamConstants.START_ELEMENT)
  617. throw new XMLStreamException("current event must be START_ELEMENT");
  618. CPStringBuilder elementText = new CPStringBuilder();
  619. int depth = stack.size();
  620. while (event != XMLStreamConstants.END_ELEMENT || stack.size() > depth)
  621. {
  622. switch (next())
  623. {
  624. case XMLStreamConstants.CHARACTERS:
  625. case XMLStreamConstants.SPACE:
  626. elementText.append(buf.toString());
  627. }
  628. }
  629. return elementText.toString();
  630. }
  631. public String getEncoding()
  632. {
  633. return (input.inputEncoding == null) ? "UTF-8" : input.inputEncoding;
  634. }
  635. public int getEventType()
  636. {
  637. return event;
  638. }
  639. public String getLocalName()
  640. {
  641. switch (event)
  642. {
  643. case XMLStreamConstants.START_ELEMENT:
  644. case XMLStreamConstants.END_ELEMENT:
  645. String qName = buf.toString();
  646. int ci = qName.indexOf(':');
  647. String localName = (ci == -1) ? qName : qName.substring(ci + 1);
  648. if (stringInterning)
  649. localName = localName.intern();
  650. return localName;
  651. default:
  652. return null;
  653. }
  654. }
  655. public Location getLocation()
  656. {
  657. return input;
  658. }
  659. public QName getName()
  660. {
  661. switch (event)
  662. {
  663. case XMLStreamConstants.START_ELEMENT:
  664. case XMLStreamConstants.END_ELEMENT:
  665. String qName = buf.toString();
  666. int ci = qName.indexOf(':');
  667. String localName = (ci == -1) ? qName : qName.substring(ci + 1);
  668. if (stringInterning)
  669. localName = localName.intern();
  670. String prefix = (ci == -1) ?
  671. (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) :
  672. qName.substring(0, ci);
  673. if (stringInterning && prefix != null)
  674. prefix = prefix.intern();
  675. String namespaceURI = getNamespaceURI(prefix);
  676. return new QName(namespaceURI, localName, prefix);
  677. default:
  678. return null;
  679. }
  680. }
  681. public int getNamespaceCount()
  682. {
  683. if (!namespaceAware || namespaces.isEmpty())
  684. return 0;
  685. switch (event)
  686. {
  687. case XMLStreamConstants.START_ELEMENT:
  688. case XMLStreamConstants.END_ELEMENT:
  689. LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
  690. return ctx.size();
  691. default:
  692. return 0;
  693. }
  694. }
  695. public String getNamespacePrefix(int index)
  696. {
  697. LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
  698. int count = 0;
  699. for (Iterator i = ctx.keySet().iterator(); i.hasNext(); )
  700. {
  701. String prefix = (String) i.next();
  702. if (count++ == index)
  703. return prefix;
  704. }
  705. return null;
  706. }
  707. public String getNamespaceURI()
  708. {
  709. switch (event)
  710. {
  711. case XMLStreamConstants.START_ELEMENT:
  712. case XMLStreamConstants.END_ELEMENT:
  713. String qName = buf.toString();
  714. int ci = qName.indexOf(':');
  715. if (ci == -1)
  716. return null;
  717. String prefix = qName.substring(0, ci);
  718. return getNamespaceURI(prefix);
  719. default:
  720. return null;
  721. }
  722. }
  723. public String getNamespaceURI(int index)
  724. {
  725. LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
  726. int count = 0;
  727. for (Iterator i = ctx.values().iterator(); i.hasNext(); )
  728. {
  729. String uri = (String) i.next();
  730. if (count++ == index)
  731. return uri;
  732. }
  733. return null;
  734. }
  735. public String getPIData()
  736. {
  737. return piData;
  738. }
  739. public String getPITarget()
  740. {
  741. return piTarget;
  742. }
  743. public String getPrefix()
  744. {
  745. switch (event)
  746. {
  747. case XMLStreamConstants.START_ELEMENT:
  748. case XMLStreamConstants.END_ELEMENT:
  749. String qName = buf.toString();
  750. int ci = qName.indexOf(':');
  751. String prefix = (ci == -1) ?
  752. (namespaceAware ? XMLConstants.DEFAULT_NS_PREFIX : null) :
  753. qName.substring(0, ci);
  754. if (stringInterning && prefix != null)
  755. prefix = prefix.intern();
  756. return prefix;
  757. default:
  758. return null;
  759. }
  760. }
  761. public Object getProperty(String name)
  762. throws IllegalArgumentException
  763. {
  764. if (name == null)
  765. throw new IllegalArgumentException("name is null");
  766. if (XMLInputFactory.ALLOCATOR.equals(name))
  767. return null;
  768. if (XMLInputFactory.IS_COALESCING.equals(name))
  769. return coalescing ? Boolean.TRUE : Boolean.FALSE;
  770. if (XMLInputFactory.IS_NAMESPACE_AWARE.equals(name))
  771. return namespaceAware ? Boolean.TRUE : Boolean.FALSE;
  772. if (XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES.equals(name))
  773. return replaceERefs ? Boolean.TRUE : Boolean.FALSE;
  774. if (XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES.equals(name))
  775. return externalEntities ? Boolean.TRUE : Boolean.FALSE;
  776. if (XMLInputFactory.IS_VALIDATING.equals(name))
  777. return Boolean.FALSE;
  778. if (XMLInputFactory.REPORTER.equals(name))
  779. return reporter;
  780. if (XMLInputFactory.RESOLVER.equals(name))
  781. return resolver;
  782. if (XMLInputFactory.SUPPORT_DTD.equals(name))
  783. return supportDTD ? Boolean.TRUE : Boolean.FALSE;
  784. if ("gnu.xml.stream.stringInterning".equals(name))
  785. return stringInterning ? Boolean.TRUE : Boolean.FALSE;
  786. if ("gnu.xml.stream.xmlBase".equals(name))
  787. return baseAware ? Boolean.TRUE : Boolean.FALSE;
  788. if ("gnu.xml.stream.baseURI".equals(name))
  789. return getXMLBase();
  790. return null;
  791. }
  792. public String getText()
  793. {
  794. return buf.toString();
  795. }
  796. public char[] getTextCharacters()
  797. {
  798. return buf.toString().toCharArray();
  799. }
  800. public int getTextCharacters(int sourceStart, char[] target,
  801. int targetStart, int length)
  802. throws XMLStreamException
  803. {
  804. length = Math.min(sourceStart + buf.length(), length);
  805. int sourceEnd = sourceStart + length;
  806. buf.getChars(sourceStart, sourceEnd, target, targetStart);
  807. return length;
  808. }
  809. public int getTextLength()
  810. {
  811. return buf.length();
  812. }
  813. public int getTextStart()
  814. {
  815. return 0;
  816. }
  817. public String getVersion()
  818. {
  819. return (xmlVersion == null) ? "1.0" : xmlVersion;
  820. }
  821. public boolean hasName()
  822. {
  823. switch (event)
  824. {
  825. case XMLStreamConstants.START_ELEMENT:
  826. case XMLStreamConstants.END_ELEMENT:
  827. return true;
  828. default:
  829. return false;
  830. }
  831. }
  832. public boolean hasText()
  833. {
  834. switch (event)
  835. {
  836. case XMLStreamConstants.CHARACTERS:
  837. case XMLStreamConstants.SPACE:
  838. return true;
  839. default:
  840. return false;
  841. }
  842. }
  843. public boolean isAttributeSpecified(int index)
  844. {
  845. Attribute a = (Attribute) attrs.get(index);
  846. return a.specified;
  847. }
  848. public boolean isCharacters()
  849. {
  850. return (event == XMLStreamConstants.CHARACTERS);
  851. }
  852. public boolean isEndElement()
  853. {
  854. return (event == XMLStreamConstants.END_ELEMENT);
  855. }
  856. public boolean isStandalone()
  857. {
  858. return Boolean.TRUE.equals(xmlStandalone);
  859. }
  860. public boolean isStartElement()
  861. {
  862. return (event == XMLStreamConstants.START_ELEMENT);
  863. }
  864. public boolean isWhiteSpace()
  865. {
  866. return (event == XMLStreamConstants.SPACE);
  867. }
  868. public int nextTag()
  869. throws XMLStreamException
  870. {
  871. do
  872. {
  873. switch (next())
  874. {
  875. case XMLStreamConstants.START_ELEMENT:
  876. case XMLStreamConstants.END_ELEMENT:
  877. case XMLStreamConstants.CHARACTERS:
  878. case XMLStreamConstants.SPACE:
  879. case XMLStreamConstants.COMMENT:
  880. case XMLStreamConstants.PROCESSING_INSTRUCTION:
  881. break;
  882. default:
  883. throw new XMLStreamException("Unexpected event type: " + event);
  884. }
  885. }
  886. while (event != XMLStreamConstants.START_ELEMENT &&
  887. event != XMLStreamConstants.END_ELEMENT);
  888. return event;
  889. }
  890. public void require(int type, String namespaceURI, String localName)
  891. throws XMLStreamException
  892. {
  893. if (event != type)
  894. throw new XMLStreamException("Current event type is " + event);
  895. if (event == XMLStreamConstants.START_ELEMENT ||
  896. event == XMLStreamConstants.END_ELEMENT)
  897. {
  898. String ln = getLocalName();
  899. if (!ln.equals(localName))
  900. throw new XMLStreamException("Current local-name is " + ln);
  901. String uri = getNamespaceURI();
  902. if ((uri == null && namespaceURI != null) ||
  903. (uri != null && !uri.equals(namespaceURI)))
  904. throw new XMLStreamException("Current namespace URI is " + uri);
  905. }
  906. }
  907. public boolean standaloneSet()
  908. {
  909. return (xmlStandalone != null);
  910. }
  911. public boolean hasNext()
  912. throws XMLStreamException
  913. {
  914. return (event != XMLStreamConstants.END_DOCUMENT && event != -1);
  915. }
  916. public int next()
  917. throws XMLStreamException
  918. {
  919. if (event == XMLStreamConstants.END_ELEMENT)
  920. {
  921. // Pop namespace context
  922. if (namespaceAware && !namespaces.isEmpty())
  923. namespaces.removeFirst();
  924. // Pop base context
  925. if (baseAware && !bases.isEmpty())
  926. bases.removeFirst();
  927. }
  928. if (!startEntityStack.isEmpty())
  929. {
  930. String entityName = (String) startEntityStack.removeFirst();
  931. buf.setLength(0);
  932. buf.append(entityName);
  933. event = START_ENTITY;
  934. return extendedEventTypes ? event : next();
  935. }
  936. else if (!endEntityStack.isEmpty())
  937. {
  938. String entityName = (String) endEntityStack.removeFirst();
  939. buf.setLength(0);
  940. buf.append(entityName);
  941. event = END_ENTITY;
  942. return extendedEventTypes ? event : next();
  943. }
  944. try
  945. {
  946. if (!input.initialized)
  947. input.init();
  948. switch (state)
  949. {
  950. case CONTENT:
  951. if (tryRead(TEST_END_ELEMENT))
  952. {
  953. readEndElement();
  954. if (stack.isEmpty())
  955. state = MISC;
  956. event = XMLStreamConstants.END_ELEMENT;
  957. }
  958. else if (tryRead(TEST_COMMENT))
  959. {
  960. readComment(false);
  961. event = XMLStreamConstants.COMMENT;
  962. }
  963. else if (tryRead(TEST_PI))
  964. {
  965. readPI(false);
  966. event = XMLStreamConstants.PROCESSING_INSTRUCTION;
  967. }
  968. else if (tryRead(TEST_CDATA))
  969. {
  970. readCDSect();
  971. event = XMLStreamConstants.CDATA;
  972. }
  973. else if (tryRead(TEST_START_ELEMENT))
  974. {
  975. state = readStartElement();
  976. event = XMLStreamConstants.START_ELEMENT;
  977. }
  978. else
  979. {
  980. // Check for character reference or predefined entity
  981. mark(8);
  982. int c = readCh();
  983. if (c == 0x26) // '&'
  984. {
  985. c = readCh();
  986. if (c == 0x23) // '#'
  987. {
  988. reset();
  989. event = readCharData(null);
  990. }
  991. else
  992. {
  993. // entity reference
  994. reset();
  995. readCh(); // &
  996. readReference();
  997. String ref = buf.toString();
  998. String text = (String) PREDEFINED_ENTITIES.get(ref);
  999. if (text != null)
  1000. {
  1001. event = readCharData(text);
  1002. }
  1003. else if (replaceERefs && !isUnparsedEntity(ref))
  1004. {
  1005. // this will report a start-entity event
  1006. boolean external = false;
  1007. if (doctype != null)
  1008. {
  1009. Object entity = doctype.getEntity(ref);
  1010. if (entity instanceof ExternalIds)
  1011. external = true;
  1012. }
  1013. expandEntity(ref, false, external);
  1014. event = next();
  1015. }
  1016. else
  1017. {
  1018. event = XMLStreamConstants.ENTITY_REFERENCE;
  1019. }
  1020. }
  1021. }
  1022. else
  1023. {
  1024. reset();
  1025. event = readCharData(null);
  1026. if (validating && doctype != null)
  1027. validatePCData(buf.toString());
  1028. }
  1029. }
  1030. break;
  1031. case EMPTY_ELEMENT:
  1032. String elementName = (String) stack.removeLast();
  1033. buf.setLength(0);
  1034. buf.append(elementName);
  1035. state = stack.isEmpty() ? MISC : CONTENT;
  1036. event = XMLStreamConstants.END_ELEMENT;
  1037. if (validating && doctype != null)
  1038. endElementValidationHook();
  1039. break;
  1040. case INIT: // XMLDecl?
  1041. if (tryRead(TEST_XML_DECL))
  1042. readXMLDecl();
  1043. input.finalizeEncoding();
  1044. event = XMLStreamConstants.START_DOCUMENT;
  1045. state = PROLOG;
  1046. break;
  1047. case PROLOG: // Misc* (doctypedecl Misc*)?
  1048. skipWhitespace();
  1049. if (doctype == null && tryRead(TEST_DOCTYPE_DECL))
  1050. {
  1051. readDoctypeDecl();
  1052. event = XMLStreamConstants.DTD;
  1053. }
  1054. else if (tryRead(TEST_COMMENT))
  1055. {
  1056. readComment(false);
  1057. event = XMLStreamConstants.COMMENT;
  1058. }
  1059. else if (tryRead(TEST_PI))
  1060. {
  1061. readPI(false);
  1062. event = XMLStreamConstants.PROCESSING_INSTRUCTION;
  1063. }
  1064. else if (tryRead(TEST_START_ELEMENT))
  1065. {
  1066. state = readStartElement();
  1067. event = XMLStreamConstants.START_ELEMENT;
  1068. }
  1069. else
  1070. {
  1071. int c = readCh();
  1072. error("no root element: U+" + Integer.toHexString(c));
  1073. }
  1074. break;
  1075. case MISC: // Comment | PI | S
  1076. skipWhitespace();
  1077. if (tryRead(TEST_COMMENT))
  1078. {
  1079. readComment(false);
  1080. event = XMLStreamConstants.COMMENT;
  1081. }
  1082. else if (tryRead(TEST_PI))
  1083. {
  1084. readPI(false);
  1085. event = XMLStreamConstants.PROCESSING_INSTRUCTION;
  1086. }
  1087. else
  1088. {
  1089. if (event == XMLStreamConstants.END_DOCUMENT)
  1090. throw new NoSuchElementException();
  1091. int c = readCh();
  1092. if (c != -1)
  1093. error("Only comments and PIs may appear after " +
  1094. "the root element");
  1095. event = XMLStreamConstants.END_DOCUMENT;
  1096. }
  1097. break;
  1098. default:
  1099. event = -1;
  1100. }
  1101. return event;
  1102. }
  1103. catch (IOException e)
  1104. {
  1105. XMLStreamException e2 = new XMLStreamException();
  1106. e2.initCause(e);
  1107. throw e2;
  1108. }
  1109. }
  1110. // package private
  1111. /**
  1112. * Returns the current element name.
  1113. */
  1114. String getCurrentElement()
  1115. {
  1116. return (String) stack.getLast();
  1117. }
  1118. // private
  1119. private void mark(int limit)
  1120. throws IOException
  1121. {
  1122. input.mark(limit);
  1123. }
  1124. private void reset()
  1125. throws IOException
  1126. {
  1127. input.reset();
  1128. }
  1129. private int read()
  1130. throws IOException
  1131. {
  1132. return input.read();
  1133. }
  1134. private int read(int[] b, int off, int len)
  1135. throws IOException
  1136. {
  1137. return input.read(b, off, len);
  1138. }
  1139. /**
  1140. * Parsed character read.
  1141. */
  1142. private int readCh()
  1143. throws IOException, XMLStreamException
  1144. {
  1145. int c = read();
  1146. if (expandPE && c == 0x25) // '%'
  1147. {
  1148. if (peIsError)
  1149. error("PE reference within decl in internal subset.");
  1150. expandPEReference();
  1151. return readCh();
  1152. }
  1153. return c;
  1154. }
  1155. /**
  1156. * Reads the next character, ensuring it is the character specified.
  1157. * @param delim the character to match
  1158. * @exception XMLStreamException if the next character is not the
  1159. * specified one
  1160. */
  1161. private void require(char delim)
  1162. throws IOException, XMLStreamException
  1163. {
  1164. mark(1);
  1165. int c = readCh();
  1166. if (delim != c)
  1167. {
  1168. reset();
  1169. error("required character (got U+" + Integer.toHexString(c) + ")",
  1170. new Character(delim));
  1171. }
  1172. }
  1173. /**
  1174. * Reads the next few characters, ensuring they match the string specified.
  1175. * @param delim the string to match
  1176. * @exception XMLStreamException if the next characters do not match the
  1177. * specified string
  1178. */
  1179. private void require(String delim)
  1180. throws IOException, XMLStreamException
  1181. {
  1182. char[] chars = delim.toCharArray();
  1183. int len = chars.length;
  1184. mark(len);
  1185. int off = 0;
  1186. do
  1187. {
  1188. int l2 = read(tmpBuf, off, len - off);
  1189. if (l2 == -1)
  1190. {
  1191. reset();
  1192. error("EOF before required string", delim);
  1193. }
  1194. off += l2;
  1195. }
  1196. while (off < len);
  1197. for (int i = 0; i < chars.length; i++)
  1198. {
  1199. if (chars[i] != tmpBuf[i])
  1200. {
  1201. reset();
  1202. error("required string", delim);
  1203. }
  1204. }
  1205. }
  1206. /**
  1207. * Try to read a single character. On failure, reset the stream.
  1208. * @param delim the character to test
  1209. * @return true if the character matched delim, false otherwise.
  1210. */
  1211. private boolean tryRead(char delim)
  1212. throws IOException, XMLStreamException
  1213. {
  1214. mark(1);
  1215. int c = readCh();
  1216. if (delim != c)
  1217. {
  1218. reset();
  1219. return false;
  1220. }
  1221. return true;
  1222. }
  1223. /**
  1224. * Tries to read the specified characters.
  1225. * If successful, the stream is positioned after the last character,
  1226. * otherwise it is reset.
  1227. * @param test the string to test
  1228. * @return true if the characters matched the test string, false otherwise.
  1229. */
  1230. private boolean tryRead(String test)
  1231. throws IOException
  1232. {
  1233. char[] chars = test.toCharArray();
  1234. int len = chars.length;
  1235. mark(len);
  1236. int count = 0;
  1237. int l2 = read(tmpBuf, 0, len);
  1238. if (l2 == -1)
  1239. {
  1240. reset();
  1241. return false;
  1242. }
  1243. count += l2;
  1244. // check the characters we received first before doing additional reads
  1245. for (int i = 0; i < count; i++)
  1246. {
  1247. if (chars[i] != tmpBuf[i])
  1248. {
  1249. reset();
  1250. return false;
  1251. }
  1252. }
  1253. while (count < len)
  1254. {
  1255. // force read
  1256. int c = read();
  1257. if (c == -1)
  1258. {
  1259. reset();
  1260. return false;
  1261. }
  1262. tmpBuf[count] = (char) c;
  1263. // check each character as it is read
  1264. if (chars[count] != tmpBuf[count])
  1265. {
  1266. reset();
  1267. return false;
  1268. }
  1269. count++;
  1270. }
  1271. return true;
  1272. }
  1273. /**
  1274. * Reads characters until the specified test string is encountered.
  1275. * @param delim the string delimiting the end of the characters
  1276. */
  1277. private void readUntil(String delim)
  1278. throws IOException, XMLStreamException
  1279. {
  1280. int startLine = input.line;
  1281. try
  1282. {
  1283. while (!tryRead(delim))
  1284. {
  1285. int c = readCh();
  1286. if (c == -1)
  1287. throw new EOFException();
  1288. else if (input.xml11)
  1289. {
  1290. if (!isXML11Char(c) || isXML11RestrictedChar(c))
  1291. error("illegal XML 1.1 character",
  1292. "U+" + Integer.toHexString(c));
  1293. }
  1294. else if (!isChar(c))
  1295. error("illegal XML character",
  1296. "U+" + Integer.toHexString(c));
  1297. buf.append(Character.toChars(c));
  1298. }
  1299. }
  1300. catch (EOFException e)
  1301. {
  1302. error("end of input while looking for delimiter "+
  1303. "(started on line " + startLine + ')', delim);
  1304. }
  1305. }
  1306. /**
  1307. * Reads any whitespace characters.
  1308. * @return true if whitespace characters were read, false otherwise
  1309. */
  1310. private boolean tryWhitespace()
  1311. throws IOException, XMLStreamException
  1312. {
  1313. boolean white;
  1314. boolean ret = false;
  1315. do
  1316. {
  1317. mark(1);
  1318. int c = readCh();
  1319. while (c == -1 && inputStack.size() > 1)
  1320. {
  1321. popInput();
  1322. c = readCh();
  1323. }
  1324. white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d);
  1325. if (white)
  1326. ret = true;
  1327. }
  1328. while (white);
  1329. reset();
  1330. return ret;
  1331. }
  1332. /**
  1333. * Skip over any whitespace characters.
  1334. */
  1335. private void skipWhitespace()
  1336. throws IOException, XMLStreamException
  1337. {
  1338. boolean white;
  1339. do
  1340. {
  1341. mark(1);
  1342. int c = readCh();
  1343. while (c == -1 && inputStack.size() > 1)
  1344. {
  1345. popInput();
  1346. c = readCh();
  1347. }
  1348. white = (c == 0x20 || c == 0x09 || c == 0x0a || c == 0x0d);
  1349. }
  1350. while (white);
  1351. reset();
  1352. }
  1353. /**
  1354. * Try to read as many whitespace characters as are available.
  1355. * @exception XMLStreamException if no whitespace characters were seen
  1356. */
  1357. private void requireWhitespace()
  1358. throws IOException, XMLStreamException
  1359. {
  1360. if (!tryWhitespace())
  1361. error("whitespace required");
  1362. }
  1363. /**
  1364. * Returns the current base URI for resolving external entities.
  1365. */
  1366. String getXMLBase()
  1367. {
  1368. if (baseAware)
  1369. {
  1370. for (Iterator i = bases.iterator(); i.hasNext(); )
  1371. {
  1372. String base = (String) i.next();
  1373. if (base != null)
  1374. return base;
  1375. }
  1376. }
  1377. return input.systemId;
  1378. }
  1379. /**
  1380. * Push the specified text input source.
  1381. */
  1382. private void pushInput(String name, String text, boolean report,
  1383. boolean normalize)
  1384. throws IOException, XMLStreamException
  1385. {
  1386. // Check for recursion
  1387. if (name != null && !"".equals(name))
  1388. {
  1389. for (Iterator i = inputStack.iterator(); i.hasNext(); )
  1390. {
  1391. Input ctx = (Input) i.next();
  1392. if (name.equals(ctx.name))
  1393. error("entities may not be self-recursive", name);
  1394. }
  1395. }
  1396. else
  1397. report = false;
  1398. pushInput(new Input(null, new StringReader(text), input.publicId,
  1399. input.systemId, name, input.inputEncoding, report,
  1400. normalize));
  1401. }
  1402. /**
  1403. * Push the specified external input source.
  1404. */
  1405. private void pushInput(String name, ExternalIds ids, boolean report,
  1406. boolean normalize)
  1407. throws IOException, XMLStreamException
  1408. {
  1409. if (!externalEntities)
  1410. return;
  1411. String url = canonicalize(absolutize(input.systemId, ids.systemId));
  1412. // Check for recursion
  1413. for (Iterator i = inputStack.iterator(); i.hasNext(); )
  1414. {
  1415. Input ctx = (Input) i.next();
  1416. if (url.equals(ctx.systemId))
  1417. error("entities may not be self-recursive", url);
  1418. if (name != null && !"".equals(name) && name.equals(ctx.name))
  1419. error("entities may not be self-recursive", name);
  1420. }
  1421. if (name == null || "".equals(name))
  1422. report = false;
  1423. InputStream in = null;
  1424. if (resolver != null)
  1425. {
  1426. Object obj = resolver.resolveEntity(ids.publicId, url, getXMLBase(),
  1427. null);
  1428. if (obj instanceof InputStream)
  1429. in = (InputStream) obj;
  1430. }
  1431. if (in == null)
  1432. in = resolve(url);
  1433. if (in == null)
  1434. error("unable to resolve external entity",
  1435. (ids.systemId != null) ? ids.systemId : ids.publicId);
  1436. pushInput(new Input(in, null, ids.publicId, url, name, null, report,
  1437. normalize));
  1438. input.init();
  1439. if (tryRead(TEST_XML_DECL))
  1440. readTextDecl();
  1441. input.finalizeEncoding();
  1442. }
  1443. /**
  1444. * Push the specified input source (general entity) onto the input stack.
  1445. */
  1446. private void pushInput(Input input)
  1447. {
  1448. if (input.report)
  1449. startEntityStack.addFirst(input.name);
  1450. inputStack.addLast(input);
  1451. if (this.input != null)
  1452. input.xml11 = this.input.xml11;
  1453. this.input = input;
  1454. }
  1455. /**
  1456. * Returns a canonicalized version of the specified URL.
  1457. * This is largely to work around a problem with the specification of
  1458. * file URLs.
  1459. */
  1460. static String canonicalize(String url)
  1461. {
  1462. if (url == null)
  1463. return null;
  1464. if (url.startsWith("file:") && !url.startsWith("file://"))
  1465. url = "file://" + url.substring(5);
  1466. return url;
  1467. }
  1468. /**
  1469. * "Absolutize" a URL. This resolves a relative URL into an absolute one.
  1470. * @param base the current base URL
  1471. * @param href the (absolute or relative) URL to resolve
  1472. */
  1473. public static String absolutize(String base, String href)
  1474. {
  1475. if (href == null)
  1476. return null;
  1477. int ci = href.indexOf(':');
  1478. if (ci > 1 && isURLScheme(href.substring(0, ci)))
  1479. {
  1480. // href is absolute already
  1481. return href;
  1482. }
  1483. if (base == null)
  1484. base = "";
  1485. else
  1486. {
  1487. int i = base.lastIndexOf('/');
  1488. if (i != -1)
  1489. base = base.substring(0, i + 1);
  1490. else
  1491. base = "";
  1492. }
  1493. if ("".equals(base))
  1494. {
  1495. // assume file URL relative to current directory
  1496. base = System.getProperty("user.dir");
  1497. if (base.charAt(0) == '/')
  1498. base = base.substring(1);
  1499. base = "file:///" + base.replace(File.separatorChar, '/');
  1500. if (!base.endsWith("/"))
  1501. base += "/";
  1502. }
  1503. // We can't use java.net.URL here to do the parsing, as it searches for
  1504. // a protocol handler. A protocol handler may not be registered for the
  1505. // URL scheme here. Do it manually.
  1506. //
  1507. // Set aside scheme and host portion of base URL
  1508. String basePrefix = null;
  1509. ci = base.indexOf(':');
  1510. if (ci > 1 && isURLScheme(base.substring(0, ci)))
  1511. {
  1512. if (base.length() > (ci + 3) &&
  1513. base.charAt(ci + 1) == '/' &&
  1514. base.charAt(ci + 2) == '/')
  1515. {
  1516. int si = base.indexOf('/', ci + 3);
  1517. if (si == -1)
  1518. base = null;
  1519. else
  1520. {
  1521. basePrefix = base.substring(0, si);
  1522. base = base.substring(si);
  1523. }
  1524. }
  1525. else
  1526. base = null;
  1527. }
  1528. if (base == null) // unknown or malformed base URL, use href
  1529. return href;
  1530. if (href.startsWith("/")) // absolute href pathname
  1531. return (basePrefix == null) ? href : basePrefix + href;
  1532. // relative href pathname
  1533. if (!base.endsWith("/"))
  1534. {
  1535. int lsi = base.lastIndexOf('/');
  1536. if (lsi == -1)
  1537. base = "/";
  1538. else
  1539. base = base.substring(0, lsi + 1);
  1540. }
  1541. while (href.startsWith("../") || href.startsWith("./"))
  1542. {
  1543. if (href.startsWith("../"))
  1544. {
  1545. // strip last path component from base
  1546. int lsi = base.lastIndexOf('/', base.length() - 2);
  1547. if (lsi > -1)
  1548. base = base.substring(0, lsi + 1);
  1549. href = href.substring(3); // strip ../ prefix
  1550. }
  1551. else
  1552. {
  1553. href = href.substring(2); // strip ./ prefix
  1554. }
  1555. }
  1556. return (basePrefix == null) ? base + href : basePrefix + base + href;
  1557. }
  1558. /**
  1559. * Indicates whether the specified characters match the scheme portion of
  1560. * a URL.
  1561. * @see RFC 1738 section 2.1
  1562. */
  1563. private static boolean isURLScheme(String text)
  1564. {
  1565. int len = text.length();
  1566. for (int i = 0; i < len; i++)
  1567. {
  1568. char c = text.charAt(i);
  1569. if (c == '+' || c == '.' || c == '-')
  1570. continue;
  1571. if (c < 65 || (c > 90 && c < 97) || c > 122)
  1572. return false;
  1573. }
  1574. return true;
  1575. }
  1576. /**
  1577. * Returns an input stream for the given URL.
  1578. */
  1579. static InputStream resolve(String url)
  1580. throws IOException
  1581. {
  1582. try
  1583. {
  1584. return new URL(url).openStream();
  1585. }
  1586. catch (MalformedURLException e)
  1587. {
  1588. return null;
  1589. }
  1590. catch (IOException e)
  1591. {
  1592. IOException e2 = new IOException("error resolving " + url);
  1593. e2.initCause(e);
  1594. throw e2;
  1595. }
  1596. }
  1597. /**
  1598. * Pops the current input source (general entity) off the stack.
  1599. */
  1600. private void popInput()
  1601. {
  1602. Input old = (Input) inputStack.removeLast();
  1603. if (old.report)
  1604. endEntityStack.addFirst(old.name);
  1605. input = (Input) inputStack.getLast();
  1606. }
  1607. /**
  1608. * Parse an entity text declaration.
  1609. */
  1610. private void readTextDecl()
  1611. throws IOException, XMLStreamException
  1612. {
  1613. final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
  1614. requireWhitespace();
  1615. if (tryRead("version"))
  1616. {
  1617. readEq();
  1618. String v = readLiteral(flags, false);
  1619. if ("1.0".equals(v))
  1620. input.xml11 = false;
  1621. else if ("1.1".equals(v))
  1622. {
  1623. Input i1 = (Input) inputStack.getFirst();
  1624. if (!i1.xml11)
  1625. error("external entity specifies later version number");
  1626. input.xml11 = true;
  1627. }
  1628. else
  1629. throw new XMLStreamException("illegal XML version: " + v);
  1630. requireWhitespace();
  1631. }
  1632. require("encoding");
  1633. readEq();
  1634. String enc = readLiteral(flags, false);
  1635. skipWhitespace();
  1636. require("?>");
  1637. input.setInputEncoding(enc);
  1638. }
  1639. /**
  1640. * Parse the XML declaration.
  1641. */
  1642. private void readXMLDecl()
  1643. throws IOException, XMLStreamException
  1644. {
  1645. final int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
  1646. requireWhitespace();
  1647. require("version");
  1648. readEq();
  1649. xmlVersion = readLiteral(flags, false);
  1650. if ("1.0".equals(xmlVersion))
  1651. input.xml11 = false;
  1652. else if ("1.1".equals(xmlVersion))
  1653. input.xml11 = true;
  1654. else
  1655. throw new XMLStreamException("illegal XML version: " + xmlVersion);
  1656. boolean white = tryWhitespace();
  1657. if (tryRead("encoding"))
  1658. {
  1659. if (!white)
  1660. error("whitespace required before 'encoding='");
  1661. readEq();
  1662. xmlEncoding = readLiteral(flags, false);
  1663. white = tryWhitespace();
  1664. }
  1665. if (tryRead("standalone"))
  1666. {
  1667. if (!white)
  1668. error("whitespace required before 'standalone='");
  1669. readEq();
  1670. String standalone = readLiteral(flags, false);
  1671. if ("yes".equals(standalone))
  1672. xmlStandalone = Boolean.TRUE;
  1673. else if ("no".equals(standalone))
  1674. xmlStandalone = Boolean.FALSE;
  1675. else
  1676. error("standalone flag must be 'yes' or 'no'", standalone);
  1677. }
  1678. skipWhitespace();
  1679. require("?>");
  1680. if (xmlEncoding != null)
  1681. input.setInputEncoding(xmlEncoding);
  1682. }
  1683. /**
  1684. * Parse the DOCTYPE declaration.
  1685. */
  1686. private void readDoctypeDecl()
  1687. throws IOException, XMLStreamException
  1688. {
  1689. if (!supportDTD)
  1690. error("parser was configured not to support DTDs");
  1691. requireWhitespace();
  1692. String rootName = readNmtoken(true);
  1693. skipWhitespace();
  1694. ExternalIds ids = readExternalIds(false, true);
  1695. doctype =
  1696. this.new Doctype(rootName, ids.publicId, ids.systemId);
  1697. // Parse internal subset first
  1698. skipWhitespace();
  1699. if (tryRead('['))
  1700. {
  1701. while (true)
  1702. {
  1703. expandPE = true;
  1704. skipWhitespace();
  1705. expandPE = false;
  1706. if (tryRead(']'))
  1707. break;
  1708. else
  1709. readMarkupdecl(false);
  1710. }
  1711. }
  1712. skipWhitespace();
  1713. require('>');
  1714. // Parse external subset
  1715. if (ids.systemId != null && externalEntities)
  1716. {
  1717. pushInput("", ">", false, false);
  1718. pushInput("[dtd]", ids, true, true);
  1719. // loop until we get back to ">"
  1720. while (true)
  1721. {
  1722. expandPE = true;
  1723. skipWhitespace();
  1724. expandPE = false;
  1725. mark(1);
  1726. int c = readCh();
  1727. if (c == 0x3e) // '>'
  1728. break;
  1729. else if (c == -1)
  1730. popInput();
  1731. else
  1732. {
  1733. reset();
  1734. expandPE = true;
  1735. readMarkupdecl(true);
  1736. expandPE = true;
  1737. }
  1738. }
  1739. if (inputStack.size() != 2)
  1740. error("external subset has unmatched '>'");
  1741. popInput();
  1742. }
  1743. checkDoctype();
  1744. if (validating)
  1745. validateDoctype();
  1746. // Make rootName available for reading
  1747. buf.setLength(0);
  1748. buf.append(rootName);
  1749. }
  1750. /**
  1751. * Checks the well-formedness of the DTD.
  1752. */
  1753. private void checkDoctype()
  1754. throws XMLStreamException
  1755. {
  1756. // TODO check entity recursion
  1757. }
  1758. /**
  1759. * Parse the markupdecl production.
  1760. */
  1761. private void readMarkupdecl(boolean inExternalSubset)
  1762. throws IOException, XMLStreamException
  1763. {
  1764. boolean saved = expandPE;
  1765. mark(1);
  1766. require('<');
  1767. reset();
  1768. expandPE = false;
  1769. if (tryRead(TEST_ELEMENT_DECL))
  1770. {
  1771. expandPE = saved;
  1772. readElementDecl();
  1773. }
  1774. else if (tryRead(TEST_ATTLIST_DECL))
  1775. {
  1776. expandPE = saved;
  1777. readAttlistDecl();
  1778. }
  1779. else if (tryRead(TEST_ENTITY_DECL))
  1780. {
  1781. expandPE = saved;
  1782. readEntityDecl(inExternalSubset);
  1783. }
  1784. else if (tryRead(TEST_NOTATION_DECL))
  1785. {
  1786. expandPE = saved;
  1787. readNotationDecl(inExternalSubset);
  1788. }
  1789. else if (tryRead(TEST_PI))
  1790. {
  1791. readPI(true);
  1792. expandPE = saved;
  1793. }
  1794. else if (tryRead(TEST_COMMENT))
  1795. {
  1796. readComment(true);
  1797. expandPE = saved;
  1798. }
  1799. else if (tryRead("<!["))
  1800. {
  1801. // conditional section
  1802. expandPE = saved;
  1803. if (inputStack.size() < 2)
  1804. error("conditional sections illegal in internal subset");
  1805. skipWhitespace();
  1806. if (tryRead("INCLUDE"))
  1807. {
  1808. skipWhitespace();
  1809. require('[');
  1810. skipWhitespace();
  1811. while (!tryRead("]]>"))
  1812. {
  1813. readMarkupdecl(inExternalSubset);
  1814. skipWhitespace();
  1815. }
  1816. }
  1817. else if (tryRead("IGNORE"))
  1818. {
  1819. skipWhitespace();
  1820. require('[');
  1821. expandPE = false;
  1822. for (int nesting = 1; nesting > 0; )
  1823. {
  1824. int c = readCh();
  1825. switch (c)
  1826. {
  1827. case 0x3c: // '<'
  1828. if (tryRead("!["))
  1829. nesting++;
  1830. break;
  1831. case 0x5d: // ']'
  1832. if (tryRead("]>"))
  1833. nesting--;
  1834. break;
  1835. case -1:
  1836. throw new EOFException();
  1837. }
  1838. }
  1839. expandPE = saved;
  1840. }
  1841. else
  1842. error("conditional section must begin with INCLUDE or IGNORE");
  1843. }
  1844. else
  1845. error("expected markup declaration");
  1846. }
  1847. /**
  1848. * Parse the elementdecl production.
  1849. */
  1850. private void readElementDecl()
  1851. throws IOException, XMLStreamException
  1852. {
  1853. requireWhitespace();
  1854. boolean saved = expandPE;
  1855. expandPE = (inputStack.size() > 1);
  1856. String name = readNmtoken(true);
  1857. expandPE = saved;
  1858. requireWhitespace();
  1859. readContentspec(name);
  1860. skipWhitespace();
  1861. require('>');
  1862. }
  1863. /**
  1864. * Parse the contentspec production.
  1865. */
  1866. private void readContentspec(String elementName)
  1867. throws IOException, XMLStreamException
  1868. {
  1869. if (tryRead("EMPTY"))
  1870. doctype.addElementDecl(elementName, "EMPTY", new EmptyContentModel());
  1871. else if (tryRead("ANY"))
  1872. doctype.addElementDecl(elementName, "ANY", new AnyContentModel());
  1873. else
  1874. {
  1875. ContentModel model;
  1876. CPStringBuilder acc = new CPStringBuilder();
  1877. require('(');
  1878. acc.append('(');
  1879. skipWhitespace();
  1880. if (tryRead("#PCDATA"))
  1881. {
  1882. // mixed content
  1883. acc.append("#PCDATA");
  1884. MixedContentModel mm = new MixedContentModel();
  1885. model = mm;
  1886. skipWhitespace();
  1887. if (tryRead(')'))
  1888. {
  1889. acc.append(")");
  1890. if (tryRead('*'))
  1891. {
  1892. mm.min = 0;
  1893. mm.max = -1;
  1894. }
  1895. }
  1896. else
  1897. {
  1898. while (!tryRead(")"))
  1899. {
  1900. require('|');
  1901. acc.append('|');
  1902. skipWhitespace();
  1903. String name = readNmtoken(true);
  1904. acc.append(name);
  1905. mm.addName(name);
  1906. skipWhitespace();
  1907. }
  1908. require('*');
  1909. acc.append(")*");
  1910. mm.min = 0;
  1911. mm.max = -1;
  1912. }
  1913. }
  1914. else
  1915. model = readElements(acc);
  1916. doctype.addElementDecl(elementName, acc.toString(), model);
  1917. }
  1918. }
  1919. /**
  1920. * Parses an element content model.
  1921. */
  1922. private ElementContentModel readElements(CPStringBuilder acc)
  1923. throws IOException, XMLStreamException
  1924. {
  1925. int separator;
  1926. ElementContentModel model = new ElementContentModel();
  1927. // Parse first content particle
  1928. skipWhitespace();
  1929. model.addContentParticle(readContentParticle(acc));
  1930. // End or separator
  1931. skipWhitespace();
  1932. int c = readCh();
  1933. switch (c)
  1934. {
  1935. case 0x29: // ')'
  1936. acc.append(')');
  1937. mark(1);
  1938. c = readCh();
  1939. switch (c)
  1940. {
  1941. case 0x3f: // '?'
  1942. acc.append('?');
  1943. model.min = 0;
  1944. model.max = 1;
  1945. break;
  1946. case 0x2a: // '*'
  1947. acc.append('*');
  1948. model.min = 0;
  1949. model.max = -1;
  1950. break;
  1951. case 0x2b: // '+'
  1952. acc.append('+');
  1953. model.min = 1;
  1954. model.max = -1;
  1955. break;
  1956. default:
  1957. reset();
  1958. }
  1959. return model; // done
  1960. case 0x7c: // '|'
  1961. model.or = true;
  1962. // fall through
  1963. case 0x2c: // ','
  1964. separator = c;
  1965. acc.append(Character.toChars(c));
  1966. break;
  1967. default:
  1968. error("bad separator in content model",
  1969. "U+" + Integer.toHexString(c));
  1970. return model;
  1971. }
  1972. // Parse subsequent content particles
  1973. while (true)
  1974. {
  1975. skipWhitespace();
  1976. model.addContentParticle(readContentParticle(acc));
  1977. skipWhitespace();
  1978. c = readCh();
  1979. if (c == 0x29) // ')'
  1980. {
  1981. acc.append(')');
  1982. break;
  1983. }
  1984. else if (c != separator)
  1985. {
  1986. error("bad separator in content model",
  1987. "U+" + Integer.toHexString(c));
  1988. return model;
  1989. }
  1990. else
  1991. acc.append(c);
  1992. }
  1993. // Check for occurrence indicator
  1994. mark(1);
  1995. c = readCh();
  1996. switch (c)
  1997. {
  1998. case 0x3f: // '?'
  1999. acc.append('?');
  2000. model.min = 0;
  2001. model.max = 1;
  2002. break;
  2003. case 0x2a: // '*'
  2004. acc.append('*');
  2005. model.min = 0;
  2006. model.max = -1;
  2007. break;
  2008. case 0x2b: // '+'
  2009. acc.append('+');
  2010. model.min = 1;
  2011. model.max = -1;
  2012. break;
  2013. default:
  2014. reset();
  2015. }
  2016. return model;
  2017. }
  2018. /**
  2019. * Parse a cp production.
  2020. */
  2021. private ContentParticle readContentParticle(CPStringBuilder acc)
  2022. throws IOException, XMLStreamException
  2023. {
  2024. ContentParticle cp = new ContentParticle();
  2025. if (tryRead('('))
  2026. {
  2027. acc.append('(');
  2028. cp.content = readElements(acc);
  2029. }
  2030. else
  2031. {
  2032. String name = readNmtoken(true);
  2033. acc.append(name);
  2034. cp.content = name;
  2035. mark(1);
  2036. int c = readCh();
  2037. switch (c)
  2038. {
  2039. case 0x3f: // '?'
  2040. acc.append('?');
  2041. cp.min = 0;
  2042. cp.max = 1;
  2043. break;
  2044. case 0x2a: // '*'
  2045. acc.append('*');
  2046. cp.min = 0;
  2047. cp.max = -1;
  2048. break;
  2049. case 0x2b: // '+'
  2050. acc.append('+');
  2051. cp.min = 1;
  2052. cp.max = -1;
  2053. break;
  2054. default:
  2055. reset();
  2056. }
  2057. }
  2058. return cp;
  2059. }
  2060. /**
  2061. * Parse an attribute-list definition.
  2062. */
  2063. private void readAttlistDecl()
  2064. throws IOException, XMLStreamException
  2065. {
  2066. requireWhitespace();
  2067. boolean saved = expandPE;
  2068. expandPE = (inputStack.size() > 1);
  2069. String elementName = readNmtoken(true);
  2070. expandPE = saved;
  2071. boolean white = tryWhitespace();
  2072. while (!tryRead('>'))
  2073. {
  2074. if (!white)
  2075. error("whitespace required before attribute definition");
  2076. readAttDef(elementName);
  2077. white = tryWhitespace();
  2078. }
  2079. }
  2080. /**
  2081. * Parse a single attribute definition.
  2082. */
  2083. private void readAttDef(String elementName)
  2084. throws IOException, XMLStreamException
  2085. {
  2086. String name = readNmtoken(true);
  2087. requireWhitespace();
  2088. CPStringBuilder acc = new CPStringBuilder();
  2089. HashSet values = new HashSet();
  2090. String type = readAttType(acc, values);
  2091. if (validating)
  2092. {
  2093. if ("ID".equals(type))
  2094. {
  2095. // VC: One ID per Element Type
  2096. for (Iterator i = doctype.attlistIterator(elementName);
  2097. i.hasNext(); )
  2098. {
  2099. Map.Entry entry = (Map.Entry) i.next();
  2100. AttributeDecl decl = (AttributeDecl) entry.getValue();
  2101. if ("ID".equals(decl.type))
  2102. error("element types must not have more than one ID " +
  2103. "attribute");
  2104. }
  2105. }
  2106. else if ("NOTATION".equals(type))
  2107. {
  2108. // VC: One Notation Per Element Type
  2109. for (Iterator i = doctype.attlistIterator(elementName);
  2110. i.hasNext(); )
  2111. {
  2112. Map.Entry entry = (Map.Entry) i.next();
  2113. AttributeDecl decl = (AttributeDecl) entry.getValue();
  2114. if ("NOTATION".equals(decl.type))
  2115. error("element types must not have more than one NOTATION " +
  2116. "attribute");
  2117. }
  2118. // VC: No Notation on Empty Element
  2119. ContentModel model = doctype.getElementModel(elementName);
  2120. if (model != null && model.type == ContentModel.EMPTY)
  2121. error("attributes of type NOTATION must not be declared on an " +
  2122. "element declared EMPTY");
  2123. }
  2124. }
  2125. String enumer = null;
  2126. if ("ENUMERATION".equals(type) || "NOTATION".equals(type))
  2127. enumer = acc.toString();
  2128. else
  2129. values = null;
  2130. requireWhitespace();
  2131. readDefault(elementName, name, type, enumer, values);
  2132. }
  2133. /**
  2134. * Parse an attribute type.
  2135. */
  2136. private String readAttType(CPStringBuilder acc, HashSet values)
  2137. throws IOException, XMLStreamException
  2138. {
  2139. if (tryRead('('))
  2140. {
  2141. readEnumeration(false, acc, values);
  2142. return "ENUMERATION";
  2143. }
  2144. else
  2145. {
  2146. String typeString = readNmtoken(true);
  2147. if ("NOTATION".equals(typeString))
  2148. {
  2149. readNotationType(acc, values);
  2150. return typeString;
  2151. }
  2152. else if ("CDATA".equals(typeString) ||
  2153. "ID".equals(typeString) ||
  2154. "IDREF".equals(typeString) ||
  2155. "IDREFS".equals(typeString) ||
  2156. "ENTITY".equals(typeString) ||
  2157. "ENTITIES".equals(typeString) ||
  2158. "NMTOKEN".equals(typeString) ||
  2159. "NMTOKENS".equals(typeString))
  2160. return typeString;
  2161. else
  2162. {
  2163. error("illegal attribute type", typeString);
  2164. return null;
  2165. }
  2166. }
  2167. }
  2168. /**
  2169. * Parse an enumeration.
  2170. */
  2171. private void readEnumeration(boolean isNames, CPStringBuilder acc,
  2172. HashSet values)
  2173. throws IOException, XMLStreamException
  2174. {
  2175. acc.append('(');
  2176. // first token
  2177. skipWhitespace();
  2178. String token = readNmtoken(isNames);
  2179. acc.append(token);
  2180. values.add(token);
  2181. // subsequent tokens
  2182. skipWhitespace();
  2183. while (!tryRead(')'))
  2184. {
  2185. require('|');
  2186. acc.append('|');
  2187. skipWhitespace();
  2188. token = readNmtoken(isNames);
  2189. // VC: No Duplicate Tokens
  2190. if (validating && values.contains(token))
  2191. error("duplicate token", token);
  2192. acc.append(token);
  2193. values.add(token);
  2194. skipWhitespace();
  2195. }
  2196. acc.append(')');
  2197. }
  2198. /**
  2199. * Parse a notation type for an attribute.
  2200. */
  2201. private void readNotationType(CPStringBuilder acc, HashSet values)
  2202. throws IOException, XMLStreamException
  2203. {
  2204. requireWhitespace();
  2205. require('(');
  2206. readEnumeration(true, acc, values);
  2207. }
  2208. /**
  2209. * Parse the default value for an attribute.
  2210. */
  2211. private void readDefault(String elementName, String name,
  2212. String type, String enumeration, HashSet values)
  2213. throws IOException, XMLStreamException
  2214. {
  2215. int valueType = ATTRIBUTE_DEFAULT_SPECIFIED;
  2216. int flags = LIT_ATTRIBUTE;
  2217. String value = null, defaultType = null;
  2218. boolean saved = expandPE;
  2219. if (!"CDATA".equals(type))
  2220. flags |= LIT_NORMALIZE;
  2221. expandPE = false;
  2222. if (tryRead('#'))
  2223. {
  2224. if (tryRead("FIXED"))
  2225. {
  2226. defaultType = "#FIXED";
  2227. valueType = ATTRIBUTE_DEFAULT_FIXED;
  2228. requireWhitespace();
  2229. value = readLiteral(flags, false);
  2230. }
  2231. else if (tryRead("REQUIRED"))
  2232. {
  2233. defaultType = "#REQUIRED";
  2234. valueType = ATTRIBUTE_DEFAULT_REQUIRED;
  2235. }
  2236. else if (tryRead("IMPLIED"))
  2237. {
  2238. defaultType = "#IMPLIED";
  2239. valueType = ATTRIBUTE_DEFAULT_IMPLIED;
  2240. }
  2241. else
  2242. error("illegal keyword for attribute default value");
  2243. }
  2244. else
  2245. value = readLiteral(flags, false);
  2246. expandPE = saved;
  2247. if (validating)
  2248. {
  2249. if ("ID".equals(type))
  2250. {
  2251. // VC: Attribute Default Value Syntactically Correct
  2252. if (value != null && !isNmtoken(value, true))
  2253. error("default value must match Name production", value);
  2254. // VC: ID Attribute Default
  2255. if (valueType != ATTRIBUTE_DEFAULT_REQUIRED &&
  2256. valueType != ATTRIBUTE_DEFAULT_IMPLIED)
  2257. error("ID attributes must have a declared default of " +
  2258. "#IMPLIED or #REQUIRED");
  2259. }
  2260. else if (value != null)
  2261. {
  2262. // VC: Attribute Default Value Syntactically Correct
  2263. if ("IDREF".equals(type) || "ENTITY".equals(type))
  2264. {
  2265. if (!isNmtoken(value, true))
  2266. error("default value must match Name production", value);
  2267. }
  2268. else if ("IDREFS".equals(type) || "ENTITIES".equals(type))
  2269. {
  2270. StringTokenizer st = new StringTokenizer(value);
  2271. while (st.hasMoreTokens())
  2272. {
  2273. String token = st.nextToken();
  2274. if (!isNmtoken(token, true))
  2275. error("default value must match Name production", token);
  2276. }
  2277. }
  2278. else if ("NMTOKEN".equals(type) || "ENUMERATION".equals(type))
  2279. {
  2280. if (!isNmtoken(value, false))
  2281. error("default value must match Nmtoken production", value);
  2282. }
  2283. else if ("NMTOKENS".equals(type))
  2284. {
  2285. StringTokenizer st = new StringTokenizer(value);
  2286. while (st.hasMoreTokens())
  2287. {
  2288. String token = st.nextToken();
  2289. if (!isNmtoken(token, false))
  2290. error("default value must match Nmtoken production",
  2291. token);
  2292. }
  2293. }
  2294. }
  2295. }
  2296. // Register attribute def
  2297. AttributeDecl attribute =
  2298. new AttributeDecl(type, value, valueType, enumeration, values,
  2299. inputStack.size() != 1);
  2300. doctype.addAttributeDecl(elementName, name, attribute);
  2301. }
  2302. /**
  2303. * Parse the EntityDecl production.
  2304. */
  2305. private void readEntityDecl(boolean inExternalSubset)
  2306. throws IOException, XMLStreamException
  2307. {
  2308. int flags = 0;
  2309. // Check if parameter entity
  2310. boolean peFlag = false;
  2311. expandPE = false;
  2312. requireWhitespace();
  2313. if (tryRead('%'))
  2314. {
  2315. peFlag = true;
  2316. requireWhitespace();
  2317. }
  2318. expandPE = true;
  2319. // Read entity name
  2320. String name = readNmtoken(true);
  2321. if (name.indexOf(':') != -1)
  2322. error("illegal character ':' in entity name", name);
  2323. if (peFlag)
  2324. name = "%" + name;
  2325. requireWhitespace();
  2326. mark(1);
  2327. int c = readCh();
  2328. reset();
  2329. if (c == 0x22 || c == 0x27) // " | '
  2330. {
  2331. // Internal entity replacement text
  2332. String value = readLiteral(flags | LIT_DISABLE_EREF, true);
  2333. int ai = value.indexOf('&');
  2334. while (ai != -1)
  2335. {
  2336. int sci = value.indexOf(';', ai);
  2337. if (sci == -1)
  2338. error("malformed reference in entity value", value);
  2339. String ref = value.substring(ai + 1, sci);
  2340. int[] cp = UnicodeReader.toCodePointArray(ref);
  2341. if (cp.length == 0)
  2342. error("malformed reference in entity value", value);
  2343. if (cp[0] == 0x23) // #
  2344. {
  2345. if (cp.length == 1)
  2346. error("malformed reference in entity value", value);
  2347. if (cp[1] == 0x78) // 'x'
  2348. {
  2349. if (cp.length == 2)
  2350. error("malformed reference in entity value", value);
  2351. for (int i = 2; i < cp.length; i++)
  2352. {
  2353. int x = cp[i];
  2354. if (x < 0x30 ||
  2355. (x > 0x39 && x < 0x41) ||
  2356. (x > 0x46 && x < 0x61) ||
  2357. x > 0x66)
  2358. error("malformed character reference in entity value",
  2359. value);
  2360. }
  2361. }
  2362. else
  2363. {
  2364. for (int i = 1; i < cp.length; i++)
  2365. {
  2366. int x = cp[i];
  2367. if (x < 0x30 || x > 0x39)
  2368. error("malformed character reference in entity value",
  2369. value);
  2370. }
  2371. }
  2372. }
  2373. else
  2374. {
  2375. if (!isNameStartCharacter(cp[0], input.xml11))
  2376. error("malformed reference in entity value", value);
  2377. for (int i = 1; i < cp.length; i++)
  2378. {
  2379. if (!isNameCharacter(cp[i], input.xml11))
  2380. error("malformed reference in entity value", value);
  2381. }
  2382. }
  2383. ai = value.indexOf('&', sci);
  2384. }
  2385. doctype.addEntityDecl(name, value, inExternalSubset);
  2386. }
  2387. else
  2388. {
  2389. ExternalIds ids = readExternalIds(false, false);
  2390. // Check for NDATA
  2391. boolean white = tryWhitespace();
  2392. if (!peFlag && tryRead("NDATA"))
  2393. {
  2394. if (!white)
  2395. error("whitespace required before NDATA");
  2396. requireWhitespace();
  2397. ids.notationName = readNmtoken(true);
  2398. }
  2399. doctype.addEntityDecl(name, ids, inExternalSubset);
  2400. }
  2401. // finish
  2402. skipWhitespace();
  2403. require('>');
  2404. }
  2405. /**
  2406. * Parse the NotationDecl production.
  2407. */
  2408. private void readNotationDecl(boolean inExternalSubset)
  2409. throws IOException, XMLStreamException
  2410. {
  2411. requireWhitespace();
  2412. String notationName = readNmtoken(true);
  2413. if (notationName.indexOf(':') != -1)
  2414. error("illegal character ':' in notation name", notationName);
  2415. if (validating)
  2416. {
  2417. // VC: Unique Notation Name
  2418. ExternalIds notation = doctype.getNotation(notationName);
  2419. if (notation != null)
  2420. error("duplicate notation name", notationName);
  2421. }
  2422. requireWhitespace();
  2423. ExternalIds ids = readExternalIds(true, false);
  2424. ids.notationName = notationName;
  2425. doctype.addNotationDecl(notationName, ids, inExternalSubset);
  2426. skipWhitespace();
  2427. require('>');
  2428. }
  2429. /**
  2430. * Returns a tuple {publicId, systemId}.
  2431. */
  2432. private ExternalIds readExternalIds(boolean inNotation, boolean isSubset)
  2433. throws IOException, XMLStreamException
  2434. {
  2435. int c;
  2436. int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF;
  2437. ExternalIds ids = new ExternalIds();
  2438. if (tryRead("PUBLIC"))
  2439. {
  2440. requireWhitespace();
  2441. ids.publicId = readLiteral(LIT_NORMALIZE | LIT_PUBID | flags, false);
  2442. if (inNotation)
  2443. {
  2444. skipWhitespace();
  2445. mark(1);
  2446. c = readCh();
  2447. reset();
  2448. if (c == 0x22 || c == 0x27) // " | '
  2449. {
  2450. String href = readLiteral(flags, false);
  2451. ids.systemId = absolutize(input.systemId, href);
  2452. }
  2453. }
  2454. else
  2455. {
  2456. requireWhitespace();
  2457. String href = readLiteral(flags, false);
  2458. ids.systemId = absolutize(input.systemId, href);
  2459. }
  2460. // Check valid URI characters
  2461. for (int i = 0; i < ids.publicId.length(); i++)
  2462. {
  2463. char d = ids.publicId.charAt(i);
  2464. if (d >= 'a' && d <= 'z')
  2465. continue;
  2466. if (d >= 'A' && d <= 'Z')
  2467. continue;
  2468. if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf(d) != -1)
  2469. continue;
  2470. error("illegal PUBLIC id character",
  2471. "U+" + Integer.toHexString(d));
  2472. }
  2473. }
  2474. else if (tryRead("SYSTEM"))
  2475. {
  2476. requireWhitespace();
  2477. String href = readLiteral(flags, false);
  2478. ids.systemId = absolutize(input.systemId, href);
  2479. }
  2480. else if (!isSubset)
  2481. {
  2482. error("missing SYSTEM or PUBLIC keyword");
  2483. }
  2484. if (ids.systemId != null && !inNotation)
  2485. {
  2486. if (ids.systemId.indexOf('#') != -1)
  2487. error("SYSTEM id has a URI fragment", ids.systemId);
  2488. }
  2489. return ids;
  2490. }
  2491. /**
  2492. * Parse the start of an element.
  2493. * @return the state of the parser afterwards (EMPTY_ELEMENT or CONTENT)
  2494. */
  2495. private int readStartElement()
  2496. throws IOException, XMLStreamException
  2497. {
  2498. // Read element name
  2499. String elementName = readNmtoken(true);
  2500. attrs.clear();
  2501. // Push namespace context
  2502. if (namespaceAware)
  2503. {
  2504. if (elementName.charAt(0) == ':' ||
  2505. elementName.charAt(elementName.length() - 1) == ':')
  2506. error("not a QName", elementName);
  2507. namespaces.addFirst(new LinkedHashMap());
  2508. }
  2509. // Read element content
  2510. boolean white = tryWhitespace();
  2511. mark(1);
  2512. int c = readCh();
  2513. while (c != 0x2f && c != 0x3e) // '/' | '>'
  2514. {
  2515. // Read attribute
  2516. reset();
  2517. if (!white)
  2518. error("need whitespace between attributes");
  2519. readAttribute(elementName);
  2520. white = tryWhitespace();
  2521. mark(1);
  2522. c = readCh();
  2523. }
  2524. // supply defaulted attributes
  2525. if (doctype != null)
  2526. {
  2527. for (Iterator i = doctype.attlistIterator(elementName); i.hasNext(); )
  2528. {
  2529. Map.Entry entry = (Map.Entry) i.next();
  2530. String attName = (String) entry.getKey();
  2531. AttributeDecl decl = (AttributeDecl) entry.getValue();
  2532. if (validating)
  2533. {
  2534. switch (decl.valueType)
  2535. {
  2536. case ATTRIBUTE_DEFAULT_REQUIRED:
  2537. // VC: Required Attribute
  2538. if (decl.value == null && !attributeSpecified(attName))
  2539. error("value for " + attName + " attribute is required");
  2540. break;
  2541. case ATTRIBUTE_DEFAULT_FIXED:
  2542. // VC: Fixed Attribute Default
  2543. for (Iterator j = attrs.iterator(); j.hasNext(); )
  2544. {
  2545. Attribute a = (Attribute) j.next();
  2546. if (attName.equals(a.name) &&
  2547. !decl.value.equals(a.value))
  2548. error("value for " + attName + " attribute must be " +
  2549. decl.value);
  2550. }
  2551. break;
  2552. }
  2553. }
  2554. if (namespaceAware && attName.equals("xmlns"))
  2555. {
  2556. LinkedHashMap ctx =
  2557. (LinkedHashMap) namespaces.getFirst();
  2558. if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX))
  2559. continue; // namespace was specified
  2560. }
  2561. else if (namespaceAware && attName.startsWith("xmlns:"))
  2562. {
  2563. LinkedHashMap ctx =
  2564. (LinkedHashMap) namespaces.getFirst();
  2565. if (ctx.containsKey(attName.substring(6)))
  2566. continue; // namespace was specified
  2567. }
  2568. else if (attributeSpecified(attName))
  2569. continue;
  2570. if (decl.value == null)
  2571. continue;
  2572. // VC: Standalone Document Declaration
  2573. if (validating && decl.external && xmlStandalone == Boolean.TRUE)
  2574. error("standalone must be 'no' if attributes inherit values " +
  2575. "from externally declared markup declarations");
  2576. Attribute attr =
  2577. new Attribute(attName, decl.type, false, decl.value);
  2578. if (namespaceAware)
  2579. {
  2580. if (!addNamespace(attr))
  2581. attrs.add(attr);
  2582. }
  2583. else
  2584. attrs.add(attr);
  2585. }
  2586. }
  2587. if (baseAware)
  2588. {
  2589. String uri = getAttributeValue(XMLConstants.XML_NS_URI, "base");
  2590. String base = getXMLBase();
  2591. bases.addFirst(absolutize(base, uri));
  2592. }
  2593. if (namespaceAware)
  2594. {
  2595. // check prefix bindings
  2596. int ci = elementName.indexOf(':');
  2597. if (ci != -1)
  2598. {
  2599. String prefix = elementName.substring(0, ci);
  2600. String uri = getNamespaceURI(prefix);
  2601. if (uri == null)
  2602. error("unbound element prefix", prefix);
  2603. else if (input.xml11 && "".equals(uri))
  2604. error("XML 1.1 unbound element prefix", prefix);
  2605. }
  2606. for (Iterator i = attrs.iterator(); i.hasNext(); )
  2607. {
  2608. Attribute attr = (Attribute) i.next();
  2609. if (attr.prefix != null &&
  2610. !XMLConstants.XMLNS_ATTRIBUTE.equals(attr.prefix))
  2611. {
  2612. String uri = getNamespaceURI(attr.prefix);
  2613. if (uri == null)
  2614. error("unbound attribute prefix", attr.prefix);
  2615. else if (input.xml11 && "".equals(uri))
  2616. error("XML 1.1 unbound attribute prefix", attr.prefix);
  2617. }
  2618. }
  2619. }
  2620. if (validating && doctype != null)
  2621. {
  2622. validateStartElement(elementName);
  2623. currentContentModel = doctype.getElementModel(elementName);
  2624. if (currentContentModel == null)
  2625. error("no element declaration", elementName);
  2626. validationStack.add(new LinkedList());
  2627. }
  2628. // make element name available for read
  2629. buf.setLength(0);
  2630. buf.append(elementName);
  2631. // push element onto stack
  2632. stack.addLast(elementName);
  2633. switch (c)
  2634. {
  2635. case 0x3e: // '>'
  2636. return CONTENT;
  2637. case 0x2f: // '/'
  2638. require('>');
  2639. return EMPTY_ELEMENT;
  2640. }
  2641. return -1; // to satisfy compiler
  2642. }
  2643. /**
  2644. * Indicates whether the specified attribute name was specified for the
  2645. * current element.
  2646. */
  2647. private boolean attributeSpecified(String attName)
  2648. {
  2649. for (Iterator j = attrs.iterator(); j.hasNext(); )
  2650. {
  2651. Attribute a = (Attribute) j.next();
  2652. if (attName.equals(a.name))
  2653. return true;
  2654. }
  2655. return false;
  2656. }
  2657. /**
  2658. * Parse an attribute.
  2659. */
  2660. private void readAttribute(String elementName)
  2661. throws IOException, XMLStreamException
  2662. {
  2663. // Read attribute name
  2664. String attributeName = readNmtoken(true);
  2665. String type = getAttributeType(elementName, attributeName);
  2666. readEq();
  2667. // Read literal
  2668. final int flags = LIT_ATTRIBUTE | LIT_ENTITY_REF;
  2669. String value = (type == null || "CDATA".equals(type)) ?
  2670. readLiteral(flags, false) : readLiteral(flags | LIT_NORMALIZE, false);
  2671. // add attribute event
  2672. Attribute attr = this.new Attribute(attributeName, type, true, value);
  2673. if (namespaceAware)
  2674. {
  2675. if (attributeName.charAt(0) == ':' ||
  2676. attributeName.charAt(attributeName.length() - 1) == ':')
  2677. error("not a QName", attributeName);
  2678. else if (attributeName.equals("xmlns"))
  2679. {
  2680. LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
  2681. if (ctx.containsKey(XMLConstants.DEFAULT_NS_PREFIX))
  2682. error("duplicate default namespace");
  2683. }
  2684. else if (attributeName.startsWith("xmlns:"))
  2685. {
  2686. LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
  2687. if (ctx.containsKey(attributeName.substring(6)))
  2688. error("duplicate namespace", attributeName.substring(6));
  2689. }
  2690. else if (attrs.contains(attr))
  2691. error("duplicate attribute", attributeName);
  2692. }
  2693. else if (attrs.contains(attr))
  2694. error("duplicate attribute", attributeName);
  2695. if (validating && doctype != null)
  2696. {
  2697. // VC: Attribute Value Type
  2698. AttributeDecl decl =
  2699. doctype.getAttributeDecl(elementName, attributeName);
  2700. if (decl == null)
  2701. error("attribute must be declared", attributeName);
  2702. if ("ENUMERATION".equals(decl.type))
  2703. {
  2704. // VC: Enumeration
  2705. if (!decl.values.contains(value))
  2706. error("value does not match enumeration " + decl.enumeration,
  2707. value);
  2708. }
  2709. else if ("ID".equals(decl.type))
  2710. {
  2711. // VC: ID
  2712. if (!isNmtoken(value, true))
  2713. error("ID values must match the Name production");
  2714. if (ids.contains(value))
  2715. error("Duplicate ID", value);
  2716. ids.add(value);
  2717. }
  2718. else if ("IDREF".equals(decl.type) || "IDREFS".equals(decl.type))
  2719. {
  2720. StringTokenizer st = new StringTokenizer(value);
  2721. while (st.hasMoreTokens())
  2722. {
  2723. String token = st.nextToken();
  2724. // VC: IDREF
  2725. if (!isNmtoken(token, true))
  2726. error("IDREF values must match the Name production");
  2727. idrefs.add(token);
  2728. }
  2729. }
  2730. else if ("NMTOKEN".equals(decl.type) || "NMTOKENS".equals(decl.type))
  2731. {
  2732. StringTokenizer st = new StringTokenizer(value);
  2733. while (st.hasMoreTokens())
  2734. {
  2735. String token = st.nextToken();
  2736. // VC: Name Token
  2737. if (!isNmtoken(token, false))
  2738. error("NMTOKEN values must match the Nmtoken production");
  2739. }
  2740. }
  2741. else if ("ENTITY".equals(decl.type))
  2742. {
  2743. // VC: Entity Name
  2744. if (!isNmtoken(value, true))
  2745. error("ENTITY values must match the Name production");
  2746. Object entity = doctype.getEntity(value);
  2747. if (entity == null || !(entity instanceof ExternalIds) ||
  2748. ((ExternalIds) entity).notationName == null)
  2749. error("ENTITY values must match the name of an unparsed " +
  2750. "entity declared in the DTD");
  2751. }
  2752. else if ("NOTATION".equals(decl.type))
  2753. {
  2754. if (!decl.values.contains(value))
  2755. error("NOTATION values must match a declared notation name",
  2756. value);
  2757. // VC: Notation Attributes
  2758. ExternalIds notation = doctype.getNotation(value);
  2759. if (notation == null)
  2760. error("NOTATION values must match the name of a notation " +
  2761. "declared in the DTD", value);
  2762. }
  2763. }
  2764. if (namespaceAware)
  2765. {
  2766. if (!addNamespace(attr))
  2767. attrs.add(attr);
  2768. }
  2769. else
  2770. attrs.add(attr);
  2771. }
  2772. /**
  2773. * Determines whether the specified attribute is a namespace declaration,
  2774. * and adds it to the current namespace context if so. Returns false if
  2775. * the attribute is an ordinary attribute.
  2776. */
  2777. private boolean addNamespace(Attribute attr)
  2778. throws XMLStreamException
  2779. {
  2780. if ("xmlns".equals(attr.name))
  2781. {
  2782. LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
  2783. if (ctx.get(XMLConstants.DEFAULT_NS_PREFIX) != null)
  2784. error("Duplicate default namespace declaration");
  2785. if (XMLConstants.XML_NS_URI.equals(attr.value))
  2786. error("can't bind XML namespace");
  2787. ctx.put(XMLConstants.DEFAULT_NS_PREFIX, attr.value);
  2788. return true;
  2789. }
  2790. else if ("xmlns".equals(attr.prefix))
  2791. {
  2792. LinkedHashMap ctx = (LinkedHashMap) namespaces.getFirst();
  2793. if (ctx.get(attr.localName) != null)
  2794. error("Duplicate namespace declaration for prefix",
  2795. attr.localName);
  2796. if (XMLConstants.XML_NS_PREFIX.equals(attr.localName))
  2797. {
  2798. if (!XMLConstants.XML_NS_URI.equals(attr.value))
  2799. error("can't redeclare xml prefix");
  2800. else
  2801. return false; // treat as attribute
  2802. }
  2803. if (XMLConstants.XML_NS_URI.equals(attr.value))
  2804. error("can't bind non-xml prefix to XML namespace");
  2805. if (XMLConstants.XMLNS_ATTRIBUTE.equals(attr.localName))
  2806. error("can't redeclare xmlns prefix");
  2807. if (XMLConstants.XMLNS_ATTRIBUTE_NS_URI.equals(attr.value))
  2808. error("can't bind non-xmlns prefix to XML Namespace namespace");
  2809. if ("".equals(attr.value) && !input.xml11)
  2810. error("illegal use of 1.1-style prefix unbinding in 1.0 document");
  2811. ctx.put(attr.localName, attr.value);
  2812. return true;
  2813. }
  2814. return false;
  2815. }
  2816. /**
  2817. * Parse a closing tag.
  2818. */
  2819. private void readEndElement()
  2820. throws IOException, XMLStreamException
  2821. {
  2822. // pop element off stack
  2823. String expected = (String) stack.removeLast();
  2824. require(expected);
  2825. skipWhitespace();
  2826. require('>');
  2827. // Make element name available
  2828. buf.setLength(0);
  2829. buf.append(expected);
  2830. if (validating && doctype != null)
  2831. endElementValidationHook();
  2832. }
  2833. /**
  2834. * Validate the end of an element.
  2835. * Called on an end-element or empty element if validating.
  2836. */
  2837. private void endElementValidationHook()
  2838. throws XMLStreamException
  2839. {
  2840. validateEndElement();
  2841. validationStack.removeLast();
  2842. if (stack.isEmpty())
  2843. currentContentModel = null;
  2844. else
  2845. {
  2846. String parent = (String) stack.getLast();
  2847. currentContentModel = doctype.getElementModel(parent);
  2848. }
  2849. }
  2850. /**
  2851. * Parse a comment.
  2852. */
  2853. private void readComment(boolean inDTD)
  2854. throws IOException, XMLStreamException
  2855. {
  2856. boolean saved = expandPE;
  2857. expandPE = false;
  2858. buf.setLength(0);
  2859. readUntil(TEST_END_COMMENT);
  2860. require('>');
  2861. expandPE = saved;
  2862. if (inDTD)
  2863. doctype.addComment(buf.toString());
  2864. }
  2865. /**
  2866. * Parse a processing instruction.
  2867. */
  2868. private void readPI(boolean inDTD)
  2869. throws IOException, XMLStreamException
  2870. {
  2871. boolean saved = expandPE;
  2872. expandPE = false;
  2873. piTarget = readNmtoken(true);
  2874. if (piTarget.indexOf(':') != -1)
  2875. error("illegal character in PI target", new Character(':'));
  2876. if ("xml".equalsIgnoreCase(piTarget))
  2877. error("illegal PI target", piTarget);
  2878. if (tryRead(TEST_END_PI))
  2879. piData = null;
  2880. else
  2881. {
  2882. if (!tryWhitespace())
  2883. error("whitespace required between PI target and data");
  2884. buf.setLength(0);
  2885. readUntil(TEST_END_PI);
  2886. piData = buf.toString();
  2887. }
  2888. expandPE = saved;
  2889. if (inDTD)
  2890. doctype.addPI(piTarget, piData);
  2891. }
  2892. /**
  2893. * Parse an entity reference.
  2894. */
  2895. private void readReference()
  2896. throws IOException, XMLStreamException
  2897. {
  2898. buf.setLength(0);
  2899. String entityName = readNmtoken(true);
  2900. require(';');
  2901. buf.setLength(0);
  2902. buf.append(entityName);
  2903. }
  2904. /**
  2905. * Read an CDATA section.
  2906. */
  2907. private void readCDSect()
  2908. throws IOException, XMLStreamException
  2909. {
  2910. buf.setLength(0);
  2911. readUntil(TEST_END_CDATA);
  2912. }
  2913. /**
  2914. * Read character data.
  2915. * @return the type of text read (CHARACTERS or SPACE)
  2916. */
  2917. private int readCharData(String prefix)
  2918. throws IOException, XMLStreamException
  2919. {
  2920. boolean white = true;
  2921. buf.setLength(0);
  2922. if (prefix != null)
  2923. buf.append(prefix);
  2924. boolean done = false;
  2925. boolean entities = false;
  2926. while (!done)
  2927. {
  2928. // Block read
  2929. mark(tmpBuf.length);
  2930. int len = read(tmpBuf, 0, tmpBuf.length);
  2931. if (len == -1)
  2932. {
  2933. if (inputStack.size() > 1)
  2934. {
  2935. popInput();
  2936. // report end-entity
  2937. done = true;
  2938. }
  2939. else
  2940. throw new EOFException();
  2941. }
  2942. for (int i = 0; i < len && !done; i++)
  2943. {
  2944. int c = tmpBuf[i];
  2945. switch (c)
  2946. {
  2947. case 0x20:
  2948. case 0x09:
  2949. case 0x0a:
  2950. case 0x0d:
  2951. buf.append(Character.toChars(c));
  2952. break; // whitespace
  2953. case 0x26: // '&'
  2954. reset();
  2955. read(tmpBuf, 0, i);
  2956. // character reference?
  2957. mark(3);
  2958. c = readCh(); // &
  2959. c = readCh();
  2960. if (c == 0x23) // '#'
  2961. {
  2962. mark(1);
  2963. c = readCh();
  2964. boolean hex = (c == 0x78); // 'x'
  2965. if (!hex)
  2966. reset();
  2967. char[] ch = readCharacterRef(hex ? 16 : 10);
  2968. buf.append(ch, 0, ch.length);
  2969. for (int j = 0; j < ch.length; j++)
  2970. {
  2971. switch (ch[j])
  2972. {
  2973. case 0x20:
  2974. case 0x09:
  2975. case 0x0a:
  2976. case 0x0d:
  2977. break; // whitespace
  2978. default:
  2979. white = false;
  2980. }
  2981. }
  2982. }
  2983. else
  2984. {
  2985. // entity reference
  2986. reset();
  2987. c = readCh(); // &
  2988. String entityName = readNmtoken(true);
  2989. require(';');
  2990. String text =
  2991. (String) PREDEFINED_ENTITIES.get(entityName);
  2992. if (text != null)
  2993. buf.append(text);
  2994. else
  2995. {
  2996. pushInput("", "&" + entityName + ";", false, false);
  2997. done = true;
  2998. break;
  2999. }
  3000. }
  3001. // continue processing
  3002. i = -1;
  3003. mark(tmpBuf.length);
  3004. len = read(tmpBuf, 0, tmpBuf.length);
  3005. if (len == -1)
  3006. {
  3007. if (inputStack.size() > 1)
  3008. {
  3009. popInput();
  3010. done = true;
  3011. }
  3012. else
  3013. throw new EOFException();
  3014. }
  3015. entities = true;
  3016. break; // end of text sequence
  3017. case 0x3e: // '>'
  3018. int l = buf.length();
  3019. if (l > 1 &&
  3020. buf.charAt(l - 1) == ']' &&
  3021. buf.charAt(l - 2) == ']')
  3022. error("Character data may not contain unescaped ']]>'");
  3023. buf.append(Character.toChars(c));
  3024. break;
  3025. case 0x3c: // '<'
  3026. reset();
  3027. // read i characters
  3028. int count = 0, remaining = i;
  3029. do
  3030. {
  3031. int r = read(tmpBuf, 0, remaining);
  3032. count += r;
  3033. remaining -= r;
  3034. }
  3035. while (count < i);
  3036. i = len;
  3037. if (coalescing && tryRead(TEST_CDATA))
  3038. readUntil(TEST_END_CDATA); // read CDATA section into buf
  3039. else
  3040. done = true; // end of text sequence
  3041. break;
  3042. default:
  3043. if (input.xml11)
  3044. {
  3045. if (!isXML11Char(c) || isXML11RestrictedChar(c))
  3046. error("illegal XML 1.1 character",
  3047. "U+" + Integer.toHexString(c));
  3048. }
  3049. else if (!isChar(c))
  3050. error("illegal XML character",
  3051. "U+" + Integer.toHexString(c));
  3052. white = false;
  3053. buf.append(Character.toChars(c));
  3054. }
  3055. }
  3056. // if text buffer >= 2MB, return it as a chunk
  3057. // to avoid excessive memory use
  3058. if (buf.length() >= 2097152)
  3059. done = true;
  3060. }
  3061. if (entities)
  3062. normalizeCRLF(buf);
  3063. return white ? XMLStreamConstants.SPACE : XMLStreamConstants.CHARACTERS;
  3064. }
  3065. /**
  3066. * Expands the specified entity.
  3067. */
  3068. private void expandEntity(String name, boolean inAttr, boolean normalize)
  3069. throws IOException, XMLStreamException
  3070. {
  3071. if (doctype != null)
  3072. {
  3073. Object value = doctype.getEntity(name);
  3074. if (value != null)
  3075. {
  3076. if (xmlStandalone == Boolean.TRUE)
  3077. {
  3078. // VC: Standalone Document Declaration
  3079. if (doctype.isEntityExternal(name))
  3080. error("reference to external entity in standalone document");
  3081. else if (value instanceof ExternalIds)
  3082. {
  3083. ExternalIds ids = (ExternalIds) value;
  3084. if (ids.notationName != null &&
  3085. doctype.isNotationExternal(ids.notationName))
  3086. error("reference to external notation in " +
  3087. "standalone document");
  3088. }
  3089. }
  3090. if (value instanceof String)
  3091. {
  3092. String text = (String) value;
  3093. if (inAttr && text.indexOf('<') != -1)
  3094. error("< in attribute value");
  3095. pushInput(name, text, !inAttr, normalize);
  3096. }
  3097. else if (inAttr)
  3098. error("reference to external entity in attribute value", name);
  3099. else
  3100. pushInput(name, (ExternalIds) value, !inAttr, normalize);
  3101. return;
  3102. }
  3103. }
  3104. error("reference to undeclared entity", name);
  3105. }
  3106. /**
  3107. * Indicates whether the specified entity is unparsed.
  3108. */
  3109. private boolean isUnparsedEntity(String name)
  3110. {
  3111. if (doctype != null)
  3112. {
  3113. Object value = doctype.getEntity(name);
  3114. if (value != null && value instanceof ExternalIds)
  3115. return ((ExternalIds) value).notationName != null;
  3116. }
  3117. return false;
  3118. }
  3119. /**
  3120. * Read an equals sign.
  3121. */
  3122. private void readEq()
  3123. throws IOException, XMLStreamException
  3124. {
  3125. skipWhitespace();
  3126. require('=');
  3127. skipWhitespace();
  3128. }
  3129. /**
  3130. * Character read for reading literals.
  3131. * @param recognizePEs whether to recognize parameter-entity references
  3132. */
  3133. private int literalReadCh(boolean recognizePEs)
  3134. throws IOException, XMLStreamException
  3135. {
  3136. int c = recognizePEs ? readCh() : read();
  3137. while (c == -1)
  3138. {
  3139. if (inputStack.size() > 1)
  3140. {
  3141. inputStack.removeLast();
  3142. input = (Input) inputStack.getLast();
  3143. // Don't issue end-entity
  3144. c = recognizePEs ? readCh() : read();
  3145. }
  3146. else
  3147. throw new EOFException();
  3148. }
  3149. return c;
  3150. }
  3151. /**
  3152. * Read a string literal.
  3153. */
  3154. private String readLiteral(int flags, boolean recognizePEs)
  3155. throws IOException, XMLStreamException
  3156. {
  3157. boolean saved = expandPE;
  3158. int delim = readCh();
  3159. if (delim != 0x27 && delim != 0x22)
  3160. error("expected '\"' or \"'\"", "U+" + Integer.toHexString(delim));
  3161. literalBuf.setLength(0);
  3162. if ((flags & LIT_DISABLE_PE) != 0)
  3163. expandPE = false;
  3164. boolean entities = false;
  3165. int inputStackSize = inputStack.size();
  3166. do
  3167. {
  3168. int c = literalReadCh(recognizePEs);
  3169. if (c == delim && inputStackSize == inputStack.size())
  3170. break;
  3171. switch (c)
  3172. {
  3173. case 0x0a:
  3174. case 0x0d:
  3175. if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0)
  3176. c = 0x20; // normalize to space
  3177. break;
  3178. case 0x09:
  3179. if ((flags & LIT_ATTRIBUTE) != 0)
  3180. c = 0x20; // normalize to space
  3181. break;
  3182. case 0x26: // '&'
  3183. mark(2);
  3184. c = readCh();
  3185. if (c == 0x23) // '#'
  3186. {
  3187. if ((flags & LIT_DISABLE_CREF) != 0)
  3188. {
  3189. reset();
  3190. c = 0x26; // '&'
  3191. }
  3192. else
  3193. {
  3194. mark(1);
  3195. c = readCh();
  3196. boolean hex = (c == 0x78); // 'x'
  3197. if (!hex)
  3198. reset();
  3199. char[] ref = readCharacterRef(hex ? 16 : 10);
  3200. for (int i = 0; i < ref.length; i++)
  3201. literalBuf.append(ref[i]);
  3202. entities = true;
  3203. continue;
  3204. }
  3205. }
  3206. else
  3207. {
  3208. if ((flags & LIT_DISABLE_EREF) != 0)
  3209. {
  3210. reset();
  3211. c = 0x26; // '&'
  3212. }
  3213. else
  3214. {
  3215. reset();
  3216. String entityName = readNmtoken(true);
  3217. require(';');
  3218. String text =
  3219. (String) PREDEFINED_ENTITIES.get(entityName);
  3220. if (text != null)
  3221. literalBuf.append(text);
  3222. else
  3223. expandEntity(entityName,
  3224. (flags & LIT_ATTRIBUTE) != 0,
  3225. true);
  3226. entities = true;
  3227. continue;
  3228. }
  3229. }
  3230. break;
  3231. case 0x3c: // '<'
  3232. if ((flags & LIT_ATTRIBUTE) != 0)
  3233. error("attribute values may not contain '<'");
  3234. break;
  3235. case -1:
  3236. if (inputStack.size() > 1)
  3237. {
  3238. popInput();
  3239. continue;
  3240. }
  3241. throw new EOFException();
  3242. default:
  3243. if ((c < 0x0020 || c > 0xfffd) ||
  3244. (c >= 0xd800 && c < 0xdc00) ||
  3245. (input.xml11 && (c >= 0x007f) &&
  3246. (c <= 0x009f) && (c != 0x0085)))
  3247. error("illegal character", "U+" + Integer.toHexString(c));
  3248. }
  3249. literalBuf.append(Character.toChars(c));
  3250. }
  3251. while (true);
  3252. expandPE = saved;
  3253. if (entities)
  3254. normalizeCRLF(literalBuf);
  3255. if ((flags & LIT_NORMALIZE) > 0)
  3256. literalBuf = normalize(literalBuf);
  3257. return literalBuf.toString();
  3258. }
  3259. /**
  3260. * Performs attribute-value normalization of the text buffer.
  3261. * This discards leading and trailing whitespace, and replaces sequences
  3262. * of whitespace with a single space.
  3263. */
  3264. private StringBuffer normalize(StringBuffer buf)
  3265. {
  3266. StringBuffer acc = new StringBuffer();
  3267. int len = buf.length();
  3268. int avState = 0;
  3269. for (int i = 0; i < len; i++)
  3270. {
  3271. char c = buf.charAt(i);
  3272. if (c == ' ')
  3273. avState = (avState == 0) ? 0 : 1;
  3274. else
  3275. {
  3276. if (avState == 1)
  3277. acc.append(' ');
  3278. acc.append(c);
  3279. avState = 2;
  3280. }
  3281. }
  3282. return acc;
  3283. }
  3284. /**
  3285. * Replace any CR/LF pairs in the buffer with LF.
  3286. * This may be necessary if combinations of CR or LF were declared as
  3287. * (character) entity references in the input.
  3288. */
  3289. private void normalizeCRLF(StringBuffer buf)
  3290. {
  3291. int len = buf.length() - 1;
  3292. for (int i = 0; i < len; i++)
  3293. {
  3294. char c = buf.charAt(i);
  3295. if (c == '\r' && buf.charAt(i + 1) == '\n')
  3296. {
  3297. buf.deleteCharAt(i--);
  3298. len--;
  3299. }
  3300. }
  3301. }
  3302. /**
  3303. * Parse and expand a parameter entity reference.
  3304. */
  3305. private void expandPEReference()
  3306. throws IOException, XMLStreamException
  3307. {
  3308. String name = readNmtoken(true, new StringBuffer());
  3309. require(';');
  3310. mark(1); // ensure we don't reset to before the semicolon
  3311. if (doctype != null)
  3312. {
  3313. String entityName = "%" + name;
  3314. Object entity = doctype.getEntity(entityName);
  3315. if (entity != null)
  3316. {
  3317. if (xmlStandalone == Boolean.TRUE)
  3318. {
  3319. if (doctype.isEntityExternal(entityName))
  3320. error("reference to external parameter entity in " +
  3321. "standalone document");
  3322. }
  3323. if (entity instanceof String)
  3324. {
  3325. pushInput(name, (String) entity, false, input.normalize);
  3326. //pushInput(name, " " + (String) entity + " ");
  3327. }
  3328. else
  3329. {
  3330. //pushInput("", " ");
  3331. pushInput(name, (ExternalIds) entity, false, input.normalize);
  3332. //pushInput("", " ");
  3333. }
  3334. }
  3335. else
  3336. error("reference to undeclared parameter entity", name);
  3337. }
  3338. else
  3339. error("reference to parameter entity without doctype", name);
  3340. }
  3341. /**
  3342. * Parse the digits in a character reference.
  3343. * @param base the base of the digits (10 or 16)
  3344. */
  3345. private char[] readCharacterRef(int base)
  3346. throws IOException, XMLStreamException
  3347. {
  3348. CPStringBuilder b = new CPStringBuilder();
  3349. for (int c = readCh(); c != 0x3b && c != -1; c = readCh())
  3350. b.append(Character.toChars(c));
  3351. try
  3352. {
  3353. int ord = Integer.parseInt(b.toString(), base);
  3354. if (input.xml11)
  3355. {
  3356. if (!isXML11Char(ord))
  3357. error("illegal XML 1.1 character reference " +
  3358. "U+" + Integer.toHexString(ord));
  3359. }
  3360. else
  3361. {
  3362. if ((ord < 0x20 && !(ord == 0x0a || ord == 0x09 || ord == 0x0d))
  3363. || (ord >= 0xd800 && ord <= 0xdfff)
  3364. || ord == 0xfffe || ord == 0xffff
  3365. || ord > 0x0010ffff)
  3366. error("illegal XML character reference " +
  3367. "U+" + Integer.toHexString(ord));
  3368. }
  3369. return Character.toChars(ord);
  3370. }
  3371. catch (NumberFormatException e)
  3372. {
  3373. error("illegal characters in character reference", b.toString());
  3374. return null;
  3375. }
  3376. }
  3377. /**
  3378. * Parses an NMTOKEN or Name production.
  3379. * @param isName if a Name, otherwise an NMTOKEN
  3380. */
  3381. private String readNmtoken(boolean isName)
  3382. throws IOException, XMLStreamException
  3383. {
  3384. return readNmtoken(isName, nmtokenBuf);
  3385. }
  3386. /**
  3387. * Parses an NMTOKEN or Name production using the specified buffer.
  3388. * @param isName if a Name, otherwise an NMTOKEN
  3389. * @param buf the character buffer to use
  3390. */
  3391. private String readNmtoken(boolean isName, StringBuffer buf)
  3392. throws IOException, XMLStreamException
  3393. {
  3394. buf.setLength(0);
  3395. int c = readCh();
  3396. if (isName)
  3397. {
  3398. if (!isNameStartCharacter(c, input.xml11))
  3399. error("not a name start character",
  3400. "U+" + Integer.toHexString(c));
  3401. }
  3402. else
  3403. {
  3404. if (!isNameCharacter(c, input.xml11))
  3405. error("not a name character",
  3406. "U+" + Integer.toHexString(c));
  3407. }
  3408. buf.append(Character.toChars(c));
  3409. do
  3410. {
  3411. mark(1);
  3412. c = readCh();
  3413. switch (c)
  3414. {
  3415. case 0x25: // '%'
  3416. case 0x3c: // '<'
  3417. case 0x3e: // '>'
  3418. case 0x26: // '&'
  3419. case 0x2c: // ','
  3420. case 0x7c: // '|'
  3421. case 0x2a: // '*'
  3422. case 0x2b: // '+'
  3423. case 0x3f: // '?'
  3424. case 0x29: // ')'
  3425. case 0x3d: // '='
  3426. case 0x27: // '\''
  3427. case 0x22: // '"'
  3428. case 0x5b: // '['
  3429. case 0x20: // ' '
  3430. case 0x09: // '\t'
  3431. case 0x0a: // '\n'
  3432. case 0x0d: // '\r'
  3433. case 0x3b: // ';'
  3434. case 0x2f: // '/'
  3435. case -1:
  3436. reset();
  3437. return intern(buf.toString());
  3438. default:
  3439. if (!isNameCharacter(c, input.xml11))
  3440. error("not a name character",
  3441. "U+" + Integer.toHexString(c));
  3442. else
  3443. buf.append(Character.toChars(c));
  3444. }
  3445. }
  3446. while (true);
  3447. }
  3448. /**
  3449. * Indicates whether the specified Unicode character is an XML 1.1 Char.
  3450. */
  3451. public static boolean isXML11Char(int c)
  3452. {
  3453. return ((c >= 0x0001 && c <= 0xD7FF) ||
  3454. (c >= 0xE000 && c < 0xFFFE) ||
  3455. (c >= 0x10000 && c <= 0x10FFFF));
  3456. }
  3457. /**
  3458. * Indicates whether the specified Unicode character is an XML 1.1
  3459. * RestrictedChar.
  3460. */
  3461. public static boolean isXML11RestrictedChar(int c)
  3462. {
  3463. return ((c >= 0x0001 && c <= 0x0008) ||
  3464. (c >= 0x000B && c <= 0x000C) ||
  3465. (c >= 0x000E && c <= 0x001F) ||
  3466. (c >= 0x007F && c <= 0x0084) ||
  3467. (c >= 0x0086 && c <= 0x009F));
  3468. }
  3469. /**
  3470. * Indicates whether the specified text matches the Name or Nmtoken
  3471. * production.
  3472. */
  3473. private boolean isNmtoken(String text, boolean isName)
  3474. {
  3475. try
  3476. {
  3477. int[] cp = UnicodeReader.toCodePointArray(text);
  3478. if (cp.length == 0)
  3479. return false;
  3480. if (isName)
  3481. {
  3482. if (!isNameStartCharacter(cp[0], input.xml11))
  3483. return false;
  3484. }
  3485. else
  3486. {
  3487. if (!isNameCharacter(cp[0], input.xml11))
  3488. return false;
  3489. }
  3490. for (int i = 1; i < cp.length; i++)
  3491. {
  3492. if (!isNameCharacter(cp[i], input.xml11))
  3493. return false;
  3494. }
  3495. return true;
  3496. }
  3497. catch (IOException e)
  3498. {
  3499. return false;
  3500. }
  3501. }
  3502. /**
  3503. * Indicates whether the specified Unicode character is a Name start
  3504. * character.
  3505. */
  3506. public static boolean isNameStartCharacter(int c, boolean xml11)
  3507. {
  3508. if (xml11)
  3509. return ((c >= 0x0041 && c <= 0x005a) ||
  3510. (c >= 0x0061 && c <= 0x007a) ||
  3511. c == 0x3a |
  3512. c == 0x5f |
  3513. (c >= 0xC0 && c <= 0xD6) ||
  3514. (c >= 0xD8 && c <= 0xF6) ||
  3515. (c >= 0xF8 && c <= 0x2FF) ||
  3516. (c >= 0x370 && c <= 0x37D) ||
  3517. (c >= 0x37F && c <= 0x1FFF) ||
  3518. (c >= 0x200C && c <= 0x200D) ||
  3519. (c >= 0x2070 && c <= 0x218F) ||
  3520. (c >= 0x2C00 && c <= 0x2FEF) ||
  3521. (c >= 0x3001 && c <= 0xD7FF) ||
  3522. (c >= 0xF900 && c <= 0xFDCF) ||
  3523. (c >= 0xFDF0 && c <= 0xFFFD) ||
  3524. (c >= 0x10000 && c <= 0xEFFFF));
  3525. else
  3526. return (c == 0x5f || c == 0x3a || isLetter(c));
  3527. }
  3528. /**
  3529. * Indicates whether the specified Unicode character is a Name non-initial
  3530. * character.
  3531. */
  3532. public static boolean isNameCharacter(int c, boolean xml11)
  3533. {
  3534. if (xml11)
  3535. return ((c >= 0x0041 && c <= 0x005a) ||
  3536. (c >= 0x0061 && c <= 0x007a) ||
  3537. (c >= 0x0030 && c <= 0x0039) ||
  3538. c == 0x3a |
  3539. c == 0x5f |
  3540. c == 0x2d |
  3541. c == 0x2e |
  3542. c == 0xB7 |
  3543. (c >= 0xC0 && c <= 0xD6) ||
  3544. (c >= 0xD8 && c <= 0xF6) ||
  3545. (c >= 0xF8 && c <= 0x2FF) ||
  3546. (c >= 0x300 && c <= 0x37D) ||
  3547. (c >= 0x37F && c <= 0x1FFF) ||
  3548. (c >= 0x200C && c <= 0x200D) ||
  3549. (c >= 0x203F && c <= 0x2040) ||
  3550. (c >= 0x2070 && c <= 0x218F) ||
  3551. (c >= 0x2C00 && c <= 0x2FEF) ||
  3552. (c >= 0x3001 && c <= 0xD7FF) ||
  3553. (c >= 0xF900 && c <= 0xFDCF) ||
  3554. (c >= 0xFDF0 && c <= 0xFFFD) ||
  3555. (c >= 0x10000 && c <= 0xEFFFF));
  3556. else
  3557. return (c == 0x2e || c == 0x2d || c == 0x5f || c == 0x3a ||
  3558. isLetter(c) || isDigit(c) ||
  3559. isCombiningChar(c) || isExtender(c));
  3560. }
  3561. /**
  3562. * Indicates whether the specified Unicode character matches the Letter
  3563. * production.
  3564. */
  3565. public static boolean isLetter(int c)
  3566. {
  3567. if ((c >= 0x0041 && c <= 0x005A) ||
  3568. (c >= 0x0061 && c <= 0x007A) ||
  3569. (c >= 0x00C0 && c <= 0x00D6) ||
  3570. (c >= 0x00D8 && c <= 0x00F6) ||
  3571. (c >= 0x00F8 && c <= 0x00FF) ||
  3572. (c >= 0x0100 && c <= 0x0131) ||
  3573. (c >= 0x0134 && c <= 0x013E) ||
  3574. (c >= 0x0141 && c <= 0x0148) ||
  3575. (c >= 0x014A && c <= 0x017E) ||
  3576. (c >= 0x0180 && c <= 0x01C3) ||
  3577. (c >= 0x01CD && c <= 0x01F0) ||
  3578. (c >= 0x01F4 && c <= 0x01F5) ||
  3579. (c >= 0x01FA && c <= 0x0217) ||
  3580. (c >= 0x0250 && c <= 0x02A8) ||
  3581. (c >= 0x02BB && c <= 0x02C1) ||
  3582. c == 0x0386 ||
  3583. (c >= 0x0388 && c <= 0x038A) ||
  3584. c == 0x038C ||
  3585. (c >= 0x038E && c <= 0x03A1) ||
  3586. (c >= 0x03A3 && c <= 0x03CE) ||
  3587. (c >= 0x03D0 && c <= 0x03D6) ||
  3588. c == 0x03DA ||
  3589. c == 0x03DC ||
  3590. c == 0x03DE ||
  3591. c == 0x03E0 ||
  3592. (c >= 0x03E2 && c <= 0x03F3) ||
  3593. (c >= 0x0401 && c <= 0x040C) ||
  3594. (c >= 0x040E && c <= 0x044F) ||
  3595. (c >= 0x0451 && c <= 0x045C) ||
  3596. (c >= 0x045E && c <= 0x0481) ||
  3597. (c >= 0x0490 && c <= 0x04C4) ||
  3598. (c >= 0x04C7 && c <= 0x04C8) ||
  3599. (c >= 0x04CB && c <= 0x04CC) ||
  3600. (c >= 0x04D0 && c <= 0x04EB) ||
  3601. (c >= 0x04EE && c <= 0x04F5) ||
  3602. (c >= 0x04F8 && c <= 0x04F9) ||
  3603. (c >= 0x0531 && c <= 0x0556) ||
  3604. c == 0x0559 ||
  3605. (c >= 0x0561 && c <= 0x0586) ||
  3606. (c >= 0x05D0 && c <= 0x05EA) ||
  3607. (c >= 0x05F0 && c <= 0x05F2) ||
  3608. (c >= 0x0621 && c <= 0x063A) ||
  3609. (c >= 0x0641 && c <= 0x064A) ||
  3610. (c >= 0x0671 && c <= 0x06B7) ||
  3611. (c >= 0x06BA && c <= 0x06BE) ||
  3612. (c >= 0x06C0 && c <= 0x06CE) ||
  3613. (c >= 0x06D0 && c <= 0x06D3) ||
  3614. c == 0x06D5 ||
  3615. (c >= 0x06E5 && c <= 0x06E6) ||
  3616. (c >= 0x0905 && c <= 0x0939) ||
  3617. c == 0x093D ||
  3618. (c >= 0x0958 && c <= 0x0961) ||
  3619. (c >= 0x0985 && c <= 0x098C) ||
  3620. (c >= 0x098F && c <= 0x0990) ||
  3621. (c >= 0x0993 && c <= 0x09A8) ||
  3622. (c >= 0x09AA && c <= 0x09B0) ||
  3623. c == 0x09B2 ||
  3624. (c >= 0x09B6 && c <= 0x09B9) ||
  3625. (c >= 0x09DC && c <= 0x09DD) ||
  3626. (c >= 0x09DF && c <= 0x09E1) ||
  3627. (c >= 0x09F0 && c <= 0x09F1) ||
  3628. (c >= 0x0A05 && c <= 0x0A0A) ||
  3629. (c >= 0x0A0F && c <= 0x0A10) ||
  3630. (c >= 0x0A13 && c <= 0x0A28) ||
  3631. (c >= 0x0A2A && c <= 0x0A30) ||
  3632. (c >= 0x0A32 && c <= 0x0A33) ||
  3633. (c >= 0x0A35 && c <= 0x0A36) ||
  3634. (c >= 0x0A38 && c <= 0x0A39) ||
  3635. (c >= 0x0A59 && c <= 0x0A5C) ||
  3636. c == 0x0A5E ||
  3637. (c >= 0x0A72 && c <= 0x0A74) ||
  3638. (c >= 0x0A85 && c <= 0x0A8B) ||
  3639. c == 0x0A8D ||
  3640. (c >= 0x0A8F && c <= 0x0A91) ||
  3641. (c >= 0x0A93 && c <= 0x0AA8) ||
  3642. (c >= 0x0AAA && c <= 0x0AB0) ||
  3643. (c >= 0x0AB2 && c <= 0x0AB3) ||
  3644. (c >= 0x0AB5 && c <= 0x0AB9) ||
  3645. c == 0x0ABD ||
  3646. c == 0x0AE0 ||
  3647. (c >= 0x0B05 && c <= 0x0B0C) ||
  3648. (c >= 0x0B0F && c <= 0x0B10) ||
  3649. (c >= 0x0B13 && c <= 0x0B28) ||
  3650. (c >= 0x0B2A && c <= 0x0B30) ||
  3651. (c >= 0x0B32 && c <= 0x0B33) ||
  3652. (c >= 0x0B36 && c <= 0x0B39) ||
  3653. c == 0x0B3D ||
  3654. (c >= 0x0B5C && c <= 0x0B5D) ||
  3655. (c >= 0x0B5F && c <= 0x0B61) ||
  3656. (c >= 0x0B85 && c <= 0x0B8A) ||
  3657. (c >= 0x0B8E && c <= 0x0B90) ||
  3658. (c >= 0x0B92 && c <= 0x0B95) ||
  3659. (c >= 0x0B99 && c <= 0x0B9A) ||
  3660. c == 0x0B9C ||
  3661. (c >= 0x0B9E && c <= 0x0B9F) ||
  3662. (c >= 0x0BA3 && c <= 0x0BA4) ||
  3663. (c >= 0x0BA8 && c <= 0x0BAA) ||
  3664. (c >= 0x0BAE && c <= 0x0BB5) ||
  3665. (c >= 0x0BB7 && c <= 0x0BB9) ||
  3666. (c >= 0x0C05 && c <= 0x0C0C) ||
  3667. (c >= 0x0C0E && c <= 0x0C10) ||
  3668. (c >= 0x0C12 && c <= 0x0C28) ||
  3669. (c >= 0x0C2A && c <= 0x0C33) ||
  3670. (c >= 0x0C35 && c <= 0x0C39) ||
  3671. (c >= 0x0C60 && c <= 0x0C61) ||
  3672. (c >= 0x0C85 && c <= 0x0C8C) ||
  3673. (c >= 0x0C8E && c <= 0x0C90) ||
  3674. (c >= 0x0C92 && c <= 0x0CA8) ||
  3675. (c >= 0x0CAA && c <= 0x0CB3) ||
  3676. (c >= 0x0CB5 && c <= 0x0CB9) ||
  3677. c == 0x0CDE ||
  3678. (c >= 0x0CE0 && c <= 0x0CE1) ||
  3679. (c >= 0x0D05 && c <= 0x0D0C) ||
  3680. (c >= 0x0D0E && c <= 0x0D10) ||
  3681. (c >= 0x0D12 && c <= 0x0D28) ||
  3682. (c >= 0x0D2A && c <= 0x0D39) ||
  3683. (c >= 0x0D60 && c <= 0x0D61) ||
  3684. (c >= 0x0E01 && c <= 0x0E2E) ||
  3685. c == 0x0E30 ||
  3686. (c >= 0x0E32 && c <= 0x0E33) ||
  3687. (c >= 0x0E40 && c <= 0x0E45) ||
  3688. (c >= 0x0E81 && c <= 0x0E82) ||
  3689. c == 0x0E84 ||
  3690. (c >= 0x0E87 && c <= 0x0E88) ||
  3691. c == 0x0E8A ||
  3692. c == 0x0E8D ||
  3693. (c >= 0x0E94 && c <= 0x0E97) ||
  3694. (c >= 0x0E99 && c <= 0x0E9F) ||
  3695. (c >= 0x0EA1 && c <= 0x0EA3) ||
  3696. c == 0x0EA5 ||
  3697. c == 0x0EA7 ||
  3698. (c >= 0x0EAA && c <= 0x0EAB) ||
  3699. (c >= 0x0EAD && c <= 0x0EAE) ||
  3700. c == 0x0EB0 ||
  3701. (c >= 0x0EB2 && c <= 0x0EB3) ||
  3702. c == 0x0EBD ||
  3703. (c >= 0x0EC0 && c <= 0x0EC4) ||
  3704. (c >= 0x0F40 && c <= 0x0F47) ||
  3705. (c >= 0x0F49 && c <= 0x0F69) ||
  3706. (c >= 0x10A0 && c <= 0x10C5) ||
  3707. (c >= 0x10D0 && c <= 0x10F6) ||
  3708. c == 0x1100 ||
  3709. (c >= 0x1102 && c <= 0x1103) ||
  3710. (c >= 0x1105 && c <= 0x1107) ||
  3711. c == 0x1109 ||
  3712. (c >= 0x110B && c <= 0x110C) ||
  3713. (c >= 0x110E && c <= 0x1112) ||
  3714. c == 0x113C ||
  3715. c == 0x113E ||
  3716. c == 0x1140 ||
  3717. c == 0x114C ||
  3718. c == 0x114E ||
  3719. c == 0x1150 ||
  3720. (c >= 0x1154 && c <= 0x1155) ||
  3721. c == 0x1159 ||
  3722. (c >= 0x115F && c <= 0x1161) ||
  3723. c == 0x1163 ||
  3724. c == 0x1165 ||
  3725. c == 0x1167 ||
  3726. c == 0x1169 ||
  3727. (c >= 0x116D && c <= 0x116E) ||
  3728. (c >= 0x1172 && c <= 0x1173) ||
  3729. c == 0x1175 ||
  3730. c == 0x119E ||
  3731. c == 0x11A8 ||
  3732. c == 0x11AB ||
  3733. (c >= 0x11AE && c <= 0x11AF) ||
  3734. (c >= 0x11B7 && c <= 0x11B8) ||
  3735. c == 0x11BA ||
  3736. (c >= 0x11BC && c <= 0x11C2) ||
  3737. c == 0x11EB ||
  3738. c == 0x11F0 ||
  3739. c == 0x11F9 ||
  3740. (c >= 0x1E00 && c <= 0x1E9B) ||
  3741. (c >= 0x1EA0 && c <= 0x1EF9) ||
  3742. (c >= 0x1F00 && c <= 0x1F15) ||
  3743. (c >= 0x1F18 && c <= 0x1F1D) ||
  3744. (c >= 0x1F20 && c <= 0x1F45) ||
  3745. (c >= 0x1F48 && c <= 0x1F4D) ||
  3746. (c >= 0x1F50 && c <= 0x1F57) ||
  3747. c == 0x1F59 ||
  3748. c == 0x1F5B ||
  3749. c == 0x1F5D ||
  3750. (c >= 0x1F5F && c <= 0x1F7D) ||
  3751. (c >= 0x1F80 && c <= 0x1FB4) ||
  3752. (c >= 0x1FB6 && c <= 0x1FBC) ||
  3753. c == 0x1FBE ||
  3754. (c >= 0x1FC2 && c <= 0x1FC4) ||
  3755. (c >= 0x1FC6 && c <= 0x1FCC) ||
  3756. (c >= 0x1FD0 && c <= 0x1FD3) ||
  3757. (c >= 0x1FD6 && c <= 0x1FDB) ||
  3758. (c >= 0x1FE0 && c <= 0x1FEC) ||
  3759. (c >= 0x1FF2 && c <= 0x1FF4) ||
  3760. (c >= 0x1FF6 && c <= 0x1FFC) ||
  3761. c == 0x2126 ||
  3762. (c >= 0x212A && c <= 0x212B) ||
  3763. c == 0x212E ||
  3764. (c >= 0x2180 && c <= 0x2182) ||
  3765. (c >= 0x3041 && c <= 0x3094) ||
  3766. (c >= 0x30A1 && c <= 0x30FA) ||
  3767. (c >= 0x3105 && c <= 0x312C) ||
  3768. (c >= 0xAC00 && c <= 0xD7A3))
  3769. return true; // BaseChar
  3770. if ((c >= 0x4e00 && c <= 0x9fa5) ||
  3771. c == 0x3007 ||
  3772. (c >= 0x3021 && c <= 0x3029))
  3773. return true; // Ideographic
  3774. return false;
  3775. }
  3776. /**
  3777. * Indicates whether the specified Unicode character matches the Digit
  3778. * production.
  3779. */
  3780. public static boolean isDigit(int c)
  3781. {
  3782. return ((c >= 0x0030 && c <= 0x0039) ||
  3783. (c >= 0x0660 && c <= 0x0669) ||
  3784. (c >= 0x06F0 && c <= 0x06F9) ||
  3785. (c >= 0x0966 && c <= 0x096F) ||
  3786. (c >= 0x09E6 && c <= 0x09EF) ||
  3787. (c >= 0x0A66 && c <= 0x0A6F) ||
  3788. (c >= 0x0AE6 && c <= 0x0AEF) ||
  3789. (c >= 0x0B66 && c <= 0x0B6F) ||
  3790. (c >= 0x0BE7 && c <= 0x0BEF) ||
  3791. (c >= 0x0C66 && c <= 0x0C6F) ||
  3792. (c >= 0x0CE6 && c <= 0x0CEF) ||
  3793. (c >= 0x0D66 && c <= 0x0D6F) ||
  3794. (c >= 0x0E50 && c <= 0x0E59) ||
  3795. (c >= 0x0ED0 && c <= 0x0ED9) ||
  3796. (c >= 0x0F20 && c <= 0x0F29));
  3797. }
  3798. /**
  3799. * Indicates whether the specified Unicode character matches the
  3800. * CombiningChar production.
  3801. */
  3802. public static boolean isCombiningChar(int c)
  3803. {
  3804. return ((c >= 0x0300 && c <= 0x0345) ||
  3805. (c >= 0x0360 && c <= 0x0361) ||
  3806. (c >= 0x0483 && c <= 0x0486) ||
  3807. (c >= 0x0591 && c <= 0x05A1) ||
  3808. (c >= 0x05A3 && c <= 0x05B9) ||
  3809. (c >= 0x05BB && c <= 0x05BD) ||
  3810. c == 0x05BF ||
  3811. (c >= 0x05C1 && c <= 0x05C2) ||
  3812. c == 0x05C4 ||
  3813. (c >= 0x064B && c <= 0x0652) ||
  3814. c == 0x0670 ||
  3815. (c >= 0x06D6 && c <= 0x06DC) ||
  3816. (c >= 0x06DD && c <= 0x06DF) ||
  3817. (c >= 0x06E0 && c <= 0x06E4) ||
  3818. (c >= 0x06E7 && c <= 0x06E8) ||
  3819. (c >= 0x06EA && c <= 0x06ED) ||
  3820. (c >= 0x0901 && c <= 0x0903) ||
  3821. c == 0x093C ||
  3822. (c >= 0x093E && c <= 0x094C) ||
  3823. c == 0x094D ||
  3824. (c >= 0x0951 && c <= 0x0954) ||
  3825. (c >= 0x0962 && c <= 0x0963) ||
  3826. (c >= 0x0981 && c <= 0x0983) ||
  3827. c == 0x09BC ||
  3828. c == 0x09BE ||
  3829. c == 0x09BF ||
  3830. (c >= 0x09C0 && c <= 0x09C4) ||
  3831. (c >= 0x09C7 && c <= 0x09C8) ||
  3832. (c >= 0x09CB && c <= 0x09CD) ||
  3833. c == 0x09D7 ||
  3834. (c >= 0x09E2 && c <= 0x09E3) ||
  3835. c == 0x0A02 ||
  3836. c == 0x0A3C ||
  3837. c == 0x0A3E ||
  3838. c == 0x0A3F ||
  3839. (c >= 0x0A40 && c <= 0x0A42) ||
  3840. (c >= 0x0A47 && c <= 0x0A48) ||
  3841. (c >= 0x0A4B && c <= 0x0A4D) ||
  3842. (c >= 0x0A70 && c <= 0x0A71) ||
  3843. (c >= 0x0A81 && c <= 0x0A83) ||
  3844. c == 0x0ABC ||
  3845. (c >= 0x0ABE && c <= 0x0AC5) ||
  3846. (c >= 0x0AC7 && c <= 0x0AC9) ||
  3847. (c >= 0x0ACB && c <= 0x0ACD) ||
  3848. (c >= 0x0B01 && c <= 0x0B03) ||
  3849. c == 0x0B3C ||
  3850. (c >= 0x0B3E && c <= 0x0B43) ||
  3851. (c >= 0x0B47 && c <= 0x0B48) ||
  3852. (c >= 0x0B4B && c <= 0x0B4D) ||
  3853. (c >= 0x0B56 && c <= 0x0B57) ||
  3854. (c >= 0x0B82 && c <= 0x0B83) ||
  3855. (c >= 0x0BBE && c <= 0x0BC2) ||
  3856. (c >= 0x0BC6 && c <= 0x0BC8) ||
  3857. (c >= 0x0BCA && c <= 0x0BCD) ||
  3858. c == 0x0BD7 ||
  3859. (c >= 0x0C01 && c <= 0x0C03) ||
  3860. (c >= 0x0C3E && c <= 0x0C44) ||
  3861. (c >= 0x0C46 && c <= 0x0C48) ||
  3862. (c >= 0x0C4A && c <= 0x0C4D) ||
  3863. (c >= 0x0C55 && c <= 0x0C56) ||
  3864. (c >= 0x0C82 && c <= 0x0C83) ||
  3865. (c >= 0x0CBE && c <= 0x0CC4) ||
  3866. (c >= 0x0CC6 && c <= 0x0CC8) ||
  3867. (c >= 0x0CCA && c <= 0x0CCD) ||
  3868. (c >= 0x0CD5 && c <= 0x0CD6) ||
  3869. (c >= 0x0D02 && c <= 0x0D03) ||
  3870. (c >= 0x0D3E && c <= 0x0D43) ||
  3871. (c >= 0x0D46 && c <= 0x0D48) ||
  3872. (c >= 0x0D4A && c <= 0x0D4D) ||
  3873. c == 0x0D57 ||
  3874. c == 0x0E31 ||
  3875. (c >= 0x0E34 && c <= 0x0E3A) ||
  3876. (c >= 0x0E47 && c <= 0x0E4E) ||
  3877. c == 0x0EB1 ||
  3878. (c >= 0x0EB4 && c <= 0x0EB9) ||
  3879. (c >= 0x0EBB && c <= 0x0EBC) ||
  3880. (c >= 0x0EC8 && c <= 0x0ECD) ||
  3881. (c >= 0x0F18 && c <= 0x0F19) ||
  3882. c == 0x0F35 ||
  3883. c == 0x0F37 ||
  3884. c == 0x0F39 ||
  3885. c == 0x0F3E ||
  3886. c == 0x0F3F ||
  3887. (c >= 0x0F71 && c <= 0x0F84) ||
  3888. (c >= 0x0F86 && c <= 0x0F8B) ||
  3889. (c >= 0x0F90 && c <= 0x0F95) ||
  3890. c == 0x0F97 ||
  3891. (c >= 0x0F99 && c <= 0x0FAD) ||
  3892. (c >= 0x0FB1 && c <= 0x0FB7) ||
  3893. c == 0x0FB9 ||
  3894. (c >= 0x20D0 && c <= 0x20DC) ||
  3895. c == 0x20E1 ||
  3896. (c >= 0x302A && c <= 0x302F) ||
  3897. c == 0x3099 ||
  3898. c == 0x309A);
  3899. }
  3900. /**
  3901. * Indicates whether the specified Unicode character matches the Extender
  3902. * production.
  3903. */
  3904. public static boolean isExtender(int c)
  3905. {
  3906. return (c == 0x00B7 ||
  3907. c == 0x02D0 ||
  3908. c == 0x02D1 ||
  3909. c == 0x0387 ||
  3910. c == 0x0640 ||
  3911. c == 0x0E46 ||
  3912. c == 0x0EC6 ||
  3913. c == 0x3005 ||
  3914. (c >= 0x3031 && c <= 0x3035) ||
  3915. (c >= 0x309D && c <= 0x309E) ||
  3916. (c >= 0x30FC && c <= 0x30FE));
  3917. }
  3918. /**
  3919. * Indicates whether the specified Unicode character matches the Char
  3920. * production.
  3921. */
  3922. public static boolean isChar(int c)
  3923. {
  3924. return (c >= 0x20 && c < 0xd800) ||
  3925. (c >= 0xe00 && c < 0xfffe) ||
  3926. (c >= 0x10000 && c < 0x110000) ||
  3927. c == 0xa || c == 0x9 || c == 0xd;
  3928. }
  3929. /**
  3930. * Interns the specified text or not, depending on the value of
  3931. * stringInterning.
  3932. */
  3933. private String intern(String text)
  3934. {
  3935. return stringInterning ? text.intern() : text;
  3936. }
  3937. /**
  3938. * Report a parsing error.
  3939. */
  3940. private void error(String message)
  3941. throws XMLStreamException
  3942. {
  3943. error(message, null);
  3944. }
  3945. /**
  3946. * Report a parsing error.
  3947. */
  3948. private void error(String message, Object info)
  3949. throws XMLStreamException
  3950. {
  3951. if (info != null)
  3952. {
  3953. if (info instanceof String)
  3954. message += ": \"" + ((String) info) + "\"";
  3955. else if (info instanceof Character)
  3956. message += ": '" + ((Character) info) + "'";
  3957. }
  3958. throw new XMLStreamException(message);
  3959. }
  3960. /**
  3961. * Perform validation of a start-element event.
  3962. */
  3963. private void validateStartElement(String elementName)
  3964. throws XMLStreamException
  3965. {
  3966. if (currentContentModel == null)
  3967. {
  3968. // root element
  3969. // VC: Root Element Type
  3970. if (!elementName.equals(doctype.rootName))
  3971. error("root element name must match name in DTD");
  3972. return;
  3973. }
  3974. // VC: Element Valid
  3975. switch (currentContentModel.type)
  3976. {
  3977. case ContentModel.EMPTY:
  3978. error("child element found in empty element", elementName);
  3979. break;
  3980. case ContentModel.ELEMENT:
  3981. LinkedList ctx = (LinkedList) validationStack.getLast();
  3982. ctx.add(elementName);
  3983. break;
  3984. case ContentModel.MIXED:
  3985. MixedContentModel mm = (MixedContentModel) currentContentModel;
  3986. if (!mm.containsName(elementName))
  3987. error("illegal element for content model", elementName);
  3988. break;
  3989. }
  3990. }
  3991. /**
  3992. * Perform validation of an end-element event.
  3993. */
  3994. private void validateEndElement()
  3995. throws XMLStreamException
  3996. {
  3997. if (currentContentModel == null)
  3998. {
  3999. // root element
  4000. // VC: IDREF
  4001. if (!idrefs.containsAll(ids))
  4002. error("IDREF values must match the value of some ID attribute");
  4003. return;
  4004. }
  4005. // VC: Element Valid
  4006. switch (currentContentModel.type)
  4007. {
  4008. case ContentModel.ELEMENT:
  4009. LinkedList ctx = (LinkedList) validationStack.getLast();
  4010. ElementContentModel ecm = (ElementContentModel) currentContentModel;
  4011. validateElementContent(ecm, ctx);
  4012. break;
  4013. }
  4014. }
  4015. /**
  4016. * Perform validation of character data.
  4017. */
  4018. private void validatePCData(String text)
  4019. throws XMLStreamException
  4020. {
  4021. // VC: Element Valid
  4022. switch (currentContentModel.type)
  4023. {
  4024. case ContentModel.EMPTY:
  4025. error("character data found in empty element", text);
  4026. break;
  4027. case ContentModel.ELEMENT:
  4028. boolean white = true;
  4029. int len = text.length();
  4030. for (int i = 0; i < len; i++)
  4031. {
  4032. char c = text.charAt(i);
  4033. if (c != ' ' && c != '\t' && c != '\n' && c != '\r')
  4034. {
  4035. white = false;
  4036. break;
  4037. }
  4038. }
  4039. if (!white)
  4040. error("character data found in element with element content", text);
  4041. else if (xmlStandalone == Boolean.TRUE && currentContentModel.external)
  4042. // VC: Standalone Document Declaration
  4043. error("whitespace in element content of externally declared " +
  4044. "element in standalone document");
  4045. break;
  4046. }
  4047. }
  4048. /**
  4049. * Validates the specified validation context (list of child elements)
  4050. * against the element content model for the current element.
  4051. */
  4052. private void validateElementContent(ElementContentModel model,
  4053. LinkedList children)
  4054. throws XMLStreamException
  4055. {
  4056. // Use regular expression
  4057. CPStringBuilder buf = new CPStringBuilder();
  4058. for (Iterator i = children.iterator(); i.hasNext(); )
  4059. {
  4060. buf.append((String) i.next());
  4061. buf.append(' ');
  4062. }
  4063. String c = buf.toString();
  4064. String regex = createRegularExpression(model);
  4065. if (!c.matches(regex))
  4066. error("element content "+model.text+" does not match expression "+regex, c);
  4067. }
  4068. /**
  4069. * Creates the regular expression used to validate an element content
  4070. * model.
  4071. */
  4072. private String createRegularExpression(ElementContentModel model)
  4073. {
  4074. if (model.regex == null)
  4075. {
  4076. CPStringBuilder buf = new CPStringBuilder();
  4077. buf.append('(');
  4078. for (Iterator i = model.contentParticles.iterator(); i.hasNext(); )
  4079. {
  4080. ContentParticle cp = (ContentParticle) i.next();
  4081. if (cp.content instanceof String)
  4082. {
  4083. buf.append('(');
  4084. buf.append((String) cp.content);
  4085. buf.append(' ');
  4086. buf.append(')');
  4087. if (cp.max == -1)
  4088. {
  4089. if (cp.min == 0)
  4090. buf.append('*');
  4091. else
  4092. buf.append('+');
  4093. }
  4094. else if (cp.min == 0)
  4095. buf.append('?');
  4096. }
  4097. else
  4098. {
  4099. ElementContentModel ecm = (ElementContentModel) cp.content;
  4100. buf.append(createRegularExpression(ecm));
  4101. }
  4102. if (model.or && i.hasNext())
  4103. buf.append('|');
  4104. }
  4105. buf.append(')');
  4106. if (model.max == -1)
  4107. {
  4108. if (model.min == 0)
  4109. buf.append('*');
  4110. else
  4111. buf.append('+');
  4112. }
  4113. else if (model.min == 0)
  4114. buf.append('?');
  4115. model.regex = buf.toString();
  4116. }
  4117. return model.regex;
  4118. }
  4119. /**
  4120. * Performs validation of a document type declaration event.
  4121. */
  4122. void validateDoctype()
  4123. throws XMLStreamException
  4124. {
  4125. for (Iterator i = doctype.entityIterator(); i.hasNext(); )
  4126. {
  4127. Map.Entry entry = (Map.Entry) i.next();
  4128. Object entity = entry.getValue();
  4129. if (entity instanceof ExternalIds)
  4130. {
  4131. ExternalIds ids = (ExternalIds) entity;
  4132. if (ids.notationName != null)
  4133. {
  4134. // VC: Notation Declared
  4135. ExternalIds notation = doctype.getNotation(ids.notationName);
  4136. if (notation == null)
  4137. error("Notation name must match the declared name of a " +
  4138. "notation", ids.notationName);
  4139. }
  4140. }
  4141. }
  4142. }
  4143. /**
  4144. * Simple test harness for reading an XML file.
  4145. * args[0] is the filename of the XML file
  4146. * If args[1] is "-x", enable XInclude processing
  4147. */
  4148. public static void main(String[] args)
  4149. throws Exception
  4150. {
  4151. boolean validating = false;
  4152. boolean namespaceAware = false;
  4153. boolean xIncludeAware = false;
  4154. int pos = 0;
  4155. while (pos < args.length && args[pos].startsWith("-"))
  4156. {
  4157. if ("-x".equals(args[pos]))
  4158. xIncludeAware = true;
  4159. else if ("-v".equals(args[pos]))
  4160. validating = true;
  4161. else if ("-n".equals(args[pos]))
  4162. namespaceAware = true;
  4163. pos++;
  4164. }
  4165. if (pos >= args.length)
  4166. {
  4167. System.out.println("Syntax: XMLParser [-n] [-v] [-x] <file> [<file2> [...]]");
  4168. System.out.println("\t-n: use namespace aware mode");
  4169. System.out.println("\t-v: use validating parser");
  4170. System.out.println("\t-x: use XInclude aware mode");
  4171. System.exit(2);
  4172. }
  4173. while (pos < args.length)
  4174. {
  4175. XMLParser p = new XMLParser(new java.io.FileInputStream(args[pos]),
  4176. absolutize(null, args[pos]),
  4177. validating, // validating
  4178. namespaceAware, // namespaceAware
  4179. true, // coalescing,
  4180. true, // replaceERefs
  4181. true, // externalEntities
  4182. true, // supportDTD
  4183. true, // baseAware
  4184. true, // stringInterning
  4185. true, // extendedEventTypes
  4186. null,
  4187. null);
  4188. XMLStreamReader reader = p;
  4189. if (xIncludeAware)
  4190. reader = new XIncludeFilter(p, args[pos], true, true, true);
  4191. try
  4192. {
  4193. int event;
  4194. //do
  4195. while (reader.hasNext())
  4196. {
  4197. event = reader.next();
  4198. Location loc = reader.getLocation();
  4199. System.out.print(loc.getLineNumber() + ":" +
  4200. loc.getColumnNumber() + " ");
  4201. switch (event)
  4202. {
  4203. case XMLStreamConstants.START_DOCUMENT:
  4204. System.out.println("START_DOCUMENT version=" +
  4205. reader.getVersion() +
  4206. " encoding=" +
  4207. reader.getEncoding());
  4208. break;
  4209. case XMLStreamConstants.END_DOCUMENT:
  4210. System.out.println("END_DOCUMENT");
  4211. break;
  4212. case XMLStreamConstants.START_ELEMENT:
  4213. System.out.println("START_ELEMENT " +
  4214. reader.getName());
  4215. int l = reader.getNamespaceCount();
  4216. for (int i = 0; i < l; i++)
  4217. System.out.println("\tnamespace " +
  4218. reader.getNamespacePrefix(i) + "='" +
  4219. reader.getNamespaceURI(i)+"'");
  4220. l = reader.getAttributeCount();
  4221. for (int i = 0; i < l; i++)
  4222. System.out.println("\tattribute " +
  4223. reader.getAttributeName(i) + "='" +
  4224. reader.getAttributeValue(i) + "'");
  4225. break;
  4226. case XMLStreamConstants.END_ELEMENT:
  4227. System.out.println("END_ELEMENT " + reader.getName());
  4228. break;
  4229. case XMLStreamConstants.CHARACTERS:
  4230. System.out.println("CHARACTERS '" +
  4231. encodeText(reader.getText()) + "'");
  4232. break;
  4233. case XMLStreamConstants.CDATA:
  4234. System.out.println("CDATA '" +
  4235. encodeText(reader.getText()) + "'");
  4236. break;
  4237. case XMLStreamConstants.SPACE:
  4238. System.out.println("SPACE '" +
  4239. encodeText(reader.getText()) + "'");
  4240. break;
  4241. case XMLStreamConstants.DTD:
  4242. System.out.println("DTD " + reader.getText());
  4243. break;
  4244. case XMLStreamConstants.ENTITY_REFERENCE:
  4245. System.out.println("ENTITY_REFERENCE " + reader.getText());
  4246. break;
  4247. case XMLStreamConstants.COMMENT:
  4248. System.out.println("COMMENT '" +
  4249. encodeText(reader.getText()) + "'");
  4250. break;
  4251. case XMLStreamConstants.PROCESSING_INSTRUCTION:
  4252. System.out.println("PROCESSING_INSTRUCTION " +
  4253. reader.getPITarget() + " " +
  4254. reader.getPIData());
  4255. break;
  4256. case START_ENTITY:
  4257. System.out.println("START_ENTITY " + reader.getText());
  4258. break;
  4259. case END_ENTITY:
  4260. System.out.println("END_ENTITY " + reader.getText());
  4261. break;
  4262. default:
  4263. System.out.println("Unknown event: " + event);
  4264. }
  4265. }
  4266. }
  4267. catch (XMLStreamException e)
  4268. {
  4269. Location l = reader.getLocation();
  4270. System.out.println("At line "+l.getLineNumber()+
  4271. ", column "+l.getColumnNumber()+
  4272. " of "+l.getSystemId());
  4273. throw e;
  4274. }
  4275. pos++;
  4276. }
  4277. }
  4278. /**
  4279. * Escapes control characters in the specified text. For debugging.
  4280. */
  4281. private static String encodeText(String text)
  4282. {
  4283. CPStringBuilder b = new CPStringBuilder();
  4284. int len = text.length();
  4285. for (int i = 0; i < len; i++)
  4286. {
  4287. char c = text.charAt(i);
  4288. switch (c)
  4289. {
  4290. case '\t':
  4291. b.append("\\t");
  4292. break;
  4293. case '\n':
  4294. b.append("\\n");
  4295. break;
  4296. case '\r':
  4297. b.append("\\r");
  4298. break;
  4299. default:
  4300. b.append(c);
  4301. }
  4302. }
  4303. return b.toString();
  4304. }
  4305. /**
  4306. * An attribute instance.
  4307. */
  4308. class Attribute
  4309. {
  4310. /**
  4311. * Attribute name.
  4312. */
  4313. final String name;
  4314. /**
  4315. * Attribute type as declared in the DTD, or CDATA otherwise.
  4316. */
  4317. final String type;
  4318. /**
  4319. * Whether the attribute was specified or defaulted.
  4320. */
  4321. final boolean specified;
  4322. /**
  4323. * The attribute value.
  4324. */
  4325. final String value;
  4326. /**
  4327. * The namespace prefix.
  4328. */
  4329. final String prefix;
  4330. /**
  4331. * The namespace local-name.
  4332. */
  4333. final String localName;
  4334. Attribute(String name, String type, boolean specified, String value)
  4335. {
  4336. this.name = name;
  4337. this.type = type;
  4338. this.specified = specified;
  4339. this.value = value;
  4340. int ci = name.indexOf(':');
  4341. if (ci == -1)
  4342. {
  4343. prefix = null;
  4344. localName = intern(name);
  4345. }
  4346. else
  4347. {
  4348. prefix = intern(name.substring(0, ci));
  4349. localName = intern(name.substring(ci + 1));
  4350. }
  4351. }
  4352. public boolean equals(Object other)
  4353. {
  4354. if (other instanceof Attribute)
  4355. {
  4356. Attribute a = (Attribute) other;
  4357. if (namespaceAware)
  4358. {
  4359. if (!a.localName.equals(localName))
  4360. return false;
  4361. String auri = getNamespaceURI(a.prefix);
  4362. String uri = getNamespaceURI(prefix);
  4363. if (uri == null && (auri == null ||
  4364. (input.xml11 && "".equals(auri))))
  4365. return true;
  4366. if (uri != null)
  4367. {
  4368. if ("".equals(uri) && input.xml11 && "".equals(auri))
  4369. return true;
  4370. return uri.equals(auri);
  4371. }
  4372. return false;
  4373. }
  4374. else
  4375. return a.name.equals(name);
  4376. }
  4377. return false;
  4378. }
  4379. public String toString()
  4380. {
  4381. CPStringBuilder buf = new CPStringBuilder(getClass().getName());
  4382. buf.append('[');
  4383. buf.append("name=");
  4384. buf.append(name);
  4385. if (value != null)
  4386. {
  4387. buf.append(",value=");
  4388. buf.append(value);
  4389. }
  4390. if (type != null)
  4391. {
  4392. buf.append(",type=");
  4393. buf.append(type);
  4394. }
  4395. if (specified)
  4396. buf.append(",specified");
  4397. buf.append(']');
  4398. return buf.toString();
  4399. }
  4400. }
  4401. /**
  4402. * Representation of a DTD.
  4403. */
  4404. class Doctype
  4405. {
  4406. /**
  4407. * Name of the root element.
  4408. */
  4409. final String rootName;
  4410. /**
  4411. * Public ID, if any, of external subset.
  4412. */
  4413. final String publicId;
  4414. /**
  4415. * System ID (URL), if any, of external subset.
  4416. */
  4417. final String systemId;
  4418. /**
  4419. * Map of element names to content models.
  4420. */
  4421. private final LinkedHashMap elements = new LinkedHashMap();
  4422. /**
  4423. * Map of element names to maps of attribute declarations.
  4424. */
  4425. private final LinkedHashMap attlists = new LinkedHashMap();
  4426. /**
  4427. * Map of entity names to entities (String or ExternalIds).
  4428. */
  4429. private final LinkedHashMap entities = new LinkedHashMap();
  4430. /**
  4431. * Map of notation names to ExternalIds.
  4432. */
  4433. private final LinkedHashMap notations = new LinkedHashMap();
  4434. /**
  4435. * Map of anonymous keys to comments.
  4436. */
  4437. private final LinkedHashMap comments = new LinkedHashMap();
  4438. /**
  4439. * Map of anonymous keys to processing instructions (String[2]
  4440. * containing {target, data}).
  4441. */
  4442. private final LinkedHashMap pis = new LinkedHashMap();
  4443. /**
  4444. * List of keys to all markup entries in the DTD.
  4445. */
  4446. private final LinkedList entries = new LinkedList();
  4447. /**
  4448. * Set of the entities defined in the external subset.
  4449. */
  4450. private final HashSet externalEntities = new HashSet();
  4451. /**
  4452. * Set of the notations defined in the external subset.
  4453. */
  4454. private final HashSet externalNotations = new HashSet();
  4455. /**
  4456. * Counter for making anonymous keys.
  4457. */
  4458. private int anon = 1;
  4459. /**
  4460. * Constructor.
  4461. */
  4462. Doctype(String rootName, String publicId, String systemId)
  4463. {
  4464. this.rootName = rootName;
  4465. this.publicId = publicId;
  4466. this.systemId = systemId;
  4467. }
  4468. /**
  4469. * Adds an element declaration.
  4470. * @param name the element name
  4471. * @param text the content model text
  4472. * @param model the parsed content model
  4473. */
  4474. void addElementDecl(String name, String text, ContentModel model)
  4475. {
  4476. if (elements.containsKey(name))
  4477. return;
  4478. model.text = text;
  4479. model.external = (inputStack.size() != 1);
  4480. elements.put(name, model);
  4481. entries.add("E" + name);
  4482. }
  4483. /**
  4484. * Adds an attribute declaration.
  4485. * @param ename the element name
  4486. * @param aname the attribute name
  4487. * @param decl the attribute declaration details
  4488. */
  4489. void addAttributeDecl(String ename, String aname, AttributeDecl decl)
  4490. {
  4491. LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
  4492. if (attlist == null)
  4493. {
  4494. attlist = new LinkedHashMap();
  4495. attlists.put(ename, attlist);
  4496. }
  4497. else if (attlist.containsKey(aname))
  4498. return;
  4499. attlist.put(aname, decl);
  4500. String key = "A" + ename;
  4501. if (!entries.contains(key))
  4502. entries.add(key);
  4503. }
  4504. /**
  4505. * Adds an entity declaration.
  4506. * @param name the entity name
  4507. * @param text the entity replacement text
  4508. * @param inExternalSubset if we are in the exernal subset
  4509. */
  4510. void addEntityDecl(String name, String text, boolean inExternalSubset)
  4511. {
  4512. if (entities.containsKey(name))
  4513. return;
  4514. entities.put(name, text);
  4515. entries.add("e" + name);
  4516. if (inExternalSubset)
  4517. externalEntities.add(name);
  4518. }
  4519. /**
  4520. * Adds an entity declaration.
  4521. * @param name the entity name
  4522. * @param ids the external IDs
  4523. * @param inExternalSubset if we are in the exernal subset
  4524. */
  4525. void addEntityDecl(String name, ExternalIds ids, boolean inExternalSubset)
  4526. {
  4527. if (entities.containsKey(name))
  4528. return;
  4529. entities.put(name, ids);
  4530. entries.add("e" + name);
  4531. if (inExternalSubset)
  4532. externalEntities.add(name);
  4533. }
  4534. /**
  4535. * Adds a notation declaration.
  4536. * @param name the notation name
  4537. * @param ids the external IDs
  4538. * @param inExternalSubset if we are in the exernal subset
  4539. */
  4540. void addNotationDecl(String name, ExternalIds ids, boolean inExternalSubset)
  4541. {
  4542. if (notations.containsKey(name))
  4543. return;
  4544. notations.put(name, ids);
  4545. entries.add("n" + name);
  4546. if (inExternalSubset)
  4547. externalNotations.add(name);
  4548. }
  4549. /**
  4550. * Adds a comment.
  4551. */
  4552. void addComment(String text)
  4553. {
  4554. String key = Integer.toString(anon++);
  4555. comments.put(key, text);
  4556. entries.add("c" + key);
  4557. }
  4558. /**
  4559. * Adds a processing instruction.
  4560. */
  4561. void addPI(String target, String data)
  4562. {
  4563. String key = Integer.toString(anon++);
  4564. pis.put(key, new String[] {target, data});
  4565. entries.add("p" + key);
  4566. }
  4567. /**
  4568. * Returns the content model for the specified element.
  4569. * @param name the element name
  4570. */
  4571. ContentModel getElementModel(String name)
  4572. {
  4573. return (ContentModel) elements.get(name);
  4574. }
  4575. /**
  4576. * Returns the attribute definition for the given attribute
  4577. * @param ename the element name
  4578. * @param aname the attribute name
  4579. */
  4580. AttributeDecl getAttributeDecl(String ename, String aname)
  4581. {
  4582. LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
  4583. return (attlist == null) ? null : (AttributeDecl) attlist.get(aname);
  4584. }
  4585. /**
  4586. * Indicates whether the specified attribute was declared in the DTD.
  4587. * @param ename the element name
  4588. * @param aname the attribute name
  4589. */
  4590. boolean isAttributeDeclared(String ename, String aname)
  4591. {
  4592. LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
  4593. return (attlist == null) ? false : attlist.containsKey(aname);
  4594. }
  4595. /**
  4596. * Returns an iterator over the entries in the attribute list for the
  4597. * given element.
  4598. * @param ename the element name
  4599. */
  4600. Iterator attlistIterator(String ename)
  4601. {
  4602. LinkedHashMap attlist = (LinkedHashMap) attlists.get(ename);
  4603. return (attlist == null) ? Collections.EMPTY_LIST.iterator() :
  4604. attlist.entrySet().iterator();
  4605. }
  4606. /**
  4607. * Returns the entity (String or ExternalIds) for the given entity name.
  4608. */
  4609. Object getEntity(String name)
  4610. {
  4611. return entities.get(name);
  4612. }
  4613. /**
  4614. * Indicates whether the specified entity was declared in the external
  4615. * subset.
  4616. */
  4617. boolean isEntityExternal(String name)
  4618. {
  4619. return externalEntities.contains(name);
  4620. }
  4621. /**
  4622. * Returns an iterator over the entity map entries.
  4623. */
  4624. Iterator entityIterator()
  4625. {
  4626. return entities.entrySet().iterator();
  4627. }
  4628. /**
  4629. * Returns the notation IDs for the given notation name.
  4630. */
  4631. ExternalIds getNotation(String name)
  4632. {
  4633. return (ExternalIds) notations.get(name);
  4634. }
  4635. /**
  4636. * Indicates whether the specified notation was declared in the external
  4637. * subset.
  4638. */
  4639. boolean isNotationExternal(String name)
  4640. {
  4641. return externalNotations.contains(name);
  4642. }
  4643. /**
  4644. * Returns the comment associated with the specified (anonymous) key.
  4645. */
  4646. String getComment(String key)
  4647. {
  4648. return (String) comments.get(key);
  4649. }
  4650. /**
  4651. * Returns the processing instruction associated with the specified
  4652. * (anonymous) key.
  4653. */
  4654. String[] getPI(String key)
  4655. {
  4656. return (String[]) pis.get(key);
  4657. }
  4658. /**
  4659. * Returns an iterator over the keys of the markup entries in this DTD,
  4660. * in the order declared.
  4661. */
  4662. Iterator entryIterator()
  4663. {
  4664. return entries.iterator();
  4665. }
  4666. }
  4667. /**
  4668. * Combination of an ExternalID and an optional NDataDecl.
  4669. */
  4670. class ExternalIds
  4671. {
  4672. /**
  4673. * The public ID.
  4674. */
  4675. String publicId;
  4676. /**
  4677. * The system ID.
  4678. */
  4679. String systemId;
  4680. /**
  4681. * The notation name declared with the NDATA keyword.
  4682. */
  4683. String notationName;
  4684. }
  4685. /**
  4686. * A content model.
  4687. */
  4688. abstract class ContentModel
  4689. {
  4690. static final int EMPTY = 0;
  4691. static final int ANY = 1;
  4692. static final int ELEMENT = 2;
  4693. static final int MIXED = 3;
  4694. int min;
  4695. int max;
  4696. final int type;
  4697. String text;
  4698. boolean external;
  4699. ContentModel(int type)
  4700. {
  4701. this.type = type;
  4702. min = 1;
  4703. max = 1;
  4704. }
  4705. }
  4706. /**
  4707. * The EMPTY content model.
  4708. */
  4709. class EmptyContentModel
  4710. extends ContentModel
  4711. {
  4712. EmptyContentModel()
  4713. {
  4714. super(ContentModel.EMPTY);
  4715. min = 0;
  4716. max = 0;
  4717. }
  4718. }
  4719. /**
  4720. * The ANY content model.
  4721. */
  4722. class AnyContentModel
  4723. extends ContentModel
  4724. {
  4725. AnyContentModel()
  4726. {
  4727. super(ContentModel.ANY);
  4728. min = 0;
  4729. max = -1;
  4730. }
  4731. }
  4732. /**
  4733. * An element content model.
  4734. */
  4735. class ElementContentModel
  4736. extends ContentModel
  4737. {
  4738. LinkedList contentParticles;
  4739. boolean or;
  4740. String regex; // regular expression cache
  4741. ElementContentModel()
  4742. {
  4743. super(ContentModel.ELEMENT);
  4744. contentParticles = new LinkedList();
  4745. }
  4746. void addContentParticle(ContentParticle cp)
  4747. {
  4748. contentParticles.add(cp);
  4749. }
  4750. }
  4751. class ContentParticle
  4752. {
  4753. int min = 1;
  4754. int max = 1;
  4755. Object content; // Name (String) or ElementContentModel
  4756. }
  4757. /**
  4758. * A mixed content model.
  4759. */
  4760. class MixedContentModel
  4761. extends ContentModel
  4762. {
  4763. private HashSet names;
  4764. MixedContentModel()
  4765. {
  4766. super(ContentModel.MIXED);
  4767. names = new HashSet();
  4768. }
  4769. void addName(String name)
  4770. {
  4771. names.add(name);
  4772. }
  4773. boolean containsName(String name)
  4774. {
  4775. return names.contains(name);
  4776. }
  4777. }
  4778. /**
  4779. * An attribute definition.
  4780. */
  4781. class AttributeDecl
  4782. {
  4783. /**
  4784. * The attribute type (CDATA, ID, etc).
  4785. */
  4786. final String type;
  4787. /**
  4788. * The default value.
  4789. */
  4790. final String value;
  4791. /**
  4792. * The value type (#FIXED, #IMPLIED, etc).
  4793. */
  4794. final int valueType;
  4795. /**
  4796. * The enumeration text.
  4797. */
  4798. final String enumeration;
  4799. /**
  4800. * The enumeration tokens.
  4801. */
  4802. final HashSet values;
  4803. /**
  4804. * Whether this attribute declaration occurred in the external subset.
  4805. */
  4806. final boolean external;
  4807. AttributeDecl(String type, String value,
  4808. int valueType, String enumeration,
  4809. HashSet values, boolean external)
  4810. {
  4811. this.type = type;
  4812. this.value = value;
  4813. this.valueType = valueType;
  4814. this.enumeration = enumeration;
  4815. this.values = values;
  4816. this.external = external;
  4817. }
  4818. }
  4819. /**
  4820. * An XML input source.
  4821. */
  4822. static class Input
  4823. implements Location
  4824. {
  4825. int line = 1, markLine;
  4826. int column, markColumn;
  4827. int offset, markOffset;
  4828. final String publicId, systemId, name;
  4829. final boolean report; // report start- and end-entity
  4830. final boolean normalize; // normalize CR, etc to LF
  4831. InputStream in;
  4832. Reader reader;
  4833. UnicodeReader unicodeReader;
  4834. boolean initialized;
  4835. boolean encodingDetected;
  4836. String inputEncoding;
  4837. boolean xml11;
  4838. Input(InputStream in, Reader reader, String publicId, String systemId,
  4839. String name, String inputEncoding, boolean report,
  4840. boolean normalize)
  4841. {
  4842. if (inputEncoding == null)
  4843. inputEncoding = "UTF-8";
  4844. this.inputEncoding = inputEncoding;
  4845. this.publicId = publicId;
  4846. this.systemId = systemId;
  4847. this.name = name;
  4848. this.report = report;
  4849. this.normalize = normalize;
  4850. if (in != null)
  4851. {
  4852. if (reader != null)
  4853. throw new IllegalStateException("both byte and char streams "+
  4854. "specified");
  4855. if (normalize)
  4856. in = new CRLFInputStream(in);
  4857. in = new BufferedInputStream(in);
  4858. this.in = in;
  4859. }
  4860. else
  4861. {
  4862. this.reader = normalize ? new CRLFReader(reader) : reader;
  4863. unicodeReader = new UnicodeReader(this.reader);
  4864. }
  4865. initialized = false;
  4866. }
  4867. // -- Location --
  4868. public int getCharacterOffset()
  4869. {
  4870. return offset;
  4871. }
  4872. public int getColumnNumber()
  4873. {
  4874. return column;
  4875. }
  4876. public int getLineNumber()
  4877. {
  4878. return line;
  4879. }
  4880. public String getPublicId()
  4881. {
  4882. return publicId;
  4883. }
  4884. public String getSystemId()
  4885. {
  4886. return systemId;
  4887. }
  4888. void init()
  4889. throws IOException
  4890. {
  4891. if (initialized)
  4892. return;
  4893. if (in != null)
  4894. detectEncoding();
  4895. initialized = true;
  4896. }
  4897. void mark(int len)
  4898. throws IOException
  4899. {
  4900. markOffset = offset;
  4901. markLine = line;
  4902. markColumn = column;
  4903. if (unicodeReader != null)
  4904. unicodeReader.mark(len);
  4905. else
  4906. in.mark(len);
  4907. }
  4908. /**
  4909. * Character read.
  4910. */
  4911. int read()
  4912. throws IOException
  4913. {
  4914. offset++;
  4915. int ret = (unicodeReader != null) ? unicodeReader.read() : in.read();
  4916. if (normalize &&
  4917. (ret == 0x0d || (xml11 && (ret == 0x85 || ret == 0x2028))))
  4918. {
  4919. // Normalize CR etc to LF
  4920. ret = 0x0a;
  4921. }
  4922. // Locator handling
  4923. if (ret == 0x0a)
  4924. {
  4925. line++;
  4926. column = 0;
  4927. }
  4928. else
  4929. column++;
  4930. return ret;
  4931. }
  4932. /**
  4933. * Block read.
  4934. */
  4935. int read(int[] b, int off, int len)
  4936. throws IOException
  4937. {
  4938. int ret;
  4939. if (unicodeReader != null)
  4940. {
  4941. ret = unicodeReader.read(b, off, len);
  4942. }
  4943. else
  4944. {
  4945. byte[] b2 = new byte[len];
  4946. ret = in.read(b2, 0, len);
  4947. if (ret != -1)
  4948. {
  4949. String s = new String(b2, 0, ret, inputEncoding);
  4950. int[] c = UnicodeReader.toCodePointArray(s);
  4951. ret = c.length;
  4952. System.arraycopy(c, 0, b, off, ret);
  4953. }
  4954. }
  4955. if (ret != -1)
  4956. {
  4957. // Locator handling
  4958. for (int i = 0; i < ret; i++)
  4959. {
  4960. int c = b[off + i];
  4961. if (normalize &&
  4962. (c == 0x0d || (xml11 && (c == 0x85 || c == 0x2028))))
  4963. {
  4964. // Normalize CR etc to LF
  4965. c = 0x0a;
  4966. b[off + i] = c;
  4967. }
  4968. if (c == 0x0a)
  4969. {
  4970. line++;
  4971. column = 0;
  4972. }
  4973. else
  4974. column++;
  4975. }
  4976. }
  4977. return ret;
  4978. }
  4979. void reset()
  4980. throws IOException
  4981. {
  4982. if (unicodeReader != null)
  4983. unicodeReader.reset();
  4984. else
  4985. in.reset();
  4986. offset = markOffset;
  4987. line = markLine;
  4988. column = markColumn;
  4989. }
  4990. // Detection of input encoding
  4991. private static final int[] SIGNATURE_UCS_4_1234 =
  4992. new int[] { 0x00, 0x00, 0x00, 0x3c };
  4993. private static final int[] SIGNATURE_UCS_4_4321 =
  4994. new int[] { 0x3c, 0x00, 0x00, 0x00 };
  4995. private static final int[] SIGNATURE_UCS_4_2143 =
  4996. new int[] { 0x00, 0x00, 0x3c, 0x00 };
  4997. private static final int[] SIGNATURE_UCS_4_3412 =
  4998. new int[] { 0x00, 0x3c, 0x00, 0x00 };
  4999. private static final int[] SIGNATURE_UCS_2_12 =
  5000. new int[] { 0xfe, 0xff };
  5001. private static final int[] SIGNATURE_UCS_2_21 =
  5002. new int[] { 0xff, 0xfe };
  5003. private static final int[] SIGNATURE_UCS_2_12_NOBOM =
  5004. new int[] { 0x00, 0x3c, 0x00, 0x3f };
  5005. private static final int[] SIGNATURE_UCS_2_21_NOBOM =
  5006. new int[] { 0x3c, 0x00, 0x3f, 0x00 };
  5007. private static final int[] SIGNATURE_UTF_8 =
  5008. new int[] { 0x3c, 0x3f, 0x78, 0x6d };
  5009. private static final int[] SIGNATURE_UTF_8_BOM =
  5010. new int[] { 0xef, 0xbb, 0xbf };
  5011. /**
  5012. * Detect the input encoding.
  5013. */
  5014. private void detectEncoding()
  5015. throws IOException
  5016. {
  5017. int[] signature = new int[4];
  5018. in.mark(4);
  5019. for (int i = 0; i < 4; i++)
  5020. signature[i] = in.read();
  5021. in.reset();
  5022. // 4-byte encodings
  5023. if (equals(SIGNATURE_UCS_4_1234, signature))
  5024. {
  5025. in.read();
  5026. in.read();
  5027. in.read();
  5028. in.read();
  5029. setInputEncoding("UTF-32BE");
  5030. encodingDetected = true;
  5031. }
  5032. else if (equals(SIGNATURE_UCS_4_4321, signature))
  5033. {
  5034. in.read();
  5035. in.read();
  5036. in.read();
  5037. in.read();
  5038. setInputEncoding("UTF-32LE");
  5039. encodingDetected = true;
  5040. }
  5041. else if (equals(SIGNATURE_UCS_4_2143, signature) ||
  5042. equals(SIGNATURE_UCS_4_3412, signature))
  5043. throw new UnsupportedEncodingException("unsupported UCS-4 byte ordering");
  5044. // 2-byte encodings
  5045. else if (equals(SIGNATURE_UCS_2_12, signature))
  5046. {
  5047. in.read();
  5048. in.read();
  5049. setInputEncoding("UTF-16BE");
  5050. encodingDetected = true;
  5051. }
  5052. else if (equals(SIGNATURE_UCS_2_21, signature))
  5053. {
  5054. in.read();
  5055. in.read();
  5056. setInputEncoding("UTF-16LE");
  5057. encodingDetected = true;
  5058. }
  5059. else if (equals(SIGNATURE_UCS_2_12_NOBOM, signature))
  5060. {
  5061. //setInputEncoding("UTF-16BE");
  5062. throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity");
  5063. }
  5064. else if (equals(SIGNATURE_UCS_2_21_NOBOM, signature))
  5065. {
  5066. //setInputEncoding("UTF-16LE");
  5067. throw new UnsupportedEncodingException("no byte-order mark for UCS-2 entity");
  5068. }
  5069. // ASCII-derived encodings
  5070. else if (equals(SIGNATURE_UTF_8, signature))
  5071. {
  5072. // UTF-8 input encoding implied, TextDecl
  5073. }
  5074. else if (equals(SIGNATURE_UTF_8_BOM, signature))
  5075. {
  5076. in.read();
  5077. in.read();
  5078. in.read();
  5079. setInputEncoding("UTF-8");
  5080. encodingDetected = true;
  5081. }
  5082. }
  5083. private static boolean equals(int[] b1, int[] b2)
  5084. {
  5085. for (int i = 0; i < b1.length; i++)
  5086. {
  5087. if (b1[i] != b2[i])
  5088. return false;
  5089. }
  5090. return true;
  5091. }
  5092. void setInputEncoding(String encoding)
  5093. throws IOException
  5094. {
  5095. if (encoding.equals(inputEncoding))
  5096. return;
  5097. if ("UTF-16".equalsIgnoreCase(encoding) &&
  5098. inputEncoding.startsWith("UTF-16"))
  5099. return;
  5100. if (encodingDetected)
  5101. throw new UnsupportedEncodingException("document is not in its " +
  5102. "declared encoding " +
  5103. inputEncoding +
  5104. ": " + encoding);
  5105. inputEncoding = encoding;
  5106. finalizeEncoding();
  5107. }
  5108. void finalizeEncoding()
  5109. throws IOException
  5110. {
  5111. if (reader != null)
  5112. return;
  5113. reader = new BufferedReader(new InputStreamReader(in, inputEncoding));
  5114. unicodeReader = new UnicodeReader(reader);
  5115. mark(1);
  5116. }
  5117. }
  5118. }