IXmlPullParser.java 45 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117
  1. /* -*- c-basic-offset: 4; indent-tabs-mode: nil; -*- //------100-columns-wide------>|*/
  2. // for license please see accompanying XmlPull license file (available also at http://www.xmlpull.org/)
  3. package org.xmlpull.v1;
  4. import java.io.InputStream;
  5. import java.io.IOException;
  6. import java.io.Reader;
  7. /**
  8. * XML Pull Parser is an interface that defines parsing functionlity provided
  9. * in <a href="http://www.xmlpull.org/">XMLPULL V1 API</a> (visit this website to
  10. * learn more about API and its implementations).
  11. *
  12. * <p>There are following different
  13. * kinds of parser depending on which features are set:<ul>
  14. * <li><b>non-validating</b> parser as defined in XML 1.0 spec when
  15. * FEATURE_PROCESS_DOCDECL is set to true
  16. * <li><b>validating parser</b> as defined in XML 1.0 spec when
  17. * FEATURE_VALIDATION is true (and that implies that FEATURE_PROCESS_DOCDECL is true)
  18. * <li>when FEATURE_PROCESS_DOCDECL is false (this is default and
  19. * if different value is required necessary must be changed before parsing is started)
  20. * then parser behaves like XML 1.0 compliant non-validating parser under condition that
  21. * <em>no DOCDECL is present</em> in XML documents
  22. * (internal entites can still be defined with defineEntityReplacementText()).
  23. * This mode of operation is intened <b>for operation in constrained environments</b> such as J2ME.
  24. * </ul>
  25. *
  26. *
  27. * <p>There are two key methods: next() and nextToken(). While next() provides
  28. * access to high level parsing events, nextToken() allows access to lower
  29. * level tokens.
  30. *
  31. * <p>The current event state of the parser
  32. * can be determined by calling the
  33. * <a href="#getEventType()">getEventType()</a> method.
  34. * Initially, the parser is in the <a href="#START_DOCUMENT">START_DOCUMENT</a>
  35. * state.
  36. *
  37. * <p>The method <a href="#next()">next()</a> advances the parser to the
  38. * next event. The int value returned from next determines the current parser
  39. * state and is identical to the value returned from following calls to
  40. * getEventType ().
  41. *
  42. * <p>Th following event types are seen by next()<dl>
  43. * <dt><a href="#START_TAG">START_TAG</a><dd> An XML start tag was read.
  44. * <dt><a href="#TEXT">TEXT</a><dd> Text content was read;
  45. * the text content can be retreived using the getText() method.
  46. * (when in validating mode next() will not report ignorable whitespaces, use nextToken() instead)
  47. * <dt><a href="#END_TAG">END_TAG</a><dd> An end tag was read
  48. * <dt><a href="#END_DOCUMENT">END_DOCUMENT</a><dd> No more events are available
  49. * </dl>
  50. *
  51. * <p>after first next() or nextToken() (or any other next*() method)
  52. * is called user application can obtain
  53. * XML version, standalone and encoding from XML declaration
  54. * in following ways:<ul>
  55. * <li><b>version</b>:
  56. * getProperty(&quot;<a href="http://xmlpull.org/v1/doc/properties.html#xmldecl-version">http://xmlpull.org/v1/doc/properties.html#xmldecl-version</a>&quot;)
  57. * returns String ("1.0") or null if XMLDecl was not read or if property is not supported
  58. * <li><b>standalone</b>:
  59. * getProperty(&quot;<a href="http://xmlpull.org/v1/doc/features.html#xmldecl-standalone">http://xmlpull.org/v1/doc/features.html#xmldecl-standalone</a>&quot;)
  60. * returns Boolean: null if there was no standalone declaration
  61. * or if property is not supported
  62. * otherwise returns Boolean(true) if standalon="yes" and Boolean(false) when standalone="no"
  63. * <li><b>encoding</b>: obtained from getInputEncoding()
  64. * null if stream had unknown encoding (not set in setInputStream)
  65. * and it was not declared in XMLDecl
  66. * </ul>
  67. *
  68. * A minimal example for using this API may look as follows:
  69. * <pre>
  70. * import java.io.IOException;
  71. * import java.io.StringReader;
  72. *
  73. * import org.xmlpull.v1.IXmlPullParser;
  74. * import org.xmlpull.v1.<a href="XmlPullParserException.html">XmlPullParserException.html</a>;
  75. * import org.xmlpull.v1.<a href="XmlPullParserFactory.html">XmlPullParserFactory</a>;
  76. *
  77. * public class SimpleXmlPullApp
  78. * {
  79. *
  80. * public static void main (String args[])
  81. * throws XmlPullParserException, IOException
  82. * {
  83. * XmlPullParserFactory factory = XmlPullParserFactory.newInstance();
  84. * factory.setNamespaceAware(true);
  85. * IXmlPullParser xpp = factory.newPullParser();
  86. *
  87. * xpp.<a href="#setInput">setInput</a>( new StringReader ( "&lt;foo>Hello World!&lt;/foo>" ) );
  88. * int eventType = xpp.getEventType();
  89. * while (eventType != IXmlPullParser.END_DOCUMENT) {
  90. * if(eventType == IXmlPullParser.START_DOCUMENT) {
  91. * System.out.println("Start document");
  92. * } else if(eventType == IXmlPullParser.END_DOCUMENT) {
  93. * System.out.println("End document");
  94. * } else if(eventType == IXmlPullParser.START_TAG) {
  95. * System.out.println("Start tag "+xpp.<a href="#getName()">getName()</a>);
  96. * } else if(eventType == IXmlPullParser.END_TAG) {
  97. * System.out.println("End tag "+xpp.getName());
  98. * } else if(eventType == IXmlPullParser.TEXT) {
  99. * System.out.println("Text "+xpp.<a href="#getText()">getText()</a>);
  100. * }
  101. * eventType = xpp.next();
  102. * }
  103. * }
  104. * }
  105. * </pre>
  106. *
  107. * <p>The above example will generate the following output:
  108. * <pre>
  109. * Start document
  110. * Start tag foo
  111. * Text Hello World!
  112. * End tag foo
  113. * </pre>
  114. *
  115. * <p>For more details on API usage, please refer to the
  116. * quick Introduction available at <a href="http://www.xmlpull.org">http://www.xmlpull.org</a>
  117. *
  118. * @see XmlPullParserFactory
  119. * @see #defineEntityReplacementText
  120. * @see #getName
  121. * @see #getNamespace
  122. * @see #getText
  123. * @see #next
  124. * @see #nextToken
  125. * @see #setInput
  126. * @see #FEATURE_PROCESS_DOCDECL
  127. * @see #FEATURE_VALIDATION
  128. * @see #START_DOCUMENT
  129. * @see #START_TAG
  130. * @see #TEXT
  131. * @see #END_TAG
  132. * @see #END_DOCUMENT
  133. *
  134. * @author <a href="http://www-ai.cs.uni-dortmund.de/PERSONAL/haustein.html">Stefan Haustein</a>
  135. * @author <a href="http://www.extreme.indiana.edu/~aslom/">Aleksander Slominski</a>
  136. */
  137. public interface IXmlPullParser {
  138. /** This constant represents the default namespace (empty string "") */
  139. String NO_NAMESPACE = "";
  140. // ----------------------------------------------------------------------------
  141. // EVENT TYPES as reported by next()
  142. /**
  143. * Signalize that parser is at the very beginning of the document
  144. * and nothing was read yet.
  145. * This event type can only be observed by calling getEvent()
  146. * before the first call to next(), nextToken, or nextTag()</a>).
  147. *
  148. * @see #next
  149. * @see #nextToken
  150. */
  151. int START_DOCUMENT = 0;
  152. /**
  153. * Logical end of the xml document. Returned from getEventType, next()
  154. * and nextToken()
  155. * when the end of the input document has been reached.
  156. * <p><strong>NOTE:</strong> calling again
  157. * <a href="#next()">next()</a> or <a href="#nextToken()">nextToken()</a>
  158. * will result in exception being thrown.
  159. *
  160. * @see #next
  161. * @see #nextToken
  162. */
  163. int END_DOCUMENT = 1;
  164. /**
  165. * Returned from getEventType(),
  166. * <a href="#next()">next()</a>, <a href="#nextToken()">nextToken()</a> when
  167. * a start tag was read.
  168. * The name of start tag is available from getName(), its namespace and prefix are
  169. * available from getNamespace() and getPrefix()
  170. * if <a href='#FEATURE_PROCESS_NAMESPACES'>namespaces are enabled</a>.
  171. * See getAttribute* methods to retrieve element attributes.
  172. * See getNamespace* methods to retrieve newly declared namespaces.
  173. *
  174. * @see #next
  175. * @see #nextToken
  176. * @see #getName
  177. * @see #getPrefix
  178. * @see #getNamespace
  179. * @see #getAttributeCount
  180. * @see #getDepth
  181. * @see #getNamespaceCount
  182. * @see #getNamespace
  183. * @see #FEATURE_PROCESS_NAMESPACES
  184. */
  185. int START_TAG = 2;
  186. /**
  187. * Returned from getEventType(), <a href="#next()">next()</a>, or
  188. * <a href="#nextToken()">nextToken()</a> when an end tag was read.
  189. * The name of start tag is available from getName(), its
  190. * namespace and prefix are
  191. * available from getNamespace() and getPrefix().
  192. *
  193. * @see #next
  194. * @see #nextToken
  195. * @see #getName
  196. * @see #getPrefix
  197. * @see #getNamespace
  198. * @see #FEATURE_PROCESS_NAMESPACES
  199. */
  200. int END_TAG = 3;
  201. /**
  202. * Character data was read and will is available by calling getText().
  203. * <p><strong>Please note:</strong> <a href="#next()">next()</a> will
  204. * accumulate multiple
  205. * events into one TEXT event, skipping IGNORABLE_WHITESPACE,
  206. * PROCESSING_INSTRUCTION and COMMENT events,
  207. * In contrast, <a href="#nextToken()">nextToken()</a> will stop reading
  208. * text when any other event is observed.
  209. * Also, when the state was reached by calling next(), the text value will
  210. * be normalized, whereas getText() will
  211. * return unnormalized content in the case of nextToken(). This allows
  212. * an exact roundtrip without chnanging line ends when examining low
  213. * level events, whereas for high level applications the text is
  214. * normalized apropriately.
  215. *
  216. * @see #next
  217. * @see #nextToken
  218. * @see #getText
  219. */
  220. int TEXT = 4;
  221. // ----------------------------------------------------------------------------
  222. // additional events exposed by lower level nextToken()
  223. /**
  224. * A CDATA sections was just read;
  225. * this token is available only from calls to <a href="#nextToken()">nextToken()</a>.
  226. * A call to next() will accumulate various text events into a single event
  227. * of type TEXT. The text contained in the CDATA section is available
  228. * by callling getText().
  229. *
  230. * @see #nextToken
  231. * @see #getText
  232. */
  233. int CDSECT = 5;
  234. /**
  235. * An entity reference was just read;
  236. * this token is available from <a href="#nextToken()">nextToken()</a>
  237. * only. The entity name is available by calling getName(). If available,
  238. * the replacement text can be obtained by calling getTextt(); otherwise,
  239. * the user is responsibile for resolving the entity reference.
  240. * This event type is never returned from next(); next() will
  241. * accumulate the replacement text and other text
  242. * events to a single TEXT event.
  243. *
  244. * @see #nextToken
  245. * @see #getText
  246. */
  247. int ENTITY_REF = 6;
  248. /**
  249. * Ignorable whitespace was just read.
  250. * This token is available only from <a href="#nextToken()">nextToken()</a>).
  251. * For non-validating
  252. * parsers, this event is only reported by nextToken() when outside
  253. * the root element.
  254. * Validating parsers may be able to detect ignorable whitespace at
  255. * other locations.
  256. * The ignorable whitespace string is available by calling getText()
  257. *
  258. * <p><strong>NOTE:</strong> this is different from calling the
  259. * isWhitespace() method, since text content
  260. * may be whitespace but not ignorable.
  261. *
  262. * Ignorable whitespace is skipped by next() automatically; this event
  263. * type is never returned from next().
  264. *
  265. * @see #nextToken
  266. * @see #getText
  267. */
  268. int IGNORABLE_WHITESPACE = 7;
  269. /**
  270. * An XML processing instruction declaration was just read. This
  271. * event type is available only via <a href="#nextToken()">nextToken()</a>.
  272. * getText() will return text that is inside the processing instruction.
  273. * Calls to next() will skip processing instructions automatically.
  274. * @see #nextToken
  275. * @see #getText
  276. */
  277. int PROCESSING_INSTRUCTION = 8;
  278. /**
  279. * An XML comment was just read. This event type is this token is
  280. * available via <a href="#nextToken()">nextToken()</a> only;
  281. * calls to next() will skip comments automatically.
  282. * The content of the comment can be accessed using the getText()
  283. * method.
  284. *
  285. * @see #nextToken
  286. * @see #getText
  287. */
  288. int COMMENT = 9;
  289. /**
  290. * An XML document type declaration was just read. This token is
  291. * available from <a href="#nextToken()">nextToken()</a> only.
  292. * The unparsed text inside the doctype is available via
  293. * the getText() method.
  294. *
  295. * @see #nextToken
  296. * @see #getText
  297. */
  298. int DOCDECL = 10;
  299. /**
  300. * This array can be used to convert the event type integer constants
  301. * such as START_TAG or TEXT to
  302. * to a string. For example, the value of TYPES[START_TAG] is
  303. * the string "START_TAG".
  304. *
  305. * This array is intended for diagnostic output only. Relying
  306. * on the contents of the array may be dangerous since malicous
  307. * applications may alter the array, although it is final, due
  308. * to limitations of the Java language.
  309. */
  310. String [] TYPES = {
  311. "START_DOCUMENT",
  312. "END_DOCUMENT",
  313. "START_TAG",
  314. "END_TAG",
  315. "TEXT",
  316. "CDSECT",
  317. "ENTITY_REF",
  318. "IGNORABLE_WHITESPACE",
  319. "PROCESSING_INSTRUCTION",
  320. "COMMENT",
  321. "DOCDECL"
  322. };
  323. // ----------------------------------------------------------------------------
  324. // namespace related features
  325. /**
  326. * This feature determines whether the parser processes
  327. * namespaces. As for all features, the default value is false.
  328. * <p><strong>NOTE:</strong> The value can not be changed during
  329. * parsing an must be set before parsing.
  330. *
  331. * @see #getFeature
  332. * @see #setFeature
  333. */
  334. String FEATURE_PROCESS_NAMESPACES =
  335. "http://xmlpull.org/v1/doc/features.html#process-namespaces";
  336. /**
  337. * This feature determines whether namespace attributes are
  338. * exposed via the attribute access methods. Like all features,
  339. * the default value is false. This feature cannot be changed
  340. * during parsing.
  341. *
  342. * @see #getFeature
  343. * @see #setFeature
  344. */
  345. String FEATURE_REPORT_NAMESPACE_ATTRIBUTES =
  346. "http://xmlpull.org/v1/doc/features.html#report-namespace-prefixes";
  347. /**
  348. * This feature determines whether the document declaration
  349. * is processed. If set to false,
  350. * the DOCDECL event type is reported by nextToken()
  351. * and ignored by next().
  352. *
  353. * If this featue is activated, then the document declaration
  354. * must be processed by the parser.
  355. *
  356. * <p><strong>Please note:</strong> If the document type declaration
  357. * was ignored, entity references may cause exceptions
  358. * later in the parsing process.
  359. * The default value of this feature is false. It cannot be changed
  360. * during parsing.
  361. *
  362. * @see #getFeature
  363. * @see #setFeature
  364. */
  365. String FEATURE_PROCESS_DOCDECL =
  366. "http://xmlpull.org/v1/doc/features.html#process-docdecl";
  367. /**
  368. * If this feature is activated, all validation errors as
  369. * defined in the XML 1.0 sepcification are reported.
  370. * This implies that FEATURE_PROCESS_DOCDECL is true and both, the
  371. * internal and external document type declaration will be processed.
  372. * <p><strong>Please Note:</strong> This feature can not be changed
  373. * during parsing. The default value is false.
  374. *
  375. * @see #getFeature
  376. * @see #setFeature
  377. */
  378. String FEATURE_VALIDATION =
  379. "http://xmlpull.org/v1/doc/features.html#validation";
  380. /**
  381. * Use this call to change the general behaviour of the parser,
  382. * such as namespace processing or doctype declaration handling.
  383. * This method must be called before the first call to next or
  384. * nextToken. Otherwise, an exception is thrown.
  385. * <p>Example: call setFeature(FEATURE_PROCESS_NAMESPACES, true) in order
  386. * to switch on namespace processing. The initial settings correspond
  387. * to the properties requested from the XML Pull Parser factory.
  388. * If none were requested, all feautures are deactivated by default.
  389. *
  390. * @exception XmlPullParserException If the feature is not supported or can not be set
  391. * @exception IllegalArgumentException If string with the feature name is null
  392. */
  393. void setFeature(String name,
  394. boolean state) throws XmlPullParserException;
  395. /**
  396. * Returns the current value of the given feature.
  397. * <p><strong>Please note:</strong> unknown features are
  398. * <strong>always</strong> returned as false.
  399. *
  400. * @param name The name of feature to be retrieved.
  401. * @return The value of the feature.
  402. * @exception IllegalArgumentException if string the feature name is null
  403. */
  404. boolean getFeature(String name);
  405. /**
  406. * Set the value of a property.
  407. *
  408. * The property name is any fully-qualified URI.
  409. *
  410. * @exception XmlPullParserException If the property is not supported or can not be set
  411. * @exception IllegalArgumentException If string with the property name is null
  412. */
  413. void setProperty(String name,
  414. Object value) throws XmlPullParserException;
  415. /**
  416. * Look up the value of a property.
  417. *
  418. * The property name is any fully-qualified URI.
  419. * <p><strong>NOTE:</strong> unknown properties are <strong>always</strong>
  420. * returned as null.
  421. *
  422. * @param name The name of property to be retrieved.
  423. * @return The value of named property.
  424. */
  425. Object getProperty(String name);
  426. /**
  427. * Set the input source for parser to the given reader and
  428. * resets the parser. The event type is set to the initial value
  429. * START_DOCUMENT.
  430. * Setting the reader to null will just stop parsing and
  431. * reset parser state,
  432. * allowing the parser to free internal resources
  433. * such as parsing buffers.
  434. */
  435. void setInput(Reader in) throws XmlPullParserException;
  436. /**
  437. * Sets the input stream the parser is going to process.
  438. * This call resets the parser state and sets the event type
  439. * to the initial value START_DOCUMENT.
  440. *
  441. * <p><strong>NOTE:</strong> If an input encoding string is passed,
  442. * it MUST be used. Otherwise,
  443. * if inputEncoding is null, the parser SHOULD try to determine
  444. * input encoding following XML 1.0 specification (see below).
  445. * If encoding detection is supported then following feature
  446. * <a href="http://xmlpull.org/v1/doc/features.html#detect-encoding">http://xmlpull.org/v1/doc/features.html#detect-encoding</a>
  447. * MUST be true amd otherwise it must be false
  448. *
  449. * @param inputStream contains a raw byte input stream of possibly
  450. * unknown encoding (when inputEncoding is null).
  451. *
  452. * @param inputEncoding if not null it MUST be used as encoding for inputStream
  453. */
  454. void setInput(InputStream inputStream, String inputEncoding)
  455. throws XmlPullParserException;
  456. /**
  457. * Returns the input encoding if known, null otherwise.
  458. * If setInput(InputStream, inputEncoding) was called with an inputEncoding
  459. * value other than null, this value must be returned
  460. * from this method. Otherwise, if inputEncoding is null and
  461. * the parser suppports the encoding detection feature
  462. * (http://xmlpull.org/v1/doc/features.html#detect-encoding),
  463. * it must return the detected encoding.
  464. * If setInput(Reader) was called, null is returned.
  465. * After first call to next if XML declaration was present this method
  466. * will return encoding declared.
  467. */
  468. String getInputEncoding();
  469. /**
  470. * Set new value for entity replacement text as defined in
  471. * <a href="http://www.w3.org/TR/REC-xml#intern-replacement">XML 1.0 Section 4.5
  472. * Construction of Internal Entity Replacement Text</a>.
  473. * If FEATURE_PROCESS_DOCDECL or FEATURE_VALIDATION are set, calling this
  474. * function will result in an exception -- when processing of DOCDECL is
  475. * enabled, there is no need to the entity replacement text manually.
  476. *
  477. * <p>The motivation for this function is to allow very small
  478. * implementations of XMLPULL that will work in J2ME environments.
  479. * Though these implementations may not be able to process the document type
  480. * declaration, they still can work with known DTDs by using this function.
  481. *
  482. * <p><b>Please notes:</b> The given value is used literally as replacement text
  483. * and it corresponds to declaring entity in DTD that has all special characters
  484. * escaped: left angle bracket is replaced with &amp;lt;, ampersnad with &amp;amp;
  485. * and so on.
  486. *
  487. * <p><b>Note:</b> The given value is the literal replacement text and must not
  488. * contain any other entity reference (if it contains any entity reference
  489. * there will be no further replacement).
  490. *
  491. * <p><b>Note:</b> The list of pre-defined entity names will
  492. * always contain standard XML entities such as
  493. * amp (&amp;amp;), lt (&amp;lt;), gt (&amp;gt;), quot (&amp;quot;), and apos (&amp;apos;).
  494. * Those cannot be redefined by this method!
  495. *
  496. * @see #setInput
  497. * @see #FEATURE_PROCESS_DOCDECL
  498. * @see #FEATURE_VALIDATION
  499. */
  500. void defineEntityReplacementText( String entityName,
  501. String replacementText ) throws XmlPullParserException;
  502. /**
  503. * Returns the numbers of elements in the namespace stack for the given
  504. * depth.
  505. * If namespaces are not enabled, 0 is returned.
  506. *
  507. * <p><b>NOTE:</b> when parser is on END_TAG then it is allowed to call
  508. * this function with getDepth()+1 argument to retrieve position of namespace
  509. * prefixes and URIs that were declared on corresponding START_TAG.
  510. * <p><b>NOTE:</b> to retrieve lsit of namespaces declared in current element:<pre>
  511. * IXmlPullParser pp = ...
  512. * int nsStart = pp.getNamespaceCount(pp.getDepth()-1);
  513. * int nsEnd = pp.getNamespaceCount(pp.getDepth());
  514. * for (int i = nsStart; i < nsEnd; i++) {
  515. * String prefix = pp.getNamespacePrefix(i);
  516. * String ns = pp.getNamespaceUri(i);
  517. * // ...
  518. * }
  519. * </pre>
  520. *
  521. * @see #getNamespacePrefix
  522. * @see #getNamespaceUri
  523. * @see #getNamespace()
  524. * @see #getNamespace(String)
  525. */
  526. int getNamespaceCount(int depth) throws XmlPullParserException;
  527. /**
  528. * Returns the namespace prefixe for the given position
  529. * in the namespace stack.
  530. * Default namespace declaration (xmlns='...') will have null as prefix.
  531. * If the given index is out of range, an exception is thrown.
  532. * <p><b>Please note:</b> when the parser is on an END_TAG,
  533. * namespace prefixes that were declared
  534. * in the corresponding START_TAG are still accessible
  535. * although they are no longer in scope.
  536. */
  537. String getNamespacePrefix(int pos) throws XmlPullParserException;
  538. /**
  539. * Returns the namespace URI for the given position in the
  540. * namespace stack
  541. * If the position is out of range, an exception is thrown.
  542. * <p><b>NOTE:</b> when parser is on END_TAG then namespace prefixes that were declared
  543. * in corresponding START_TAG are still accessible even though they are not in scope
  544. */
  545. String getNamespaceUri(int pos) throws XmlPullParserException;
  546. /**
  547. * Returns the URI corresponding to the given prefix,
  548. * depending on current state of the parser.
  549. *
  550. * <p>If the prefix was not declared in the current scope,
  551. * null is returned. The default namespace is included
  552. * in the namespace table and is available via
  553. * getNamespace (null).
  554. *
  555. * <p>This method is a convenience method for
  556. *
  557. * <pre>
  558. * for (int i = getNamespaceCount(getDepth ())-1; i >= 0; i--) {
  559. * if (getNamespacePrefix(i).equals( prefix )) {
  560. * return getNamespaceUri(i);
  561. * }
  562. * }
  563. * return null;
  564. * </pre>
  565. *
  566. * <p><strong>Please note:</strong> parser implementations
  567. * may provide more efifcient lookup, e.g. using a Hashtable.
  568. * The 'xml' prefix is bound to "http://www.w3.org/XML/1998/namespace", as
  569. * defined in the
  570. * <a href="http://www.w3.org/TR/REC-xml-names/#ns-using">Namespaces in XML</a>
  571. * specification. Analogous, the 'xmlns' prefix is resolved to
  572. * <a href="http://www.w3.org/2000/xmlns/">http://www.w3.org/2000/xmlns/</a>
  573. *
  574. * @see #getNamespaceCount
  575. * @see #getNamespacePrefix
  576. * @see #getNamespaceUri
  577. */
  578. String getNamespace (String prefix);
  579. // --------------------------------------------------------------------------
  580. // miscellaneous reporting methods
  581. /**
  582. * Returns the current depth of the element.
  583. * Outside the root element, the depth is 0. The
  584. * depth is incremented by 1 when a start tag is reached.
  585. * The depth is decremented AFTER the end tag
  586. * event was observed.
  587. *
  588. * <pre>
  589. * &lt;!-- outside --&gt; 0
  590. * &lt;root> 1
  591. * sometext 1
  592. * &lt;foobar&gt; 2
  593. * &lt;/foobar&gt; 2
  594. * &lt;/root&gt; 1
  595. * &lt;!-- outside --&gt; 0
  596. * </pre>
  597. */
  598. int getDepth();
  599. /**
  600. * Returns a short text describing the current parser state, including
  601. * the position, a
  602. * description of the current event and the data source if known.
  603. * This method is especially useful to provide meaningful
  604. * error messages and for debugging purposes.
  605. */
  606. String getPositionDescription ();
  607. /**
  608. * Returns the current line number, starting from 1.
  609. * When the parser does not know the current line number
  610. * or can not determine it, -1 is returned (e.g. for WBXML).
  611. *
  612. * @return current line number or -1 if unknown.
  613. */
  614. int getLineNumber();
  615. /**
  616. * Returns the current column number, starting from 0.
  617. * When the parser does not know the current column number
  618. * or can not determine it, -1 is returned (e.g. for WBXML).
  619. *
  620. * @return current column number or -1 if unknown.
  621. */
  622. int getColumnNumber();
  623. // --------------------------------------------------------------------------
  624. // TEXT related methods
  625. /**
  626. * Checks whether the current TEXT event contains only whitespace
  627. * characters.
  628. * For IGNORABLE_WHITESPACE, this is always true.
  629. * For TEXT and CDSECT, false is returned when the current event text
  630. * contains at least one non-white space character. For any other
  631. * event type an exception is thrown.
  632. *
  633. * <p><b>Please note:</b> non-validating parsers are not
  634. * able to distinguish whitespace and ignorable whitespace,
  635. * except from whitespace outside the root element. Ignorable
  636. * whitespace is reported as separate event, which is exposed
  637. * via nextToken only.
  638. *
  639. */
  640. boolean isWhitespace() throws XmlPullParserException;
  641. /**
  642. * Returns the text content of the current event as String.
  643. * The value returned depends on current event type,
  644. * for example for TEXT event it is element content
  645. * (this is typical case when next() is used).
  646. *
  647. * See description of nextToken() for detailed description of
  648. * possible returned values for different types of events.
  649. *
  650. * <p><strong>NOTE:</strong> in case of ENTITY_REF, this method returns
  651. * the entity replacement text (or null if not available). This is
  652. * the only case where
  653. * getText() and getTextCharacters() return different values.
  654. *
  655. * @see #getEventType
  656. * @see #next
  657. * @see #nextToken
  658. */
  659. String getText ();
  660. /**
  661. * Returns the buffer that contains the text of the current event,
  662. * as well as the start offset and length relevant for the current
  663. * event. See getText(), next() and nextToken() for description of possible returned values.
  664. *
  665. * <p><strong>Please note:</strong> this buffer must not
  666. * be modified and its content MAY change after a call to
  667. * next() or nextToken(). This method will always return the
  668. * same value as getText(), except for ENTITY_REF. In the case
  669. * of ENTITY ref, getText() returns the replacement text and
  670. * this method returns the actual input buffer containing the
  671. * entity name.
  672. * If getText() returns null, this method returns null as well and
  673. * the values returned in the holder array MUST be -1 (both start
  674. * and length).
  675. *
  676. * @see #getText
  677. * @see #next
  678. * @see #nextToken
  679. *
  680. * @param holderForStartAndLength Must hold an 2-element int array
  681. * into which the start offset and length values will be written.
  682. * @return char buffer that contains the text of the current event
  683. * (null if the current event has no text associated).
  684. */
  685. char[] getTextCharacters(int [] holderForStartAndLength);
  686. // --------------------------------------------------------------------------
  687. // START_TAG / END_TAG shared methods
  688. /**
  689. * Returns the namespace URI of the current element.
  690. * The default namespace is represented
  691. * as empty string.
  692. * If namespaces are not enabled, an empty String ("") is always returned.
  693. * The current event must be START_TAG or END_TAG; otherwise,
  694. * null is returned.
  695. */
  696. String getNamespace ();
  697. /**
  698. * For START_TAG or END_TAG events, the (local) name of the current
  699. * element is returned when namespaces are enabled. When namespace
  700. * processing is disabled, the raw name is returned.
  701. * For ENTITY_REF events, the entity name is returned.
  702. * If the current event is not START_TAG, END_TAG, or ENTITY_REF,
  703. * null is returned.
  704. * <p><b>Please note:</b> To reconstruct the raw element name
  705. * when namespaces are enabled and the prefix is not null,
  706. * you will need to add the prefix and a colon to localName..
  707. *
  708. */
  709. String getName();
  710. /**
  711. * Returns the prefix of the current element.
  712. * If the element is in the default namespace (has no prefix),
  713. * null is returned.
  714. * If namespaces are not enabled, or the current event
  715. * is not START_TAG or END_TAG, null is returned.
  716. */
  717. String getPrefix();
  718. /**
  719. * Returns true if the current event is START_TAG and the tag
  720. * is degenerated
  721. * (e.g. &lt;foobar/&gt;).
  722. * <p><b>NOTE:</b> if the parser is not on START_TAG, an exception
  723. * will be thrown.
  724. */
  725. boolean isEmptyElementTag() throws XmlPullParserException;
  726. // --------------------------------------------------------------------------
  727. // START_TAG Attributes retrieval methods
  728. /**
  729. * Returns the number of attributes of the current start tag, or
  730. * -1 if the current event type is not START_TAG
  731. *
  732. * @see #getAttributeNamespace
  733. * @see #getAttributeName
  734. * @see #getAttributePrefix
  735. * @see #getAttributeValue
  736. */
  737. int getAttributeCount();
  738. /**
  739. * Returns the namespace URI of the attribute
  740. * with the given index (starts from 0).
  741. * Returns an empty string ("") if namespaces are not enabled
  742. * or the attribute has no namespace.
  743. * Throws an IndexOutOfBoundsException if the index is out of range
  744. * or the current event type is not START_TAG.
  745. *
  746. * <p><strong>NOTE:</strong> if FEATURE_REPORT_NAMESPACE_ATTRIBUTES is set
  747. * then namespace attributes (xmlns:ns='...') must be reported
  748. * with namespace
  749. * <a href="http://www.w3.org/2000/xmlns/">http://www.w3.org/2000/xmlns/</a>
  750. * (visit this URL for description!).
  751. * The default namespace attribute (xmlns="...") will be reported with empty namespace.
  752. * <p><strong>NOTE:</strong>The xml prefix is bound as defined in
  753. * <a href="http://www.w3.org/TR/REC-xml-names/#ns-using">Namespaces in XML</a>
  754. * specification to "http://www.w3.org/XML/1998/namespace".
  755. *
  756. * @param zero based index of attribute
  757. * @return attribute namespace,
  758. * empty string ("") is returned if namesapces processing is not enabled or
  759. * namespaces processing is enabled but attribute has no namespace (it has no prefix).
  760. */
  761. String getAttributeNamespace (int index);
  762. /**
  763. * Returns the local name of the specified attribute
  764. * if namespaces are enabled or just attribute name if namespaces are disabled.
  765. * Throws an IndexOutOfBoundsException if the index is out of range
  766. * or current event type is not START_TAG.
  767. *
  768. * @param zero based index of attribute
  769. * @return attribute name (null is never returned)
  770. */
  771. String getAttributeName (int index);
  772. /**
  773. * Returns the prefix of the specified attribute
  774. * Returns null if the element has no prefix.
  775. * If namespaces are disabled it will always return null.
  776. * Throws an IndexOutOfBoundsException if the index is out of range
  777. * or current event type is not START_TAG.
  778. *
  779. * @param zero based index of attribute
  780. * @return attribute prefix or null if namespaces processing is not enabled.
  781. */
  782. String getAttributePrefix(int index);
  783. /**
  784. * Returns the type of the specified attribute
  785. * If parser is non-validating it MUST return CDATA.
  786. *
  787. * @param zero based index of attribute
  788. * @return attribute type (null is never returned)
  789. */
  790. String getAttributeType(int index);
  791. /**
  792. * Returns if the specified attribute was not in input was declared in XML.
  793. * If parser is non-validating it MUST always return false.
  794. * This information is part of XML infoset:
  795. *
  796. * @param zero based index of attribute
  797. * @return false if attribute was in input
  798. */
  799. boolean isAttributeDefault(int index);
  800. /**
  801. * Returns the given attributes value.
  802. * Throws an IndexOutOfBoundsException if the index is out of range
  803. * or current event type is not START_TAG.
  804. *
  805. * <p><strong>NOTE:</strong> attribute value must be normalized
  806. * (including entity replacement text if PROCESS_DOCDECL is false) as described in
  807. * <a href="http://www.w3.org/TR/REC-xml#AVNormalize">XML 1.0 section
  808. * 3.3.3 Attribute-Value Normalization</a>
  809. *
  810. * @see #defineEntityReplacementText
  811. *
  812. * @param zero based index of attribute
  813. * @return value of attribute (null is never returned)
  814. */
  815. String getAttributeValue(int index);
  816. /**
  817. * Returns the attributes value identified by namespace URI and namespace localName.
  818. * If namespaces are disabled namespace must be null.
  819. * If current event type is not START_TAG then IndexOutOfBoundsException will be thrown.
  820. *
  821. * <p><strong>NOTE:</strong> attribute value must be normalized
  822. * (including entity replacement text if PROCESS_DOCDECL is false) as described in
  823. * <a href="http://www.w3.org/TR/REC-xml#AVNormalize">XML 1.0 section
  824. * 3.3.3 Attribute-Value Normalization</a>
  825. *
  826. * @see #defineEntityReplacementText
  827. *
  828. * @param namespace Namespace of the attribute if namespaces are enabled otherwise must be null
  829. * @param name If namespaces enabled local name of attribute otherwise just attribute name
  830. * @return value of attribute or null if attribute with given name does not exist
  831. */
  832. String getAttributeValue(String namespace,
  833. String name);
  834. // --------------------------------------------------------------------------
  835. // actual parsing methods
  836. /**
  837. * Returns the type of the current event (START_TAG, END_TAG, TEXT, etc.)
  838. *
  839. * @see #next()
  840. * @see #nextToken()
  841. */
  842. int getEventType()
  843. throws XmlPullParserException;
  844. /**
  845. * Get next parsing event - element content wil be coalesced and only one
  846. * TEXT event must be returned for whole element content
  847. * (comments and processing instructions will be ignored and emtity references
  848. * must be expanded or exception mus be thrown if entity reerence can not be exapnded).
  849. * If element content is empty (content is "") then no TEXT event will be reported.
  850. *
  851. * <p><b>NOTE:</b> empty element (such as &lt;tag/>) will be reported
  852. * with two separate events: START_TAG, END_TAG - it must be so to preserve
  853. * parsing equivalency of empty element to &lt;tag>&lt;/tag>.
  854. * (see isEmptyElementTag ())
  855. *
  856. * @see #isEmptyElementTag
  857. * @see #START_TAG
  858. * @see #TEXT
  859. * @see #END_TAG
  860. * @see #END_DOCUMENT
  861. */
  862. int next()
  863. throws XmlPullParserException, IOException;
  864. /**
  865. * This method works similarly to next() but will expose
  866. * additional event types (COMMENT, CDSECT, DOCDECL, ENTITY_REF, PROCESSING_INSTRUCTION, or
  867. * IGNORABLE_WHITESPACE) if they are available in input.
  868. *
  869. * <p>If special feature
  870. * <a href="http://xmlpull.org/v1/doc/features.html#xml-roundtrip">FEATURE_XML_ROUNDTRIP</a>
  871. * (identified by URI: http://xmlpull.org/v1/doc/features.html#xml-roundtrip)
  872. * is enabled it is possible to do XML document round trip ie. reproduce
  873. * exectly on output the XML input using getText():
  874. * returned content is always unnormalized (exactly as in input).
  875. * Otherwise returned content is end-of-line normalized as described
  876. * <a href="http://www.w3.org/TR/REC-xml#sec-line-ends">XML 1.0 End-of-Line Handling</a>
  877. * and. Also when this feature is enabled exact content of START_TAG, END_TAG,
  878. * DOCDECL and PROCESSING_INSTRUCTION is available.
  879. *
  880. * <p>Here is the list of tokens that can be returned from nextToken()
  881. * and what getText() and getTextCharacters() returns:<dl>
  882. * <dt>START_DOCUMENT<dd>null
  883. * <dt>END_DOCUMENT<dd>null
  884. * <dt>START_TAG<dd>null unless FEATURE_XML_ROUNDTRIP
  885. * enabled and then returns XML tag, ex: &lt;tag attr='val'>
  886. * <dt>END_TAG<dd>null unless FEATURE_XML_ROUNDTRIP
  887. * id enabled and then returns XML tag, ex: &lt;/tag>
  888. * <dt>TEXT<dd>return element content.
  889. * <br>Note: that element content may be delivered in multiple consecutive TEXT events.
  890. * <dt>IGNORABLE_WHITESPACE<dd>return characters that are determined to be ignorable white
  891. * space. If the FEATURE_XML_ROUNDTRIP is enabled all whitespace content outside root
  892. * element will always reported as IGNORABLE_WHITESPACE otherise rteporting is optional.
  893. * <br>Note: that element content may be delevered in multiple consecutive IGNORABLE_WHITESPACE events.
  894. * <dt>CDSECT<dd>
  895. * return text <em>inside</em> CDATA
  896. * (ex. 'fo&lt;o' from &lt;!CDATA[fo&lt;o]]>)
  897. * <dt>PROCESSING_INSTRUCTION<dd>
  898. * if FEATURE_XML_ROUNDTRIP is true
  899. * return exact PI content ex: 'pi foo' from &lt;?pi foo?>
  900. * otherwise it may be exact PI content or concatenation of PI target,
  901. * space and data so for example for
  902. * &lt;?target data?> string &quot;target data&quot; may
  903. * be returned if FEATURE_XML_ROUNDTRIP is false.
  904. * <dt>COMMENT<dd>return comment content ex. 'foo bar' from &lt;!--foo bar-->
  905. * <dt>ENTITY_REF<dd>getText() MUST return entity replacement text if PROCESS_DOCDECL is false
  906. * otherwise getText() MAY return null,
  907. * additionally getTextCharacters() MUST return entity name
  908. * (for example 'entity_name' for &amp;entity_name;).
  909. * <br><b>NOTE:</b> this is the only place where value returned from getText() and
  910. * getTextCharacters() <b>are different</b>
  911. * <br><b>NOTE:</b> it is user responsibility to resolve entity reference
  912. * if PROCESS_DOCDECL is false and there is no entity replacement text set in
  913. * defineEntityReplacementText() method (getText() will be null)
  914. * <br><b>NOTE:</b> character entities (ex. &amp;#32;) and standard entities such as
  915. * &amp;amp; &amp;lt; &amp;gt; &amp;quot; &amp;apos; are reported as well
  916. * and are <b>not</b> reported as TEXT tokens but as ENTITY_REF tokens!
  917. * This requirement is added to allow to do roundtrip of XML documents!
  918. * <dt>DOCDECL<dd>
  919. * if FEATURE_XML_ROUNDTRIP is true or PROCESS_DOCDECL is false
  920. * then return what is inside of DOCDECL for example it returns:<pre>
  921. * &quot; titlepage SYSTEM "http://www.foo.bar/dtds/typo.dtd"
  922. * [&lt;!ENTITY % active.links "INCLUDE">]&quot;</pre>
  923. * <p>for input document that contained:<pre>
  924. * &lt;!DOCTYPE titlepage SYSTEM "http://www.foo.bar/dtds/typo.dtd"
  925. * [&lt;!ENTITY % active.links "INCLUDE">]></pre>
  926. * otherwise if FEATURE_XML_ROUNDTRIP is false and PROCESS_DOCDECL is true
  927. * then what is returned is undefined (it may be even null)
  928. * </dd>
  929. * </dl>
  930. *
  931. * <p><strong>NOTE:</strong> there is no gurantee that there will only one TEXT or
  932. * IGNORABLE_WHITESPACE event from nextToken() as parser may chose to deliver element content in
  933. * multiple tokens (dividing element content into chunks)
  934. *
  935. * <p><strong>NOTE:</strong> whether returned text of token is end-of-line normalized
  936. * is depending on FEATURE_XML_ROUNDTRIP.
  937. *
  938. * <p><strong>NOTE:</strong> XMLDecl (&lt;?xml ...?&gt;) is not reported but its content
  939. * is available through optional properties (see class description above).
  940. *
  941. * @see #next
  942. * @see #START_TAG
  943. * @see #TEXT
  944. * @see #END_TAG
  945. * @see #END_DOCUMENT
  946. * @see #COMMENT
  947. * @see #DOCDECL
  948. * @see #PROCESSING_INSTRUCTION
  949. * @see #ENTITY_REF
  950. * @see #IGNORABLE_WHITESPACE
  951. */
  952. int nextToken()
  953. throws XmlPullParserException, IOException;
  954. //-----------------------------------------------------------------------------
  955. // utility methods to mak XML parsing easier ...
  956. /**
  957. * Test if the current event is of the given type and if the
  958. * namespace and name do match. null will match any namespace
  959. * and any name. If the test is not passed, an exception is
  960. * thrown. The exception text indicates the parser position,
  961. * the expected event and the current event that is not meeting the
  962. * requirement.
  963. *
  964. * <p>Essentially it does this
  965. * <pre>
  966. * if (type != getEventType()
  967. * || (namespace != null &amp;&amp; !namespace.equals( getNamespace () ) )
  968. * || (name != null &amp;&amp; !name.equals( getName() ) ) )
  969. * throw new XmlPullParserException( "expected "+ TYPES[ type ]+getPositionDescription());
  970. * </pre>
  971. */
  972. void require(int type, String namespace, String name)
  973. throws XmlPullParserException, IOException;
  974. /**
  975. * If current event is START_TAG then if next element is TEXT then element content is returned
  976. * or if next event is END_TAG then empty string is returned, otherwise exception is thrown.
  977. * After calling this function successfully parser will be positioned on END_TAG.
  978. *
  979. * <p>The motivation for this function is to allow to parse consistently both
  980. * empty elements and elements that has non empty content, for example for input: <ol>
  981. * <li>&lt;tag&gt;foo&lt;/tag&gt;
  982. * <li>&lt;tag&gt;&lt;/tag&gt; (which is equivalent to &lt;tag/&gt;
  983. * both input can be parsed with the same code:
  984. * <pre>
  985. * p.nextTag()
  986. * p.requireEvent(p.START_TAG, "", "tag");
  987. * String content = p.nextText();
  988. * p.requireEvent(p.END_TAG, "", "tag");
  989. * </pre>
  990. * This function together with nextTag make it very easy to parse XML that has
  991. * no mixed content.
  992. *
  993. *
  994. * <p>Essentially it does this
  995. * <pre>
  996. * if(getEventType() != START_TAG) {
  997. * throw new XmlPullParserException(
  998. * "parser must be on START_TAG to read next text", this, null);
  999. * }
  1000. * int eventType = next();
  1001. * if(eventType == TEXT) {
  1002. * String result = getText();
  1003. * eventType = next();
  1004. * if(eventType != END_TAG) {
  1005. * throw new XmlPullParserException(
  1006. * "event TEXT it must be immediately followed by END_TAG", this, null);
  1007. * }
  1008. * return result;
  1009. * } else if(eventType == END_TAG) {
  1010. * return "";
  1011. * } else {
  1012. * throw new XmlPullParserException(
  1013. * "parser must be on START_TAG or TEXT to read text", this, null);
  1014. * }
  1015. * </pre>
  1016. */
  1017. String nextText() throws XmlPullParserException, IOException;
  1018. /**
  1019. * Call next() and return event if it is START_TAG or END_TAG
  1020. * otherwise throw an exception.
  1021. * It will skip whitespace TEXT before actual tag if any.
  1022. *
  1023. * <p>essentially it does this
  1024. * <pre>
  1025. * int eventType = next();
  1026. * if(eventType == TEXT &amp;&amp; isWhitespace()) { // skip whitespace
  1027. * eventType = next();
  1028. * }
  1029. * if (eventType != START_TAG &amp;&amp; eventType != END_TAG) {
  1030. * throw new XmlPullParserException("expected start or end tag", this, null);
  1031. * }
  1032. * return eventType;
  1033. * </pre>
  1034. */
  1035. int nextTag() throws XmlPullParserException, IOException;
  1036. }