1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117 |
- /* -*- c-basic-offset: 4; indent-tabs-mode: nil; -*- //------100-columns-wide------>|*/
- // for license please see accompanying XmlPull license file (available also at http://www.xmlpull.org/)
- package org.xmlpull.v1;
- import java.io.InputStream;
- import java.io.IOException;
- import java.io.Reader;
- /**
- * XML Pull Parser is an interface that defines parsing functionlity provided
- * in <a href="http://www.xmlpull.org/">XMLPULL V1 API</a> (visit this website to
- * learn more about API and its implementations).
- *
- * <p>There are following different
- * kinds of parser depending on which features are set:<ul>
- * <li><b>non-validating</b> parser as defined in XML 1.0 spec when
- * FEATURE_PROCESS_DOCDECL is set to true
- * <li><b>validating parser</b> as defined in XML 1.0 spec when
- * FEATURE_VALIDATION is true (and that implies that FEATURE_PROCESS_DOCDECL is true)
- * <li>when FEATURE_PROCESS_DOCDECL is false (this is default and
- * if different value is required necessary must be changed before parsing is started)
- * then parser behaves like XML 1.0 compliant non-validating parser under condition that
- * <em>no DOCDECL is present</em> in XML documents
- * (internal entites can still be defined with defineEntityReplacementText()).
- * This mode of operation is intened <b>for operation in constrained environments</b> such as J2ME.
- * </ul>
- *
- *
- * <p>There are two key methods: next() and nextToken(). While next() provides
- * access to high level parsing events, nextToken() allows access to lower
- * level tokens.
- *
- * <p>The current event state of the parser
- * can be determined by calling the
- * <a href="#getEventType()">getEventType()</a> method.
- * Initially, the parser is in the <a href="#START_DOCUMENT">START_DOCUMENT</a>
- * state.
- *
- * <p>The method <a href="#next()">next()</a> advances the parser to the
- * next event. The int value returned from next determines the current parser
- * state and is identical to the value returned from following calls to
- * getEventType ().
- *
- * <p>Th following event types are seen by next()<dl>
- * <dt><a href="#START_TAG">START_TAG</a><dd> An XML start tag was read.
- * <dt><a href="#TEXT">TEXT</a><dd> Text content was read;
- * the text content can be retreived using the getText() method.
- * (when in validating mode next() will not report ignorable whitespaces, use nextToken() instead)
- * <dt><a href="#END_TAG">END_TAG</a><dd> An end tag was read
- * <dt><a href="#END_DOCUMENT">END_DOCUMENT</a><dd> No more events are available
- * </dl>
- *
- * <p>after first next() or nextToken() (or any other next*() method)
- * is called user application can obtain
- * XML version, standalone and encoding from XML declaration
- * in following ways:<ul>
- * <li><b>version</b>:
- * getProperty("<a href="http://xmlpull.org/v1/doc/properties.html#xmldecl-version">http://xmlpull.org/v1/doc/properties.html#xmldecl-version</a>")
- * returns String ("1.0") or null if XMLDecl was not read or if property is not supported
- * <li><b>standalone</b>:
- * getProperty("<a href="http://xmlpull.org/v1/doc/features.html#xmldecl-standalone">http://xmlpull.org/v1/doc/features.html#xmldecl-standalone</a>")
- * returns Boolean: null if there was no standalone declaration
- * or if property is not supported
- * otherwise returns Boolean(true) if standalon="yes" and Boolean(false) when standalone="no"
- * <li><b>encoding</b>: obtained from getInputEncoding()
- * null if stream had unknown encoding (not set in setInputStream)
- * and it was not declared in XMLDecl
- * </ul>
- *
- * A minimal example for using this API may look as follows:
- * <pre>
- * import java.io.IOException;
- * import java.io.StringReader;
- *
- * import org.xmlpull.v1.IXmlPullParser;
- * import org.xmlpull.v1.<a href="XmlPullParserException.html">XmlPullParserException.html</a>;
- * import org.xmlpull.v1.<a href="XmlPullParserFactory.html">XmlPullParserFactory</a>;
- *
- * public class SimpleXmlPullApp
- * {
- *
- * public static void main (String args[])
- * throws XmlPullParserException, IOException
- * {
- * XmlPullParserFactory factory = XmlPullParserFactory.newInstance();
- * factory.setNamespaceAware(true);
- * IXmlPullParser xpp = factory.newPullParser();
- *
- * xpp.<a href="#setInput">setInput</a>( new StringReader ( "<foo>Hello World!</foo>" ) );
- * int eventType = xpp.getEventType();
- * while (eventType != IXmlPullParser.END_DOCUMENT) {
- * if(eventType == IXmlPullParser.START_DOCUMENT) {
- * System.out.println("Start document");
- * } else if(eventType == IXmlPullParser.END_DOCUMENT) {
- * System.out.println("End document");
- * } else if(eventType == IXmlPullParser.START_TAG) {
- * System.out.println("Start tag "+xpp.<a href="#getName()">getName()</a>);
- * } else if(eventType == IXmlPullParser.END_TAG) {
- * System.out.println("End tag "+xpp.getName());
- * } else if(eventType == IXmlPullParser.TEXT) {
- * System.out.println("Text "+xpp.<a href="#getText()">getText()</a>);
- * }
- * eventType = xpp.next();
- * }
- * }
- * }
- * </pre>
- *
- * <p>The above example will generate the following output:
- * <pre>
- * Start document
- * Start tag foo
- * Text Hello World!
- * End tag foo
- * </pre>
- *
- * <p>For more details on API usage, please refer to the
- * quick Introduction available at <a href="http://www.xmlpull.org">http://www.xmlpull.org</a>
- *
- * @see XmlPullParserFactory
- * @see #defineEntityReplacementText
- * @see #getName
- * @see #getNamespace
- * @see #getText
- * @see #next
- * @see #nextToken
- * @see #setInput
- * @see #FEATURE_PROCESS_DOCDECL
- * @see #FEATURE_VALIDATION
- * @see #START_DOCUMENT
- * @see #START_TAG
- * @see #TEXT
- * @see #END_TAG
- * @see #END_DOCUMENT
- *
- * @author <a href="http://www-ai.cs.uni-dortmund.de/PERSONAL/haustein.html">Stefan Haustein</a>
- * @author <a href="http://www.extreme.indiana.edu/~aslom/">Aleksander Slominski</a>
- */
- public interface IXmlPullParser {
- /** This constant represents the default namespace (empty string "") */
- String NO_NAMESPACE = "";
- // ----------------------------------------------------------------------------
- // EVENT TYPES as reported by next()
- /**
- * Signalize that parser is at the very beginning of the document
- * and nothing was read yet.
- * This event type can only be observed by calling getEvent()
- * before the first call to next(), nextToken, or nextTag()</a>).
- *
- * @see #next
- * @see #nextToken
- */
- int START_DOCUMENT = 0;
- /**
- * Logical end of the xml document. Returned from getEventType, next()
- * and nextToken()
- * when the end of the input document has been reached.
- * <p><strong>NOTE:</strong> calling again
- * <a href="#next()">next()</a> or <a href="#nextToken()">nextToken()</a>
- * will result in exception being thrown.
- *
- * @see #next
- * @see #nextToken
- */
- int END_DOCUMENT = 1;
- /**
- * Returned from getEventType(),
- * <a href="#next()">next()</a>, <a href="#nextToken()">nextToken()</a> when
- * a start tag was read.
- * The name of start tag is available from getName(), its namespace and prefix are
- * available from getNamespace() and getPrefix()
- * if <a href='#FEATURE_PROCESS_NAMESPACES'>namespaces are enabled</a>.
- * See getAttribute* methods to retrieve element attributes.
- * See getNamespace* methods to retrieve newly declared namespaces.
- *
- * @see #next
- * @see #nextToken
- * @see #getName
- * @see #getPrefix
- * @see #getNamespace
- * @see #getAttributeCount
- * @see #getDepth
- * @see #getNamespaceCount
- * @see #getNamespace
- * @see #FEATURE_PROCESS_NAMESPACES
- */
- int START_TAG = 2;
- /**
- * Returned from getEventType(), <a href="#next()">next()</a>, or
- * <a href="#nextToken()">nextToken()</a> when an end tag was read.
- * The name of start tag is available from getName(), its
- * namespace and prefix are
- * available from getNamespace() and getPrefix().
- *
- * @see #next
- * @see #nextToken
- * @see #getName
- * @see #getPrefix
- * @see #getNamespace
- * @see #FEATURE_PROCESS_NAMESPACES
- */
- int END_TAG = 3;
- /**
- * Character data was read and will is available by calling getText().
- * <p><strong>Please note:</strong> <a href="#next()">next()</a> will
- * accumulate multiple
- * events into one TEXT event, skipping IGNORABLE_WHITESPACE,
- * PROCESSING_INSTRUCTION and COMMENT events,
- * In contrast, <a href="#nextToken()">nextToken()</a> will stop reading
- * text when any other event is observed.
- * Also, when the state was reached by calling next(), the text value will
- * be normalized, whereas getText() will
- * return unnormalized content in the case of nextToken(). This allows
- * an exact roundtrip without chnanging line ends when examining low
- * level events, whereas for high level applications the text is
- * normalized apropriately.
- *
- * @see #next
- * @see #nextToken
- * @see #getText
- */
- int TEXT = 4;
- // ----------------------------------------------------------------------------
- // additional events exposed by lower level nextToken()
- /**
- * A CDATA sections was just read;
- * this token is available only from calls to <a href="#nextToken()">nextToken()</a>.
- * A call to next() will accumulate various text events into a single event
- * of type TEXT. The text contained in the CDATA section is available
- * by callling getText().
- *
- * @see #nextToken
- * @see #getText
- */
- int CDSECT = 5;
- /**
- * An entity reference was just read;
- * this token is available from <a href="#nextToken()">nextToken()</a>
- * only. The entity name is available by calling getName(). If available,
- * the replacement text can be obtained by calling getTextt(); otherwise,
- * the user is responsibile for resolving the entity reference.
- * This event type is never returned from next(); next() will
- * accumulate the replacement text and other text
- * events to a single TEXT event.
- *
- * @see #nextToken
- * @see #getText
- */
- int ENTITY_REF = 6;
- /**
- * Ignorable whitespace was just read.
- * This token is available only from <a href="#nextToken()">nextToken()</a>).
- * For non-validating
- * parsers, this event is only reported by nextToken() when outside
- * the root element.
- * Validating parsers may be able to detect ignorable whitespace at
- * other locations.
- * The ignorable whitespace string is available by calling getText()
- *
- * <p><strong>NOTE:</strong> this is different from calling the
- * isWhitespace() method, since text content
- * may be whitespace but not ignorable.
- *
- * Ignorable whitespace is skipped by next() automatically; this event
- * type is never returned from next().
- *
- * @see #nextToken
- * @see #getText
- */
- int IGNORABLE_WHITESPACE = 7;
- /**
- * An XML processing instruction declaration was just read. This
- * event type is available only via <a href="#nextToken()">nextToken()</a>.
- * getText() will return text that is inside the processing instruction.
- * Calls to next() will skip processing instructions automatically.
- * @see #nextToken
- * @see #getText
- */
- int PROCESSING_INSTRUCTION = 8;
- /**
- * An XML comment was just read. This event type is this token is
- * available via <a href="#nextToken()">nextToken()</a> only;
- * calls to next() will skip comments automatically.
- * The content of the comment can be accessed using the getText()
- * method.
- *
- * @see #nextToken
- * @see #getText
- */
- int COMMENT = 9;
- /**
- * An XML document type declaration was just read. This token is
- * available from <a href="#nextToken()">nextToken()</a> only.
- * The unparsed text inside the doctype is available via
- * the getText() method.
- *
- * @see #nextToken
- * @see #getText
- */
- int DOCDECL = 10;
- /**
- * This array can be used to convert the event type integer constants
- * such as START_TAG or TEXT to
- * to a string. For example, the value of TYPES[START_TAG] is
- * the string "START_TAG".
- *
- * This array is intended for diagnostic output only. Relying
- * on the contents of the array may be dangerous since malicous
- * applications may alter the array, although it is final, due
- * to limitations of the Java language.
- */
- String [] TYPES = {
- "START_DOCUMENT",
- "END_DOCUMENT",
- "START_TAG",
- "END_TAG",
- "TEXT",
- "CDSECT",
- "ENTITY_REF",
- "IGNORABLE_WHITESPACE",
- "PROCESSING_INSTRUCTION",
- "COMMENT",
- "DOCDECL"
- };
- // ----------------------------------------------------------------------------
- // namespace related features
- /**
- * This feature determines whether the parser processes
- * namespaces. As for all features, the default value is false.
- * <p><strong>NOTE:</strong> The value can not be changed during
- * parsing an must be set before parsing.
- *
- * @see #getFeature
- * @see #setFeature
- */
- String FEATURE_PROCESS_NAMESPACES =
- "http://xmlpull.org/v1/doc/features.html#process-namespaces";
- /**
- * This feature determines whether namespace attributes are
- * exposed via the attribute access methods. Like all features,
- * the default value is false. This feature cannot be changed
- * during parsing.
- *
- * @see #getFeature
- * @see #setFeature
- */
- String FEATURE_REPORT_NAMESPACE_ATTRIBUTES =
- "http://xmlpull.org/v1/doc/features.html#report-namespace-prefixes";
- /**
- * This feature determines whether the document declaration
- * is processed. If set to false,
- * the DOCDECL event type is reported by nextToken()
- * and ignored by next().
- *
- * If this featue is activated, then the document declaration
- * must be processed by the parser.
- *
- * <p><strong>Please note:</strong> If the document type declaration
- * was ignored, entity references may cause exceptions
- * later in the parsing process.
- * The default value of this feature is false. It cannot be changed
- * during parsing.
- *
- * @see #getFeature
- * @see #setFeature
- */
- String FEATURE_PROCESS_DOCDECL =
- "http://xmlpull.org/v1/doc/features.html#process-docdecl";
- /**
- * If this feature is activated, all validation errors as
- * defined in the XML 1.0 sepcification are reported.
- * This implies that FEATURE_PROCESS_DOCDECL is true and both, the
- * internal and external document type declaration will be processed.
- * <p><strong>Please Note:</strong> This feature can not be changed
- * during parsing. The default value is false.
- *
- * @see #getFeature
- * @see #setFeature
- */
- String FEATURE_VALIDATION =
- "http://xmlpull.org/v1/doc/features.html#validation";
- /**
- * Use this call to change the general behaviour of the parser,
- * such as namespace processing or doctype declaration handling.
- * This method must be called before the first call to next or
- * nextToken. Otherwise, an exception is thrown.
- * <p>Example: call setFeature(FEATURE_PROCESS_NAMESPACES, true) in order
- * to switch on namespace processing. The initial settings correspond
- * to the properties requested from the XML Pull Parser factory.
- * If none were requested, all feautures are deactivated by default.
- *
- * @exception XmlPullParserException If the feature is not supported or can not be set
- * @exception IllegalArgumentException If string with the feature name is null
- */
- void setFeature(String name,
- boolean state) throws XmlPullParserException;
- /**
- * Returns the current value of the given feature.
- * <p><strong>Please note:</strong> unknown features are
- * <strong>always</strong> returned as false.
- *
- * @param name The name of feature to be retrieved.
- * @return The value of the feature.
- * @exception IllegalArgumentException if string the feature name is null
- */
- boolean getFeature(String name);
- /**
- * Set the value of a property.
- *
- * The property name is any fully-qualified URI.
- *
- * @exception XmlPullParserException If the property is not supported or can not be set
- * @exception IllegalArgumentException If string with the property name is null
- */
- void setProperty(String name,
- Object value) throws XmlPullParserException;
- /**
- * Look up the value of a property.
- *
- * The property name is any fully-qualified URI.
- * <p><strong>NOTE:</strong> unknown properties are <strong>always</strong>
- * returned as null.
- *
- * @param name The name of property to be retrieved.
- * @return The value of named property.
- */
- Object getProperty(String name);
- /**
- * Set the input source for parser to the given reader and
- * resets the parser. The event type is set to the initial value
- * START_DOCUMENT.
- * Setting the reader to null will just stop parsing and
- * reset parser state,
- * allowing the parser to free internal resources
- * such as parsing buffers.
- */
- void setInput(Reader in) throws XmlPullParserException;
- /**
- * Sets the input stream the parser is going to process.
- * This call resets the parser state and sets the event type
- * to the initial value START_DOCUMENT.
- *
- * <p><strong>NOTE:</strong> If an input encoding string is passed,
- * it MUST be used. Otherwise,
- * if inputEncoding is null, the parser SHOULD try to determine
- * input encoding following XML 1.0 specification (see below).
- * If encoding detection is supported then following feature
- * <a href="http://xmlpull.org/v1/doc/features.html#detect-encoding">http://xmlpull.org/v1/doc/features.html#detect-encoding</a>
- * MUST be true amd otherwise it must be false
- *
- * @param inputStream contains a raw byte input stream of possibly
- * unknown encoding (when inputEncoding is null).
- *
- * @param inputEncoding if not null it MUST be used as encoding for inputStream
- */
- void setInput(InputStream inputStream, String inputEncoding)
- throws XmlPullParserException;
- /**
- * Returns the input encoding if known, null otherwise.
- * If setInput(InputStream, inputEncoding) was called with an inputEncoding
- * value other than null, this value must be returned
- * from this method. Otherwise, if inputEncoding is null and
- * the parser suppports the encoding detection feature
- * (http://xmlpull.org/v1/doc/features.html#detect-encoding),
- * it must return the detected encoding.
- * If setInput(Reader) was called, null is returned.
- * After first call to next if XML declaration was present this method
- * will return encoding declared.
- */
- String getInputEncoding();
- /**
- * Set new value for entity replacement text as defined in
- * <a href="http://www.w3.org/TR/REC-xml#intern-replacement">XML 1.0 Section 4.5
- * Construction of Internal Entity Replacement Text</a>.
- * If FEATURE_PROCESS_DOCDECL or FEATURE_VALIDATION are set, calling this
- * function will result in an exception -- when processing of DOCDECL is
- * enabled, there is no need to the entity replacement text manually.
- *
- * <p>The motivation for this function is to allow very small
- * implementations of XMLPULL that will work in J2ME environments.
- * Though these implementations may not be able to process the document type
- * declaration, they still can work with known DTDs by using this function.
- *
- * <p><b>Please notes:</b> The given value is used literally as replacement text
- * and it corresponds to declaring entity in DTD that has all special characters
- * escaped: left angle bracket is replaced with &lt;, ampersnad with &amp;
- * and so on.
- *
- * <p><b>Note:</b> The given value is the literal replacement text and must not
- * contain any other entity reference (if it contains any entity reference
- * there will be no further replacement).
- *
- * <p><b>Note:</b> The list of pre-defined entity names will
- * always contain standard XML entities such as
- * amp (&amp;), lt (&lt;), gt (&gt;), quot (&quot;), and apos (&apos;).
- * Those cannot be redefined by this method!
- *
- * @see #setInput
- * @see #FEATURE_PROCESS_DOCDECL
- * @see #FEATURE_VALIDATION
- */
- void defineEntityReplacementText( String entityName,
- String replacementText ) throws XmlPullParserException;
- /**
- * Returns the numbers of elements in the namespace stack for the given
- * depth.
- * If namespaces are not enabled, 0 is returned.
- *
- * <p><b>NOTE:</b> when parser is on END_TAG then it is allowed to call
- * this function with getDepth()+1 argument to retrieve position of namespace
- * prefixes and URIs that were declared on corresponding START_TAG.
- * <p><b>NOTE:</b> to retrieve lsit of namespaces declared in current element:<pre>
- * IXmlPullParser pp = ...
- * int nsStart = pp.getNamespaceCount(pp.getDepth()-1);
- * int nsEnd = pp.getNamespaceCount(pp.getDepth());
- * for (int i = nsStart; i < nsEnd; i++) {
- * String prefix = pp.getNamespacePrefix(i);
- * String ns = pp.getNamespaceUri(i);
- * // ...
- * }
- * </pre>
- *
- * @see #getNamespacePrefix
- * @see #getNamespaceUri
- * @see #getNamespace()
- * @see #getNamespace(String)
- */
- int getNamespaceCount(int depth) throws XmlPullParserException;
- /**
- * Returns the namespace prefixe for the given position
- * in the namespace stack.
- * Default namespace declaration (xmlns='...') will have null as prefix.
- * If the given index is out of range, an exception is thrown.
- * <p><b>Please note:</b> when the parser is on an END_TAG,
- * namespace prefixes that were declared
- * in the corresponding START_TAG are still accessible
- * although they are no longer in scope.
- */
- String getNamespacePrefix(int pos) throws XmlPullParserException;
- /**
- * Returns the namespace URI for the given position in the
- * namespace stack
- * If the position is out of range, an exception is thrown.
- * <p><b>NOTE:</b> when parser is on END_TAG then namespace prefixes that were declared
- * in corresponding START_TAG are still accessible even though they are not in scope
- */
- String getNamespaceUri(int pos) throws XmlPullParserException;
- /**
- * Returns the URI corresponding to the given prefix,
- * depending on current state of the parser.
- *
- * <p>If the prefix was not declared in the current scope,
- * null is returned. The default namespace is included
- * in the namespace table and is available via
- * getNamespace (null).
- *
- * <p>This method is a convenience method for
- *
- * <pre>
- * for (int i = getNamespaceCount(getDepth ())-1; i >= 0; i--) {
- * if (getNamespacePrefix(i).equals( prefix )) {
- * return getNamespaceUri(i);
- * }
- * }
- * return null;
- * </pre>
- *
- * <p><strong>Please note:</strong> parser implementations
- * may provide more efifcient lookup, e.g. using a Hashtable.
- * The 'xml' prefix is bound to "http://www.w3.org/XML/1998/namespace", as
- * defined in the
- * <a href="http://www.w3.org/TR/REC-xml-names/#ns-using">Namespaces in XML</a>
- * specification. Analogous, the 'xmlns' prefix is resolved to
- * <a href="http://www.w3.org/2000/xmlns/">http://www.w3.org/2000/xmlns/</a>
- *
- * @see #getNamespaceCount
- * @see #getNamespacePrefix
- * @see #getNamespaceUri
- */
- String getNamespace (String prefix);
- // --------------------------------------------------------------------------
- // miscellaneous reporting methods
- /**
- * Returns the current depth of the element.
- * Outside the root element, the depth is 0. The
- * depth is incremented by 1 when a start tag is reached.
- * The depth is decremented AFTER the end tag
- * event was observed.
- *
- * <pre>
- * <!-- outside --> 0
- * <root> 1
- * sometext 1
- * <foobar> 2
- * </foobar> 2
- * </root> 1
- * <!-- outside --> 0
- * </pre>
- */
- int getDepth();
- /**
- * Returns a short text describing the current parser state, including
- * the position, a
- * description of the current event and the data source if known.
- * This method is especially useful to provide meaningful
- * error messages and for debugging purposes.
- */
- String getPositionDescription ();
- /**
- * Returns the current line number, starting from 1.
- * When the parser does not know the current line number
- * or can not determine it, -1 is returned (e.g. for WBXML).
- *
- * @return current line number or -1 if unknown.
- */
- int getLineNumber();
- /**
- * Returns the current column number, starting from 0.
- * When the parser does not know the current column number
- * or can not determine it, -1 is returned (e.g. for WBXML).
- *
- * @return current column number or -1 if unknown.
- */
- int getColumnNumber();
- // --------------------------------------------------------------------------
- // TEXT related methods
- /**
- * Checks whether the current TEXT event contains only whitespace
- * characters.
- * For IGNORABLE_WHITESPACE, this is always true.
- * For TEXT and CDSECT, false is returned when the current event text
- * contains at least one non-white space character. For any other
- * event type an exception is thrown.
- *
- * <p><b>Please note:</b> non-validating parsers are not
- * able to distinguish whitespace and ignorable whitespace,
- * except from whitespace outside the root element. Ignorable
- * whitespace is reported as separate event, which is exposed
- * via nextToken only.
- *
- */
- boolean isWhitespace() throws XmlPullParserException;
- /**
- * Returns the text content of the current event as String.
- * The value returned depends on current event type,
- * for example for TEXT event it is element content
- * (this is typical case when next() is used).
- *
- * See description of nextToken() for detailed description of
- * possible returned values for different types of events.
- *
- * <p><strong>NOTE:</strong> in case of ENTITY_REF, this method returns
- * the entity replacement text (or null if not available). This is
- * the only case where
- * getText() and getTextCharacters() return different values.
- *
- * @see #getEventType
- * @see #next
- * @see #nextToken
- */
- String getText ();
- /**
- * Returns the buffer that contains the text of the current event,
- * as well as the start offset and length relevant for the current
- * event. See getText(), next() and nextToken() for description of possible returned values.
- *
- * <p><strong>Please note:</strong> this buffer must not
- * be modified and its content MAY change after a call to
- * next() or nextToken(). This method will always return the
- * same value as getText(), except for ENTITY_REF. In the case
- * of ENTITY ref, getText() returns the replacement text and
- * this method returns the actual input buffer containing the
- * entity name.
- * If getText() returns null, this method returns null as well and
- * the values returned in the holder array MUST be -1 (both start
- * and length).
- *
- * @see #getText
- * @see #next
- * @see #nextToken
- *
- * @param holderForStartAndLength Must hold an 2-element int array
- * into which the start offset and length values will be written.
- * @return char buffer that contains the text of the current event
- * (null if the current event has no text associated).
- */
- char[] getTextCharacters(int [] holderForStartAndLength);
- // --------------------------------------------------------------------------
- // START_TAG / END_TAG shared methods
- /**
- * Returns the namespace URI of the current element.
- * The default namespace is represented
- * as empty string.
- * If namespaces are not enabled, an empty String ("") is always returned.
- * The current event must be START_TAG or END_TAG; otherwise,
- * null is returned.
- */
- String getNamespace ();
- /**
- * For START_TAG or END_TAG events, the (local) name of the current
- * element is returned when namespaces are enabled. When namespace
- * processing is disabled, the raw name is returned.
- * For ENTITY_REF events, the entity name is returned.
- * If the current event is not START_TAG, END_TAG, or ENTITY_REF,
- * null is returned.
- * <p><b>Please note:</b> To reconstruct the raw element name
- * when namespaces are enabled and the prefix is not null,
- * you will need to add the prefix and a colon to localName..
- *
- */
- String getName();
- /**
- * Returns the prefix of the current element.
- * If the element is in the default namespace (has no prefix),
- * null is returned.
- * If namespaces are not enabled, or the current event
- * is not START_TAG or END_TAG, null is returned.
- */
- String getPrefix();
- /**
- * Returns true if the current event is START_TAG and the tag
- * is degenerated
- * (e.g. <foobar/>).
- * <p><b>NOTE:</b> if the parser is not on START_TAG, an exception
- * will be thrown.
- */
- boolean isEmptyElementTag() throws XmlPullParserException;
- // --------------------------------------------------------------------------
- // START_TAG Attributes retrieval methods
- /**
- * Returns the number of attributes of the current start tag, or
- * -1 if the current event type is not START_TAG
- *
- * @see #getAttributeNamespace
- * @see #getAttributeName
- * @see #getAttributePrefix
- * @see #getAttributeValue
- */
- int getAttributeCount();
- /**
- * Returns the namespace URI of the attribute
- * with the given index (starts from 0).
- * Returns an empty string ("") if namespaces are not enabled
- * or the attribute has no namespace.
- * Throws an IndexOutOfBoundsException if the index is out of range
- * or the current event type is not START_TAG.
- *
- * <p><strong>NOTE:</strong> if FEATURE_REPORT_NAMESPACE_ATTRIBUTES is set
- * then namespace attributes (xmlns:ns='...') must be reported
- * with namespace
- * <a href="http://www.w3.org/2000/xmlns/">http://www.w3.org/2000/xmlns/</a>
- * (visit this URL for description!).
- * The default namespace attribute (xmlns="...") will be reported with empty namespace.
- * <p><strong>NOTE:</strong>The xml prefix is bound as defined in
- * <a href="http://www.w3.org/TR/REC-xml-names/#ns-using">Namespaces in XML</a>
- * specification to "http://www.w3.org/XML/1998/namespace".
- *
- * @param zero based index of attribute
- * @return attribute namespace,
- * empty string ("") is returned if namesapces processing is not enabled or
- * namespaces processing is enabled but attribute has no namespace (it has no prefix).
- */
- String getAttributeNamespace (int index);
- /**
- * Returns the local name of the specified attribute
- * if namespaces are enabled or just attribute name if namespaces are disabled.
- * Throws an IndexOutOfBoundsException if the index is out of range
- * or current event type is not START_TAG.
- *
- * @param zero based index of attribute
- * @return attribute name (null is never returned)
- */
- String getAttributeName (int index);
- /**
- * Returns the prefix of the specified attribute
- * Returns null if the element has no prefix.
- * If namespaces are disabled it will always return null.
- * Throws an IndexOutOfBoundsException if the index is out of range
- * or current event type is not START_TAG.
- *
- * @param zero based index of attribute
- * @return attribute prefix or null if namespaces processing is not enabled.
- */
- String getAttributePrefix(int index);
- /**
- * Returns the type of the specified attribute
- * If parser is non-validating it MUST return CDATA.
- *
- * @param zero based index of attribute
- * @return attribute type (null is never returned)
- */
- String getAttributeType(int index);
- /**
- * Returns if the specified attribute was not in input was declared in XML.
- * If parser is non-validating it MUST always return false.
- * This information is part of XML infoset:
- *
- * @param zero based index of attribute
- * @return false if attribute was in input
- */
- boolean isAttributeDefault(int index);
- /**
- * Returns the given attributes value.
- * Throws an IndexOutOfBoundsException if the index is out of range
- * or current event type is not START_TAG.
- *
- * <p><strong>NOTE:</strong> attribute value must be normalized
- * (including entity replacement text if PROCESS_DOCDECL is false) as described in
- * <a href="http://www.w3.org/TR/REC-xml#AVNormalize">XML 1.0 section
- * 3.3.3 Attribute-Value Normalization</a>
- *
- * @see #defineEntityReplacementText
- *
- * @param zero based index of attribute
- * @return value of attribute (null is never returned)
- */
- String getAttributeValue(int index);
- /**
- * Returns the attributes value identified by namespace URI and namespace localName.
- * If namespaces are disabled namespace must be null.
- * If current event type is not START_TAG then IndexOutOfBoundsException will be thrown.
- *
- * <p><strong>NOTE:</strong> attribute value must be normalized
- * (including entity replacement text if PROCESS_DOCDECL is false) as described in
- * <a href="http://www.w3.org/TR/REC-xml#AVNormalize">XML 1.0 section
- * 3.3.3 Attribute-Value Normalization</a>
- *
- * @see #defineEntityReplacementText
- *
- * @param namespace Namespace of the attribute if namespaces are enabled otherwise must be null
- * @param name If namespaces enabled local name of attribute otherwise just attribute name
- * @return value of attribute or null if attribute with given name does not exist
- */
- String getAttributeValue(String namespace,
- String name);
- // --------------------------------------------------------------------------
- // actual parsing methods
- /**
- * Returns the type of the current event (START_TAG, END_TAG, TEXT, etc.)
- *
- * @see #next()
- * @see #nextToken()
- */
- int getEventType()
- throws XmlPullParserException;
- /**
- * Get next parsing event - element content wil be coalesced and only one
- * TEXT event must be returned for whole element content
- * (comments and processing instructions will be ignored and emtity references
- * must be expanded or exception mus be thrown if entity reerence can not be exapnded).
- * If element content is empty (content is "") then no TEXT event will be reported.
- *
- * <p><b>NOTE:</b> empty element (such as <tag/>) will be reported
- * with two separate events: START_TAG, END_TAG - it must be so to preserve
- * parsing equivalency of empty element to <tag></tag>.
- * (see isEmptyElementTag ())
- *
- * @see #isEmptyElementTag
- * @see #START_TAG
- * @see #TEXT
- * @see #END_TAG
- * @see #END_DOCUMENT
- */
- int next()
- throws XmlPullParserException, IOException;
- /**
- * This method works similarly to next() but will expose
- * additional event types (COMMENT, CDSECT, DOCDECL, ENTITY_REF, PROCESSING_INSTRUCTION, or
- * IGNORABLE_WHITESPACE) if they are available in input.
- *
- * <p>If special feature
- * <a href="http://xmlpull.org/v1/doc/features.html#xml-roundtrip">FEATURE_XML_ROUNDTRIP</a>
- * (identified by URI: http://xmlpull.org/v1/doc/features.html#xml-roundtrip)
- * is enabled it is possible to do XML document round trip ie. reproduce
- * exectly on output the XML input using getText():
- * returned content is always unnormalized (exactly as in input).
- * Otherwise returned content is end-of-line normalized as described
- * <a href="http://www.w3.org/TR/REC-xml#sec-line-ends">XML 1.0 End-of-Line Handling</a>
- * and. Also when this feature is enabled exact content of START_TAG, END_TAG,
- * DOCDECL and PROCESSING_INSTRUCTION is available.
- *
- * <p>Here is the list of tokens that can be returned from nextToken()
- * and what getText() and getTextCharacters() returns:<dl>
- * <dt>START_DOCUMENT<dd>null
- * <dt>END_DOCUMENT<dd>null
- * <dt>START_TAG<dd>null unless FEATURE_XML_ROUNDTRIP
- * enabled and then returns XML tag, ex: <tag attr='val'>
- * <dt>END_TAG<dd>null unless FEATURE_XML_ROUNDTRIP
- * id enabled and then returns XML tag, ex: </tag>
- * <dt>TEXT<dd>return element content.
- * <br>Note: that element content may be delivered in multiple consecutive TEXT events.
- * <dt>IGNORABLE_WHITESPACE<dd>return characters that are determined to be ignorable white
- * space. If the FEATURE_XML_ROUNDTRIP is enabled all whitespace content outside root
- * element will always reported as IGNORABLE_WHITESPACE otherise rteporting is optional.
- * <br>Note: that element content may be delevered in multiple consecutive IGNORABLE_WHITESPACE events.
- * <dt>CDSECT<dd>
- * return text <em>inside</em> CDATA
- * (ex. 'fo<o' from <!CDATA[fo<o]]>)
- * <dt>PROCESSING_INSTRUCTION<dd>
- * if FEATURE_XML_ROUNDTRIP is true
- * return exact PI content ex: 'pi foo' from <?pi foo?>
- * otherwise it may be exact PI content or concatenation of PI target,
- * space and data so for example for
- * <?target data?> string "target data" may
- * be returned if FEATURE_XML_ROUNDTRIP is false.
- * <dt>COMMENT<dd>return comment content ex. 'foo bar' from <!--foo bar-->
- * <dt>ENTITY_REF<dd>getText() MUST return entity replacement text if PROCESS_DOCDECL is false
- * otherwise getText() MAY return null,
- * additionally getTextCharacters() MUST return entity name
- * (for example 'entity_name' for &entity_name;).
- * <br><b>NOTE:</b> this is the only place where value returned from getText() and
- * getTextCharacters() <b>are different</b>
- * <br><b>NOTE:</b> it is user responsibility to resolve entity reference
- * if PROCESS_DOCDECL is false and there is no entity replacement text set in
- * defineEntityReplacementText() method (getText() will be null)
- * <br><b>NOTE:</b> character entities (ex. &#32;) and standard entities such as
- * &amp; &lt; &gt; &quot; &apos; are reported as well
- * and are <b>not</b> reported as TEXT tokens but as ENTITY_REF tokens!
- * This requirement is added to allow to do roundtrip of XML documents!
- * <dt>DOCDECL<dd>
- * if FEATURE_XML_ROUNDTRIP is true or PROCESS_DOCDECL is false
- * then return what is inside of DOCDECL for example it returns:<pre>
- * " titlepage SYSTEM "http://www.foo.bar/dtds/typo.dtd"
- * [<!ENTITY % active.links "INCLUDE">]"</pre>
- * <p>for input document that contained:<pre>
- * <!DOCTYPE titlepage SYSTEM "http://www.foo.bar/dtds/typo.dtd"
- * [<!ENTITY % active.links "INCLUDE">]></pre>
- * otherwise if FEATURE_XML_ROUNDTRIP is false and PROCESS_DOCDECL is true
- * then what is returned is undefined (it may be even null)
- * </dd>
- * </dl>
- *
- * <p><strong>NOTE:</strong> there is no gurantee that there will only one TEXT or
- * IGNORABLE_WHITESPACE event from nextToken() as parser may chose to deliver element content in
- * multiple tokens (dividing element content into chunks)
- *
- * <p><strong>NOTE:</strong> whether returned text of token is end-of-line normalized
- * is depending on FEATURE_XML_ROUNDTRIP.
- *
- * <p><strong>NOTE:</strong> XMLDecl (<?xml ...?>) is not reported but its content
- * is available through optional properties (see class description above).
- *
- * @see #next
- * @see #START_TAG
- * @see #TEXT
- * @see #END_TAG
- * @see #END_DOCUMENT
- * @see #COMMENT
- * @see #DOCDECL
- * @see #PROCESSING_INSTRUCTION
- * @see #ENTITY_REF
- * @see #IGNORABLE_WHITESPACE
- */
- int nextToken()
- throws XmlPullParserException, IOException;
- //-----------------------------------------------------------------------------
- // utility methods to mak XML parsing easier ...
- /**
- * Test if the current event is of the given type and if the
- * namespace and name do match. null will match any namespace
- * and any name. If the test is not passed, an exception is
- * thrown. The exception text indicates the parser position,
- * the expected event and the current event that is not meeting the
- * requirement.
- *
- * <p>Essentially it does this
- * <pre>
- * if (type != getEventType()
- * || (namespace != null && !namespace.equals( getNamespace () ) )
- * || (name != null && !name.equals( getName() ) ) )
- * throw new XmlPullParserException( "expected "+ TYPES[ type ]+getPositionDescription());
- * </pre>
- */
- void require(int type, String namespace, String name)
- throws XmlPullParserException, IOException;
- /**
- * If current event is START_TAG then if next element is TEXT then element content is returned
- * or if next event is END_TAG then empty string is returned, otherwise exception is thrown.
- * After calling this function successfully parser will be positioned on END_TAG.
- *
- * <p>The motivation for this function is to allow to parse consistently both
- * empty elements and elements that has non empty content, for example for input: <ol>
- * <li><tag>foo</tag>
- * <li><tag></tag> (which is equivalent to <tag/>
- * both input can be parsed with the same code:
- * <pre>
- * p.nextTag()
- * p.requireEvent(p.START_TAG, "", "tag");
- * String content = p.nextText();
- * p.requireEvent(p.END_TAG, "", "tag");
- * </pre>
- * This function together with nextTag make it very easy to parse XML that has
- * no mixed content.
- *
- *
- * <p>Essentially it does this
- * <pre>
- * if(getEventType() != START_TAG) {
- * throw new XmlPullParserException(
- * "parser must be on START_TAG to read next text", this, null);
- * }
- * int eventType = next();
- * if(eventType == TEXT) {
- * String result = getText();
- * eventType = next();
- * if(eventType != END_TAG) {
- * throw new XmlPullParserException(
- * "event TEXT it must be immediately followed by END_TAG", this, null);
- * }
- * return result;
- * } else if(eventType == END_TAG) {
- * return "";
- * } else {
- * throw new XmlPullParserException(
- * "parser must be on START_TAG or TEXT to read text", this, null);
- * }
- * </pre>
- */
- String nextText() throws XmlPullParserException, IOException;
- /**
- * Call next() and return event if it is START_TAG or END_TAG
- * otherwise throw an exception.
- * It will skip whitespace TEXT before actual tag if any.
- *
- * <p>essentially it does this
- * <pre>
- * int eventType = next();
- * if(eventType == TEXT && isWhitespace()) { // skip whitespace
- * eventType = next();
- * }
- * if (eventType != START_TAG && eventType != END_TAG) {
- * throw new XmlPullParserException("expected start or end tag", this, null);
- * }
- * return eventType;
- * </pre>
- */
- int nextTag() throws XmlPullParserException, IOException;
- }
|