XMLWriter.java 68 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932
  1. /* XMLWriter.java --
  2. Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
  3. This file is part of GNU Classpath.
  4. GNU Classpath is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2, or (at your option)
  7. any later version.
  8. GNU Classpath is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with GNU Classpath; see the file COPYING. If not, write to the
  14. Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  15. 02110-1301 USA.
  16. Linking this library statically or dynamically with other modules is
  17. making a combined work based on this library. Thus, the terms and
  18. conditions of the GNU General Public License cover the whole
  19. combination.
  20. As a special exception, the copyright holders of this library give you
  21. permission to link this library with independent modules to produce an
  22. executable, regardless of the license terms of these independent
  23. modules, and to copy and distribute the resulting executable under
  24. terms of your choice, provided that you also meet, for each linked
  25. independent module, the terms and conditions of the license of that
  26. module. An independent module is a module which is not derived from
  27. or based on this library. If you modify this library, you may extend
  28. this exception to your version of the library, but you are not
  29. obligated to do so. If you do not wish to do so, delete this
  30. exception statement from your version. */
  31. package gnu.xml.util;
  32. import gnu.java.lang.CPStringBuilder;
  33. import java.io.BufferedWriter;
  34. import java.io.CharConversionException;
  35. import java.io.IOException;
  36. import java.io.OutputStream;
  37. import java.io.OutputStreamWriter;
  38. import java.io.Writer;
  39. import java.util.Stack;
  40. import org.xml.sax.*;
  41. import org.xml.sax.ext.*;
  42. import org.xml.sax.helpers.*;
  43. /**
  44. * This class is a SAX handler which writes all its input as a well formed
  45. * XML or XHTML document. If driven using SAX2 events, this output may
  46. * include a recreated document type declaration, subject to limitations
  47. * of SAX (no internal subset exposed) or DOM (the important declarations,
  48. * with their documentation, are discarded).
  49. *
  50. * <p> By default, text is generated "as-is", but some optional modes
  51. * are supported. Pretty-printing is supported, to make life easier
  52. * for people reading the output. XHTML (1.0) output has can be made
  53. * particularly pretty; all the built-in character entities are known.
  54. * Canonical XML can also be generated, assuming the input is properly
  55. * formed.
  56. *
  57. * <hr>
  58. *
  59. * <p> Some of the methods on this class are intended for applications to
  60. * use directly, rather than as pure SAX2 event callbacks. Some of those
  61. * methods access the JavaBeans properties (used to tweak output formats,
  62. * for example canonicalization and pretty printing). Subclasses
  63. * are expected to add new behaviors, not to modify current behavior, so
  64. * many such methods are final.</p>
  65. *
  66. * <p> The <em>write*()</em> methods may be slightly simpler for some
  67. * applications to use than direct callbacks. For example, they support
  68. * a simple policy for encoding data items as the content of a single element.
  69. *
  70. * <p> To reuse an XMLWriter you must provide it with a new Writer, since
  71. * this handler closes the writer it was given as part of its endDocument()
  72. * handling. (XML documents have an end of input, and the way to encode
  73. * that on a stream is to close it.) </p>
  74. *
  75. * <hr>
  76. *
  77. * <p> Note that any relative URIs in the source document, as found in
  78. * entity and notation declarations, ought to have been fully resolved by
  79. * the parser providing events to this handler. This means that the
  80. * output text should only have fully resolved URIs, which may not be
  81. * the desired behavior in cases where later binding is desired. </p>
  82. *
  83. * <p> <em>Note that due to SAX2 defaults, you may need to manually
  84. * ensure that the input events are XML-conformant with respect to namespace
  85. * prefixes and declarations. {@link gnu.xml.pipeline.NSFilter} is
  86. * one solution to this problem, in the context of processing pipelines.</em>
  87. * Something as simple as connecting this handler to a parser might not
  88. * generate the correct output. Another workaround is to ensure that the
  89. * <em>namespace-prefixes</em> feature is always set to true, if you're
  90. * hooking this directly up to some XMLReader implementation.
  91. *
  92. * @see gnu.xml.pipeline.TextConsumer
  93. *
  94. * @author David Brownell
  95. *
  96. * @deprecated Please use the javax.xml.stream APIs instead
  97. */
  98. public class XMLWriter
  99. implements ContentHandler, LexicalHandler, DTDHandler, DeclHandler
  100. {
  101. // text prints/escapes differently depending on context
  102. // CTX_ENTITY ... entity literal value
  103. // CTX_ATTRIBUTE ... attribute literal value
  104. // CTX_CONTENT ... content of an element
  105. // CTX_UNPARSED ... CDATA, comment, PI, names, etc
  106. // CTX_NAME ... name or nmtoken, no escapes possible
  107. private static final int CTX_ENTITY = 1;
  108. private static final int CTX_ATTRIBUTE = 2;
  109. private static final int CTX_CONTENT = 3;
  110. private static final int CTX_UNPARSED = 4;
  111. private static final int CTX_NAME = 5;
  112. // FIXME: names (element, attribute, PI, notation, etc) are not
  113. // currently written out with range checks (escapeChars).
  114. // In non-XHTML, some names can't be directly written; panic!
  115. private static String sysEOL;
  116. static {
  117. try {
  118. sysEOL = System.getProperty ("line.separator", "\n");
  119. // don't use the system's EOL if it's illegal XML.
  120. if (!isLineEnd (sysEOL))
  121. sysEOL = "\n";
  122. } catch (SecurityException e) {
  123. sysEOL = "\n";
  124. }
  125. }
  126. private static boolean isLineEnd (String eol)
  127. {
  128. return "\n".equals (eol)
  129. || "\r".equals (eol)
  130. || "\r\n".equals (eol);
  131. }
  132. private Writer out;
  133. private boolean inCDATA;
  134. private int elementNestLevel;
  135. private String eol = sysEOL;
  136. private short dangerMask;
  137. private CPStringBuilder stringBuf;
  138. private Locator locator;
  139. private ErrorHandler errHandler;
  140. private boolean expandingEntities = false;
  141. private int entityNestLevel;
  142. private boolean xhtml;
  143. private boolean startedDoctype;
  144. private String encoding;
  145. private boolean canonical;
  146. private boolean inDoctype;
  147. private boolean inEpilogue;
  148. // pretty printing controls
  149. private boolean prettyPrinting;
  150. private int column;
  151. private boolean noWrap;
  152. private Stack space = new Stack ();
  153. // this is not a hard'n'fast rule -- longer lines are OK,
  154. // but are to be avoided. Here, prettyprinting is more to
  155. // show structure "cleanly" than to be precise about it.
  156. // better to have ragged layout than one line 24Kb long.
  157. private static final int lineLength = 75;
  158. /**
  159. * Constructs this handler with System.out used to write SAX events
  160. * using the UTF-8 encoding. Avoid using this except when you know
  161. * it's safe to close System.out at the end of the document.
  162. */
  163. public XMLWriter () throws IOException
  164. { this (System.out); }
  165. /**
  166. * Constructs a handler which writes all input to the output stream
  167. * in the UTF-8 encoding, and closes it when endDocument is called.
  168. * (Yes it's annoying that this throws an exception -- but there's
  169. * really no way around it, since it's barely possible a JDK may
  170. * exist somewhere that doesn't know how to emit UTF-8.)
  171. */
  172. public XMLWriter (OutputStream out) throws IOException
  173. {
  174. this (new OutputStreamWriter (out, "UTF8"));
  175. }
  176. /**
  177. * Constructs a handler which writes all input to the writer, and then
  178. * closes the writer when the document ends. If an XML declaration is
  179. * written onto the output, and this class can determine the name of
  180. * the character encoding for this writer, that encoding name will be
  181. * included in the XML declaration.
  182. *
  183. * <P> See the description of the constructor which takes an encoding
  184. * name for imporant information about selection of encodings.
  185. *
  186. * @param writer XML text is written to this writer.
  187. */
  188. public XMLWriter (Writer writer)
  189. {
  190. this (writer, null);
  191. }
  192. /**
  193. * Constructs a handler which writes all input to the writer, and then
  194. * closes the writer when the document ends. If an XML declaration is
  195. * written onto the output, this class will use the specified encoding
  196. * name in that declaration. If no encoding name is specified, no
  197. * encoding name will be declared unless this class can otherwise
  198. * determine the name of the character encoding for this writer.
  199. *
  200. * <P> At this time, only the UTF-8 ("UTF8") and UTF-16 ("Unicode")
  201. * output encodings are fully lossless with respect to XML data. If you
  202. * use any other encoding you risk having your data be silently mangled
  203. * on output, as the standard Java character encoding subsystem silently
  204. * maps non-encodable characters to a question mark ("?") and will not
  205. * report such errors to applications.
  206. *
  207. * <p> For a few other encodings the risk can be reduced. If the writer is
  208. * a java.io.OutputStreamWriter, and uses either the ISO-8859-1 ("8859_1",
  209. * "ISO8859_1", etc) or US-ASCII ("ASCII") encodings, content which
  210. * can't be encoded in those encodings will be written safely. Where
  211. * relevant, the XHTML entity names will be used; otherwise, numeric
  212. * character references will be emitted.
  213. *
  214. * <P> However, there remain a number of cases where substituting such
  215. * entity or character references is not an option. Such references are
  216. * not usable within a DTD, comment, PI, or CDATA section. Neither may
  217. * they be used when element, attribute, entity, or notation names have
  218. * the problematic characters.
  219. *
  220. * @param writer XML text is written to this writer.
  221. * @param encoding if non-null, and an XML declaration is written,
  222. * this is the name that will be used for the character encoding.
  223. */
  224. public XMLWriter (Writer writer, String encoding)
  225. {
  226. setWriter (writer, encoding);
  227. }
  228. private void setEncoding (String encoding)
  229. {
  230. if (encoding == null && out instanceof OutputStreamWriter)
  231. encoding = ((OutputStreamWriter)out).getEncoding ();
  232. if (encoding != null) {
  233. encoding = encoding.toUpperCase ();
  234. // Use official encoding names where we know them,
  235. // avoiding the Java-only names. When using common
  236. // encodings where we can easily tell if characters
  237. // are out of range, we'll escape out-of-range
  238. // characters using character refs for safety.
  239. // I _think_ these are all the main synonyms for these!
  240. if ("UTF8".equals (encoding)) {
  241. encoding = "UTF-8";
  242. } else if ("US-ASCII".equals (encoding)
  243. || "ASCII".equals (encoding)) {
  244. dangerMask = (short) 0xff80;
  245. encoding = "US-ASCII";
  246. } else if ("ISO-8859-1".equals (encoding)
  247. || "8859_1".equals (encoding)
  248. || "ISO8859_1".equals (encoding)) {
  249. dangerMask = (short) 0xff00;
  250. encoding = "ISO-8859-1";
  251. } else if ("UNICODE".equals (encoding)
  252. || "UNICODE-BIG".equals (encoding)
  253. || "UNICODE-LITTLE".equals (encoding)) {
  254. encoding = "UTF-16";
  255. // TODO: UTF-16BE, UTF-16LE ... no BOM; what
  256. // release of JDK supports those Unicode names?
  257. }
  258. if (dangerMask != 0)
  259. stringBuf = new CPStringBuilder ();
  260. }
  261. this.encoding = encoding;
  262. }
  263. /**
  264. * Resets the handler to write a new text document.
  265. *
  266. * @param writer XML text is written to this writer.
  267. * @param encoding if non-null, and an XML declaration is written,
  268. * this is the name that will be used for the character encoding.
  269. *
  270. * @exception IllegalStateException if the current
  271. * document hasn't yet ended (with {@link #endDocument})
  272. */
  273. final public void setWriter (Writer writer, String encoding)
  274. {
  275. if (out != null)
  276. throw new IllegalStateException (
  277. "can't change stream in mid course");
  278. out = writer;
  279. if (out != null)
  280. setEncoding (encoding);
  281. if (!(out instanceof BufferedWriter))
  282. out = new BufferedWriter (out);
  283. space.push ("default");
  284. }
  285. /**
  286. * Assigns the line ending style to be used on output.
  287. * @param eolString null to use the system default; else
  288. * "\n", "\r", or "\r\n".
  289. */
  290. final public void setEOL (String eolString)
  291. {
  292. if (eolString == null)
  293. eol = sysEOL;
  294. else if (!isLineEnd (eolString))
  295. eol = eolString;
  296. else
  297. throw new IllegalArgumentException (eolString);
  298. }
  299. /**
  300. * Assigns the error handler to be used to present most fatal
  301. * errors.
  302. */
  303. public void setErrorHandler (ErrorHandler handler)
  304. {
  305. errHandler = handler;
  306. }
  307. /**
  308. * Used internally and by subclasses, this encapsulates the logic
  309. * involved in reporting fatal errors. It uses locator information
  310. * for good diagnostics, if available, and gives the application's
  311. * ErrorHandler the opportunity to handle the error before throwing
  312. * an exception.
  313. */
  314. protected void fatal (String message, Exception e)
  315. throws SAXException
  316. {
  317. SAXParseException x;
  318. if (locator == null)
  319. x = new SAXParseException (message, null, null, -1, -1, e);
  320. else
  321. x = new SAXParseException (message, locator, e);
  322. if (errHandler != null)
  323. errHandler.fatalError (x);
  324. throw x;
  325. }
  326. // JavaBeans properties
  327. /**
  328. * Controls whether the output should attempt to follow the "transitional"
  329. * XHTML rules so that it meets the "HTML Compatibility Guidelines"
  330. * appendix in the XHTML specification. A "transitional" Document Type
  331. * Declaration (DTD) is placed near the beginning of the output document,
  332. * instead of whatever DTD would otherwise have been placed there, and
  333. * XHTML empty elements are printed specially. When writing text in
  334. * US-ASCII or ISO-8859-1 encodings, the predefined XHTML internal
  335. * entity names are used (in preference to character references) when
  336. * writing content characters which can't be expressed in those encodings.
  337. *
  338. * <p> When this option is enabled, it is the caller's responsibility
  339. * to ensure that the input is otherwise valid as XHTML. Things to
  340. * be careful of in all cases, as described in the appendix referenced
  341. * above, include: <ul>
  342. *
  343. * <li> Element and attribute names must be in lower case, both
  344. * in the document and in any CSS style sheet.
  345. * <li> All XML constructs must be valid as defined by the XHTML
  346. * "transitional" DTD (including all familiar constructs,
  347. * even deprecated ones).
  348. * <li> The root element must be "html".
  349. * <li> Elements that must be empty (such as <em>&lt;br&gt;</em>
  350. * must have no content.
  351. * <li> Use both <em>lang</em> and <em>xml:lang</em> attributes
  352. * when specifying language.
  353. * <li> Similarly, use both <em>id</em> and <em>name</em> attributes
  354. * when defining elements that may be referred to through
  355. * URI fragment identifiers ... and make sure that the
  356. * value is a legal NMTOKEN, since not all such HTML 4.0
  357. * identifiers are valid in XML.
  358. * <li> Be careful with character encodings; make sure you provide
  359. * a <em>&lt;meta http-equiv="Content-type"
  360. * content="text/xml;charset=..." /&gt;</em> element in
  361. * the HTML "head" element, naming the same encoding
  362. * used to create this handler. Also, if that encoding
  363. * is anything other than US-ASCII, make sure that if
  364. * the document is given a MIME content type, it has
  365. * a <em>charset=...</em> attribute with that encoding.
  366. * </ul>
  367. *
  368. * <p> Additionally, some of the oldest browsers have additional
  369. * quirks, to address with guidelines such as: <ul>
  370. *
  371. * <li> Processing instructions may be rendered, so avoid them.
  372. * (Similarly for an XML declaration.)
  373. * <li> Embedded style sheets and scripts should not contain XML
  374. * markup delimiters: &amp;, &lt;, and ]]&gt; are trouble.
  375. * <li> Attribute values should not have line breaks or multiple
  376. * consecutive white space characters.
  377. * <li> Use no more than one of the deprecated (transitional)
  378. * <em>&lt;isindex&gt;</em> elements.
  379. * <li> Some boolean attributes (such as <em>compact, checked,
  380. * disabled, readonly, selected,</em> and more) confuse
  381. * some browsers, since they only understand minimized
  382. * versions which are illegal in XML.
  383. * </ul>
  384. *
  385. * <p> Also, some characteristics of the resulting output may be
  386. * a function of whether the document is later given a MIME
  387. * content type of <em>text/html</em> rather than one indicating
  388. * XML (<em>application/xml</em> or <em>text/xml</em>). Worse,
  389. * some browsers ignore MIME content types and prefer to rely URI
  390. * name suffixes -- so an "index.xml" could always be XML, never
  391. * XHTML, no matter its MIME type.
  392. */
  393. final public void setXhtml (boolean value)
  394. {
  395. if (locator != null)
  396. throw new IllegalStateException ("started parsing");
  397. xhtml = value;
  398. if (xhtml)
  399. canonical = false;
  400. }
  401. /**
  402. * Returns true if the output attempts to echo the input following
  403. * "transitional" XHTML rules and matching the "HTML Compatibility
  404. * Guidelines" so that an HTML version 3 browser can read the output
  405. * as HTML; returns false (the default) othewise.
  406. */
  407. final public boolean isXhtml ()
  408. {
  409. return xhtml;
  410. }
  411. /**
  412. * Controls whether the output text contains references to
  413. * entities (the default), or instead contains the expanded
  414. * values of those entities.
  415. */
  416. final public void setExpandingEntities (boolean value)
  417. {
  418. if (locator != null)
  419. throw new IllegalStateException ("started parsing");
  420. expandingEntities = value;
  421. if (!expandingEntities)
  422. canonical = false;
  423. }
  424. /**
  425. * Returns true if the output will have no entity references;
  426. * returns false (the default) otherwise.
  427. */
  428. final public boolean isExpandingEntities ()
  429. {
  430. return expandingEntities;
  431. }
  432. /**
  433. * Controls pretty-printing, which by default is not enabled
  434. * (and currently is most useful for XHTML output).
  435. * Pretty printing enables structural indentation, sorting of attributes
  436. * by name, line wrapping, and potentially other mechanisms for making
  437. * output more or less readable.
  438. *
  439. * <p> At this writing, structural indentation and line wrapping are
  440. * enabled when pretty printing is enabled and the <em>xml:space</em>
  441. * attribute has the value <em>default</em> (its other legal value is
  442. * <em>preserve</em>, as defined in the XML specification). The three
  443. * XHTML element types which use another value are recognized by their
  444. * names (namespaces are ignored).
  445. *
  446. * <p> Also, for the record, the "pretty" aspect of printing here
  447. * is more to provide basic structure on outputs that would otherwise
  448. * risk being a single long line of text. For now, expect the
  449. * structure to be ragged ... unless you'd like to submit a patch
  450. * to make this be more strictly formatted!
  451. *
  452. * @exception IllegalStateException thrown if this method is invoked
  453. * after output has begun.
  454. */
  455. final public void setPrettyPrinting (boolean value)
  456. {
  457. if (locator != null)
  458. throw new IllegalStateException ("started parsing");
  459. prettyPrinting = value;
  460. if (prettyPrinting)
  461. canonical = false;
  462. }
  463. /**
  464. * Returns value of flag controlling pretty printing.
  465. */
  466. final public boolean isPrettyPrinting ()
  467. {
  468. return prettyPrinting;
  469. }
  470. /**
  471. * Sets the output style to be canonicalized. Input events must
  472. * meet requirements that are slightly more stringent than the
  473. * basic well-formedness ones, and include: <ul>
  474. *
  475. * <li> Namespace prefixes must not have been changed from those
  476. * in the original document. (This may only be ensured by setting
  477. * the SAX2 XMLReader <em>namespace-prefixes</em> feature flag;
  478. * by default, it is cleared.)
  479. *
  480. * <li> Redundant namespace declaration attributes have been
  481. * removed. (If an ancestor element defines a namespace prefix
  482. * and that declaration hasn't been overriden, an element must
  483. * not redeclare it.)
  484. *
  485. * <li> If comments are not to be included in the canonical output,
  486. * they must first be removed from the input event stream; this
  487. * <em>Canonical XML with comments</em> by default.
  488. *
  489. * <li> If the input character encoding was not UCS-based, the
  490. * character data must have been normalized using Unicode
  491. * Normalization Form C. (UTF-8 and UTF-16 are UCS-based.)
  492. *
  493. * <li> Attribute values must have been normalized, as is done
  494. * by any conformant XML processor which processes all external
  495. * parameter entities.
  496. *
  497. * <li> Similarly, attribute value defaulting has been performed.
  498. *
  499. * </ul>
  500. *
  501. * <p> Note that fragments of XML documents, as specified by an XPath
  502. * node set, may be canonicalized. In such cases, elements may need
  503. * some fixup (for <em>xml:*</em> attributes and application-specific
  504. * context).
  505. *
  506. * @exception IllegalArgumentException if the output encoding
  507. * is anything other than UTF-8.
  508. */
  509. final public void setCanonical (boolean value)
  510. {
  511. if (value && !"UTF-8".equals (encoding))
  512. throw new IllegalArgumentException ("encoding != UTF-8");
  513. canonical = value;
  514. if (canonical) {
  515. prettyPrinting = xhtml = false;
  516. expandingEntities = true;
  517. eol = "\n";
  518. }
  519. }
  520. /**
  521. * Returns value of flag controlling canonical output.
  522. */
  523. final public boolean isCanonical ()
  524. {
  525. return canonical;
  526. }
  527. /**
  528. * Flushes the output stream. When this handler is used in long lived
  529. * pipelines, it can be important to flush buffered state, for example
  530. * so that it can reach the disk as part of a state checkpoint.
  531. */
  532. final public void flush ()
  533. throws IOException
  534. {
  535. if (out != null)
  536. out.flush ();
  537. }
  538. // convenience routines
  539. // FIXME: probably want a subclass that holds a lot of these...
  540. // and maybe more!
  541. /**
  542. * Writes the string as if characters() had been called on the contents
  543. * of the string. This is particularly useful when applications act as
  544. * producers and write data directly to event consumers.
  545. */
  546. final public void write (String data)
  547. throws SAXException
  548. {
  549. char buf [] = data.toCharArray ();
  550. characters (buf, 0, buf.length);
  551. }
  552. /**
  553. * Writes an element that has content consisting of a single string.
  554. * @see #writeEmptyElement
  555. * @see #startElement
  556. */
  557. public void writeElement (
  558. String uri,
  559. String localName,
  560. String qName,
  561. Attributes atts,
  562. String content
  563. ) throws SAXException
  564. {
  565. if (content == null || content.length () == 0) {
  566. writeEmptyElement (uri, localName, qName, atts);
  567. return;
  568. }
  569. startElement (uri, localName, qName, atts);
  570. char chars [] = content.toCharArray ();
  571. characters (chars, 0, chars.length);
  572. endElement (uri, localName, qName);
  573. }
  574. /**
  575. * Writes an element that has content consisting of a single integer,
  576. * encoded as a decimal string.
  577. * @see #writeEmptyElement
  578. * @see #startElement
  579. */
  580. public void writeElement (
  581. String uri,
  582. String localName,
  583. String qName,
  584. Attributes atts,
  585. int content
  586. ) throws SAXException
  587. {
  588. writeElement (uri, localName, qName, atts, Integer.toString (content));
  589. }
  590. // SAX1 ContentHandler
  591. /** <b>SAX1</b>: provides parser status information */
  592. final public void setDocumentLocator (Locator l)
  593. {
  594. locator = l;
  595. }
  596. // URL for dtd that validates against all normal HTML constructs
  597. private static final String xhtmlFullDTD =
  598. "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd";
  599. /**
  600. * <b>SAX1</b>: indicates the beginning of a document parse.
  601. * If you're writing (well formed) fragments of XML, neither
  602. * this nor endDocument should be called.
  603. */
  604. // NOT final
  605. public void startDocument ()
  606. throws SAXException
  607. {
  608. try {
  609. if (out == null)
  610. throw new IllegalStateException (
  611. "null Writer given to XMLWriter");
  612. // Not all parsers provide the locator we want; this also
  613. // flags whether events are being sent to this object yet.
  614. // We could only have this one call if we only printed whole
  615. // documents ... but we also print fragments, so most of the
  616. // callbacks here replicate this test.
  617. if (locator == null)
  618. locator = new LocatorImpl ();
  619. // Unless the data is in US-ASCII or we're canonicalizing, write
  620. // the XML declaration if we know the encoding. US-ASCII won't
  621. // normally get mangled by web server confusion about the
  622. // character encodings used. Plus, it's an easy way to
  623. // ensure we can write ASCII that's unlikely to confuse
  624. // elderly HTML parsers.
  625. if (!canonical
  626. && dangerMask != (short) 0xff80
  627. && encoding != null) {
  628. rawWrite ("<?xml version='1.0'");
  629. rawWrite (" encoding='" + encoding + "'");
  630. rawWrite ("?>");
  631. newline ();
  632. }
  633. if (xhtml) {
  634. rawWrite ("<!DOCTYPE html PUBLIC");
  635. newline ();
  636. rawWrite (" '-//W3C//DTD XHTML 1.0 Transitional//EN'");
  637. newline ();
  638. rawWrite (" '");
  639. // NOTE: URL (above) matches the REC
  640. rawWrite (xhtmlFullDTD);
  641. rawWrite ("'>");
  642. newline ();
  643. newline ();
  644. // fake the rest of the handler into ignoring
  645. // everything until the root element, so any
  646. // XHTML DTD comments, PIs, etc are ignored
  647. startedDoctype = true;
  648. }
  649. entityNestLevel = 0;
  650. } catch (IOException e) {
  651. fatal ("can't write", e);
  652. }
  653. }
  654. /**
  655. * <b>SAX1</b>: indicates the completion of a parse.
  656. * Note that all complete SAX event streams make this call, even
  657. * if an error is reported during a parse.
  658. */
  659. // NOT final
  660. public void endDocument ()
  661. throws SAXException
  662. {
  663. try {
  664. if (!canonical) {
  665. newline ();
  666. newline ();
  667. }
  668. out.close ();
  669. out = null;
  670. locator = null;
  671. } catch (IOException e) {
  672. fatal ("can't write", e);
  673. }
  674. }
  675. // XHTML elements declared as EMPTY print differently
  676. final private static boolean isEmptyElementTag (String tag)
  677. {
  678. switch (tag.charAt (0)) {
  679. case 'a': return "area".equals (tag);
  680. case 'b': return "base".equals (tag)
  681. || "basefont".equals (tag)
  682. || "br".equals (tag);
  683. case 'c': return "col".equals (tag);
  684. case 'f': return "frame".equals (tag);
  685. case 'h': return "hr".equals (tag);
  686. case 'i': return "img".equals (tag)
  687. || "input".equals (tag)
  688. || "isindex".equals (tag);
  689. case 'l': return "link".equals (tag);
  690. case 'm': return "meta".equals (tag);
  691. case 'p': return "param".equals (tag);
  692. }
  693. return false;
  694. }
  695. private static boolean indentBefore (String tag)
  696. {
  697. // basically indent before block content
  698. // and within structure like tables, lists
  699. switch (tag.charAt (0)) {
  700. case 'a': return "applet".equals (tag);
  701. case 'b': return "body".equals (tag)
  702. || "blockquote".equals (tag);
  703. case 'c': return "center".equals (tag);
  704. case 'f': return "frame".equals (tag)
  705. || "frameset".equals (tag);
  706. case 'h': return "head".equals (tag);
  707. case 'm': return "meta".equals (tag);
  708. case 'o': return "object".equals (tag);
  709. case 'p': return "param".equals (tag)
  710. || "pre".equals (tag);
  711. case 's': return "style".equals (tag);
  712. case 't': return "title".equals (tag)
  713. || "td".equals (tag)
  714. || "th".equals (tag);
  715. }
  716. // ... but not inline elements like "em", "b", "font"
  717. return false;
  718. }
  719. private static boolean spaceBefore (String tag)
  720. {
  721. // blank line AND INDENT before certain structural content
  722. switch (tag.charAt (0)) {
  723. case 'h': return "h1".equals (tag)
  724. || "h2".equals (tag)
  725. || "h3".equals (tag)
  726. || "h4".equals (tag)
  727. || "h5".equals (tag)
  728. || "h6".equals (tag)
  729. || "hr".equals (tag);
  730. case 'l': return "li".equals (tag);
  731. case 'o': return "ol".equals (tag);
  732. case 'p': return "p".equals (tag);
  733. case 't': return "table".equals (tag)
  734. || "tr".equals (tag);
  735. case 'u': return "ul".equals (tag);
  736. }
  737. return false;
  738. }
  739. // XHTML DTDs say these three have xml:space="preserve"
  740. private static boolean spacePreserve (String tag)
  741. {
  742. return "pre".equals (tag)
  743. || "style".equals (tag)
  744. || "script".equals (tag);
  745. }
  746. /**
  747. * <b>SAX2</b>: ignored.
  748. */
  749. final public void startPrefixMapping (String prefix, String uri)
  750. {}
  751. /**
  752. * <b>SAX2</b>: ignored.
  753. */
  754. final public void endPrefixMapping (String prefix)
  755. {}
  756. private void writeStartTag (
  757. String name,
  758. Attributes atts,
  759. boolean isEmpty
  760. ) throws SAXException, IOException
  761. {
  762. rawWrite ('<');
  763. rawWrite (name);
  764. // write out attributes ... sorting is particularly useful
  765. // with output that's been heavily defaulted.
  766. if (atts != null && atts.getLength () != 0) {
  767. // Set up to write, with optional sorting
  768. int indices [] = new int [atts.getLength ()];
  769. for (int i= 0; i < indices.length; i++)
  770. indices [i] = i;
  771. // optionally sort
  772. // FIXME: canon xml demands xmlns nodes go first,
  773. // and sorting by URI first (empty first) then localname
  774. // it should maybe use a different sort
  775. if (canonical || prettyPrinting) {
  776. // insertion sort by attribute name
  777. for (int i = 1; i < indices.length; i++) {
  778. int n = indices [i], j;
  779. String s = atts.getQName (n);
  780. for (j = i - 1; j >= 0; j--) {
  781. if (s.compareTo (atts.getQName (indices [j]))
  782. >= 0)
  783. break;
  784. indices [j + 1] = indices [j];
  785. }
  786. indices [j + 1] = n;
  787. }
  788. }
  789. // write, sorted or no
  790. for (int i= 0; i < indices.length; i++) {
  791. String s = atts.getQName (indices [i]);
  792. if (s == null || "".equals (s))
  793. throw new IllegalArgumentException ("no XML name");
  794. rawWrite (" ");
  795. rawWrite (s);
  796. rawWrite ("=");
  797. writeQuotedValue (atts.getValue (indices [i]),
  798. CTX_ATTRIBUTE);
  799. }
  800. }
  801. if (isEmpty)
  802. rawWrite (" /");
  803. rawWrite ('>');
  804. }
  805. /**
  806. * <b>SAX2</b>: indicates the start of an element.
  807. * When XHTML is in use, avoid attribute values with
  808. * line breaks or multiple whitespace characters, since
  809. * not all user agents handle them correctly.
  810. */
  811. final public void startElement (
  812. String uri,
  813. String localName,
  814. String qName,
  815. Attributes atts
  816. ) throws SAXException
  817. {
  818. startedDoctype = false;
  819. if (locator == null)
  820. locator = new LocatorImpl ();
  821. if (qName == null || "".equals (qName))
  822. throw new IllegalArgumentException ("no XML name");
  823. try {
  824. if (entityNestLevel != 0)
  825. return;
  826. if (prettyPrinting) {
  827. String whitespace = null;
  828. if (xhtml && spacePreserve (qName))
  829. whitespace = "preserve";
  830. else if (atts != null)
  831. whitespace = atts.getValue ("xml:space");
  832. if (whitespace == null)
  833. whitespace = (String) space.peek ();
  834. space.push (whitespace);
  835. if ("default".equals (whitespace)) {
  836. if (xhtml) {
  837. if (spaceBefore (qName)) {
  838. newline ();
  839. doIndent ();
  840. } else if (indentBefore (qName))
  841. doIndent ();
  842. // else it's inlined, modulo line length
  843. // FIXME: incrementing element nest level
  844. // for inlined elements causes ugliness
  845. } else
  846. doIndent ();
  847. }
  848. }
  849. elementNestLevel++;
  850. writeStartTag (qName, atts, xhtml && isEmptyElementTag (qName));
  851. if (xhtml) {
  852. // FIXME: if this is an XHTML "pre" element, turn
  853. // off automatic wrapping.
  854. }
  855. } catch (IOException e) {
  856. fatal ("can't write", e);
  857. }
  858. }
  859. /**
  860. * Writes an empty element.
  861. * @see #startElement
  862. */
  863. public void writeEmptyElement (
  864. String uri,
  865. String localName,
  866. String qName,
  867. Attributes atts
  868. ) throws SAXException
  869. {
  870. if (canonical) {
  871. startElement (uri, localName, qName, atts);
  872. endElement (uri, localName, qName);
  873. } else {
  874. try {
  875. writeStartTag (qName, atts, true);
  876. } catch (IOException e) {
  877. fatal ("can't write", e);
  878. }
  879. }
  880. }
  881. /** <b>SAX2</b>: indicates the end of an element */
  882. final public void endElement (String uri, String localName, String qName)
  883. throws SAXException
  884. {
  885. if (qName == null || "".equals (qName))
  886. throw new IllegalArgumentException ("no XML name");
  887. try {
  888. elementNestLevel--;
  889. if (entityNestLevel != 0)
  890. return;
  891. if (xhtml && isEmptyElementTag (qName))
  892. return;
  893. rawWrite ("</");
  894. rawWrite (qName);
  895. rawWrite ('>');
  896. if (prettyPrinting) {
  897. if (!space.empty ())
  898. space.pop ();
  899. else
  900. fatal ("stack discipline", null);
  901. }
  902. if (elementNestLevel == 0)
  903. inEpilogue = true;
  904. } catch (IOException e) {
  905. fatal ("can't write", e);
  906. }
  907. }
  908. /** <b>SAX1</b>: reports content characters */
  909. final public void characters (char ch [], int start, int length)
  910. throws SAXException
  911. {
  912. if (locator == null)
  913. locator = new LocatorImpl ();
  914. try {
  915. if (entityNestLevel != 0)
  916. return;
  917. if (inCDATA) {
  918. escapeChars (ch, start, length, CTX_UNPARSED);
  919. } else {
  920. escapeChars (ch, start, length, CTX_CONTENT);
  921. }
  922. } catch (IOException e) {
  923. fatal ("can't write", e);
  924. }
  925. }
  926. /** <b>SAX1</b>: reports ignorable whitespace */
  927. final public void ignorableWhitespace (char ch [], int start, int length)
  928. throws SAXException
  929. {
  930. if (locator == null)
  931. locator = new LocatorImpl ();
  932. try {
  933. if (entityNestLevel != 0)
  934. return;
  935. // don't forget to map NL to CRLF, CR, etc
  936. escapeChars (ch, start, length, CTX_CONTENT);
  937. } catch (IOException e) {
  938. fatal ("can't write", e);
  939. }
  940. }
  941. /**
  942. * <b>SAX1</b>: reports a PI.
  943. * This doesn't check for illegal target names, such as "xml" or "XML",
  944. * or namespace-incompatible ones like "big:dog"; the caller is
  945. * responsible for ensuring those names are legal.
  946. */
  947. final public void processingInstruction (String target, String data)
  948. throws SAXException
  949. {
  950. if (locator == null)
  951. locator = new LocatorImpl ();
  952. // don't print internal subset for XHTML
  953. if (xhtml && startedDoctype)
  954. return;
  955. // ancient HTML browsers might render these ... their loss.
  956. // to prevent: "if (xhtml) return;".
  957. try {
  958. if (entityNestLevel != 0)
  959. return;
  960. if (canonical && inEpilogue)
  961. newline ();
  962. rawWrite ("<?");
  963. rawWrite (target);
  964. rawWrite (' ');
  965. escapeChars (data.toCharArray (), -1, -1, CTX_UNPARSED);
  966. rawWrite ("?>");
  967. if (elementNestLevel == 0 && !(canonical && inEpilogue))
  968. newline ();
  969. } catch (IOException e) {
  970. fatal ("can't write", e);
  971. }
  972. }
  973. /** <b>SAX1</b>: indicates a non-expanded entity reference */
  974. public void skippedEntity (String name)
  975. throws SAXException
  976. {
  977. try {
  978. rawWrite ("&");
  979. rawWrite (name);
  980. rawWrite (";");
  981. } catch (IOException e) {
  982. fatal ("can't write", e);
  983. }
  984. }
  985. // SAX2 LexicalHandler
  986. /** <b>SAX2</b>: called before parsing CDATA characters */
  987. final public void startCDATA ()
  988. throws SAXException
  989. {
  990. if (locator == null)
  991. locator = new LocatorImpl ();
  992. if (canonical)
  993. return;
  994. try {
  995. inCDATA = true;
  996. if (entityNestLevel == 0)
  997. rawWrite ("<![CDATA[");
  998. } catch (IOException e) {
  999. fatal ("can't write", e);
  1000. }
  1001. }
  1002. /** <b>SAX2</b>: called after parsing CDATA characters */
  1003. final public void endCDATA ()
  1004. throws SAXException
  1005. {
  1006. if (canonical)
  1007. return;
  1008. try {
  1009. inCDATA = false;
  1010. if (entityNestLevel == 0)
  1011. rawWrite ("]]>");
  1012. } catch (IOException e) {
  1013. fatal ("can't write", e);
  1014. }
  1015. }
  1016. /**
  1017. * <b>SAX2</b>: called when the doctype is partially parsed
  1018. * Note that this, like other doctype related calls, is ignored
  1019. * when XHTML is in use.
  1020. */
  1021. final public void startDTD (String name, String publicId, String systemId)
  1022. throws SAXException
  1023. {
  1024. if (locator == null)
  1025. locator = new LocatorImpl ();
  1026. if (xhtml)
  1027. return;
  1028. try {
  1029. inDoctype = startedDoctype = true;
  1030. if (canonical)
  1031. return;
  1032. rawWrite ("<!DOCTYPE ");
  1033. rawWrite (name);
  1034. rawWrite (' ');
  1035. if (!expandingEntities) {
  1036. if (publicId != null)
  1037. rawWrite ("PUBLIC '" + publicId + "' '" + systemId + "' ");
  1038. else if (systemId != null)
  1039. rawWrite ("SYSTEM '" + systemId + "' ");
  1040. }
  1041. rawWrite ('[');
  1042. newline ();
  1043. } catch (IOException e) {
  1044. fatal ("can't write", e);
  1045. }
  1046. }
  1047. /** <b>SAX2</b>: called after the doctype is parsed */
  1048. final public void endDTD ()
  1049. throws SAXException
  1050. {
  1051. inDoctype = false;
  1052. if (canonical || xhtml)
  1053. return;
  1054. try {
  1055. rawWrite ("]>");
  1056. newline ();
  1057. } catch (IOException e) {
  1058. fatal ("can't write", e);
  1059. }
  1060. }
  1061. /**
  1062. * <b>SAX2</b>: called before parsing a general entity in content
  1063. */
  1064. final public void startEntity (String name)
  1065. throws SAXException
  1066. {
  1067. try {
  1068. boolean writeEOL = true;
  1069. // Predefined XHTML entities (for characters) will get
  1070. // mapped back later.
  1071. if (xhtml || expandingEntities)
  1072. return;
  1073. entityNestLevel++;
  1074. if (name.equals ("[dtd]"))
  1075. return;
  1076. if (entityNestLevel != 1)
  1077. return;
  1078. if (!name.startsWith ("%")) {
  1079. writeEOL = false;
  1080. rawWrite ('&');
  1081. }
  1082. rawWrite (name);
  1083. rawWrite (';');
  1084. if (writeEOL)
  1085. newline ();
  1086. } catch (IOException e) {
  1087. fatal ("can't write", e);
  1088. }
  1089. }
  1090. /**
  1091. * <b>SAX2</b>: called after parsing a general entity in content
  1092. */
  1093. final public void endEntity (String name)
  1094. throws SAXException
  1095. {
  1096. if (xhtml || expandingEntities)
  1097. return;
  1098. entityNestLevel--;
  1099. }
  1100. /**
  1101. * <b>SAX2</b>: called when comments are parsed.
  1102. * When XHTML is used, the old HTML tradition of using comments
  1103. * to for inline CSS, or for JavaScript code is discouraged.
  1104. * This is because XML processors are encouraged to discard, on
  1105. * the grounds that comments are for users (and perhaps text
  1106. * editors) not programs. Instead, use external scripts
  1107. */
  1108. final public void comment (char ch [], int start, int length)
  1109. throws SAXException
  1110. {
  1111. if (locator == null)
  1112. locator = new LocatorImpl ();
  1113. // don't print internal subset for XHTML
  1114. if (xhtml && startedDoctype)
  1115. return;
  1116. // don't print comment in doctype for canon xml
  1117. if (canonical && inDoctype)
  1118. return;
  1119. try {
  1120. boolean indent;
  1121. if (prettyPrinting && space.empty ())
  1122. fatal ("stack discipline", null);
  1123. indent = prettyPrinting && "default".equals (space.peek ());
  1124. if (entityNestLevel != 0)
  1125. return;
  1126. if (indent)
  1127. doIndent ();
  1128. if (canonical && inEpilogue)
  1129. newline ();
  1130. rawWrite ("<!--");
  1131. escapeChars (ch, start, length, CTX_UNPARSED);
  1132. rawWrite ("-->");
  1133. if (indent)
  1134. doIndent ();
  1135. if (elementNestLevel == 0 && !(canonical && inEpilogue))
  1136. newline ();
  1137. } catch (IOException e) {
  1138. fatal ("can't write", e);
  1139. }
  1140. }
  1141. // SAX1 DTDHandler
  1142. /** <b>SAX1</b>: called on notation declarations */
  1143. final public void notationDecl (String name,
  1144. String publicId, String systemId)
  1145. throws SAXException
  1146. {
  1147. if (xhtml)
  1148. return;
  1149. try {
  1150. // At this time, only SAX2 callbacks start these.
  1151. if (!startedDoctype)
  1152. return;
  1153. if (entityNestLevel != 0)
  1154. return;
  1155. rawWrite ("<!NOTATION " + name + " ");
  1156. if (publicId != null)
  1157. rawWrite ("PUBLIC \"" + publicId + '"');
  1158. else
  1159. rawWrite ("SYSTEM ");
  1160. if (systemId != null)
  1161. rawWrite ('"' + systemId + '"');
  1162. rawWrite (">");
  1163. newline ();
  1164. } catch (IOException e) {
  1165. fatal ("can't write", e);
  1166. }
  1167. }
  1168. /** <b>SAX1</b>: called on unparsed entity declarations */
  1169. final public void unparsedEntityDecl (String name,
  1170. String publicId, String systemId,
  1171. String notationName)
  1172. throws SAXException
  1173. {
  1174. if (xhtml)
  1175. return;
  1176. try {
  1177. // At this time, only SAX2 callbacks start these.
  1178. if (!startedDoctype) {
  1179. // FIXME: write to temporary buffer, and make the start
  1180. // of the root element write these declarations.
  1181. return;
  1182. }
  1183. if (entityNestLevel != 0)
  1184. return;
  1185. rawWrite ("<!ENTITY " + name + " ");
  1186. if (publicId != null)
  1187. rawWrite ("PUBLIC \"" + publicId + '"');
  1188. else
  1189. rawWrite ("SYSTEM ");
  1190. rawWrite ('"' + systemId + '"');
  1191. rawWrite (" NDATA " + notationName + ">");
  1192. newline ();
  1193. } catch (IOException e) {
  1194. fatal ("can't write", e);
  1195. }
  1196. }
  1197. // SAX2 DeclHandler
  1198. /** <b>SAX2</b>: called on attribute declarations */
  1199. final public void attributeDecl (String eName, String aName,
  1200. String type, String mode, String value)
  1201. throws SAXException
  1202. {
  1203. if (xhtml)
  1204. return;
  1205. try {
  1206. // At this time, only SAX2 callbacks start these.
  1207. if (!startedDoctype)
  1208. return;
  1209. if (entityNestLevel != 0)
  1210. return;
  1211. rawWrite ("<!ATTLIST " + eName + ' ' + aName + ' ');
  1212. rawWrite (type);
  1213. rawWrite (' ');
  1214. if (mode != null)
  1215. rawWrite (mode + ' ');
  1216. if (value != null)
  1217. writeQuotedValue (value, CTX_ATTRIBUTE);
  1218. rawWrite ('>');
  1219. newline ();
  1220. } catch (IOException e) {
  1221. fatal ("can't write", e);
  1222. }
  1223. }
  1224. /** <b>SAX2</b>: called on element declarations */
  1225. final public void elementDecl (String name, String model)
  1226. throws SAXException
  1227. {
  1228. if (xhtml)
  1229. return;
  1230. try {
  1231. // At this time, only SAX2 callbacks start these.
  1232. if (!startedDoctype)
  1233. return;
  1234. if (entityNestLevel != 0)
  1235. return;
  1236. rawWrite ("<!ELEMENT " + name + ' ' + model + '>');
  1237. newline ();
  1238. } catch (IOException e) {
  1239. fatal ("can't write", e);
  1240. }
  1241. }
  1242. /** <b>SAX2</b>: called on external entity declarations */
  1243. final public void externalEntityDecl (
  1244. String name,
  1245. String publicId,
  1246. String systemId)
  1247. throws SAXException
  1248. {
  1249. if (xhtml)
  1250. return;
  1251. try {
  1252. // At this time, only SAX2 callbacks start these.
  1253. if (!startedDoctype)
  1254. return;
  1255. if (entityNestLevel != 0)
  1256. return;
  1257. rawWrite ("<!ENTITY ");
  1258. if (name.startsWith ("%")) {
  1259. rawWrite ("% ");
  1260. rawWrite (name.substring (1));
  1261. } else
  1262. rawWrite (name);
  1263. if (publicId != null)
  1264. rawWrite (" PUBLIC \"" + publicId + '"');
  1265. else
  1266. rawWrite (" SYSTEM ");
  1267. rawWrite ('"' + systemId + "\">");
  1268. newline ();
  1269. } catch (IOException e) {
  1270. fatal ("can't write", e);
  1271. }
  1272. }
  1273. /** <b>SAX2</b>: called on internal entity declarations */
  1274. final public void internalEntityDecl (String name, String value)
  1275. throws SAXException
  1276. {
  1277. if (xhtml)
  1278. return;
  1279. try {
  1280. // At this time, only SAX2 callbacks start these.
  1281. if (!startedDoctype)
  1282. return;
  1283. if (entityNestLevel != 0)
  1284. return;
  1285. rawWrite ("<!ENTITY ");
  1286. if (name.startsWith ("%")) {
  1287. rawWrite ("% ");
  1288. rawWrite (name.substring (1));
  1289. } else
  1290. rawWrite (name);
  1291. rawWrite (' ');
  1292. writeQuotedValue (value, CTX_ENTITY);
  1293. rawWrite ('>');
  1294. newline ();
  1295. } catch (IOException e) {
  1296. fatal ("can't write", e);
  1297. }
  1298. }
  1299. private void writeQuotedValue (String value, int code)
  1300. throws SAXException, IOException
  1301. {
  1302. char buf [] = value.toCharArray ();
  1303. int off = 0, len = buf.length;
  1304. // we can't add line breaks to attribute/entity/... values
  1305. noWrap = true;
  1306. rawWrite ('"');
  1307. escapeChars (buf, off, len, code);
  1308. rawWrite ('"');
  1309. noWrap = false;
  1310. }
  1311. // From "HTMLlat1x.ent" ... names of entities for ISO-8859-1
  1312. // (Latin/1) characters, all codes: 160-255 (0xA0-0xFF).
  1313. // Codes 128-159 have no assigned values.
  1314. private static final String HTMLlat1x [] = {
  1315. // 160
  1316. "nbsp", "iexcl", "cent", "pound", "curren",
  1317. "yen", "brvbar", "sect", "uml", "copy",
  1318. // 170
  1319. "ordf", "laquo", "not", "shy", "reg",
  1320. "macr", "deg", "plusmn", "sup2", "sup3",
  1321. // 180
  1322. "acute", "micro", "para", "middot", "cedil",
  1323. "sup1", "ordm", "raquo", "frac14", "frac12",
  1324. // 190
  1325. "frac34", "iquest", "Agrave", "Aacute", "Acirc",
  1326. "Atilde", "Auml", "Aring", "AElig", "Ccedil",
  1327. // 200
  1328. "Egrave", "Eacute", "Ecirc", "Euml", "Igrave",
  1329. "Iacute", "Icirc", "Iuml", "ETH", "Ntilde",
  1330. // 210
  1331. "Ograve", "Oacute", "Ocirc", "Otilde", "Ouml",
  1332. "times", "Oslash", "Ugrave", "Uacute", "Ucirc",
  1333. // 220
  1334. "Uuml", "Yacute", "THORN", "szlig", "agrave",
  1335. "aacute", "acirc", "atilde", "auml", "aring",
  1336. // 230
  1337. "aelig", "ccedil", "egrave", "eacute", "ecirc",
  1338. "euml", "igrave", "iacute", "icirc", "iuml",
  1339. // 240
  1340. "eth", "ntilde", "ograve", "oacute", "ocirc",
  1341. "otilde", "ouml", "divide", "oslash", "ugrave",
  1342. // 250
  1343. "uacute", "ucirc", "uuml", "yacute", "thorn",
  1344. "yuml"
  1345. };
  1346. // From "HTMLsymbolx.ent" ... some of the symbols that
  1347. // we can conveniently handle. Entities for the Greek.
  1348. // alphabet (upper and lower cases) are compact.
  1349. private static final String HTMLsymbolx_GR [] = {
  1350. // 913
  1351. "Alpha", "Beta", "Gamma", "Delta", "Epsilon",
  1352. "Zeta", "Eta", "Theta", "Iota", "Kappa",
  1353. // 923
  1354. "Lambda", "Mu", "Nu", "Xi", "Omicron",
  1355. "Pi", "Rho", null, "Sigma", "Tau",
  1356. // 933
  1357. "Upsilon", "Phi", "Chi", "Psi", "Omega"
  1358. };
  1359. private static final String HTMLsymbolx_gr [] = {
  1360. // 945
  1361. "alpha", "beta", "gamma", "delta", "epsilon",
  1362. "zeta", "eta", "theta", "iota", "kappa",
  1363. // 955
  1364. "lambda", "mu", "nu", "xi", "omicron",
  1365. "pi", "rho", "sigmaf", "sigma", "tau",
  1366. // 965
  1367. "upsilon", "phi", "chi", "psi", "omega"
  1368. };
  1369. // General routine to write text and substitute predefined
  1370. // entities (XML, and a special case for XHTML) as needed.
  1371. private void escapeChars (char buf [], int off, int len, int code)
  1372. throws SAXException, IOException
  1373. {
  1374. int first = 0;
  1375. if (off < 0) {
  1376. off = 0;
  1377. len = buf.length;
  1378. }
  1379. for (int i = 0; i < len; i++) {
  1380. String esc;
  1381. char c = buf [off + i];
  1382. switch (c) {
  1383. // Note that CTX_ATTRIBUTE isn't explicitly tested here;
  1384. // all syntax delimiters are escaped in CTX_ATTRIBUTE,
  1385. // otherwise it's similar to CTX_CONTENT
  1386. // ampersand flags entity references; entity replacement
  1387. // text has unexpanded references, other text doesn't.
  1388. case '&':
  1389. if (code == CTX_ENTITY || code == CTX_UNPARSED)
  1390. continue;
  1391. esc = "amp";
  1392. break;
  1393. // attributes and text may NOT have literal '<', but
  1394. // entities may have markup constructs
  1395. case '<':
  1396. if (code == CTX_ENTITY || code == CTX_UNPARSED)
  1397. continue;
  1398. esc = "lt";
  1399. break;
  1400. // as above re markup constructs; but otherwise
  1401. // except when canonicalizing, this is for consistency
  1402. case '>':
  1403. if (code == CTX_ENTITY || code == CTX_UNPARSED)
  1404. continue;
  1405. esc = "gt";
  1406. break;
  1407. case '\'':
  1408. if (code == CTX_CONTENT || code == CTX_UNPARSED)
  1409. continue;
  1410. if (canonical)
  1411. continue;
  1412. esc = "apos";
  1413. break;
  1414. // needed when printing quoted attribute/entity values
  1415. case '"':
  1416. if (code == CTX_CONTENT || code == CTX_UNPARSED)
  1417. continue;
  1418. esc = "quot";
  1419. break;
  1420. // make line ends work per host OS convention
  1421. case '\n':
  1422. esc = eol;
  1423. break;
  1424. //
  1425. // No other characters NEED special treatment ... except
  1426. // for encoding-specific issues, like whether the character
  1427. // can really be represented in that encoding.
  1428. //
  1429. default:
  1430. //
  1431. // There are characters we can never write safely; getting
  1432. // them is an error.
  1433. //
  1434. // (a) They're never legal in XML ... detected by range
  1435. // checks, and (eventually) by remerging surrogate
  1436. // pairs on output. (Easy error for apps to prevent.)
  1437. //
  1438. // (b) This encoding can't represent them, and we
  1439. // can't make reference substitution (e.g. inside
  1440. // CDATA sections, names, PI data, etc). (Hard for
  1441. // apps to prevent, except by using UTF-8 or UTF-16
  1442. // as their output encoding.)
  1443. //
  1444. // We know a very little bit about what characters
  1445. // the US-ASCII and ISO-8859-1 encodings support. For
  1446. // other encodings we can't detect the second type of
  1447. // error at all. (Never an issue for UTF-8 or UTF-16.)
  1448. //
  1449. // FIXME: CR in CDATA is an error; in text, turn to a char ref
  1450. // FIXME: CR/LF/TAB in attributes should become char refs
  1451. if ((c > 0xfffd)
  1452. || ((c < 0x0020) && !((c == 0x0009)
  1453. || (c == 0x000A) || (c == 0x000D)))
  1454. || (((c & dangerMask) != 0)
  1455. && (code == CTX_UNPARSED))) {
  1456. // if case (b) in CDATA, we might end the section,
  1457. // write a reference, then restart ... possible
  1458. // in one DOM L3 draft.
  1459. throw new CharConversionException (
  1460. "Illegal or non-writable character: U+"
  1461. + Integer.toHexString (c));
  1462. }
  1463. //
  1464. // If the output encoding represents the character
  1465. // directly, let it do so! Else we'll escape it.
  1466. //
  1467. if ((c & dangerMask) == 0)
  1468. continue;
  1469. esc = null;
  1470. // Avoid numeric refs where symbolic ones exist, as
  1471. // symbolic ones make more sense to humans reading!
  1472. if (xhtml) {
  1473. // all the HTMLlat1x.ent entities
  1474. // (all the "ISO-8859-1" characters)
  1475. if (c >= 160 && c <= 255)
  1476. esc = HTMLlat1x [c - 160];
  1477. // not quite half the HTMLsymbolx.ent entities
  1478. else if (c >= 913 && c <= 937)
  1479. esc = HTMLsymbolx_GR [c - 913];
  1480. else if (c >= 945 && c <= 969)
  1481. esc = HTMLsymbolx_gr [c - 945];
  1482. else switch (c) {
  1483. // all of the HTMLspecialx.ent entities
  1484. case 338: esc = "OElig"; break;
  1485. case 339: esc = "oelig"; break;
  1486. case 352: esc = "Scaron"; break;
  1487. case 353: esc = "scaron"; break;
  1488. case 376: esc = "Yuml"; break;
  1489. case 710: esc = "circ"; break;
  1490. case 732: esc = "tilde"; break;
  1491. case 8194: esc = "ensp"; break;
  1492. case 8195: esc = "emsp"; break;
  1493. case 8201: esc = "thinsp"; break;
  1494. case 8204: esc = "zwnj"; break;
  1495. case 8205: esc = "zwj"; break;
  1496. case 8206: esc = "lrm"; break;
  1497. case 8207: esc = "rlm"; break;
  1498. case 8211: esc = "ndash"; break;
  1499. case 8212: esc = "mdash"; break;
  1500. case 8216: esc = "lsquo"; break;
  1501. case 8217: esc = "rsquo"; break;
  1502. case 8218: esc = "sbquo"; break;
  1503. case 8220: esc = "ldquo"; break;
  1504. case 8221: esc = "rdquo"; break;
  1505. case 8222: esc = "bdquo"; break;
  1506. case 8224: esc = "dagger"; break;
  1507. case 8225: esc = "Dagger"; break;
  1508. case 8240: esc = "permil"; break;
  1509. case 8249: esc = "lsaquo"; break;
  1510. case 8250: esc = "rsaquo"; break;
  1511. case 8364: esc = "euro"; break;
  1512. // the other HTMLsymbox.ent entities
  1513. case 402: esc = "fnof"; break;
  1514. case 977: esc = "thetasym"; break;
  1515. case 978: esc = "upsih"; break;
  1516. case 982: esc = "piv"; break;
  1517. case 8226: esc = "bull"; break;
  1518. case 8230: esc = "hellip"; break;
  1519. case 8242: esc = "prime"; break;
  1520. case 8243: esc = "Prime"; break;
  1521. case 8254: esc = "oline"; break;
  1522. case 8260: esc = "frasl"; break;
  1523. case 8472: esc = "weierp"; break;
  1524. case 8465: esc = "image"; break;
  1525. case 8476: esc = "real"; break;
  1526. case 8482: esc = "trade"; break;
  1527. case 8501: esc = "alefsym"; break;
  1528. case 8592: esc = "larr"; break;
  1529. case 8593: esc = "uarr"; break;
  1530. case 8594: esc = "rarr"; break;
  1531. case 8595: esc = "darr"; break;
  1532. case 8596: esc = "harr"; break;
  1533. case 8629: esc = "crarr"; break;
  1534. case 8656: esc = "lArr"; break;
  1535. case 8657: esc = "uArr"; break;
  1536. case 8658: esc = "rArr"; break;
  1537. case 8659: esc = "dArr"; break;
  1538. case 8660: esc = "hArr"; break;
  1539. case 8704: esc = "forall"; break;
  1540. case 8706: esc = "part"; break;
  1541. case 8707: esc = "exist"; break;
  1542. case 8709: esc = "empty"; break;
  1543. case 8711: esc = "nabla"; break;
  1544. case 8712: esc = "isin"; break;
  1545. case 8713: esc = "notin"; break;
  1546. case 8715: esc = "ni"; break;
  1547. case 8719: esc = "prod"; break;
  1548. case 8721: esc = "sum"; break;
  1549. case 8722: esc = "minus"; break;
  1550. case 8727: esc = "lowast"; break;
  1551. case 8730: esc = "radic"; break;
  1552. case 8733: esc = "prop"; break;
  1553. case 8734: esc = "infin"; break;
  1554. case 8736: esc = "ang"; break;
  1555. case 8743: esc = "and"; break;
  1556. case 8744: esc = "or"; break;
  1557. case 8745: esc = "cap"; break;
  1558. case 8746: esc = "cup"; break;
  1559. case 8747: esc = "int"; break;
  1560. case 8756: esc = "there4"; break;
  1561. case 8764: esc = "sim"; break;
  1562. case 8773: esc = "cong"; break;
  1563. case 8776: esc = "asymp"; break;
  1564. case 8800: esc = "ne"; break;
  1565. case 8801: esc = "equiv"; break;
  1566. case 8804: esc = "le"; break;
  1567. case 8805: esc = "ge"; break;
  1568. case 8834: esc = "sub"; break;
  1569. case 8835: esc = "sup"; break;
  1570. case 8836: esc = "nsub"; break;
  1571. case 8838: esc = "sube"; break;
  1572. case 8839: esc = "supe"; break;
  1573. case 8853: esc = "oplus"; break;
  1574. case 8855: esc = "otimes"; break;
  1575. case 8869: esc = "perp"; break;
  1576. case 8901: esc = "sdot"; break;
  1577. case 8968: esc = "lceil"; break;
  1578. case 8969: esc = "rceil"; break;
  1579. case 8970: esc = "lfloor"; break;
  1580. case 8971: esc = "rfloor"; break;
  1581. case 9001: esc = "lang"; break;
  1582. case 9002: esc = "rang"; break;
  1583. case 9674: esc = "loz"; break;
  1584. case 9824: esc = "spades"; break;
  1585. case 9827: esc = "clubs"; break;
  1586. case 9829: esc = "hearts"; break;
  1587. case 9830: esc = "diams"; break;
  1588. }
  1589. }
  1590. // else escape with numeric char refs
  1591. if (esc == null) {
  1592. stringBuf.setLength (0);
  1593. stringBuf.append ("#x");
  1594. stringBuf.append (Integer.toHexString (c).toUpperCase ());
  1595. esc = stringBuf.toString ();
  1596. // FIXME: We don't write surrogate pairs correctly.
  1597. // They should work as one ref per character, since
  1598. // each pair is one character. For reading back into
  1599. // Unicode, it matters beginning in Unicode 3.1 ...
  1600. }
  1601. break;
  1602. }
  1603. if (i != first)
  1604. rawWrite (buf, off + first, i - first);
  1605. first = i + 1;
  1606. if (esc == eol)
  1607. newline ();
  1608. else {
  1609. rawWrite ('&');
  1610. rawWrite (esc);
  1611. rawWrite (';');
  1612. }
  1613. }
  1614. if (first < len)
  1615. rawWrite (buf, off + first, len - first);
  1616. }
  1617. private void newline ()
  1618. throws SAXException, IOException
  1619. {
  1620. out.write (eol);
  1621. column = 0;
  1622. }
  1623. private void doIndent ()
  1624. throws SAXException, IOException
  1625. {
  1626. int space = elementNestLevel * 2;
  1627. newline ();
  1628. column = space;
  1629. // track tabs only at line starts
  1630. while (space > 8) {
  1631. out.write ("\t");
  1632. space -= 8;
  1633. }
  1634. while (space > 0) {
  1635. out.write (" ");
  1636. space -= 2;
  1637. }
  1638. }
  1639. private void rawWrite (char c)
  1640. throws IOException
  1641. {
  1642. out.write (c);
  1643. column++;
  1644. }
  1645. private void rawWrite (String s)
  1646. throws SAXException, IOException
  1647. {
  1648. if (prettyPrinting && "default".equals (space.peek ())) {
  1649. char data [] = s.toCharArray ();
  1650. rawWrite (data, 0, data.length);
  1651. } else {
  1652. out.write (s);
  1653. column += s.length ();
  1654. }
  1655. }
  1656. // NOTE: if xhtml, the REC gives some rules about whitespace
  1657. // which we could follow ... notably, many places where conformant
  1658. // agents "must" consolidate/normalize whitespace. Line ends can
  1659. // be removed there, etc. This may not be the right place to do
  1660. // such mappings though.
  1661. // Line buffering may help clarify algorithms and improve results.
  1662. // It's likely xml:space needs more attention.
  1663. private void rawWrite (char buf [], int offset, int length)
  1664. throws SAXException, IOException
  1665. {
  1666. boolean wrap;
  1667. if (prettyPrinting && space.empty ())
  1668. fatal ("stack discipline", null);
  1669. wrap = prettyPrinting && "default".equals (space.peek ());
  1670. if (!wrap) {
  1671. out.write (buf, offset, length);
  1672. column += length;
  1673. return;
  1674. }
  1675. // we're pretty printing and want to fill lines out only
  1676. // to the desired line length.
  1677. while (length > 0) {
  1678. int target = lineLength - column;
  1679. boolean wrote = false;
  1680. // Do we even have a problem?
  1681. if (target > length || noWrap) {
  1682. out.write (buf, offset, length);
  1683. column += length;
  1684. return;
  1685. }
  1686. // break the line at a space character, trying to fill
  1687. // as much of the line as possible.
  1688. char c;
  1689. for (int i = target - 1; i >= 0; i--) {
  1690. if ((c = buf [offset + i]) == ' ' || c == '\t') {
  1691. i++;
  1692. out.write (buf, offset, i);
  1693. doIndent ();
  1694. offset += i;
  1695. length -= i;
  1696. wrote = true;
  1697. break;
  1698. }
  1699. }
  1700. if (wrote)
  1701. continue;
  1702. // no space character permitting break before target
  1703. // line length is filled. So, take the next one.
  1704. if (target < 0)
  1705. target = 0;
  1706. for (int i = target; i < length; i++)
  1707. if ((c = buf [offset + i]) == ' ' || c == '\t') {
  1708. i++;
  1709. out.write (buf, offset, i);
  1710. doIndent ();
  1711. offset += i;
  1712. length -= i;
  1713. wrote = true;
  1714. break;
  1715. }
  1716. if (wrote)
  1717. continue;
  1718. // no such luck.
  1719. out.write (buf, offset, length);
  1720. column += length;
  1721. break;
  1722. }
  1723. }
  1724. }