DoParse.java 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303
  1. /* DoParse.java --
  2. Copyright (C) 1999,2000,2001 Free Software Foundation, Inc.
  3. This file is part of GNU Classpath.
  4. GNU Classpath is free software; you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation; either version 2, or (at your option)
  7. any later version.
  8. GNU Classpath is distributed in the hope that it will be useful, but
  9. WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with GNU Classpath; see the file COPYING. If not, write to the
  14. Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  15. 02110-1301 USA.
  16. Linking this library statically or dynamically with other modules is
  17. making a combined work based on this library. Thus, the terms and
  18. conditions of the GNU General Public License cover the whole
  19. combination.
  20. As a special exception, the copyright holders of this library give you
  21. permission to link this library with independent modules to produce an
  22. executable, regardless of the license terms of these independent
  23. modules, and to copy and distribute the resulting executable under
  24. terms of your choice, provided that you also meet, for each linked
  25. independent module, the terms and conditions of the license of that
  26. module. An independent module is a module which is not derived from
  27. or based on this library. If you modify this library, you may extend
  28. this exception to your version of the library, but you are not
  29. obligated to do so. If you do not wish to do so, delete this
  30. exception statement from your version. */
  31. package gnu.xml.util;
  32. import gnu.java.lang.CPStringBuilder;
  33. import java.io.IOException;
  34. import org.xml.sax.ErrorHandler;
  35. import org.xml.sax.InputSource;
  36. import org.xml.sax.SAXException;
  37. import org.xml.sax.SAXParseException;
  38. import org.xml.sax.XMLReader;
  39. import org.xml.sax.helpers.XMLReaderFactory;
  40. import gnu.xml.pipeline.EventConsumer;
  41. import gnu.xml.pipeline.EventFilter;
  42. import gnu.xml.pipeline.NSFilter;
  43. import gnu.xml.pipeline.PipelineFactory;
  44. import gnu.xml.pipeline.TeeConsumer;
  45. import gnu.xml.pipeline.ValidationConsumer;
  46. import gnu.xml.pipeline.WellFormednessFilter;
  47. /**
  48. * This class provides a driver which may be invoked from the command line
  49. * to process a document using a SAX2 parser and a specified XML processing
  50. * pipeline.
  51. * This facilitates some common types of command line tools, such as parsing an
  52. * XML document in order test it for well formedness or validity.
  53. *
  54. * <p>The SAX2 XMLReaderFactory should return a SAX2 XML parser which
  55. * supports both of the standardized extension handlers (for declaration
  56. * and lexical events). That parser will be used to produce events.
  57. *
  58. * <p>The first parameter to the command gives the name of the document that
  59. * will be given to that processor. If it is a file name, it is converted
  60. * to a URL first.
  61. *
  62. * <p>The second parameter describes a simple processing pipeline, and will
  63. * be used as input to {@link gnu.xml.pipeline.PipelineFactory}
  64. * methods which identify the processing to be done. Examples of such a
  65. * pipeline include <pre>
  66. *
  67. * nsfix | validate <em>to validate the input document </em>
  68. * nsfix | write ( stdout ) <em>to echo the file as XML text</em>
  69. * dom | nsfix | write ( stdout ) <em>parse into DOM, print the result</em>
  70. * </pre>
  71. *
  72. * <p> Relatively complex pipelines can be described on the command line, but
  73. * not all interesting ones will require as little configuration as can be done
  74. * in that way. Put filters like "nsfix", perhaps followed by "validate",
  75. * at the front of the pipeline so they can be optimized out if a parser
  76. * supports those modes natively.
  77. *
  78. * <p> If the parsing is aborted for any reason, the JVM will exit with a
  79. * failure code. If a validating parse was done then both validation and
  80. * well formedness errors will cause a failure. A non-validating parse
  81. * will report failure on well formedness errors.
  82. *
  83. * @see gnu.xml.pipeline.PipelineFactory
  84. *
  85. * @author David Brownell
  86. */
  87. final public class DoParse
  88. {
  89. private DoParse () { /* no instances allowed */ }
  90. // first reported nonrecoverable error
  91. private static SAXParseException fatal;
  92. // error categories
  93. private static int errorCount;
  94. private static int fatalCount;
  95. /**
  96. * Command line invoker for this class; pass a filename or URL
  97. * as the first argument, and a pipeline description as the second.
  98. * Make sure to use filters to condition the input to stages that
  99. * require it; an <em>nsfix</em> filter will be a common requirement,
  100. * to restore syntax that SAX2 parsers delete by default. Some
  101. * conditioning filters may be eliminated by setting parser options.
  102. * (For example, "nsfix" can set the "namespace-prefixes" feature to
  103. * a non-default value of "true". In the same way, "validate" can set
  104. * the "validation" feature to "true".)
  105. */
  106. public static void main (String argv [])
  107. throws IOException
  108. {
  109. int exitStatus = 1;
  110. if (argv.length != 2) {
  111. System.err.println ("Usage: DoParse [filename|URL] pipeline-spec");
  112. System.err.println ("Example pipeline specs:");
  113. System.err.println (" 'nsfix | validate'");
  114. System.err.println (
  115. " ... restore namespace syntax, validate");
  116. System.err.println (" 'nsfix | write ( stdout )'");
  117. System.err.println (
  118. " ... restore namespace syntax, write to stdout as XML"
  119. );
  120. System.exit (1);
  121. }
  122. try {
  123. //
  124. // Get input source for specified document (or try ;-)
  125. //
  126. argv [0] = Resolver.getURL (argv [0]);
  127. InputSource input = new InputSource (argv [0]);
  128. //
  129. // Get the producer, using the system default parser (which
  130. // can be overridden for this particular invocation).
  131. //
  132. // And the pipeline, using commandline options.
  133. //
  134. XMLReader producer;
  135. EventConsumer consumer;
  136. producer = XMLReaderFactory.createXMLReader ();
  137. //
  138. // XXX pipeline factory now has a pre-tokenized input
  139. // method, use it ... that way at least some params
  140. // can be written using quotes (have spaces, ...)
  141. //
  142. consumer = PipelineFactory.createPipeline (argv [1]);
  143. //
  144. // XXX want commandline option for tweaking error handler.
  145. // Want to be able to present warnings.
  146. //
  147. producer.setErrorHandler (new MyErrorHandler ());
  148. // XXX need facility enabling resolving to local DTDs
  149. //
  150. // Parse. The pipeline may get optimized a bit, so we
  151. // can't always fail cleanly for validation without taking
  152. // a look at the filter stages.
  153. //
  154. EventFilter.bind (producer, consumer);
  155. producer.parse (input);
  156. try {
  157. if (producer.getFeature (
  158. "http://org.xml/sax/features/validation"))
  159. exitStatus = ((errorCount + fatalCount) > 0) ? 1 : 0;
  160. else if (fatalCount == 0)
  161. exitStatus = 0;
  162. } catch (SAXException e) {
  163. if (hasValidator (consumer))
  164. exitStatus = ((errorCount + fatalCount) > 0) ? 1 : 0;
  165. else if (fatalCount == 0)
  166. exitStatus = 0;
  167. }
  168. } catch (java.net.MalformedURLException e) {
  169. System.err.println ("** Malformed URL: " + e.getMessage ());
  170. System.err.println ("Is '" + argv [0] + "' a non-existent file?");
  171. e.printStackTrace ();
  172. // e.g. FNF
  173. } catch (SAXParseException e) {
  174. if (e != fatal) {
  175. System.err.print (printParseException ("Parsing Aborted", e));
  176. e.printStackTrace ();
  177. if (e.getException () != null) {
  178. System.err.println ("++ Wrapped exception:");
  179. e.getException ().printStackTrace ();
  180. }
  181. }
  182. } catch (SAXException e) {
  183. Exception x = e;
  184. if (e.getException () != null)
  185. x = e.getException ();
  186. x.printStackTrace ();
  187. } catch (Throwable t) {
  188. t.printStackTrace ();
  189. }
  190. System.exit (exitStatus);
  191. }
  192. // returns true if saw a validator (before end or unrecognized node)
  193. // false otherwise
  194. private static boolean hasValidator (EventConsumer e)
  195. {
  196. if (e == null)
  197. return false;
  198. if (e instanceof ValidationConsumer)
  199. return true;
  200. if (e instanceof TeeConsumer) {
  201. TeeConsumer t = (TeeConsumer) e;
  202. return hasValidator (t.getFirst ())
  203. || hasValidator (t.getRest ());
  204. }
  205. if (e instanceof WellFormednessFilter
  206. || e instanceof NSFilter
  207. )
  208. return hasValidator (((EventFilter)e).getNext ());
  209. // else ... gee, we can't know. Assume not.
  210. return false;
  211. }
  212. static class MyErrorHandler implements ErrorHandler
  213. {
  214. // dump validation errors, but continue
  215. public void error (SAXParseException e)
  216. throws SAXParseException
  217. {
  218. errorCount++;
  219. System.err.print (printParseException ("Error", e));
  220. }
  221. public void warning (SAXParseException e)
  222. throws SAXParseException
  223. {
  224. // System.err.print (printParseException ("Warning", e));
  225. }
  226. // try to continue fatal errors, in case a parser reports more
  227. public void fatalError (SAXParseException e)
  228. throws SAXParseException
  229. {
  230. fatalCount++;
  231. if (fatal == null)
  232. fatal = e;
  233. System.err.print (printParseException ("Nonrecoverable Error", e));
  234. }
  235. }
  236. static private String printParseException (
  237. String label,
  238. SAXParseException e
  239. ) {
  240. CPStringBuilder buf = new CPStringBuilder ();
  241. int temp;
  242. buf.append ("** ");
  243. buf.append (label);
  244. buf.append (": ");
  245. buf.append (e.getMessage ());
  246. buf.append ('\n');
  247. if (e.getSystemId () != null) {
  248. buf.append (" URI: ");
  249. buf.append (e.getSystemId ());
  250. buf.append ('\n');
  251. }
  252. if ((temp = e.getLineNumber ()) != -1) {
  253. buf.append (" line: ");
  254. buf.append (temp);
  255. buf.append ('\n');
  256. }
  257. if ((temp = e.getColumnNumber ()) != -1) {
  258. buf.append (" char: ");
  259. buf.append (temp);
  260. buf.append ('\n');
  261. }
  262. return buf.toString ();
  263. }
  264. }