HTMLPurifier.php 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. <?php
  2. /*! @mainpage
  3. *
  4. * HTML Purifier is an HTML filter that will take an arbitrary snippet of
  5. * HTML and rigorously test, validate and filter it into a version that
  6. * is safe for output onto webpages. It achieves this by:
  7. *
  8. * -# Lexing (parsing into tokens) the document,
  9. * -# Executing various strategies on the tokens:
  10. * -# Removing all elements not in the whitelist,
  11. * -# Making the tokens well-formed,
  12. * -# Fixing the nesting of the nodes, and
  13. * -# Validating attributes of the nodes; and
  14. * -# Generating HTML from the purified tokens.
  15. *
  16. * However, most users will only need to interface with the HTMLPurifier
  17. * and HTMLPurifier_Config.
  18. */
  19. /*
  20. HTML Purifier 4.7.0 - Standards Compliant HTML Filtering
  21. Copyright (C) 2006-2008 Edward Z. Yang
  22. This library is free software; you can redistribute it and/or
  23. modify it under the terms of the GNU Lesser General Public
  24. License as published by the Free Software Foundation; either
  25. version 2.1 of the License, or (at your option) any later version.
  26. This library is distributed in the hope that it will be useful,
  27. but WITHOUT ANY WARRANTY; without even the implied warranty of
  28. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  29. Lesser General Public License for more details.
  30. You should have received a copy of the GNU Lesser General Public
  31. License along with this library; if not, write to the Free Software
  32. Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  33. */
  34. /**
  35. * Facade that coordinates HTML Purifier's subsystems in order to purify HTML.
  36. *
  37. * @note There are several points in which configuration can be specified
  38. * for HTML Purifier. The precedence of these (from lowest to
  39. * highest) is as follows:
  40. * -# Instance: new HTMLPurifier($config)
  41. * -# Invocation: purify($html, $config)
  42. * These configurations are entirely independent of each other and
  43. * are *not* merged (this behavior may change in the future).
  44. *
  45. * @todo We need an easier way to inject strategies using the configuration
  46. * object.
  47. */
  48. class HTMLPurifier
  49. {
  50. /**
  51. * Version of HTML Purifier.
  52. * @type string
  53. */
  54. public $version = '4.7.0';
  55. /**
  56. * Constant with version of HTML Purifier.
  57. */
  58. const VERSION = '4.7.0';
  59. /**
  60. * Global configuration object.
  61. * @type HTMLPurifier_Config
  62. */
  63. public $config;
  64. /**
  65. * Array of extra filter objects to run on HTML,
  66. * for backwards compatibility.
  67. * @type HTMLPurifier_Filter[]
  68. */
  69. private $filters = array();
  70. /**
  71. * Single instance of HTML Purifier.
  72. * @type HTMLPurifier
  73. */
  74. private static $instance;
  75. /**
  76. * @type HTMLPurifier_Strategy_Core
  77. */
  78. protected $strategy;
  79. /**
  80. * @type HTMLPurifier_Generator
  81. */
  82. protected $generator;
  83. /**
  84. * Resultant context of last run purification.
  85. * Is an array of contexts if the last called method was purifyArray().
  86. * @type HTMLPurifier_Context
  87. */
  88. public $context;
  89. /**
  90. * Initializes the purifier.
  91. *
  92. * @param HTMLPurifier_Config $config Optional HTMLPurifier_Config object
  93. * for all instances of the purifier, if omitted, a default
  94. * configuration is supplied (which can be overridden on a
  95. * per-use basis).
  96. * The parameter can also be any type that
  97. * HTMLPurifier_Config::create() supports.
  98. */
  99. public function __construct($config = null)
  100. {
  101. $this->config = HTMLPurifier_Config::create($config);
  102. $this->strategy = new HTMLPurifier_Strategy_Core();
  103. }
  104. /**
  105. * Adds a filter to process the output. First come first serve
  106. *
  107. * @param HTMLPurifier_Filter $filter HTMLPurifier_Filter object
  108. */
  109. public function addFilter($filter)
  110. {
  111. trigger_error(
  112. 'HTMLPurifier->addFilter() is deprecated, use configuration directives' .
  113. ' in the Filter namespace or Filter.Custom',
  114. E_USER_WARNING
  115. );
  116. $this->filters[] = $filter;
  117. }
  118. /**
  119. * Filters an HTML snippet/document to be XSS-free and standards-compliant.
  120. *
  121. * @param string $html String of HTML to purify
  122. * @param HTMLPurifier_Config $config Config object for this operation,
  123. * if omitted, defaults to the config object specified during this
  124. * object's construction. The parameter can also be any type
  125. * that HTMLPurifier_Config::create() supports.
  126. *
  127. * @return string Purified HTML
  128. */
  129. public function purify($html, $config = null)
  130. {
  131. // :TODO: make the config merge in, instead of replace
  132. $config = $config ? HTMLPurifier_Config::create($config) : $this->config;
  133. // implementation is partially environment dependant, partially
  134. // configuration dependant
  135. $lexer = HTMLPurifier_Lexer::create($config);
  136. $context = new HTMLPurifier_Context();
  137. // setup HTML generator
  138. $this->generator = new HTMLPurifier_Generator($config, $context);
  139. $context->register('Generator', $this->generator);
  140. // set up global context variables
  141. if ($config->get('Core.CollectErrors')) {
  142. // may get moved out if other facilities use it
  143. $language_factory = HTMLPurifier_LanguageFactory::instance();
  144. $language = $language_factory->create($config, $context);
  145. $context->register('Locale', $language);
  146. $error_collector = new HTMLPurifier_ErrorCollector($context);
  147. $context->register('ErrorCollector', $error_collector);
  148. }
  149. // setup id_accumulator context, necessary due to the fact that
  150. // AttrValidator can be called from many places
  151. $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context);
  152. $context->register('IDAccumulator', $id_accumulator);
  153. $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context);
  154. // setup filters
  155. $filter_flags = $config->getBatch('Filter');
  156. $custom_filters = $filter_flags['Custom'];
  157. unset($filter_flags['Custom']);
  158. $filters = array();
  159. foreach ($filter_flags as $filter => $flag) {
  160. if (!$flag) {
  161. continue;
  162. }
  163. if (strpos($filter, '.') !== false) {
  164. continue;
  165. }
  166. $class = "HTMLPurifier_Filter_$filter";
  167. $filters[] = new $class;
  168. }
  169. foreach ($custom_filters as $filter) {
  170. // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat
  171. $filters[] = $filter;
  172. }
  173. $filters = array_merge($filters, $this->filters);
  174. // maybe prepare(), but later
  175. for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) {
  176. $html = $filters[$i]->preFilter($html, $config, $context);
  177. }
  178. // purified HTML
  179. $html =
  180. $this->generator->generateFromTokens(
  181. // list of tokens
  182. $this->strategy->execute(
  183. // list of un-purified tokens
  184. $lexer->tokenizeHTML(
  185. // un-purified HTML
  186. $html,
  187. $config,
  188. $context
  189. ),
  190. $config,
  191. $context
  192. )
  193. );
  194. for ($i = $filter_size - 1; $i >= 0; $i--) {
  195. $html = $filters[$i]->postFilter($html, $config, $context);
  196. }
  197. $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context);
  198. $this->context =& $context;
  199. return $html;
  200. }
  201. /**
  202. * Filters an array of HTML snippets
  203. *
  204. * @param string[] $array_of_html Array of html snippets
  205. * @param HTMLPurifier_Config $config Optional config object for this operation.
  206. * See HTMLPurifier::purify() for more details.
  207. *
  208. * @return string[] Array of purified HTML
  209. */
  210. public function purifyArray($array_of_html, $config = null)
  211. {
  212. $context_array = array();
  213. foreach ($array_of_html as $key => $html) {
  214. $array_of_html[$key] = $this->purify($html, $config);
  215. $context_array[$key] = $this->context;
  216. }
  217. $this->context = $context_array;
  218. return $array_of_html;
  219. }
  220. /**
  221. * Singleton for enforcing just one HTML Purifier in your system
  222. *
  223. * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
  224. * HTMLPurifier instance to overload singleton with,
  225. * or HTMLPurifier_Config instance to configure the
  226. * generated version with.
  227. *
  228. * @return HTMLPurifier
  229. */
  230. public static function instance($prototype = null)
  231. {
  232. if (!self::$instance || $prototype) {
  233. if ($prototype instanceof HTMLPurifier) {
  234. self::$instance = $prototype;
  235. } elseif ($prototype) {
  236. self::$instance = new HTMLPurifier($prototype);
  237. } else {
  238. self::$instance = new HTMLPurifier();
  239. }
  240. }
  241. return self::$instance;
  242. }
  243. /**
  244. * Singleton for enforcing just one HTML Purifier in your system
  245. *
  246. * @param HTMLPurifier|HTMLPurifier_Config $prototype Optional prototype
  247. * HTMLPurifier instance to overload singleton with,
  248. * or HTMLPurifier_Config instance to configure the
  249. * generated version with.
  250. *
  251. * @return HTMLPurifier
  252. * @note Backwards compatibility, see instance()
  253. */
  254. public static function getInstance($prototype = null)
  255. {
  256. return HTMLPurifier::instance($prototype);
  257. }
  258. }
  259. // vim: et sw=4 sts=4