HTMLModule.php 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. <?php
  2. /**
  3. * Represents an XHTML 1.1 module, with information on elements, tags
  4. * and attributes.
  5. * @note Even though this is technically XHTML 1.1, it is also used for
  6. * regular HTML parsing. We are using modulization as a convenient
  7. * way to represent the internals of HTMLDefinition, and our
  8. * implementation is by no means conforming and does not directly
  9. * use the normative DTDs or XML schemas.
  10. * @note The public variables in a module should almost directly
  11. * correspond to the variables in HTMLPurifier_HTMLDefinition.
  12. * However, the prefix info carries no special meaning in these
  13. * objects (include it anyway if that's the correspondence though).
  14. * @todo Consider making some member functions protected
  15. */
  16. class HTMLPurifier_HTMLModule
  17. {
  18. // -- Overloadable ----------------------------------------------------
  19. /**
  20. * Short unique string identifier of the module.
  21. * @type string
  22. */
  23. public $name;
  24. /**
  25. * Informally, a list of elements this module changes.
  26. * Not used in any significant way.
  27. * @type array
  28. */
  29. public $elements = array();
  30. /**
  31. * Associative array of element names to element definitions.
  32. * Some definitions may be incomplete, to be merged in later
  33. * with the full definition.
  34. * @type array
  35. */
  36. public $info = array();
  37. /**
  38. * Associative array of content set names to content set additions.
  39. * This is commonly used to, say, add an A element to the Inline
  40. * content set. This corresponds to an internal variable $content_sets
  41. * and NOT info_content_sets member variable of HTMLDefinition.
  42. * @type array
  43. */
  44. public $content_sets = array();
  45. /**
  46. * Associative array of attribute collection names to attribute
  47. * collection additions. More rarely used for adding attributes to
  48. * the global collections. Example is the StyleAttribute module adding
  49. * the style attribute to the Core. Corresponds to HTMLDefinition's
  50. * attr_collections->info, since the object's data is only info,
  51. * with extra behavior associated with it.
  52. * @type array
  53. */
  54. public $attr_collections = array();
  55. /**
  56. * Associative array of deprecated tag name to HTMLPurifier_TagTransform.
  57. * @type array
  58. */
  59. public $info_tag_transform = array();
  60. /**
  61. * List of HTMLPurifier_AttrTransform to be performed before validation.
  62. * @type array
  63. */
  64. public $info_attr_transform_pre = array();
  65. /**
  66. * List of HTMLPurifier_AttrTransform to be performed after validation.
  67. * @type array
  68. */
  69. public $info_attr_transform_post = array();
  70. /**
  71. * List of HTMLPurifier_Injector to be performed during well-formedness fixing.
  72. * An injector will only be invoked if all of it's pre-requisites are met;
  73. * if an injector fails setup, there will be no error; it will simply be
  74. * silently disabled.
  75. * @type array
  76. */
  77. public $info_injector = array();
  78. /**
  79. * Boolean flag that indicates whether or not getChildDef is implemented.
  80. * For optimization reasons: may save a call to a function. Be sure
  81. * to set it if you do implement getChildDef(), otherwise it will have
  82. * no effect!
  83. * @type bool
  84. */
  85. public $defines_child_def = false;
  86. /**
  87. * Boolean flag whether or not this module is safe. If it is not safe, all
  88. * of its members are unsafe. Modules are safe by default (this might be
  89. * slightly dangerous, but it doesn't make much sense to force HTML Purifier,
  90. * which is based off of safe HTML, to explicitly say, "This is safe," even
  91. * though there are modules which are "unsafe")
  92. *
  93. * @type bool
  94. * @note Previously, safety could be applied at an element level granularity.
  95. * We've removed this ability, so in order to add "unsafe" elements
  96. * or attributes, a dedicated module with this property set to false
  97. * must be used.
  98. */
  99. public $safe = true;
  100. /**
  101. * Retrieves a proper HTMLPurifier_ChildDef subclass based on
  102. * content_model and content_model_type member variables of
  103. * the HTMLPurifier_ElementDef class. There is a similar function
  104. * in HTMLPurifier_HTMLDefinition.
  105. * @param HTMLPurifier_ElementDef $def
  106. * @return HTMLPurifier_ChildDef subclass
  107. */
  108. public function getChildDef($def)
  109. {
  110. return false;
  111. }
  112. // -- Convenience -----------------------------------------------------
  113. /**
  114. * Convenience function that sets up a new element
  115. * @param string $element Name of element to add
  116. * @param string|bool $type What content set should element be registered to?
  117. * Set as false to skip this step.
  118. * @param string|HTMLPurifier_ChildDef $contents Allowed children in form of:
  119. * "$content_model_type: $content_model"
  120. * @param array|string $attr_includes What attribute collections to register to
  121. * element?
  122. * @param array $attr What unique attributes does the element define?
  123. * @see HTMLPurifier_ElementDef:: for in-depth descriptions of these parameters.
  124. * @return HTMLPurifier_ElementDef Created element definition object, so you
  125. * can set advanced parameters
  126. */
  127. public function addElement($element, $type, $contents, $attr_includes = array(), $attr = array())
  128. {
  129. $this->elements[] = $element;
  130. // parse content_model
  131. list($content_model_type, $content_model) = $this->parseContents($contents);
  132. // merge in attribute inclusions
  133. $this->mergeInAttrIncludes($attr, $attr_includes);
  134. // add element to content sets
  135. if ($type) {
  136. $this->addElementToContentSet($element, $type);
  137. }
  138. // create element
  139. $this->info[$element] = HTMLPurifier_ElementDef::create(
  140. $content_model,
  141. $content_model_type,
  142. $attr
  143. );
  144. // literal object $contents means direct child manipulation
  145. if (!is_string($contents)) {
  146. $this->info[$element]->child = $contents;
  147. }
  148. return $this->info[$element];
  149. }
  150. /**
  151. * Convenience function that creates a totally blank, non-standalone
  152. * element.
  153. * @param string $element Name of element to create
  154. * @return HTMLPurifier_ElementDef Created element
  155. */
  156. public function addBlankElement($element)
  157. {
  158. if (!isset($this->info[$element])) {
  159. $this->elements[] = $element;
  160. $this->info[$element] = new HTMLPurifier_ElementDef();
  161. $this->info[$element]->standalone = false;
  162. } else {
  163. trigger_error("Definition for $element already exists in module, cannot redefine");
  164. }
  165. return $this->info[$element];
  166. }
  167. /**
  168. * Convenience function that registers an element to a content set
  169. * @param string $element Element to register
  170. * @param string $type Name content set (warning: case sensitive, usually upper-case
  171. * first letter)
  172. */
  173. public function addElementToContentSet($element, $type)
  174. {
  175. if (!isset($this->content_sets[$type])) {
  176. $this->content_sets[$type] = '';
  177. } else {
  178. $this->content_sets[$type] .= ' | ';
  179. }
  180. $this->content_sets[$type] .= $element;
  181. }
  182. /**
  183. * Convenience function that transforms single-string contents
  184. * into separate content model and content model type
  185. * @param string $contents Allowed children in form of:
  186. * "$content_model_type: $content_model"
  187. * @return array
  188. * @note If contents is an object, an array of two nulls will be
  189. * returned, and the callee needs to take the original $contents
  190. * and use it directly.
  191. */
  192. public function parseContents($contents)
  193. {
  194. if (!is_string($contents)) {
  195. return array(null, null);
  196. } // defer
  197. switch ($contents) {
  198. // check for shorthand content model forms
  199. case 'Empty':
  200. return array('empty', '');
  201. case 'Inline':
  202. return array('optional', 'Inline | #PCDATA');
  203. case 'Flow':
  204. return array('optional', 'Flow | #PCDATA');
  205. }
  206. list($content_model_type, $content_model) = explode(':', $contents);
  207. $content_model_type = strtolower(trim($content_model_type));
  208. $content_model = trim($content_model);
  209. return array($content_model_type, $content_model);
  210. }
  211. /**
  212. * Convenience function that merges a list of attribute includes into
  213. * an attribute array.
  214. * @param array $attr Reference to attr array to modify
  215. * @param array $attr_includes Array of includes / string include to merge in
  216. */
  217. public function mergeInAttrIncludes(&$attr, $attr_includes)
  218. {
  219. if (!is_array($attr_includes)) {
  220. if (empty($attr_includes)) {
  221. $attr_includes = array();
  222. } else {
  223. $attr_includes = array($attr_includes);
  224. }
  225. }
  226. $attr[0] = $attr_includes;
  227. }
  228. /**
  229. * Convenience function that generates a lookup table with boolean
  230. * true as value.
  231. * @param string $list List of values to turn into a lookup
  232. * @note You can also pass an arbitrary number of arguments in
  233. * place of the regular argument
  234. * @return array array equivalent of list
  235. */
  236. public function makeLookup($list)
  237. {
  238. if (is_string($list)) {
  239. $list = func_get_args();
  240. }
  241. $ret = array();
  242. foreach ($list as $value) {
  243. if (is_null($value)) {
  244. continue;
  245. }
  246. $ret[$value] = true;
  247. }
  248. return $ret;
  249. }
  250. /**
  251. * Lazy load construction of the module after determining whether
  252. * or not it's needed, and also when a finalized configuration object
  253. * is available.
  254. * @param HTMLPurifier_Config $config
  255. */
  256. public function setup($config)
  257. {
  258. }
  259. }
  260. // vim: et sw=4 sts=4