Table.php 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. <?php
  2. /**
  3. * Definition for tables. The general idea is to extract out all of the
  4. * essential bits, and then reconstruct it later.
  5. *
  6. * This is a bit confusing, because the DTDs and the W3C
  7. * validators seem to disagree on the appropriate definition. The
  8. * DTD claims:
  9. *
  10. * (CAPTION?, (COL*|COLGROUP*), THEAD?, TFOOT?, TBODY+)
  11. *
  12. * But actually, the HTML4 spec then has this to say:
  13. *
  14. * The TBODY start tag is always required except when the table
  15. * contains only one table body and no table head or foot sections.
  16. * The TBODY end tag may always be safely omitted.
  17. *
  18. * So the DTD is kind of wrong. The validator is, unfortunately, kind
  19. * of on crack.
  20. *
  21. * The definition changed again in XHTML1.1; and in my opinion, this
  22. * formulation makes the most sense.
  23. *
  24. * caption?, ( col* | colgroup* ), (( thead?, tfoot?, tbody+ ) | ( tr+ ))
  25. *
  26. * Essentially, we have two modes: thead/tfoot/tbody mode, and tr mode.
  27. * If we encounter a thead, tfoot or tbody, we are placed in the former
  28. * mode, and we *must* wrap any stray tr segments with a tbody. But if
  29. * we don't run into any of them, just have tr tags is OK.
  30. */
  31. class HTMLPurifier_ChildDef_Table extends HTMLPurifier_ChildDef
  32. {
  33. /**
  34. * @type bool
  35. */
  36. public $allow_empty = false;
  37. /**
  38. * @type string
  39. */
  40. public $type = 'table';
  41. /**
  42. * @type array
  43. */
  44. public $elements = array(
  45. 'tr' => true,
  46. 'tbody' => true,
  47. 'thead' => true,
  48. 'tfoot' => true,
  49. 'caption' => true,
  50. 'colgroup' => true,
  51. 'col' => true
  52. );
  53. public function __construct()
  54. {
  55. }
  56. /**
  57. * @param array $children
  58. * @param HTMLPurifier_Config $config
  59. * @param HTMLPurifier_Context $context
  60. * @return array
  61. */
  62. public function validateChildren($children, $config, $context)
  63. {
  64. if (empty($children)) {
  65. return false;
  66. }
  67. // only one of these elements is allowed in a table
  68. $caption = false;
  69. $thead = false;
  70. $tfoot = false;
  71. // whitespace
  72. $initial_ws = array();
  73. $after_caption_ws = array();
  74. $after_thead_ws = array();
  75. $after_tfoot_ws = array();
  76. // as many of these as you want
  77. $cols = array();
  78. $content = array();
  79. $tbody_mode = false; // if true, then we need to wrap any stray
  80. // <tr>s with a <tbody>.
  81. $ws_accum =& $initial_ws;
  82. foreach ($children as $node) {
  83. if ($node instanceof HTMLPurifier_Node_Comment) {
  84. $ws_accum[] = $node;
  85. continue;
  86. }
  87. switch ($node->name) {
  88. case 'tbody':
  89. $tbody_mode = true;
  90. // fall through
  91. case 'tr':
  92. $content[] = $node;
  93. $ws_accum =& $content;
  94. break;
  95. case 'caption':
  96. // there can only be one caption!
  97. if ($caption !== false) break;
  98. $caption = $node;
  99. $ws_accum =& $after_caption_ws;
  100. break;
  101. case 'thead':
  102. $tbody_mode = true;
  103. // XXX This breaks rendering properties with
  104. // Firefox, which never floats a <thead> to
  105. // the top. Ever. (Our scheme will float the
  106. // first <thead> to the top.) So maybe
  107. // <thead>s that are not first should be
  108. // turned into <tbody>? Very tricky, indeed.
  109. if ($thead === false) {
  110. $thead = $node;
  111. $ws_accum =& $after_thead_ws;
  112. } else {
  113. // Oops, there's a second one! What
  114. // should we do? Current behavior is to
  115. // transmutate the first and last entries into
  116. // tbody tags, and then put into content.
  117. // Maybe a better idea is to *attach
  118. // it* to the existing thead or tfoot?
  119. // We don't do this, because Firefox
  120. // doesn't float an extra tfoot to the
  121. // bottom like it does for the first one.
  122. $node->name = 'tbody';
  123. $content[] = $node;
  124. $ws_accum =& $content;
  125. }
  126. break;
  127. case 'tfoot':
  128. // see above for some aveats
  129. $tbody_mode = true;
  130. if ($tfoot === false) {
  131. $tfoot = $node;
  132. $ws_accum =& $after_tfoot_ws;
  133. } else {
  134. $node->name = 'tbody';
  135. $content[] = $node;
  136. $ws_accum =& $content;
  137. }
  138. break;
  139. case 'colgroup':
  140. case 'col':
  141. $cols[] = $node;
  142. $ws_accum =& $cols;
  143. break;
  144. case '#PCDATA':
  145. // How is whitespace handled? We treat is as sticky to
  146. // the *end* of the previous element. So all of the
  147. // nonsense we have worked on is to keep things
  148. // together.
  149. if (!empty($node->is_whitespace)) {
  150. $ws_accum[] = $node;
  151. }
  152. break;
  153. }
  154. }
  155. if (empty($content)) {
  156. return false;
  157. }
  158. $ret = $initial_ws;
  159. if ($caption !== false) {
  160. $ret[] = $caption;
  161. $ret = array_merge($ret, $after_caption_ws);
  162. }
  163. if ($cols !== false) {
  164. $ret = array_merge($ret, $cols);
  165. }
  166. if ($thead !== false) {
  167. $ret[] = $thead;
  168. $ret = array_merge($ret, $after_thead_ws);
  169. }
  170. if ($tfoot !== false) {
  171. $ret[] = $tfoot;
  172. $ret = array_merge($ret, $after_tfoot_ws);
  173. }
  174. if ($tbody_mode) {
  175. // we have to shuffle tr into tbody
  176. $current_tr_tbody = null;
  177. foreach($content as $node) {
  178. switch ($node->name) {
  179. case 'tbody':
  180. $current_tr_tbody = null;
  181. $ret[] = $node;
  182. break;
  183. case 'tr':
  184. if ($current_tr_tbody === null) {
  185. $current_tr_tbody = new HTMLPurifier_Node_Element('tbody');
  186. $ret[] = $current_tr_tbody;
  187. }
  188. $current_tr_tbody->children[] = $node;
  189. break;
  190. case '#PCDATA':
  191. //assert($node->is_whitespace);
  192. if ($current_tr_tbody === null) {
  193. $ret[] = $node;
  194. } else {
  195. $current_tr_tbody->children[] = $node;
  196. }
  197. break;
  198. }
  199. }
  200. } else {
  201. $ret = array_merge($ret, $content);
  202. }
  203. return $ret;
  204. }
  205. }
  206. // vim: et sw=4 sts=4