RaggettWrapper.php 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. <?php
  2. namespace MediaWiki\Tidy;
  3. use ParserOutput;
  4. use Parser;
  5. /**
  6. * Class used to hide mw:editsection tokens from Tidy so that it doesn't break them
  7. * or break on them. This is a bit of a hack for now, but hopefully in the future
  8. * we may create a real postprocessor or something that will replace this.
  9. * It's called wrapper because for now it basically takes over MWTidy::tidy's task
  10. * of wrapping the text in a xhtml block
  11. *
  12. * This re-uses some of the parser's UNIQ tricks, though some of it is private so it's
  13. * duplicated. Perhaps we should create an abstract marker hiding class.
  14. *
  15. * @ingroup Parser
  16. */
  17. class RaggettWrapper {
  18. /**
  19. * @var array
  20. */
  21. protected $mTokens;
  22. /**
  23. * @var int
  24. */
  25. protected $mMarkerIndex;
  26. /**
  27. * @param string $text
  28. * @return string
  29. */
  30. public function getWrapped( $text ) {
  31. $this->mTokens = [];
  32. $this->mMarkerIndex = 0;
  33. // Replace <mw:editsection> elements with placeholders
  34. $wrappedtext = preg_replace_callback( ParserOutput::EDITSECTION_REGEX,
  35. [ $this, 'replaceCallback' ], $text );
  36. // ...and <mw:toc> markers
  37. $wrappedtext = preg_replace_callback( '/\<\\/?mw:toc\>/',
  38. [ $this, 'replaceCallback' ], $wrappedtext );
  39. // ... and <math> tags
  40. $wrappedtext = preg_replace_callback( '/\<math(.*?)\<\\/math\>/s',
  41. [ $this, 'replaceCallback' ], $wrappedtext );
  42. // Modify inline Microdata <link> and <meta> elements so they say <html-link> and <html-meta> so
  43. // we can trick Tidy into not stripping them out by including them in tidy's new-empty-tags config
  44. $wrappedtext = preg_replace( '!<(link|meta)([^>]*?)(/{0,1}>)!', '<html-$1$2$3', $wrappedtext );
  45. // Similar for inline <style> tags, but those aren't empty.
  46. $wrappedtext = preg_replace_callback( '!<style([^>]*)>(.*?)</style>!s', function ( $m ) {
  47. return '<html-style' . $m[1] . '>'
  48. . $this->replaceCallback( [ $m[2] ] )
  49. . '</html-style>';
  50. }, $wrappedtext );
  51. // Preserve empty li elements (T49673) by abusing Tidy's datafld hack
  52. // The whitespace class is as in TY_(InitMap)
  53. $wrappedtext = preg_replace( "!<li>([ \r\n\t\f]*)</li>!",
  54. '<li datafld="" class="mw-empty-elt">\1</li>', $wrappedtext );
  55. // Wrap the whole thing in a doctype and body for Tidy.
  56. $wrappedtext = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"' .
  57. ' "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html>' .
  58. '<head><title>test</title></head><body>' . $wrappedtext . '</body></html>';
  59. return $wrappedtext;
  60. }
  61. /**
  62. * @param array $m
  63. * @return string
  64. */
  65. private function replaceCallback( array $m ) {
  66. $marker = Parser::MARKER_PREFIX . "-item-{$this->mMarkerIndex}" . Parser::MARKER_SUFFIX;
  67. $this->mMarkerIndex++;
  68. $this->mTokens[$marker] = $m[0];
  69. return $marker;
  70. }
  71. /**
  72. * @param string $text
  73. * @return string
  74. */
  75. public function postprocess( $text ) {
  76. // Revert <html-{link,meta,style}> back to <{link,meta,style}>
  77. $text = preg_replace( '!<html-(link|meta)([^>]*?)(/{0,1}>)!', '<$1$2$3', $text );
  78. $text = preg_replace( '!<(/?)html-(style)([^>]*)>!', '<$1$2$3>', $text );
  79. // Remove datafld
  80. $text = str_replace( '<li datafld=""', '<li', $text );
  81. // Restore the contents of placeholder tokens
  82. $text = strtr( $text, $this->mTokens );
  83. return $text;
  84. }
  85. }