RemexDriver.php 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. <?php
  2. namespace MediaWiki\Tidy;
  3. use RemexHtml\Serializer\Serializer;
  4. use RemexHtml\Tokenizer\Tokenizer;
  5. use RemexHtml\TreeBuilder\Dispatcher;
  6. use RemexHtml\TreeBuilder\TreeBuilder;
  7. use RemexHtml\TreeBuilder\TreeMutationTracer;
  8. class RemexDriver extends TidyDriverBase {
  9. private $trace;
  10. private $pwrap;
  11. public function __construct( array $config ) {
  12. $config += [
  13. 'treeMutationTrace' => false,
  14. 'pwrap' => true
  15. ];
  16. $this->trace = $config['treeMutationTrace'];
  17. $this->pwrap = $config['pwrap'];
  18. parent::__construct( $config );
  19. }
  20. public function tidy( $text ) {
  21. $formatter = new RemexCompatFormatter;
  22. $serializer = new Serializer( $formatter );
  23. if ( $this->pwrap ) {
  24. $munger = new RemexCompatMunger( $serializer );
  25. } else {
  26. $munger = $serializer;
  27. }
  28. if ( $this->trace ) {
  29. $tracer = new TreeMutationTracer( $munger, function ( $msg ) {
  30. wfDebug( "RemexHtml: $msg" );
  31. } );
  32. } else {
  33. $tracer = $munger;
  34. }
  35. $treeBuilder = new TreeBuilder( $tracer, [
  36. 'ignoreErrors' => true,
  37. 'ignoreNulls' => true,
  38. ] );
  39. $dispatcher = new Dispatcher( $treeBuilder );
  40. $tokenizer = new Tokenizer( $dispatcher, $text, [
  41. 'ignoreErrors' => true,
  42. 'ignoreCharRefs' => true,
  43. 'ignoreNulls' => true,
  44. 'skipPreprocess' => true,
  45. ] );
  46. $tokenizer->execute( [
  47. 'fragmentNamespace' => \RemexHtml\HTMLData::NS_HTML,
  48. 'fragmentName' => 'body'
  49. ] );
  50. return $serializer->getResult();
  51. }
  52. }