TextSlotDiffRenderer.php 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. <?php
  2. /**
  3. * Renders a slot diff by doing a text diff on the native representation.
  4. *
  5. * This program is free software; you can redistribute it and/or modify
  6. * it under the terms of the GNU General Public License as published by
  7. * the Free Software Foundation; either version 2 of the License, or
  8. * (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. * GNU General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18. * http://www.gnu.org/copyleft/gpl.html
  19. *
  20. * @file
  21. * @ingroup DifferenceEngine
  22. */
  23. use MediaWiki\Shell\Shell;
  24. use Wikimedia\Assert\Assert;
  25. /**
  26. * Renders a slot diff by doing a text diff on the native representation.
  27. *
  28. * If you want to use this without content objects (to call getTextDiff() on some
  29. * non-content-related texts), obtain an instance with
  30. * ContentHandler::getForModelID( CONTENT_MODEL_TEXT )
  31. * ->getSlotDiffRenderer( RequestContext::getMain() )
  32. *
  33. * @ingroup DifferenceEngine
  34. */
  35. class TextSlotDiffRenderer extends SlotDiffRenderer {
  36. /** Use the PHP diff implementation (DiffEngine). */
  37. const ENGINE_PHP = 'php';
  38. /** Use the wikidiff2 PHP module. */
  39. const ENGINE_WIKIDIFF2 = 'wikidiff2';
  40. /** Use an external executable. */
  41. const ENGINE_EXTERNAL = 'external';
  42. /** @var IBufferingStatsdDataFactory|null */
  43. private $statsdDataFactory;
  44. /** @var Language|null The language this content is in. */
  45. private $language;
  46. /** @var string One of the ENGINE_* constants. */
  47. private $engine = self::ENGINE_PHP;
  48. /** @var string Path to an executable to be used as the diff engine. */
  49. private $externalEngine;
  50. /**
  51. * Convenience helper to use getTextDiff without an instance.
  52. * @param string $oldText
  53. * @param string $newText
  54. * @return string
  55. */
  56. public static function diff( $oldText, $newText ) {
  57. /** @var TextSlotDiffRenderer $slotDiffRenderer */
  58. $slotDiffRenderer = ContentHandler::getForModelID( CONTENT_MODEL_TEXT )
  59. ->getSlotDiffRenderer( RequestContext::getMain() );
  60. '@phan-var TextSlotDiffRenderer $slotDiffRenderer';
  61. return $slotDiffRenderer->getTextDiff( $oldText, $newText );
  62. }
  63. public function setStatsdDataFactory( IBufferingStatsdDataFactory $statsdDataFactory ) {
  64. $this->statsdDataFactory = $statsdDataFactory;
  65. }
  66. public function setLanguage( Language $language ) {
  67. $this->language = $language;
  68. }
  69. /**
  70. * Set which diff engine to use.
  71. * @param string $type One of the ENGINE_* constants.
  72. * @param string|null $executable Path to an external exectable, only when type is ENGINE_EXTERNAL.
  73. */
  74. public function setEngine( $type, $executable = null ) {
  75. $engines = [ self::ENGINE_PHP, self::ENGINE_WIKIDIFF2, self::ENGINE_EXTERNAL ];
  76. Assert::parameter( in_array( $type, $engines, true ), '$type',
  77. 'must be one of the TextSlotDiffRenderer::ENGINE_* constants' );
  78. if ( $type === self::ENGINE_EXTERNAL ) {
  79. Assert::parameter( is_string( $executable ) && is_executable( $executable ), '$executable',
  80. 'must be a path to a valid executable' );
  81. } else {
  82. Assert::parameter( is_null( $executable ), '$executable',
  83. 'must not be set unless $type is ENGINE_EXTERNAL' );
  84. }
  85. $this->engine = $type;
  86. $this->externalEngine = $executable;
  87. }
  88. /** @inheritDoc */
  89. public function getDiff( Content $oldContent = null, Content $newContent = null ) {
  90. $this->normalizeContents( $oldContent, $newContent, TextContent::class );
  91. $oldText = $oldContent->serialize();
  92. $newText = $newContent->serialize();
  93. return $this->getTextDiff( $oldText, $newText );
  94. }
  95. /**
  96. * Diff the text representations of two content objects (or just two pieces of text in general).
  97. * @param string $oldText
  98. * @param string $newText
  99. * @return string HTML, one or more <tr> tags.
  100. */
  101. public function getTextDiff( $oldText, $newText ) {
  102. Assert::parameterType( 'string', $oldText, '$oldText' );
  103. Assert::parameterType( 'string', $newText, '$newText' );
  104. $diff = function () use ( $oldText, $newText ) {
  105. $time = microtime( true );
  106. $result = $this->getTextDiffInternal( $oldText, $newText );
  107. $time = intval( ( microtime( true ) - $time ) * 1000 );
  108. if ( $this->statsdDataFactory ) {
  109. $this->statsdDataFactory->timing( 'diff_time', $time );
  110. }
  111. // TODO reimplement this using T142313
  112. /*
  113. // Log requests slower than 99th percentile
  114. if ( $time > 100 && $this->mOldPage && $this->mNewPage ) {
  115. wfDebugLog( 'diff',
  116. "$time ms diff: {$this->mOldid} -> {$this->mNewid} {$this->mNewPage}" );
  117. }
  118. */
  119. return $result;
  120. };
  121. /**
  122. * @param Status $status
  123. * @throws FatalError
  124. */
  125. $error = function ( $status ) {
  126. throw new FatalError( $status->getWikiText() );
  127. };
  128. // Use PoolCounter if the diff looks like it can be expensive
  129. if ( strlen( $oldText ) + strlen( $newText ) > 20000 ) {
  130. $work = new PoolCounterWorkViaCallback( 'diff',
  131. md5( $oldText ) . md5( $newText ),
  132. [ 'doWork' => $diff, 'error' => $error ]
  133. );
  134. return $work->execute();
  135. }
  136. return $diff();
  137. }
  138. /**
  139. * Diff the text representations of two content objects (or just two pieces of text in general).
  140. * This does the actual diffing, getTextDiff() wraps it with logging and resource limiting.
  141. * @param string $oldText
  142. * @param string $newText
  143. * @return string
  144. * @throws Exception
  145. */
  146. protected function getTextDiffInternal( $oldText, $newText ) {
  147. // TODO move most of this into three parallel implementations of a text diff generator
  148. // class, choose which one to use via dependecy injection
  149. $oldText = str_replace( "\r\n", "\n", $oldText );
  150. $newText = str_replace( "\r\n", "\n", $newText );
  151. // Better external diff engine, the 2 may some day be dropped
  152. // This one does the escaping and segmenting itself
  153. if ( $this->engine === self::ENGINE_WIKIDIFF2 ) {
  154. $wikidiff2Version = phpversion( 'wikidiff2' );
  155. if (
  156. $wikidiff2Version !== false &&
  157. version_compare( $wikidiff2Version, '1.5.0', '>=' ) &&
  158. version_compare( $wikidiff2Version, '1.8.0', '<' )
  159. ) {
  160. $text = wikidiff2_do_diff(
  161. $oldText,
  162. $newText,
  163. 2,
  164. 0
  165. );
  166. } else {
  167. // Don't pass the 4th parameter introduced in version 1.5.0 and removed in version 1.8.0
  168. $text = wikidiff2_do_diff(
  169. $oldText,
  170. $newText,
  171. 2
  172. );
  173. }
  174. return $text;
  175. } elseif ( $this->engine === self::ENGINE_EXTERNAL ) {
  176. # Diff via the shell
  177. $tmpDir = wfTempDir();
  178. $tempName1 = tempnam( $tmpDir, 'diff_' );
  179. $tempName2 = tempnam( $tmpDir, 'diff_' );
  180. $tempFile1 = fopen( $tempName1, "w" );
  181. if ( !$tempFile1 ) {
  182. return false;
  183. }
  184. $tempFile2 = fopen( $tempName2, "w" );
  185. if ( !$tempFile2 ) {
  186. return false;
  187. }
  188. fwrite( $tempFile1, $oldText );
  189. fwrite( $tempFile2, $newText );
  190. fclose( $tempFile1 );
  191. fclose( $tempFile2 );
  192. $cmd = [ $this->externalEngine, $tempName1, $tempName2 ];
  193. $result = Shell::command( $cmd )
  194. ->execute();
  195. $exitCode = $result->getExitCode();
  196. if ( $exitCode !== 0 ) {
  197. throw new Exception( "External diff command returned code {$exitCode}. Stderr: "
  198. . wfEscapeWikiText( $result->getStderr() )
  199. );
  200. }
  201. $difftext = $result->getStdout();
  202. unlink( $tempName1 );
  203. unlink( $tempName2 );
  204. return $difftext;
  205. } elseif ( $this->engine === self::ENGINE_PHP ) {
  206. if ( $this->language ) {
  207. $oldText = $this->language->segmentForDiff( $oldText );
  208. $newText = $this->language->segmentForDiff( $newText );
  209. }
  210. $ota = explode( "\n", $oldText );
  211. $nta = explode( "\n", $newText );
  212. $diffs = new Diff( $ota, $nta );
  213. $formatter = new TableDiffFormatter();
  214. $difftext = $formatter->format( $diffs );
  215. if ( $this->language ) {
  216. $difftext = $this->language->unsegmentForDiff( $difftext );
  217. }
  218. return $difftext;
  219. }
  220. throw new LogicException( 'Invalid engine: ' . $this->engine );
  221. }
  222. }