WordLevelDiff.php 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. <?php
  2. /**
  3. * Copyright © 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
  4. * You may copy this code freely under the conditions of the GPL.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License as published by
  8. * the Free Software Foundation; either version 2 of the License, or
  9. * (at your option) any later version.
  10. *
  11. * This program is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. * GNU General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU General Public License along
  17. * with this program; if not, write to the Free Software Foundation, Inc.,
  18. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  19. * http://www.gnu.org/copyleft/gpl.html
  20. *
  21. * @file
  22. * @ingroup DifferenceEngine
  23. * @defgroup DifferenceEngine DifferenceEngine
  24. */
  25. use MediaWiki\Diff\ComplexityException;
  26. use MediaWiki\Diff\WordAccumulator;
  27. /**
  28. * Performs a word-level diff on several lines
  29. *
  30. * @ingroup DifferenceEngine
  31. */
  32. class WordLevelDiff extends \Diff {
  33. /**
  34. * @inheritDoc
  35. */
  36. protected $bailoutComplexity = 40000000; // Roughly 6K x 6K words changed
  37. /**
  38. * @param string[] $linesBefore
  39. * @param string[] $linesAfter
  40. */
  41. public function __construct( $linesBefore, $linesAfter ) {
  42. list( $wordsBefore, $wordsBeforeStripped ) = $this->split( $linesBefore );
  43. list( $wordsAfter, $wordsAfterStripped ) = $this->split( $linesAfter );
  44. try {
  45. parent::__construct( $wordsBeforeStripped, $wordsAfterStripped );
  46. } catch ( ComplexityException $ex ) {
  47. // Too hard to diff, just show whole paragraph(s) as changed
  48. $this->edits = [ new DiffOpChange( $linesBefore, $linesAfter ) ];
  49. }
  50. $xi = $yi = 0;
  51. $editCount = count( $this->edits );
  52. for ( $i = 0; $i < $editCount; $i++ ) {
  53. $orig = &$this->edits[$i]->orig;
  54. if ( is_array( $orig ) ) {
  55. $orig = array_slice( $wordsBefore, $xi, count( $orig ) );
  56. $xi += count( $orig );
  57. }
  58. $closing = &$this->edits[$i]->closing;
  59. if ( is_array( $closing ) ) {
  60. $closing = array_slice( $wordsAfter, $yi, count( $closing ) );
  61. $yi += count( $closing );
  62. }
  63. }
  64. }
  65. /**
  66. * @param string[] $lines
  67. *
  68. * @return array[]
  69. */
  70. private function split( $lines ) {
  71. $words = [];
  72. $stripped = [];
  73. $first = true;
  74. foreach ( $lines as $line ) {
  75. if ( $first ) {
  76. $first = false;
  77. } else {
  78. $words[] = "\n";
  79. $stripped[] = "\n";
  80. }
  81. $m = [];
  82. if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
  83. $line, $m ) ) {
  84. foreach ( $m[0] as $word ) {
  85. $words[] = $word;
  86. }
  87. foreach ( $m[1] as $stripped_word ) {
  88. $stripped[] = $stripped_word;
  89. }
  90. }
  91. }
  92. return [ $words, $stripped ];
  93. }
  94. /**
  95. * @return string[]
  96. */
  97. public function orig() {
  98. $orig = new WordAccumulator;
  99. foreach ( $this->edits as $edit ) {
  100. if ( $edit->type == 'copy' ) {
  101. $orig->addWords( $edit->orig );
  102. } elseif ( $edit->orig ) {
  103. $orig->addWords( $edit->orig, 'del' );
  104. }
  105. }
  106. $lines = $orig->getLines();
  107. return $lines;
  108. }
  109. /**
  110. * @return string[]
  111. */
  112. public function closing() {
  113. $closing = new WordAccumulator;
  114. foreach ( $this->edits as $edit ) {
  115. if ( $edit->type == 'copy' ) {
  116. $closing->addWords( $edit->closing );
  117. } elseif ( $edit->closing ) {
  118. $closing->addWords( $edit->closing, 'ins' );
  119. }
  120. }
  121. $lines = $closing->getLines();
  122. return $lines;
  123. }
  124. }