HTMLDiff.php 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010
  1. <?php
  2. /** Copyright (C) 2008 Guy Van den Broeck <guy@guyvdb.eu>
  3. *
  4. * This program is free software; you can redistribute it and/or modify
  5. * it under the terms of the GNU General Public License as published by
  6. * the Free Software Foundation; either version 2 of the License, or
  7. * (at your option) any later version.
  8. *
  9. * This program is distributed in the hope that it will be useful,
  10. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. * GNU General Public License for more details.
  13. *
  14. * You should have received a copy of the GNU General Public License
  15. * along with this program; if not, write to the Free Software
  16. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  17. * or see http://www.gnu.org/
  18. *
  19. * @ingroup DifferenceEngine
  20. */
  21. /**
  22. * When detecting the last common parent of two nodes, all results are stored as
  23. * a LastCommonParentResult.
  24. */
  25. class LastCommonParentResult {
  26. // Parent
  27. public $parent;
  28. // Splitting
  29. public $splittingNeeded = false;
  30. // Depth
  31. public $lastCommonParentDepth = -1;
  32. // Index
  33. public $indexInLastCommonParent = -1;
  34. }
  35. class Modification{
  36. const NONE = 1;
  37. const REMOVED = 2;
  38. const ADDED = 4;
  39. const CHANGED = 8;
  40. public $type;
  41. public $id = -1;
  42. public $firstOfID = false;
  43. public $changes;
  44. function __construct($type) {
  45. $this->type = $type;
  46. }
  47. public static function typeToString($type) {
  48. switch($type) {
  49. case self::NONE: return 'none';
  50. case self::REMOVED: return 'removed';
  51. case self::ADDED: return 'added';
  52. case self::CHANGED: return 'changed';
  53. }
  54. }
  55. }
  56. class DomTreeBuilder {
  57. public $textNodes = array();
  58. public $bodyNode;
  59. private $currentParent;
  60. private $newWord = '';
  61. protected $bodyStarted = false;
  62. protected $bodyEnded = false;
  63. private $whiteSpaceBeforeThis = false;
  64. private $lastSibling;
  65. private $notInPre = true;
  66. function __construct() {
  67. $this->bodyNode = $this->currentParent = new BodyNode();
  68. $this->lastSibling = new DummyNode();
  69. }
  70. /**
  71. * Must be called manually
  72. */
  73. public function endDocument() {
  74. $this->endWord();
  75. HTMLDiffer::diffDebug( count($this->textNodes) . " text nodes in document.\n" );
  76. }
  77. public function startElement($parser, $name, /*array*/ $attributes) {
  78. if (strcasecmp($name, 'body') != 0) {
  79. HTMLDiffer::diffDebug( "Starting $name node.\n" );
  80. $this->endWord();
  81. $newNode = new TagNode($this->currentParent, $name, $attributes);
  82. $this->currentParent->children[] = $newNode;
  83. $this->currentParent = $newNode;
  84. $this->lastSibling = new DummyNode();
  85. if ($this->whiteSpaceBeforeThis && !in_array(strtolower($this->currentParent->qName),TagNode::$blocks)) {
  86. $this->currentParent->whiteBefore = true;
  87. }
  88. $this->whiteSpaceBeforeThis = false;
  89. if(strcasecmp($name, 'pre') == 0) {
  90. $this->notInPre = false;
  91. }
  92. }
  93. }
  94. public function endElement($parser, $name) {
  95. if(strcasecmp($name, 'body') != 0) {
  96. HTMLDiffer::diffDebug( "Ending $name node.\n");
  97. if (0 == strcasecmp($name,'img')) {
  98. // Insert a dummy leaf for the image
  99. $img = new ImageNode($this->currentParent, $this->currentParent->attributes);
  100. $this->currentParent->children[] = $img;
  101. $img->whiteBefore = $this->whiteSpaceBeforeThis;
  102. $this->lastSibling = $img;
  103. $this->textNodes[] = $img;
  104. }
  105. $this->endWord();
  106. if (!in_array(strtolower($this->currentParent->qName),TagNode::$blocks)) {
  107. $this->lastSibling = $this->currentParent;
  108. } else {
  109. $this->lastSibling = new DummyNode();
  110. }
  111. $this->currentParent = $this->currentParent->parent;
  112. $this->whiteSpaceBeforeThis = false;
  113. if (!$this->notInPre && strcasecmp($name, 'pre') == 0) {
  114. $this->notInPre = true;
  115. }
  116. } else {
  117. $this->endDocument();
  118. }
  119. }
  120. const regex = '/([\s\.\,\"\\\'\(\)\?\:\;\!\{\}\-\+\*\=\_\[\]\&\|\$]{1})/';
  121. const whitespace = '/^[\s]{1}$/';
  122. const delimiter = '/^[\s\.\,\"\\\'\(\)\?\:\;\!\{\}\-\+\*\=\_\[\]\&\|\$]{1}$/';
  123. public function characters($parser, $data) {
  124. $matches = preg_split(self::regex, $data, -1, PREG_SPLIT_DELIM_CAPTURE);
  125. foreach($matches as &$word) {
  126. if (preg_match(self::whitespace, $word) && $this->notInPre) {
  127. $this->endWord();
  128. $this->lastSibling->whiteAfter = true;
  129. $this->whiteSpaceBeforeThis = true;
  130. } else if (preg_match(self::delimiter, $word)) {
  131. $this->endWord();
  132. $textNode = new TextNode($this->currentParent, $word);
  133. $this->currentParent->children[] = $textNode;
  134. $textNode->whiteBefore = $this->whiteSpaceBeforeThis;
  135. $this->whiteSpaceBeforeThis = false;
  136. $this->lastSibling = $textNode;
  137. $this->textNodes[] = $textNode;
  138. } else {
  139. $this->newWord .= $word;
  140. }
  141. }
  142. }
  143. private function endWord() {
  144. if ($this->newWord !== '') {
  145. $node = new TextNode($this->currentParent, $this->newWord);
  146. $this->currentParent->children[] = $node;
  147. $node->whiteBefore = $this->whiteSpaceBeforeThis;
  148. $this->whiteSpaceBeforeThis = false;
  149. $this->lastSibling = $node;
  150. $this->textNodes[] = $node;
  151. $this->newWord = "";
  152. }
  153. }
  154. public function getDiffLines() {
  155. return array_map(array('TextNode','toDiffLine'), $this->textNodes);
  156. }
  157. }
  158. class TextNodeDiffer {
  159. private $textNodes;
  160. public $bodyNode;
  161. private $oldTextNodes;
  162. private $oldBodyNode;
  163. private $newID = 0;
  164. private $changedID = 0;
  165. private $changedIDUsed = false;
  166. // used to remove the whitespace between a red and green block
  167. private $whiteAfterLastChangedPart = false;
  168. private $deletedID = 0;
  169. function __construct(DomTreeBuilder $tree, DomTreeBuilder $oldTree) {
  170. $this->textNodes = $tree->textNodes;
  171. $this->bodyNode = $tree->bodyNode;
  172. $this->oldTextNodes = $oldTree->textNodes;
  173. $this->oldBodyNode = $oldTree->bodyNode;
  174. }
  175. public function markAsNew($start, $end) {
  176. if ($end <= $start) {
  177. return;
  178. }
  179. if ($this->whiteAfterLastChangedPart) {
  180. $this->textNodes[$start]->whiteBefore = false;
  181. }
  182. for ($i = $start; $i < $end; ++$i) {
  183. $mod = new Modification(Modification::ADDED);
  184. $mod->id = $this->newID;
  185. $this->textNodes[$i]->modification = $mod;
  186. }
  187. if ($start < $end) {
  188. $this->textNodes[$start]->modification->firstOfID = true;
  189. }
  190. ++$this->newID;
  191. }
  192. public function handlePossibleChangedPart($leftstart, $leftend, $rightstart, $rightend) {
  193. $i = $rightstart;
  194. $j = $leftstart;
  195. if ($this->changedIDUsed) {
  196. ++$this->changedID;
  197. $this->changedIDUsed = false;
  198. }
  199. $changes;
  200. while ($i < $rightend) {
  201. $acthis = new AncestorComparator($this->textNodes[$i]->getParentTree());
  202. $acother = new AncestorComparator($this->oldTextNodes[$j]->getParentTree());
  203. $result = $acthis->getResult($acother);
  204. unset($acthis, $acother);
  205. if ( $result ) {
  206. $mod = new Modification(Modification::CHANGED);
  207. if (!$this->changedIDUsed) {
  208. $mod->firstOfID = true;
  209. } else if (!is_null( $result ) && $result !== $this->changes) {
  210. ++$this->changedID;
  211. $mod->firstOfID = true;
  212. }
  213. $mod->changes = $result;
  214. $mod->id = $this->changedID;
  215. $this->textNodes[$i]->modification = $mod;
  216. $this->changes = $result;
  217. $this->changedIDUsed = true;
  218. } else if ($this->changedIDUsed) {
  219. ++$this->changedID;
  220. $this->changedIDUsed = false;
  221. }
  222. ++$i;
  223. ++$j;
  224. }
  225. }
  226. public function markAsDeleted($start, $end, $before) {
  227. if ($end <= $start) {
  228. return;
  229. }
  230. if ($before > 0 && $this->textNodes[$before - 1]->whiteAfter) {
  231. $this->whiteAfterLastChangedPart = true;
  232. } else {
  233. $this->whiteAfterLastChangedPart = false;
  234. }
  235. for ($i = $start; $i < $end; ++$i) {
  236. $mod = new Modification(Modification::REMOVED);
  237. $mod->id = $this->deletedID;
  238. // oldTextNodes is used here because we're going to move its deleted
  239. // elements to this tree!
  240. $this->oldTextNodes[$i]->modification = $mod;
  241. }
  242. $this->oldTextNodes[$start]->modification->firstOfID = true;
  243. $root = $this->oldTextNodes[$start]->getLastCommonParent($this->oldTextNodes[$end-1])->parent;
  244. $junk1 = $junk2 = null;
  245. $deletedNodes = $root->getMinimalDeletedSet($this->deletedID, $junk1, $junk2);
  246. HTMLDiffer::diffDebug( "Minimal set of deleted nodes of size " . count($deletedNodes) . "\n" );
  247. // Set prevLeaf to the leaf after which the old HTML needs to be
  248. // inserted
  249. if ($before > 0) {
  250. $prevLeaf = $this->textNodes[$before - 1];
  251. }
  252. // Set nextLeaf to the leaf before which the old HTML needs to be
  253. // inserted
  254. if ($before < count($this->textNodes)) {
  255. $nextLeaf = $this->textNodes[$before];
  256. }
  257. while (count($deletedNodes) > 0) {
  258. if (isset($prevLeaf)) {
  259. $prevResult = $prevLeaf->getLastCommonParent($deletedNodes[0]);
  260. } else {
  261. $prevResult = new LastCommonParentResult();
  262. $prevResult->parent = $this->bodyNode;
  263. $prevResult->indexInLastCommonParent = -1;
  264. }
  265. if (isset($nextleaf)) {
  266. $nextResult = $nextLeaf->getLastCommonParent($deletedNodes[count($deletedNodes) - 1]);
  267. } else {
  268. $nextResult = new LastCommonParentResult();
  269. $nextResult->parent = $this->bodyNode;
  270. $nextResult->indexInLastCommonParent = $this->bodyNode->getNbChildren();
  271. }
  272. if ($prevResult->lastCommonParentDepth == $nextResult->lastCommonParentDepth) {
  273. // We need some metric to choose which way to add-...
  274. if ($deletedNodes[0]->parent === $deletedNodes[count($deletedNodes) - 1]->parent
  275. && $prevResult->parent === $nextResult->parent) {
  276. // The difference is not in the parent
  277. $prevResult->lastCommonParentDepth = $prevResult->lastCommonParentDepth + 1;
  278. } else {
  279. // The difference is in the parent, so compare them
  280. // now THIS is tricky
  281. $distancePrev = $deletedNodes[0]->parent->getMatchRatio($prevResult->parent);
  282. $distanceNext = $deletedNodes[count($deletedNodes) - 1]->parent->getMatchRatio($nextResult->parent);
  283. if ($distancePrev <= $distanceNext) {
  284. $prevResult->lastCommonParentDepth = $prevResult->lastCommonParentDepth + 1;
  285. } else {
  286. $nextResult->lastCommonParentDepth = $nextResult->lastCommonParentDepth + 1;
  287. }
  288. }
  289. }
  290. if ($prevResult->lastCommonParentDepth > $nextResult->lastCommonParentDepth) {
  291. // Inserting at the front
  292. if ($prevResult->splittingNeeded) {
  293. $prevLeaf->parent->splitUntil($prevResult->parent, $prevLeaf, true);
  294. }
  295. $prevLeaf = $deletedNodes[0]->copyTree();
  296. unset($deletedNodes[0]);
  297. $deletedNodes = array_values($deletedNodes);
  298. $prevLeaf->setParent($prevResult->parent);
  299. $prevResult->parent->addChildAbsolute($prevLeaf,$prevResult->indexInLastCommonParent + 1);
  300. } else if ($prevResult->lastCommonParentDepth < $nextResult->lastCommonParentDepth) {
  301. // Inserting at the back
  302. if ($nextResult->splittingNeeded) {
  303. $splitOccured = $nextLeaf->parent->splitUntil($nextResult->parent, $nextLeaf, false);
  304. if ($splitOccured) {
  305. // The place where to insert is shifted one place to the
  306. // right
  307. $nextResult->indexInLastCommonParent = $nextResult->indexInLastCommonParent + 1;
  308. }
  309. }
  310. $nextLeaf = $deletedNodes[count(deletedNodes) - 1]->copyTree();
  311. unset($deletedNodes[count(deletedNodes) - 1]);
  312. $deletedNodes = array_values($deletedNodes);
  313. $nextLeaf->setParent($nextResult->parent);
  314. $nextResult->parent->addChildAbsolute($nextLeaf,$nextResult->indexInLastCommonParent);
  315. }
  316. }
  317. ++$this->deletedID;
  318. }
  319. public function expandWhiteSpace() {
  320. $this->bodyNode->expandWhiteSpace();
  321. }
  322. public function lengthNew(){
  323. return count($this->textNodes);
  324. }
  325. public function lengthOld(){
  326. return count($this->oldTextNodes);
  327. }
  328. }
  329. class HTMLDiffer {
  330. private $output;
  331. private static $debug = '';
  332. function __construct($output) {
  333. $this->output = $output;
  334. }
  335. function htmlDiff($from, $to) {
  336. wfProfileIn( __METHOD__ );
  337. // Create an XML parser
  338. $xml_parser = xml_parser_create('');
  339. $domfrom = new DomTreeBuilder();
  340. // Set the functions to handle opening and closing tags
  341. xml_set_element_handler($xml_parser, array($domfrom, "startElement"), array($domfrom, "endElement"));
  342. // Set the function to handle blocks of character data
  343. xml_set_character_data_handler($xml_parser, array($domfrom, "characters"));
  344. HTMLDiffer::diffDebug( "Parsing " . strlen($from) . " characters worth of HTML\n" );
  345. if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', false)
  346. || !xml_parse($xml_parser, $from, false)
  347. || !xml_parse($xml_parser, '</body>', true)){
  348. $error = xml_error_string(xml_get_error_code($xml_parser));
  349. $line = xml_get_current_line_number($xml_parser);
  350. HTMLDiffer::diffDebug( "XML error: $error at line $line\n" );
  351. }
  352. xml_parser_free($xml_parser);
  353. unset($from);
  354. $xml_parser = xml_parser_create('');
  355. $domto = new DomTreeBuilder();
  356. // Set the functions to handle opening and closing tags
  357. xml_set_element_handler($xml_parser, array($domto, "startElement"), array($domto, "endElement"));
  358. // Set the function to handle blocks of character data
  359. xml_set_character_data_handler($xml_parser, array($domto, "characters"));
  360. HTMLDiffer::diffDebug( "Parsing " . strlen($to) . " characters worth of HTML\n" );
  361. if (!xml_parse($xml_parser, '<?xml version="1.0" encoding="UTF-8"?>'.Sanitizer::hackDocType().'<body>', false)
  362. || !xml_parse($xml_parser, $to, false)
  363. || !xml_parse($xml_parser, '</body>', true)){
  364. $error = xml_error_string(xml_get_error_code($xml_parser));
  365. $line = xml_get_current_line_number($xml_parser);
  366. HTMLDiffer::diffDebug( "XML error: $error at line $line\n" );
  367. }
  368. xml_parser_free($xml_parser);
  369. unset($to);
  370. $diffengine = new WikiDiff3();
  371. $differences = $this->preProcess($diffengine->diff_range($domfrom->getDiffLines(), $domto->getDiffLines()));
  372. unset($xml_parser, $diffengine);
  373. $domdiffer = new TextNodeDiffer($domto, $domfrom);
  374. $currentIndexLeft = 0;
  375. $currentIndexRight = 0;
  376. foreach ($differences as &$d) {
  377. if ($d->leftstart > $currentIndexLeft) {
  378. $domdiffer->handlePossibleChangedPart($currentIndexLeft, $d->leftstart,
  379. $currentIndexRight, $d->rightstart);
  380. }
  381. if ($d->leftlength > 0) {
  382. $domdiffer->markAsDeleted($d->leftstart, $d->leftend, $d->rightstart);
  383. }
  384. $domdiffer->markAsNew($d->rightstart, $d->rightend);
  385. $currentIndexLeft = $d->leftend;
  386. $currentIndexRight = $d->rightend;
  387. }
  388. $oldLength = $domdiffer->lengthOld();
  389. if ($currentIndexLeft < $oldLength) {
  390. $domdiffer->handlePossibleChangedPart($currentIndexLeft, $oldLength, $currentIndexRight, $domdiffer->lengthNew());
  391. }
  392. $domdiffer->expandWhiteSpace();
  393. $output = new HTMLOutput('htmldiff', $this->output);
  394. $output->parse($domdiffer->bodyNode);
  395. wfProfileOut( __METHOD__ );
  396. }
  397. private function preProcess(/*array*/ $differences) {
  398. $newRanges = array();
  399. $nbDifferences = count($differences);
  400. for ($i = 0; $i < $nbDifferences; ++$i) {
  401. $leftStart = $differences[$i]->leftstart;
  402. $leftEnd = $differences[$i]->leftend;
  403. $rightStart = $differences[$i]->rightstart;
  404. $rightEnd = $differences[$i]->rightend;
  405. $leftLength = $leftEnd - $leftStart;
  406. $rightLength = $rightEnd - $rightStart;
  407. while ($i + 1 < $nbDifferences && self::score($leftLength,
  408. $differences[$i + 1]->leftlength,
  409. $rightLength,
  410. $differences[$i + 1]->rightlength)
  411. > ($differences[$i + 1]->leftstart - $leftEnd)) {
  412. $leftEnd = $differences[$i + 1]->leftend;
  413. $rightEnd = $differences[$i + 1]->rightend;
  414. $leftLength = $leftEnd - $leftStart;
  415. $rightLength = $rightEnd - $rightStart;
  416. ++$i;
  417. }
  418. $newRanges[] = new RangeDifference($leftStart, $leftEnd, $rightStart, $rightEnd);
  419. }
  420. return $newRanges;
  421. }
  422. /**
  423. * Heuristic to merge differences for readability.
  424. */
  425. public static function score($ll, $nll, $rl, $nrl) {
  426. if (($ll == 0 && $nll == 0)
  427. || ($rl == 0 && $nrl == 0)) {
  428. return 0;
  429. }
  430. $numbers = array($ll, $nll, $rl, $nrl);
  431. $d = 0;
  432. foreach ($numbers as &$number) {
  433. while ($number > 3) {
  434. $d += 3;
  435. $number -= 3;
  436. $number *= 0.5;
  437. }
  438. $d += $number;
  439. }
  440. return $d / (1.5 * count($numbers));
  441. }
  442. /**
  443. * Add to debug output
  444. * @param string $str Debug output
  445. */
  446. public static function diffDebug( $str ) {
  447. self :: $debug .= $str;
  448. }
  449. /**
  450. * Get debug output
  451. * @return string
  452. */
  453. public static function getDebugOutput() {
  454. return self :: $debug;
  455. }
  456. }
  457. class TextOnlyComparator {
  458. public $leafs = array();
  459. function _construct(TagNode $tree) {
  460. $this->addRecursive($tree);
  461. $this->leafs = array_map(array('TextNode','toDiffLine'), $this->leafs);
  462. }
  463. private function addRecursive(TagNode $tree) {
  464. foreach ($tree->children as &$child) {
  465. if ($child instanceof TagNode) {
  466. $this->addRecursive($child);
  467. } else if ($child instanceof TextNode) {
  468. $this->leafs[] = $node;
  469. }
  470. }
  471. }
  472. public function getMatchRatio(TextOnlyComparator $other) {
  473. $nbOthers = count($other->leafs);
  474. $nbThis = count($this->leafs);
  475. if($nbOthers == 0 || $nbThis == 0){
  476. return -log(0);
  477. }
  478. $diffengine = new WikiDiff3(25000, 1.35);
  479. $diffengine->diff($this->leafs, $other->leafs);
  480. $lcsLength = $diffengine->getLcsLength();
  481. $distanceThis = $nbThis-$lcsLength;
  482. return (2.0 - $lcsLength/$nbOthers - $lcsLength/$nbThis) / 2.0;
  483. }
  484. }
  485. /**
  486. * A comparator used when calculating the difference in ancestry of two Nodes.
  487. */
  488. class AncestorComparator {
  489. public $ancestors;
  490. public $ancestorsText;
  491. function __construct(/*array*/ $ancestors) {
  492. $this->ancestors = $ancestors;
  493. $this->ancestorsText = array_map(array('TagNode','toDiffLine'), $ancestors);
  494. }
  495. public $compareTxt = "";
  496. public function getResult(AncestorComparator $other) {
  497. $diffengine = new WikiDiff3(10000, 1.35);
  498. $differences = $diffengine->diff_range($other->ancestorsText,$this->ancestorsText);
  499. if (count($differences) == 0){
  500. return null;
  501. }
  502. $changeTxt = new ChangeTextGenerator($this, $other);
  503. return $changeTxt->getChanged($differences)->toString();;
  504. }
  505. }
  506. class ChangeTextGenerator {
  507. private $ancestorComparator;
  508. private $other;
  509. private $factory;
  510. function __construct(AncestorComparator $ancestorComparator, AncestorComparator $other) {
  511. $this->ancestorComparator = $ancestorComparator;
  512. $this->other = $other;
  513. $this->factory = new TagToStringFactory();
  514. }
  515. public function getChanged(/*array*/ $differences) {
  516. $txt = new ChangeText;
  517. $rootlistopened = false;
  518. if (count($differences) > 1) {
  519. $txt->addHtml('<ul class="changelist">');
  520. $rootlistopened = true;
  521. }
  522. $nbDifferences = count($differences);
  523. for ($j = 0; $j < $nbDifferences; ++$j) {
  524. $d = $differences[$j];
  525. $lvl1listopened = false;
  526. if ($rootlistopened) {
  527. $txt->addHtml('<li>');
  528. }
  529. if ($d->leftlength + $d->rightlength > 1) {
  530. $txt->addHtml('<ul class="changelist">');
  531. $lvl1listopened = true;
  532. }
  533. // left are the old ones
  534. for ($i = $d->leftstart; $i < $d->leftend; ++$i) {
  535. if ($lvl1listopened){
  536. $txt->addHtml('<li>');
  537. }
  538. // add a bullet for a old tag
  539. $this->addTagOld($txt, $this->other->ancestors[$i]);
  540. if ($lvl1listopened){
  541. $txt->addHtml('</li>');
  542. }
  543. }
  544. // right are the new ones
  545. for ($i = $d->rightstart; $i < $d->rightend; ++$i) {
  546. if ($lvl1listopened){
  547. $txt->addHtml('<li>');
  548. }
  549. // add a bullet for a new tag
  550. $this->addTagNew($txt, $this->ancestorComparator->ancestors[$i]);
  551. if ($lvl1listopened){
  552. $txt->addHtml('</li>');
  553. }
  554. }
  555. if ($lvl1listopened) {
  556. $txt->addHtml('</ul>');
  557. }
  558. if ($rootlistopened) {
  559. $txt->addHtml('</li>');
  560. }
  561. }
  562. if ($rootlistopened) {
  563. $txt->addHtml('</ul>');
  564. }
  565. return $txt;
  566. }
  567. private function addTagOld(ChangeText $txt, TagNode $ancestor) {
  568. $this->factory->create($ancestor)->getRemovedDescription($txt);
  569. }
  570. private function addTagNew(ChangeText $txt, TagNode $ancestor) {
  571. $this->factory->create($ancestor)->getAddedDescription($txt);
  572. }
  573. }
  574. class ChangeText {
  575. private $txt = "";
  576. public function addHtml($s) {
  577. $this->txt .= $s;
  578. }
  579. public function toString() {
  580. return $this->txt;
  581. }
  582. }
  583. class TagToStringFactory {
  584. private static $containerTags = array('html', 'body', 'p', 'blockquote',
  585. 'h1', 'h2', 'h3', 'h4', 'h5', 'pre', 'div', 'ul', 'ol', 'li',
  586. 'table', 'tbody', 'tr', 'td', 'th', 'br', 'hr', 'code', 'dl',
  587. 'dt', 'dd', 'input', 'form', 'img', 'span', 'a');
  588. private static $styleTags = array('i', 'b', 'strong', 'em', 'font',
  589. 'big', 'del', 'tt', 'sub', 'sup', 'strike');
  590. const MOVED = 1;
  591. const STYLE = 2;
  592. const UNKNOWN = 4;
  593. public function create(TagNode $node) {
  594. $sem = $this->getChangeSemantic($node->qName);
  595. if (strcasecmp($node->qName,'a') == 0) {
  596. return new AnchorToString($node, $sem);
  597. }
  598. if (strcasecmp($node->qName,'img') == 0) {
  599. return new NoContentTagToString($node, $sem);
  600. }
  601. return new TagToString($node, $sem);
  602. }
  603. protected function getChangeSemantic($qname) {
  604. if (in_array(strtolower($qname),self::$containerTags)) {
  605. return self::MOVED;
  606. }
  607. if (in_array(strtolower($qname),self::$styleTags)) {
  608. return self::STYLE;
  609. }
  610. return self::UNKNOWN;
  611. }
  612. }
  613. class TagToString {
  614. protected $node;
  615. protected $sem;
  616. function __construct(TagNode $node, $sem) {
  617. $this->node = $node;
  618. $this->sem = $sem;
  619. }
  620. public function getRemovedDescription(ChangeText $txt) {
  621. $tagDescription = wfMsgExt('diff-' . $this->node->qName, 'parseinline' );
  622. if( wfEmptyMsg( 'diff-' . $this->node->qName, $tagDescription ) ){
  623. $tagDescription = "&lt;" . $this->node->qName . "&gt;";
  624. }
  625. if ($this->sem == TagToStringFactory::MOVED) {
  626. $txt->addHtml( wfMsgExt( 'diff-movedoutof', 'parseinline', $tagDescription ) );
  627. } else if ($this->sem == TagToStringFactory::STYLE) {
  628. $txt->addHtml( wfMsgExt( 'diff-styleremoved' , 'parseinline', $tagDescription ) );
  629. } else {
  630. $txt->addHtml( wfMsgExt( 'diff-removed' , 'parseinline', $tagDescription ) );
  631. }
  632. $this->addAttributes($txt, $this->node->attributes);
  633. $txt->addHtml('.');
  634. }
  635. public function getAddedDescription(ChangeText $txt) {
  636. $tagDescription = wfMsgExt('diff-' . $this->node->qName, 'parseinline' );
  637. if( wfEmptyMsg( 'diff-' . $this->node->qName, $tagDescription ) ){
  638. $tagDescription = "&lt;" . $this->node->qName . "&gt;";
  639. }
  640. if ($this->sem == TagToStringFactory::MOVED) {
  641. $txt->addHtml( wfMsgExt( 'diff-movedto' , 'parseinline', $tagDescription) );
  642. } else if ($this->sem == TagToStringFactory::STYLE) {
  643. $txt->addHtml( wfMsgExt( 'diff-styleadded', 'parseinline', $tagDescription ) );
  644. } else {
  645. $txt->addHtml( wfMsgExt( 'diff-added', 'parseinline', $tagDescription ) );
  646. }
  647. $this->addAttributes($txt, $this->node->attributes);
  648. $txt->addHtml('.');
  649. }
  650. protected function addAttributes(ChangeText $txt, array $attributes) {
  651. if (count($attributes) < 1) {
  652. return;
  653. }
  654. $firstOne = true;
  655. $nbAttributes_min_1 = count($attributes)-1;
  656. $keys = array_keys($attributes);
  657. for ($i=0;$i<$nbAttributes_min_1;$i++) {
  658. $key = $keys[$i];
  659. $attr = $attributes[$key];
  660. if($firstOne) {
  661. $firstOne = false;
  662. $txt->addHtml( wfMsgExt('diff-with', 'escapenoentities', $this->translateArgument($key), htmlspecialchars($attr) ) );
  663. continue;
  664. }
  665. $txt->addHtml( wfMsgExt( 'comma-separator', 'escapenoentities' ) .
  666. wfMsgExt( 'diff-with-additional', 'escapenoentities',
  667. $this->translateArgument( $key ), htmlspecialchars( $attr ) )
  668. );
  669. }
  670. if ($nbAttributes_min_1 > 0) {
  671. $txt->addHtml( wfMsgExt( 'diff-with-final', 'escapenoentities',
  672. $this->translateArgument($keys[$nbAttributes_min_1]),
  673. htmlspecialchars($attributes[$keys[$nbAttributes_min_1]]) ) );
  674. }
  675. }
  676. protected function translateArgument($name) {
  677. $translation = wfMsgExt('diff-' . $name, 'parseinline' );
  678. if ( wfEmptyMsg( 'diff-' . $name, $translation ) ) {
  679. $translation = "&lt;" . $name . "&gt;";;
  680. }
  681. return htmlspecialchars( $translation );
  682. }
  683. }
  684. class NoContentTagToString extends TagToString {
  685. function __construct(TagNode $node, $sem) {
  686. parent::__construct($node, $sem);
  687. }
  688. public function getAddedDescription(ChangeText $txt) {
  689. $tagDescription = wfMsgExt('diff-' . $this->node->qName, 'parseinline' );
  690. if( wfEmptyMsg( 'diff-' . $this->node->qName, $tagDescription ) ){
  691. $tagDescription = "&lt;" . $this->node->qName . "&gt;";
  692. }
  693. $txt->addHtml( wfMsgExt('diff-changedto', 'parseinline', $tagDescription ) );
  694. $this->addAttributes($txt, $this->node->attributes);
  695. $txt->addHtml('.');
  696. }
  697. public function getRemovedDescription(ChangeText $txt) {
  698. $tagDescription = wfMsgExt('diff-' . $this->node->qName, 'parseinline' );
  699. if( wfEmptyMsg( 'diff-' . $this->node->qName, $tagDescription ) ){
  700. $tagDescription = "&lt;" . $this->node->qName . "&gt;";
  701. }
  702. $txt->addHtml( wfMsgExt('diff-changedfrom', 'parseinline', $tagDescription ) );
  703. $this->addAttributes($txt, $this->node->attributes);
  704. $txt->addHtml('.');
  705. }
  706. }
  707. class AnchorToString extends TagToString {
  708. function __construct(TagNode $node, $sem) {
  709. parent::__construct($node, $sem);
  710. }
  711. protected function addAttributes(ChangeText $txt, array $attributes) {
  712. if (array_key_exists('href', $attributes)) {
  713. $txt->addHtml(' ' . wfMsgExt( 'diff-withdestination', 'parseinline', htmlspecialchars($attributes['href']) ) );
  714. unset($attributes['href']);
  715. }
  716. parent::addAttributes($txt, $attributes);
  717. }
  718. }
  719. /**
  720. * Takes a branch root and creates an HTML file for it.
  721. */
  722. class HTMLOutput{
  723. private $prefix;
  724. private $handler;
  725. function __construct($prefix, $handler) {
  726. $this->prefix = $prefix;
  727. $this->handler = $handler;
  728. }
  729. public function parse(TagNode $node) {
  730. $handler = &$this->handler;
  731. if (strcasecmp($node->qName, 'img') != 0 && strcasecmp($node->qName, 'body') != 0) {
  732. $handler->startElement($node->qName, $node->attributes);
  733. }
  734. $newStarted = false;
  735. $remStarted = false;
  736. $changeStarted = false;
  737. $changeTXT = '';
  738. foreach ($node->children as &$child) {
  739. if ($child instanceof TagNode) {
  740. if ($newStarted) {
  741. $handler->endElement('span');
  742. $newStarted = false;
  743. } else if ($changeStarted) {
  744. $handler->endElement('span');
  745. $changeStarted = false;
  746. } else if ($remStarted) {
  747. $handler->endElement('span');
  748. $remStarted = false;
  749. }
  750. $this->parse($child);
  751. } else if ($child instanceof TextNode) {
  752. $mod = $child->modification;
  753. if ($newStarted && ($mod->type != Modification::ADDED || $mod->firstOfID)) {
  754. $handler->endElement('span');
  755. $newStarted = false;
  756. } else if ($changeStarted && ($mod->type != Modification::CHANGED
  757. || $mod->changes != $changeTXT || $mod->firstOfID)) {
  758. $handler->endElement('span');
  759. $changeStarted = false;
  760. } else if ($remStarted && ($mod->type != Modification::REMOVED || $mod ->firstOfID)) {
  761. $handler->endElement('span');
  762. $remStarted = false;
  763. }
  764. // no else because a removed part can just be closed and a new
  765. // part can start
  766. if (!$newStarted && $mod->type == Modification::ADDED) {
  767. $attrs = array('class' => 'diff-html-added');
  768. if ($mod->firstOfID) {
  769. $attrs['id'] = "added-{$this->prefix}-{$mod->id}";
  770. }
  771. $handler->startElement('span', $attrs);
  772. $newStarted = true;
  773. } else if (!$changeStarted && $mod->type == Modification::CHANGED) {
  774. $attrs = array('class' => 'diff-html-changed');
  775. if ($mod->firstOfID) {
  776. $attrs['id'] = "changed-{$this->prefix}-{$mod->id}";
  777. }
  778. $handler->startElement('span', $attrs);
  779. //tooltip
  780. $handler->startElement('span', array('class' => 'tip'));
  781. $handler->html($mod->changes);
  782. $handler->endElement('span');
  783. $changeStarted = true;
  784. $changeTXT = $mod->changes;
  785. } else if (!$remStarted && $mod->type == Modification::REMOVED) {
  786. $attrs = array('class'=>'diff-html-removed');
  787. if ($mod->firstOfID) {
  788. $attrs['id'] = "removed-{$this->prefix}-{$mod->id}";
  789. }
  790. $handler->startElement('span', $attrs);
  791. $remStarted = true;
  792. }
  793. $chars = $child->text;
  794. if ($child instanceof ImageNode) {
  795. $this->writeImage($child);
  796. } else {
  797. $handler->characters($chars);
  798. }
  799. }
  800. }
  801. if ($newStarted) {
  802. $handler->endElement('span');
  803. $newStarted = false;
  804. } else if ($changeStarted) {
  805. $handler->endElement('span');
  806. $changeStarted = false;
  807. } else if ($remStarted) {
  808. $handler->endElement('span');
  809. $remStarted = false;
  810. }
  811. if (strcasecmp($node->qName, 'img') != 0
  812. && strcasecmp($node->qName, 'body') != 0) {
  813. $handler->endElement($node->qName);
  814. }
  815. }
  816. private function writeImage(ImageNode $imgNode) {
  817. $attrs = $imgNode->attributes;
  818. $this->handler->startElement('img', $attrs);
  819. $this->handler->endElement('img');
  820. }
  821. }
  822. class DelegatingContentHandler {
  823. private $delegate;
  824. function __construct($delegate) {
  825. $this->delegate = $delegate;
  826. }
  827. function startElement($qname, /*array*/ $arguments) {
  828. $this->delegate->addHtml(Xml::openElement($qname, $arguments));
  829. }
  830. function endElement($qname){
  831. $this->delegate->addHtml(Xml::closeElement($qname));
  832. }
  833. function characters($chars){
  834. $this->delegate->addHtml(htmlspecialchars($chars));
  835. }
  836. function html($html){
  837. $this->delegate->addHtml($html);
  838. }
  839. }