juce_TextDiff.cpp 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. /*
  2. ==============================================================================
  3. This file is part of the juce_core module of the JUCE library.
  4. Copyright (c) 2015 - ROLI Ltd.
  5. Permission to use, copy, modify, and/or distribute this software for any purpose with
  6. or without fee is hereby granted, provided that the above copyright notice and this
  7. permission notice appear in all copies.
  8. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
  9. TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN
  10. NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
  11. DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
  12. IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  13. CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  14. ------------------------------------------------------------------------------
  15. NOTE! This permissive ISC license applies ONLY to files within the juce_core module!
  16. All other JUCE modules are covered by a dual GPL/commercial license, so if you are
  17. using any other modules, be sure to check that you also comply with their license.
  18. For more details, visit www.juce.com
  19. ==============================================================================
  20. */
  21. struct TextDiffHelpers
  22. {
  23. enum { minLengthToMatch = 3,
  24. maxComplexity = 16 * 1024 * 1024 };
  25. struct StringRegion
  26. {
  27. StringRegion (const String& s) noexcept
  28. : text (s.getCharPointer()), start (0), length (s.length()) {}
  29. StringRegion (const String::CharPointerType t, int s, int len) noexcept
  30. : text (t), start (s), length (len) {}
  31. void incrementStart() noexcept { ++text; ++start; --length; }
  32. String::CharPointerType text;
  33. int start, length;
  34. };
  35. static void addInsertion (TextDiff& td, const String::CharPointerType text, int index, int length)
  36. {
  37. TextDiff::Change c;
  38. c.insertedText = String (text, (size_t) length);
  39. c.start = index;
  40. c.length = 0;
  41. td.changes.add (c);
  42. }
  43. static void addDeletion (TextDiff& td, int index, int length)
  44. {
  45. TextDiff::Change c;
  46. c.start = index;
  47. c.length = length;
  48. td.changes.add (c);
  49. }
  50. static void diffSkippingCommonStart (TextDiff& td, StringRegion a, StringRegion b)
  51. {
  52. for (;;)
  53. {
  54. const juce_wchar ca = *a.text;
  55. const juce_wchar cb = *b.text;
  56. if (ca != cb || ca == 0)
  57. break;
  58. a.incrementStart();
  59. b.incrementStart();
  60. }
  61. diffRecursively (td, a, b);
  62. }
  63. static void diffRecursively (TextDiff& td, StringRegion a, StringRegion b)
  64. {
  65. int indexA = 0, indexB = 0;
  66. const int len = findLongestCommonSubstring (a.text, a.length, indexA,
  67. b.text, b.length, indexB);
  68. if (len >= minLengthToMatch)
  69. {
  70. if (indexA > 0 && indexB > 0)
  71. diffSkippingCommonStart (td, StringRegion (a.text, a.start, indexA),
  72. StringRegion (b.text, b.start, indexB));
  73. else if (indexA > 0)
  74. addDeletion (td, b.start, indexA);
  75. else if (indexB > 0)
  76. addInsertion (td, b.text, b.start, indexB);
  77. diffRecursively (td, StringRegion (a.text + (indexA + len), a.start + indexA + len, a.length - indexA - len),
  78. StringRegion (b.text + (indexB + len), b.start + indexB + len, b.length - indexB - len));
  79. }
  80. else
  81. {
  82. if (a.length > 0) addDeletion (td, b.start, a.length);
  83. if (b.length > 0) addInsertion (td, b.text, b.start, b.length);
  84. }
  85. }
  86. static int findLongestCommonSubstring (String::CharPointerType a, const int lenA, int& indexInA,
  87. String::CharPointerType b, const int lenB, int& indexInB) noexcept
  88. {
  89. if (lenA == 0 || lenB == 0)
  90. return 0;
  91. if (lenA * lenB > maxComplexity)
  92. return findCommonSuffix (a, lenA, indexInA,
  93. b, lenB, indexInB);
  94. const size_t scratchSpace = sizeof (int) * (2 + 2 * (size_t) lenB);
  95. if (scratchSpace < 4096)
  96. {
  97. int* scratch = (int*) alloca (scratchSpace);
  98. return findLongestCommonSubstring (a, lenA, indexInA, b, lenB, indexInB, scratchSpace, scratch);
  99. }
  100. HeapBlock<int> scratch (scratchSpace);
  101. return findLongestCommonSubstring (a, lenA, indexInA, b, lenB, indexInB, scratchSpace, scratch);
  102. }
  103. static int findLongestCommonSubstring (String::CharPointerType a, const int lenA, int& indexInA,
  104. String::CharPointerType b, const int lenB, int& indexInB,
  105. const size_t scratchSpace, int* const lines) noexcept
  106. {
  107. zeromem (lines, scratchSpace);
  108. int* l0 = lines;
  109. int* l1 = l0 + lenB + 1;
  110. int loopsWithoutImprovement = 0;
  111. int bestLength = 0;
  112. for (int i = 0; i < lenA; ++i)
  113. {
  114. const juce_wchar ca = a.getAndAdvance();
  115. String::CharPointerType b2 (b);
  116. for (int j = 0; j < lenB; ++j)
  117. {
  118. if (ca != b2.getAndAdvance())
  119. {
  120. l1[j + 1] = 0;
  121. }
  122. else
  123. {
  124. const int len = l0[j] + 1;
  125. l1[j + 1] = len;
  126. if (len > bestLength)
  127. {
  128. loopsWithoutImprovement = 0;
  129. bestLength = len;
  130. indexInA = i;
  131. indexInB = j;
  132. }
  133. }
  134. }
  135. if (++loopsWithoutImprovement > 100)
  136. break;
  137. std::swap (l0, l1);
  138. }
  139. indexInA -= bestLength - 1;
  140. indexInB -= bestLength - 1;
  141. return bestLength;
  142. }
  143. static int findCommonSuffix (String::CharPointerType a, const int lenA, int& indexInA,
  144. String::CharPointerType b, const int lenB, int& indexInB) noexcept
  145. {
  146. int length = 0;
  147. a += lenA - 1;
  148. b += lenB - 1;
  149. while (length < lenA && length < lenB && *a == *b)
  150. {
  151. --a;
  152. --b;
  153. ++length;
  154. }
  155. indexInA = lenA - length;
  156. indexInB = lenB - length;
  157. return length;
  158. }
  159. };
  160. TextDiff::TextDiff (const String& original, const String& target)
  161. {
  162. TextDiffHelpers::diffSkippingCommonStart (*this, original, target);
  163. }
  164. String TextDiff::appliedTo (String text) const
  165. {
  166. for (int i = 0; i < changes.size(); ++i)
  167. text = changes.getReference(i).appliedTo (text);
  168. return text;
  169. }
  170. bool TextDiff::Change::isDeletion() const noexcept
  171. {
  172. return insertedText.isEmpty();
  173. }
  174. String TextDiff::Change::appliedTo (const String& text) const noexcept
  175. {
  176. return text.replaceSection (start, length, insertedText);
  177. }
  178. //==============================================================================
  179. //==============================================================================
  180. #if JUCE_UNIT_TESTS
  181. class DiffTests : public UnitTest
  182. {
  183. public:
  184. DiffTests() : UnitTest ("TextDiff class") {}
  185. static String createString (Random& r)
  186. {
  187. juce_wchar buffer[500] = { 0 };
  188. for (int i = r.nextInt (numElementsInArray (buffer) - 1); --i >= 0;)
  189. {
  190. if (r.nextInt (10) == 0)
  191. {
  192. do
  193. {
  194. buffer[i] = (juce_wchar) (1 + r.nextInt (0x10ffff - 1));
  195. }
  196. while (! CharPointer_UTF16::canRepresent (buffer[i]));
  197. }
  198. else
  199. buffer[i] = (juce_wchar) ('a' + r.nextInt (3));
  200. }
  201. return CharPointer_UTF32 (buffer);
  202. }
  203. void testDiff (const String& a, const String& b)
  204. {
  205. TextDiff diff (a, b);
  206. const String result (diff.appliedTo (a));
  207. expectEquals (result, b);
  208. }
  209. void runTest() override
  210. {
  211. beginTest ("TextDiff");
  212. Random r = getRandom();
  213. testDiff (String::empty, String::empty);
  214. testDiff ("x", String::empty);
  215. testDiff (String::empty, "x");
  216. testDiff ("x", "x");
  217. testDiff ("x", "y");
  218. testDiff ("xxx", "x");
  219. testDiff ("x", "xxx");
  220. for (int i = 1000; --i >= 0;)
  221. {
  222. String s (createString (r));
  223. testDiff (s, createString (r));
  224. testDiff (s + createString (r), s + createString (r));
  225. }
  226. }
  227. };
  228. static DiffTests diffTests;
  229. #endif