OpenShot Audio Library | OpenShotAudio  0.3.1
juce_TextDiff.cpp
1 /*
2  ==============================================================================
3 
4  This file is part of the JUCE library.
5  Copyright (c) 2017 - ROLI Ltd.
6 
7  JUCE is an open source library subject to commercial or open-source
8  licensing.
9 
10  The code included in this file is provided under the terms of the ISC license
11  http://www.isc.org/downloads/software-support-policy/isc-license. Permission
12  To use, copy, modify, and/or distribute this software for any purpose with or
13  without fee is hereby granted provided that the above copyright notice and
14  this permission notice appear in all copies.
15 
16  JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
17  EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
18  DISCLAIMED.
19 
20  ==============================================================================
21 */
22 
23 namespace juce
24 {
25 
26 struct TextDiffHelpers
27 {
28  enum { minLengthToMatch = 3,
29  maxComplexity = 16 * 1024 * 1024 };
30 
31  struct StringRegion
32  {
33  StringRegion (const String& s) noexcept
34  : text (s.getCharPointer()), start (0), length (s.length()) {}
35 
36  StringRegion (String::CharPointerType t, int s, int len) noexcept
37  : text (t), start (s), length (len) {}
38 
39  void incrementStart() noexcept { ++text; ++start; --length; }
40 
41  String::CharPointerType text;
42  int start, length;
43  };
44 
45  static void addInsertion (TextDiff& td, String::CharPointerType text, int index, int length)
46  {
47  TextDiff::Change c;
48  c.insertedText = String (text, (size_t) length);
49  c.start = index;
50  c.length = 0;
51  td.changes.add (c);
52  }
53 
54  static void addDeletion (TextDiff& td, int index, int length)
55  {
56  TextDiff::Change c;
57  c.start = index;
58  c.length = length;
59  td.changes.add (c);
60  }
61 
62  static void diffSkippingCommonStart (TextDiff& td, StringRegion a, StringRegion b)
63  {
64  for (;;)
65  {
66  auto ca = *a.text;
67  auto cb = *b.text;
68 
69  if (ca != cb || ca == 0)
70  break;
71 
72  a.incrementStart();
73  b.incrementStart();
74  }
75 
76  diffRecursively (td, a, b);
77  }
78 
79  static void diffRecursively (TextDiff& td, StringRegion a, StringRegion b)
80  {
81  int indexA = 0, indexB = 0;
82  auto len = findLongestCommonSubstring (a.text, a.length, indexA,
83  b.text, b.length, indexB);
84 
85  if (len >= minLengthToMatch)
86  {
87  if (indexA > 0 && indexB > 0)
88  diffSkippingCommonStart (td, StringRegion (a.text, a.start, indexA),
89  StringRegion (b.text, b.start, indexB));
90  else if (indexA > 0)
91  addDeletion (td, b.start, indexA);
92  else if (indexB > 0)
93  addInsertion (td, b.text, b.start, indexB);
94 
95  diffRecursively (td, StringRegion (a.text + (indexA + len), a.start + indexA + len, a.length - indexA - len),
96  StringRegion (b.text + (indexB + len), b.start + indexB + len, b.length - indexB - len));
97  }
98  else
99  {
100  if (a.length > 0) addDeletion (td, b.start, a.length);
101  if (b.length > 0) addInsertion (td, b.text, b.start, b.length);
102  }
103  }
104 
105  static int findLongestCommonSubstring (String::CharPointerType a, const int lenA, int& indexInA,
106  String::CharPointerType b, const int lenB, int& indexInB) noexcept
107  {
108  if (lenA == 0 || lenB == 0)
109  return 0;
110 
111  if (lenA * lenB > maxComplexity)
112  return findCommonSuffix (a, lenA, indexInA,
113  b, lenB, indexInB);
114 
115  auto scratchSpace = sizeof (int) * (2 + 2 * (size_t) lenB);
116 
117  if (scratchSpace < 4096)
118  {
119  auto* scratch = (int*) alloca (scratchSpace);
120  return findLongestCommonSubstring (a, lenA, indexInA, b, lenB, indexInB, scratchSpace, scratch);
121  }
122 
123  HeapBlock<int> scratch (scratchSpace);
124  return findLongestCommonSubstring (a, lenA, indexInA, b, lenB, indexInB, scratchSpace, scratch);
125  }
126 
127  static int findLongestCommonSubstring (String::CharPointerType a, const int lenA, int& indexInA,
128  String::CharPointerType b, const int lenB, int& indexInB,
129  const size_t scratchSpace, int* const lines) noexcept
130  {
131  zeromem (lines, scratchSpace);
132 
133  auto* l0 = lines;
134  auto* l1 = l0 + lenB + 1;
135 
136  int loopsWithoutImprovement = 0;
137  int bestLength = 0;
138 
139  for (int i = 0; i < lenA; ++i)
140  {
141  auto ca = a.getAndAdvance();
142  auto b2 = b;
143 
144  for (int j = 0; j < lenB; ++j)
145  {
146  if (ca != b2.getAndAdvance())
147  {
148  l1[j + 1] = 0;
149  }
150  else
151  {
152  auto len = l0[j] + 1;
153  l1[j + 1] = len;
154 
155  if (len > bestLength)
156  {
157  loopsWithoutImprovement = 0;
158  bestLength = len;
159  indexInA = i;
160  indexInB = j;
161  }
162  }
163  }
164 
165  if (++loopsWithoutImprovement > 100)
166  break;
167 
168  std::swap (l0, l1);
169  }
170 
171  indexInA -= bestLength - 1;
172  indexInB -= bestLength - 1;
173  return bestLength;
174  }
175 
176  static int findCommonSuffix (String::CharPointerType a, int lenA, int& indexInA,
177  String::CharPointerType b, int lenB, int& indexInB) noexcept
178  {
179  int length = 0;
180  a += lenA - 1;
181  b += lenB - 1;
182 
183  while (length < lenA && length < lenB && *a == *b)
184  {
185  --a;
186  --b;
187  ++length;
188  }
189 
190  indexInA = lenA - length;
191  indexInB = lenB - length;
192  return length;
193  }
194 };
195 
196 TextDiff::TextDiff (const String& original, const String& target)
197 {
198  TextDiffHelpers::diffSkippingCommonStart (*this, original, target);
199 }
200 
202 {
203  for (auto& c : changes)
204  text = c.appliedTo (text);
205 
206  return text;
207 }
208 
209 bool TextDiff::Change::isDeletion() const noexcept
210 {
211  return insertedText.isEmpty();
212 }
213 
214 String TextDiff::Change::appliedTo (const String& text) const noexcept
215 {
216  return text.replaceSection (start, length, insertedText);
217 }
218 
219 
220 //==============================================================================
221 //==============================================================================
222 #if JUCE_UNIT_TESTS
223 
224 class DiffTests : public UnitTest
225 {
226 public:
227  DiffTests()
228  : UnitTest ("TextDiff class", UnitTestCategories::text)
229  {}
230 
231  static String createString (Random& r)
232  {
233  juce_wchar buffer[500] = { 0 };
234 
235  for (int i = r.nextInt (numElementsInArray (buffer) - 1); --i >= 0;)
236  {
237  if (r.nextInt (10) == 0)
238  {
239  do
240  {
241  buffer[i] = (juce_wchar) (1 + r.nextInt (0x10ffff - 1));
242  }
243  while (! CharPointer_UTF16::canRepresent (buffer[i]));
244  }
245  else
246  buffer[i] = (juce_wchar) ('a' + r.nextInt (3));
247  }
248 
249  return CharPointer_UTF32 (buffer);
250  }
251 
252  void testDiff (const String& a, const String& b)
253  {
254  TextDiff diff (a, b);
255  auto result = diff.appliedTo (a);
256  expectEquals (result, b);
257  }
258 
259  void runTest() override
260  {
261  beginTest ("TextDiff");
262 
263  auto r = getRandom();
264 
265  testDiff (String(), String());
266  testDiff ("x", String());
267  testDiff (String(), "x");
268  testDiff ("x", "x");
269  testDiff ("x", "y");
270  testDiff ("xxx", "x");
271  testDiff ("x", "xxx");
272 
273  for (int i = 1000; --i >= 0;)
274  {
275  auto s = createString (r);
276  testDiff (s, createString (r));
277  testDiff (s + createString (r), s + createString (r));
278  }
279  }
280 };
281 
282 static DiffTests diffTests;
283 
284 #endif
285 
286 } // namespace juce
TextDiff(const String &original, const String &target)
int nextInt() noexcept
Definition: juce_Random.cpp:78
static bool canRepresent(juce_wchar character) noexcept
String replaceSection(int startIndex, int numCharactersToReplace, StringRef stringToInsert) const
String appliedTo(const String &original) const noexcept
bool isDeletion() const noexcept
String appliedTo(String text) const