OpenShot Library | libopenshot-audio  0.1.9
juce_TextDiff.cpp
1 /*
2  ==============================================================================
3 
4  This file is part of the JUCE library.
5  Copyright (c) 2017 - ROLI Ltd.
6 
7  JUCE is an open source library subject to commercial or open-source
8  licensing.
9 
10  The code included in this file is provided under the terms of the ISC license
11  http://www.isc.org/downloads/software-support-policy/isc-license. Permission
12  To use, copy, modify, and/or distribute this software for any purpose with or
13  without fee is hereby granted provided that the above copyright notice and
14  this permission notice appear in all copies.
15 
16  JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
17  EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
18  DISCLAIMED.
19 
20  ==============================================================================
21 */
22 
23 namespace juce
24 {
25 
27 {
28  enum { minLengthToMatch = 3,
29  maxComplexity = 16 * 1024 * 1024 };
30 
31  struct StringRegion
32  {
33  StringRegion (const String& s) noexcept
34  : text (s.getCharPointer()), start (0), length (s.length()) {}
35 
36  StringRegion (String::CharPointerType t, int s, int len) noexcept
37  : text (t), start (s), length (len) {}
38 
39  void incrementStart() noexcept { ++text; ++start; --length; }
40 
42  int start, length;
43  };
44 
45  static void addInsertion (TextDiff& td, String::CharPointerType text, int index, int length)
46  {
48  c.insertedText = String (text, (size_t) length);
49  c.start = index;
50  c.length = 0;
51  td.changes.add (c);
52  }
53 
54  static void addDeletion (TextDiff& td, int index, int length)
55  {
57  c.start = index;
58  c.length = length;
59  td.changes.add (c);
60  }
61 
62  static void diffSkippingCommonStart (TextDiff& td, StringRegion a, StringRegion b)
63  {
64  for (;;)
65  {
66  auto ca = *a.text;
67  auto cb = *b.text;
68 
69  if (ca != cb || ca == 0)
70  break;
71 
72  a.incrementStart();
73  b.incrementStart();
74  }
75 
76  diffRecursively (td, a, b);
77  }
78 
79  static void diffRecursively (TextDiff& td, StringRegion a, StringRegion b)
80  {
81  int indexA = 0, indexB = 0;
82  auto len = findLongestCommonSubstring (a.text, a.length, indexA,
83  b.text, b.length, indexB);
84 
85  if (len >= minLengthToMatch)
86  {
87  if (indexA > 0 && indexB > 0)
88  diffSkippingCommonStart (td, StringRegion (a.text, a.start, indexA),
89  StringRegion (b.text, b.start, indexB));
90  else if (indexA > 0)
91  addDeletion (td, b.start, indexA);
92  else if (indexB > 0)
93  addInsertion (td, b.text, b.start, indexB);
94 
95  diffRecursively (td, StringRegion (a.text + (indexA + len), a.start + indexA + len, a.length - indexA - len),
96  StringRegion (b.text + (indexB + len), b.start + indexB + len, b.length - indexB - len));
97  }
98  else
99  {
100  if (a.length > 0) addDeletion (td, b.start, a.length);
101  if (b.length > 0) addInsertion (td, b.text, b.start, b.length);
102  }
103  }
104 
105  static int findLongestCommonSubstring (String::CharPointerType a, const int lenA, int& indexInA,
106  String::CharPointerType b, const int lenB, int& indexInB) noexcept
107  {
108  if (lenA == 0 || lenB == 0)
109  return 0;
110 
111  if (lenA * lenB > maxComplexity)
112  return findCommonSuffix (a, lenA, indexInA,
113  b, lenB, indexInB);
114 
115  auto scratchSpace = sizeof (int) * (2 + 2 * (size_t) lenB);
116 
117  if (scratchSpace < 4096)
118  {
119  auto* scratch = (int*) alloca (scratchSpace);
120  return findLongestCommonSubstring (a, lenA, indexInA, b, lenB, indexInB, scratchSpace, scratch);
121  }
122 
123  HeapBlock<int> scratch (scratchSpace);
124  return findLongestCommonSubstring (a, lenA, indexInA, b, lenB, indexInB, scratchSpace, scratch);
125  }
126 
127  static int findLongestCommonSubstring (String::CharPointerType a, const int lenA, int& indexInA,
128  String::CharPointerType b, const int lenB, int& indexInB,
129  const size_t scratchSpace, int* const lines) noexcept
130  {
131  zeromem (lines, scratchSpace);
132 
133  auto* l0 = lines;
134  auto* l1 = l0 + lenB + 1;
135 
136  int loopsWithoutImprovement = 0;
137  int bestLength = 0;
138 
139  for (int i = 0; i < lenA; ++i)
140  {
141  auto ca = a.getAndAdvance();
142  auto b2 = b;
143 
144  for (int j = 0; j < lenB; ++j)
145  {
146  if (ca != b2.getAndAdvance())
147  {
148  l1[j + 1] = 0;
149  }
150  else
151  {
152  auto len = l0[j] + 1;
153  l1[j + 1] = len;
154 
155  if (len > bestLength)
156  {
157  loopsWithoutImprovement = 0;
158  bestLength = len;
159  indexInA = i;
160  indexInB = j;
161  }
162  }
163  }
164 
165  if (++loopsWithoutImprovement > 100)
166  break;
167 
168  std::swap (l0, l1);
169  }
170 
171  indexInA -= bestLength - 1;
172  indexInB -= bestLength - 1;
173  return bestLength;
174  }
175 
176  static int findCommonSuffix (String::CharPointerType a, int lenA, int& indexInA,
177  String::CharPointerType b, int lenB, int& indexInB) noexcept
178  {
179  int length = 0;
180  a += lenA - 1;
181  b += lenB - 1;
182 
183  while (length < lenA && length < lenB && *a == *b)
184  {
185  --a;
186  --b;
187  ++length;
188  }
189 
190  indexInA = lenA - length;
191  indexInB = lenB - length;
192  return length;
193  }
194 };
195 
196 TextDiff::TextDiff (const String& original, const String& target)
197 {
198  TextDiffHelpers::diffSkippingCommonStart (*this, original, target);
199 }
200 
202 {
203  for (auto& c : changes)
204  text = c.appliedTo (text);
205 
206  return text;
207 }
208 
209 bool TextDiff::Change::isDeletion() const noexcept
210 {
211  return insertedText.isEmpty();
212 }
213 
214 String TextDiff::Change::appliedTo (const String& text) const noexcept
215 {
216  return text.replaceSection (start, length, insertedText);
217 }
218 
219 //==============================================================================
220 //==============================================================================
221 #if JUCE_UNIT_TESTS
222 
223 class DiffTests : public UnitTest
224 {
225 public:
226  DiffTests() : UnitTest ("TextDiff class", "Text") {}
227 
228  static String createString (Random& r)
229  {
230  juce_wchar buffer[500] = { 0 };
231 
232  for (int i = r.nextInt (numElementsInArray (buffer) - 1); --i >= 0;)
233  {
234  if (r.nextInt (10) == 0)
235  {
236  do
237  {
238  buffer[i] = (juce_wchar) (1 + r.nextInt (0x10ffff - 1));
239  }
240  while (! CharPointer_UTF16::canRepresent (buffer[i]));
241  }
242  else
243  buffer[i] = (juce_wchar) ('a' + r.nextInt (3));
244  }
245 
246  return CharPointer_UTF32 (buffer);
247  }
248 
249  void testDiff (const String& a, const String& b)
250  {
251  TextDiff diff (a, b);
252  auto result = diff.appliedTo (a);
253  expectEquals (result, b);
254  }
255 
256  void runTest() override
257  {
258  beginTest ("TextDiff");
259 
260  auto r = getRandom();
261 
262  testDiff (String(), String());
263  testDiff ("x", String());
264  testDiff (String(), "x");
265  testDiff ("x", "x");
266  testDiff ("x", "y");
267  testDiff ("xxx", "x");
268  testDiff ("x", "xxx");
269 
270  for (int i = 1000; --i >= 0;)
271  {
272  auto s = createString (r);
273  testDiff (s, createString (r));
274  testDiff (s + createString (r), s + createString (r));
275  }
276  }
277 };
278 
279 static DiffTests diffTests;
280 
281 #endif
282 
283 } // namespace juce
TextDiff(const String &original, const String &target)
Creates a set of diffs for converting the original string into the target.
int nextInt() noexcept
Returns the next random 32 bit integer.
Definition: juce_Random.cpp:78
static bool canRepresent(juce_wchar character) noexcept
Returns true if the given unicode character can be represented in this encoding.
CharPointerType getCharPointer() const noexcept
Returns the character pointer currently being used to store this string.
Definition: juce_String.h:1200
The JUCE String class!
Definition: juce_String.h:42
String insertedText
If this change is a deletion, this string will be empty; otherwise, it&#39;ll be the text that should be ...
Definition: juce_TextDiff.h:59
int length
If this change is a deletion, this specifies the number of characters to delete.
Definition: juce_TextDiff.h:62
This is a base class for classes that perform a unit test.
Definition: juce_UnitTest.h:73
Calculates and applies a sequence of changes to convert one text string into another.
Definition: juce_TextDiff.h:40
Wraps a pointer to a null-terminated UTF-32 character string, and provides various methods to operate...
String appliedTo(const String &original) const noexcept
Returns the result of applying this change to a string.
juce_wchar getAndAdvance() noexcept
Returns the character that this pointer is currently pointing to, and then advances the pointer to po...
int start
Specifies the character index in a string at which text should be inserted or deleted.
Definition: juce_TextDiff.h:61
bool isDeletion() const noexcept
Returns true if this change is a deletion, or false for an insertion.
Describes a change, which can be either an insertion or deletion.
Definition: juce_TextDiff.h:57
String appliedTo(String text) const
Applies this sequence of changes to the original string, producing the target string that was specifi...
A random number generator.
Definition: juce_Random.h:38
int length() const noexcept
Returns the number of characters in the string.
Array< Change > changes
The list of changes required to perform the transformation.
Definition: juce_TextDiff.h:75
Wraps a pointer to a null-terminated UTF-8 character string, and provides various methods to operate ...