OpenShot Audio Library | OpenShotAudio  0.3.1
juce_CharPointer_UTF8.h
1 /*
2  ==============================================================================
3 
4  This file is part of the JUCE library.
5  Copyright (c) 2017 - ROLI Ltd.
6 
7  JUCE is an open source library subject to commercial or open-source
8  licensing.
9 
10  The code included in this file is provided under the terms of the ISC license
11  http://www.isc.org/downloads/software-support-policy/isc-license. Permission
12  To use, copy, modify, and/or distribute this software for any purpose with or
13  without fee is hereby granted provided that the above copyright notice and
14  this permission notice appear in all copies.
15 
16  JUCE IS PROVIDED "AS IS" WITHOUT ANY WARRANTY, AND ALL WARRANTIES, WHETHER
17  EXPRESSED OR IMPLIED, INCLUDING MERCHANTABILITY AND FITNESS FOR PURPOSE, ARE
18  DISCLAIMED.
19 
20  ==============================================================================
21 */
22 
23 namespace juce
24 {
25 
26 //==============================================================================
34 class CharPointer_UTF8 final
35 {
36 public:
37  using CharType = char;
38 
39  inline explicit CharPointer_UTF8 (const CharType* rawPointer) noexcept
40  : data (const_cast<CharType*> (rawPointer))
41  {
42  }
43 
44  inline CharPointer_UTF8 (const CharPointer_UTF8& other) = default;
45 
46  inline CharPointer_UTF8 operator= (CharPointer_UTF8 other) noexcept
47  {
48  data = other.data;
49  return *this;
50  }
51 
52  inline CharPointer_UTF8 operator= (const CharType* text) noexcept
53  {
54  data = const_cast<CharType*> (text);
55  return *this;
56  }
57 
59  inline bool operator== (CharPointer_UTF8 other) const noexcept { return data == other.data; }
60  inline bool operator!= (CharPointer_UTF8 other) const noexcept { return data != other.data; }
61  inline bool operator<= (CharPointer_UTF8 other) const noexcept { return data <= other.data; }
62  inline bool operator< (CharPointer_UTF8 other) const noexcept { return data < other.data; }
63  inline bool operator>= (CharPointer_UTF8 other) const noexcept { return data >= other.data; }
64  inline bool operator> (CharPointer_UTF8 other) const noexcept { return data > other.data; }
65 
67  inline CharType* getAddress() const noexcept { return data; }
68 
70  inline operator const CharType*() const noexcept { return data; }
71 
73  inline bool isEmpty() const noexcept { return *data == 0; }
74 
76  inline bool isNotEmpty() const noexcept { return *data != 0; }
77 
79  juce_wchar operator*() const noexcept
80  {
81  auto byte = (signed char) *data;
82 
83  if (byte >= 0)
84  return (juce_wchar) (uint8) byte;
85 
86  uint32 n = (uint32) (uint8) byte;
87  uint32 mask = 0x7f;
88  uint32 bit = 0x40;
89  int numExtraValues = 0;
90 
91  while ((n & bit) != 0 && bit > 0x8)
92  {
93  mask >>= 1;
94  ++numExtraValues;
95  bit >>= 1;
96  }
97 
98  n &= mask;
99 
100  for (int i = 1; i <= numExtraValues; ++i)
101  {
102  auto nextByte = (uint32) (uint8) data[i];
103 
104  if ((nextByte & 0xc0) != 0x80)
105  break;
106 
107  n <<= 6;
108  n |= (nextByte & 0x3f);
109  }
110 
111  return (juce_wchar) n;
112  }
113 
116  {
117  jassert (*data != 0); // trying to advance past the end of the string?
118  auto n = (signed char) *data++;
119 
120  if (n < 0)
121  {
122  uint8 bit = 0x40;
123 
124  while ((static_cast<uint8> (n) & bit) != 0 && bit > 0x8)
125  {
126  ++data;
127  bit >>= 1;
128  }
129  }
130 
131  return *this;
132  }
133 
136  {
137  int count = 0;
138 
139  while ((*--data & 0xc0) == 0x80 && ++count < 4)
140  {}
141 
142  return *this;
143  }
144 
147  juce_wchar getAndAdvance() noexcept
148  {
149  auto byte = (signed char) *data++;
150 
151  if (byte >= 0)
152  return (juce_wchar) (uint8) byte;
153 
154  uint32 n = (uint32) (uint8) byte;
155  uint32 mask = 0x7f;
156  uint32 bit = 0x40;
157  int numExtraValues = 0;
158 
159  while ((n & bit) != 0 && bit > 0x8)
160  {
161  mask >>= 1;
162  ++numExtraValues;
163  bit >>= 1;
164  }
165 
166  n &= mask;
167 
168  while (--numExtraValues >= 0)
169  {
170  auto nextByte = (uint32) (uint8) *data;
171 
172  if ((nextByte & 0xc0) != 0x80)
173  break;
174 
175  ++data;
176  n <<= 6;
177  n |= (nextByte & 0x3f);
178  }
179 
180  return (juce_wchar) n;
181  }
182 
185  {
186  CharPointer_UTF8 temp (*this);
187  ++*this;
188  return temp;
189  }
190 
192  void operator+= (int numToSkip) noexcept
193  {
194  if (numToSkip < 0)
195  {
196  while (++numToSkip <= 0)
197  --*this;
198  }
199  else
200  {
201  while (--numToSkip >= 0)
202  ++*this;
203  }
204  }
205 
207  void operator-= (int numToSkip) noexcept
208  {
209  operator+= (-numToSkip);
210  }
211 
213  juce_wchar operator[] (int characterIndex) const noexcept
214  {
215  auto p (*this);
216  p += characterIndex;
217  return *p;
218  }
219 
221  CharPointer_UTF8 operator+ (int numToSkip) const noexcept
222  {
223  auto p (*this);
224  p += numToSkip;
225  return p;
226  }
227 
229  CharPointer_UTF8 operator- (int numToSkip) const noexcept
230  {
231  auto p (*this);
232  p += -numToSkip;
233  return p;
234  }
235 
237  size_t length() const noexcept
238  {
239  auto* d = data;
240  size_t count = 0;
241 
242  for (;;)
243  {
244  auto n = (uint32) (uint8) *d++;
245 
246  if ((n & 0x80) != 0)
247  {
248  while ((*d & 0xc0) == 0x80)
249  ++d;
250  }
251  else if (n == 0)
252  break;
253 
254  ++count;
255  }
256 
257  return count;
258  }
259 
261  size_t lengthUpTo (const size_t maxCharsToCount) const noexcept
262  {
263  return CharacterFunctions::lengthUpTo (*this, maxCharsToCount);
264  }
265 
267  size_t lengthUpTo (const CharPointer_UTF8 end) const noexcept
268  {
269  return CharacterFunctions::lengthUpTo (*this, end);
270  }
271 
275  size_t sizeInBytes() const noexcept
276  {
277  jassert (data != nullptr);
278  return strlen (data) + 1;
279  }
280 
284  static size_t getBytesRequiredFor (const juce_wchar charToWrite) noexcept
285  {
286  size_t num = 1;
287  auto c = (uint32) charToWrite;
288 
289  if (c >= 0x80)
290  {
291  ++num;
292  if (c >= 0x800)
293  {
294  ++num;
295  if (c >= 0x10000)
296  ++num;
297  }
298  }
299 
300  return num;
301  }
302 
307  template <class CharPointer>
308  static size_t getBytesRequiredFor (CharPointer text) noexcept
309  {
310  size_t count = 0;
311 
312  while (auto n = text.getAndAdvance())
313  count += getBytesRequiredFor (n);
314 
315  return count;
316  }
317 
320  {
321  return CharPointer_UTF8 (data + strlen (data));
322  }
323 
325  void write (const juce_wchar charToWrite) noexcept
326  {
327  auto c = (uint32) charToWrite;
328 
329  if (c >= 0x80)
330  {
331  int numExtraBytes = 1;
332  if (c >= 0x800)
333  {
334  ++numExtraBytes;
335  if (c >= 0x10000)
336  ++numExtraBytes;
337  }
338 
339  *data++ = (CharType) ((uint32) (0xff << (7 - numExtraBytes)) | (c >> (numExtraBytes * 6)));
340 
341  while (--numExtraBytes >= 0)
342  *data++ = (CharType) (0x80 | (0x3f & (c >> (numExtraBytes * 6))));
343  }
344  else
345  {
346  *data++ = (CharType) c;
347  }
348  }
349 
351  inline void writeNull() const noexcept
352  {
353  *data = 0;
354  }
355 
357  template <typename CharPointer>
358  void writeAll (const CharPointer src) noexcept
359  {
360  CharacterFunctions::copyAll (*this, src);
361  }
362 
364  void writeAll (const CharPointer_UTF8 src) noexcept
365  {
366  auto* s = src.data;
367 
368  while ((*data = *s) != 0)
369  {
370  ++data;
371  ++s;
372  }
373  }
374 
379  template <typename CharPointer>
380  size_t writeWithDestByteLimit (const CharPointer src, const size_t maxDestBytes) noexcept
381  {
382  return CharacterFunctions::copyWithDestByteLimit (*this, src, maxDestBytes);
383  }
384 
389  template <typename CharPointer>
390  void writeWithCharLimit (const CharPointer src, const int maxChars) noexcept
391  {
392  CharacterFunctions::copyWithCharLimit (*this, src, maxChars);
393  }
394 
396  template <typename CharPointer>
397  int compare (const CharPointer other) const noexcept
398  {
399  return CharacterFunctions::compare (*this, other);
400  }
401 
403  template <typename CharPointer>
404  int compareUpTo (const CharPointer other, const int maxChars) const noexcept
405  {
406  return CharacterFunctions::compareUpTo (*this, other, maxChars);
407  }
408 
410  template <typename CharPointer>
411  int compareIgnoreCase (const CharPointer other) const noexcept
412  {
413  return CharacterFunctions::compareIgnoreCase (*this, other);
414  }
415 
417  int compareIgnoreCase (const CharPointer_UTF8 other) const noexcept
418  {
419  return CharacterFunctions::compareIgnoreCase (*this, other);
420  }
421 
423  template <typename CharPointer>
424  int compareIgnoreCaseUpTo (const CharPointer other, const int maxChars) const noexcept
425  {
426  return CharacterFunctions::compareIgnoreCaseUpTo (*this, other, maxChars);
427  }
428 
430  template <typename CharPointer>
431  int indexOf (const CharPointer stringToFind) const noexcept
432  {
433  return CharacterFunctions::indexOf (*this, stringToFind);
434  }
435 
437  int indexOf (const juce_wchar charToFind) const noexcept
438  {
439  return CharacterFunctions::indexOfChar (*this, charToFind);
440  }
441 
443  int indexOf (const juce_wchar charToFind, const bool ignoreCase) const noexcept
444  {
445  return ignoreCase ? CharacterFunctions::indexOfCharIgnoreCase (*this, charToFind)
446  : CharacterFunctions::indexOfChar (*this, charToFind);
447  }
448 
450  bool isWhitespace() const noexcept { const CharType c = *data; return c == ' ' || (c <= 13 && c >= 9); }
452  bool isDigit() const noexcept { const CharType c = *data; return c >= '0' && c <= '9'; }
454  bool isLetter() const noexcept { return CharacterFunctions::isLetter (operator*()) != 0; }
456  bool isLetterOrDigit() const noexcept { return CharacterFunctions::isLetterOrDigit (operator*()) != 0; }
458  bool isUpperCase() const noexcept { return CharacterFunctions::isUpperCase (operator*()) != 0; }
460  bool isLowerCase() const noexcept { return CharacterFunctions::isLowerCase (operator*()) != 0; }
461 
463  juce_wchar toUpperCase() const noexcept { return CharacterFunctions::toUpperCase (operator*()); }
465  juce_wchar toLowerCase() const noexcept { return CharacterFunctions::toLowerCase (operator*()); }
466 
468  int getIntValue32() const noexcept { return atoi (data); }
469 
471  int64 getIntValue64() const noexcept
472  {
473  #if JUCE_WINDOWS && ! JUCE_MINGW
474  return _atoi64 (data);
475  #else
476  return atoll (data);
477  #endif
478  }
479 
481  double getDoubleValue() const noexcept { return CharacterFunctions::getDoubleValue (*this); }
482 
485 
487  static bool canRepresent (juce_wchar character) noexcept
488  {
489  return ((uint32) character) < (uint32) 0x10ffff;
490  }
491 
493  static bool isValidString (const CharType* dataToTest, int maxBytesToRead)
494  {
495  while (--maxBytesToRead >= 0 && *dataToTest != 0)
496  {
497  auto byte = (signed char) *dataToTest++;
498 
499  if (byte < 0)
500  {
501  int bit = 0x40;
502  int numExtraValues = 0;
503 
504  while ((byte & bit) != 0)
505  {
506  if (bit < 8)
507  return false;
508 
509  ++numExtraValues;
510  bit >>= 1;
511 
512  if (bit == 8 && (numExtraValues > maxBytesToRead
513  || *CharPointer_UTF8 (dataToTest - 1) > 0x10ffff))
514  return false;
515  }
516 
517  if (numExtraValues == 0)
518  return false;
519 
520  maxBytesToRead -= numExtraValues;
521  if (maxBytesToRead < 0)
522  return false;
523 
524  while (--numExtraValues >= 0)
525  if ((*dataToTest++ & 0xc0) != 0x80)
526  return false;
527  }
528  }
529 
530  return true;
531  }
532 
535  {
536  return CharPointer_UTF8 (reinterpret_cast<Atomic<CharType*>&> (data).exchange (newValue.data));
537  }
538 
540  enum
541  {
542  byteOrderMark1 = 0xef,
543  byteOrderMark2 = 0xbb,
544  byteOrderMark3 = 0xbf
545  };
546 
550  static bool isByteOrderMark (const void* possibleByteOrder) noexcept
551  {
552  jassert (possibleByteOrder != nullptr);
553  auto c = static_cast<const uint8*> (possibleByteOrder);
554 
555  return c[0] == (uint8) byteOrderMark1
556  && c[1] == (uint8) byteOrderMark2
557  && c[2] == (uint8) byteOrderMark3;
558  }
559 
560 private:
561  CharType* data;
562 };
563 
564 } // namespace juce
static size_t getBytesRequiredFor(const juce_wchar charToWrite) noexcept
bool isLetter() const noexcept
juce_wchar toUpperCase() const noexcept
int indexOf(const juce_wchar charToFind) const noexcept
static double getDoubleValue(CharPointerType text) noexcept
static juce_wchar toUpperCase(juce_wchar character) noexcept
void writeWithCharLimit(const CharPointer src, const int maxChars) noexcept
bool isLetterOrDigit() const noexcept
bool isDigit() const noexcept
CharPointer_UTF8 & operator++() noexcept
size_t length() const noexcept
size_t lengthUpTo(const CharPointer_UTF8 end) const noexcept
int compareIgnoreCase(const CharPointer_UTF8 other) const noexcept
double getDoubleValue() const noexcept
static int indexOfCharIgnoreCase(Type text, juce_wchar charToFind) noexcept
static void copyAll(DestCharPointerType &dest, SrcCharPointerType src) noexcept
juce_wchar operator*() const noexcept
bool isLowerCase() const noexcept
static int compareIgnoreCase(juce_wchar char1, juce_wchar char2) noexcept
void operator-=(int numToSkip) noexcept
bool isNotEmpty() const noexcept
bool isEmpty() const noexcept
static Type findEndOfWhitespace(Type text) noexcept
size_t lengthUpTo(const size_t maxCharsToCount) const noexcept
size_t writeWithDestByteLimit(const CharPointer src, const size_t maxDestBytes) noexcept
static bool isValidString(const CharType *dataToTest, int maxBytesToRead)
CharPointer_UTF8 operator--() noexcept
CharPointer_UTF8 findEndOfWhitespace() const noexcept
CharPointer_UTF8 operator+(int numToSkip) const noexcept
static int indexOfChar(Type text, const juce_wchar charToFind) noexcept
void writeAll(const CharPointer src) noexcept
bool isWhitespace() const noexcept
static int compareIgnoreCaseUpTo(CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept
juce_wchar toLowerCase() const noexcept
int indexOf(const CharPointer stringToFind) const noexcept
static bool isUpperCase(juce_wchar character) noexcept
static size_t lengthUpTo(CharPointerType text, const size_t maxCharsToCount) noexcept
static size_t getBytesRequiredFor(CharPointer text) noexcept
bool operator==(CharPointer_UTF8 other) const noexcept
static size_t copyWithDestByteLimit(DestCharPointerType &dest, SrcCharPointerType src, size_t maxBytesToWrite) noexcept
bool isUpperCase() const noexcept
juce_wchar getAndAdvance() noexcept
CharType * getAddress() const noexcept
int compareIgnoreCase(const CharPointer other) const noexcept
CharPointer_UTF8 findTerminatingNull() const noexcept
int compareUpTo(const CharPointer other, const int maxChars) const noexcept
static juce_wchar toLowerCase(juce_wchar character) noexcept
int64 getIntValue64() const noexcept
static bool isByteOrderMark(const void *possibleByteOrder) noexcept
static bool isLetter(char character) noexcept
static bool canRepresent(juce_wchar character) noexcept
CharPointer_UTF8 atomicSwap(const CharPointer_UTF8 newValue)
void operator+=(int numToSkip) noexcept
CharPointer_UTF8 operator-(int numToSkip) const noexcept
int compareIgnoreCaseUpTo(const CharPointer other, const int maxChars) const noexcept
static int indexOf(CharPointerType1 textToSearch, const CharPointerType2 substringToLookFor) noexcept
int compare(const CharPointer other) const noexcept
void writeNull() const noexcept
int indexOf(const juce_wchar charToFind, const bool ignoreCase) const noexcept
int getIntValue32() const noexcept
static bool isLowerCase(juce_wchar character) noexcept
static bool isLetterOrDigit(char character) noexcept
static int compareUpTo(CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept
static void copyWithCharLimit(DestCharPointerType &dest, SrcCharPointerType src, int maxChars) noexcept
void writeAll(const CharPointer_UTF8 src) noexcept
static int compare(juce_wchar char1, juce_wchar char2) noexcept
size_t sizeInBytes() const noexcept
juce_wchar operator[](int characterIndex) const noexcept
void write(const juce_wchar charToWrite) noexcept