43 std::unique_ptr<XmlElement> parseXML (
const String& textToParse)
48 std::unique_ptr<XmlElement> parseXML (
const File& fileToParse)
55 inputSource.reset (newSource);
60 ignoreEmptyTextElements = shouldBeIgnored;
63 namespace XmlIdentifierChars
65 static bool isIdentifierCharSlow (juce_wchar c) noexcept
68 || c ==
'_' || c ==
'-' || c ==
':' || c ==
'.';
71 static bool isIdentifierChar (juce_wchar c) noexcept
73 static const uint32 legalChars[] = { 0, 0x7ff6000, 0x87fffffe, 0x7fffffe, 0 };
75 return ((
int) c < (
int) numElementsInArray (legalChars) * 32) ? ((legalChars [c >> 5] & (1 << (c & 31))) != 0)
76 : isIdentifierCharSlow (c);
95 while (isIdentifierChar (*p))
104 if (originalText.isEmpty() && inputSource !=
nullptr)
106 std::unique_ptr<InputStream> in (inputSource->createInputStream());
113 #if JUCE_STRING_UTF_TYPE == 8 117 auto* text =
static_cast<const char*
> (data.
getData());
139 return parseDocumentElement (originalText.getCharPointer(), onlyReadOuterDocumentElement);
147 void XmlDocument::setLastError (
const String& desc,
const bool carryOn)
150 errorOccurred = ! carryOn;
153 String XmlDocument::getFileContents (
const String& filename)
const 155 if (inputSource !=
nullptr)
157 std::unique_ptr<InputStream> in (inputSource->createInputStreamFor (filename.
trim().
unquoted()));
160 return in->readEntireStreamAsString();
166 juce_wchar XmlDocument::readNextChar() noexcept
168 auto c = input.getAndAdvance();
180 const bool onlyReadOuterDocumentElement)
183 errorOccurred =
false;
185 needToLoadDTD =
true;
189 lastError =
"not enough input";
191 else if (! parseHeader())
193 lastError =
"malformed header";
195 else if (! parseDTD())
197 lastError =
"malformed DTD";
202 std::unique_ptr<XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
205 return result.release();
211 bool XmlDocument::parseHeader()
213 skipNextWhiteSpace();
219 if (headerEnd.isEmpty())
223 auto encoding =
String (input, headerEnd)
237 jassert (encoding.isEmpty() || encoding.startsWithIgnoreCase (
"utf-"));
240 input = headerEnd + 2;
241 skipNextWhiteSpace();
247 bool XmlDocument::parseDTD()
252 auto dtdStart = input;
254 for (
int n = 1; n > 0;)
256 auto c = readNextChar();
267 dtdText =
String (dtdStart, input - 1).
trim();
273 void XmlDocument::skipNextWhiteSpace()
277 input = input.findEndOfWhitespace();
294 if (closeComment < 0)
300 input += closeComment + 3;
309 if (closeBracket < 0)
315 input += closeBracket + 2;
324 void XmlDocument::readQuotedString (
String& result)
326 auto quote = readNextChar();
330 auto c = readNextChar();
347 auto character = *input;
349 if (character == quote)
356 if (character ==
'&')
364 setLastError (
"unmatched quotes",
false);
375 XmlElement* XmlDocument::readNextElement (
const bool alsoParseSubElements)
378 skipNextWhiteSpace();
386 auto endOfToken = XmlIdentifierChars::findEndOfToken (input);
388 if (endOfToken == input)
391 skipNextWhiteSpace();
392 endOfToken = XmlIdentifierChars::findEndOfToken (input);
394 if (endOfToken == input)
396 setLastError (
"tag name missing",
false);
408 skipNextWhiteSpace();
412 if (c ==
'/' && input[1] ==
'>')
423 if (alsoParseSubElements)
424 readChildElements (*node);
430 if (XmlIdentifierChars::isIdentifierChar (c))
432 auto attNameEnd = XmlIdentifierChars::findEndOfToken (input);
434 if (attNameEnd != input)
436 auto attNameStart = input;
438 skipNextWhiteSpace();
440 if (readNextChar() ==
'=')
442 skipNextWhiteSpace();
443 auto nextChar = *input;
445 if (nextChar ==
'"' || nextChar ==
'\'')
447 auto* newAtt =
new XmlElement::XmlAttributeNode (attNameStart, attNameEnd);
448 readQuotedString (newAtt->value);
449 attributeAppender.
append (newAtt);
455 setLastError (
"expected '=' after attribute '" 456 +
String (attNameStart, attNameEnd) +
"'",
false);
464 setLastError (
"illegal character found in " + node->
getTagName() +
": '" + c +
"'",
false);
474 void XmlDocument::readChildElements (
XmlElement& parent)
480 auto preWhitespaceInput = input;
481 skipNextWhiteSpace();
485 setLastError (
"unmatched tags",
false);
496 auto closeTag = input.indexOf ((juce_wchar)
'>');
499 input += closeTag + 1;
507 auto inputStart = input;
515 setLastError (
"unterminated CDATA section",
false);
520 if (c0 ==
']' && input[1] ==
']' && input[2] ==
'>')
533 if (
auto* n = readNextElement (
true))
541 input = preWhitespaceInput;
543 bool contentShouldBeUsed = ! ignoreEmptyTextElements;
551 if (input[1] ==
'!' && input[2] ==
'-' && input[3] ==
'-')
556 if (closeComment < 0)
558 setLastError (
"unterminated comment",
false);
563 input += closeComment + 3;
572 setLastError (
"unmatched tags",
false);
584 auto oldInput = input;
585 auto oldOutOfData = outOfData;
590 while (
auto* n = readNextElement (
true))
594 outOfData = oldOutOfData;
598 textElementContent << entity;
606 auto nextChar = *input;
608 if (nextChar ==
'\r')
612 if (input[1] ==
'\n')
616 if (nextChar ==
'<' || nextChar ==
'&')
621 setLastError (
"unmatched tags",
false);
632 if (contentShouldBeUsed)
638 void XmlDocument::readEntity (
String& result)
668 else if (*input ==
'#')
673 if (*input ==
'x' || *input ==
'X')
678 while (input[0] !=
';')
682 if (hexValue < 0 || ++numChars > 8)
684 setLastError (
"illegal escape sequence",
true);
688 charCode = (charCode << 4) | hexValue;
694 else if (input[0] >=
'0' && input[0] <=
'9')
698 while (input[0] !=
';')
702 setLastError (
"illegal escape sequence",
true);
706 charCode = charCode * 10 + ((int) input[0] -
'0');
714 setLastError (
"illegal escape sequence",
true);
719 result << (juce_wchar) charCode;
723 auto entityNameStart = input;
724 auto closingSemiColon = input.
indexOf ((juce_wchar)
';');
726 if (closingSemiColon < 0)
733 input += closingSemiColon + 1;
734 result += expandExternalEntity (
String (entityNameStart, (
size_t) closingSemiColon));
751 if (char1 ==
'x' || char1 ==
'X')
754 if (char1 >=
'0' && char1 <=
'9')
757 setLastError (
"illegal escape sequence",
false);
761 return expandExternalEntity (ent);
764 String XmlDocument::expandExternalEntity (
const String& entity)
768 if (dtdText.isNotEmpty())
771 tokenisedDTD.addTokens (dtdText,
true);
773 if (tokenisedDTD[tokenisedDTD.size() - 2].equalsIgnoreCase (
"system")
774 && tokenisedDTD[tokenisedDTD.size() - 1].isQuotedString())
776 auto fn = tokenisedDTD[tokenisedDTD.size() - 1];
778 tokenisedDTD.
clear();
779 tokenisedDTD.addTokens (getFileContents (fn),
true);
783 tokenisedDTD.clear();
784 auto openBracket = dtdText.indexOfChar (
'[');
788 auto closeBracket = dtdText.lastIndexOfChar (
']');
790 if (closeBracket > openBracket)
791 tokenisedDTD.addTokens (dtdText.substring (openBracket + 1,
792 closeBracket),
true);
796 for (
int i = tokenisedDTD.size(); --i >= 0;)
798 if (tokenisedDTD[i].startsWithChar (
'%')
799 && tokenisedDTD[i].endsWithChar (
';'))
801 auto parsed = getParameterEntity (tokenisedDTD[i].substring (1, tokenisedDTD[i].length() - 1));
805 tokenisedDTD.remove (i);
807 for (
int j = newToks.
size(); --j >= 0;)
808 tokenisedDTD.insert (i, newToks[j]);
813 needToLoadDTD =
false;
816 for (
int i = 0; i < tokenisedDTD.size(); ++i)
818 if (tokenisedDTD[i] == entity)
820 if (tokenisedDTD[i - 1].equalsIgnoreCase (
"<!entity"))
827 while (ampersand >= 0)
829 auto semiColon = ent.
indexOf (i + 1,
";");
833 setLastError (
"entity without terminating semi-colon",
false);
837 auto resolved = expandEntity (ent.
substring (i + 1, semiColon));
851 setLastError (
"unknown entity",
true);
855 String XmlDocument::getParameterEntity (
const String& entity)
857 for (
int i = 0; i < tokenisedDTD.size(); ++i)
859 if (tokenisedDTD[i] == entity
860 && tokenisedDTD [i - 1] ==
"%" 861 && tokenisedDTD [i - 2].equalsIgnoreCase (
"<!entity"))
866 return getFileContents (tokenisedDTD [i + 2].trimCharactersAtEnd (
">"));
Wraps a pointer to a null-terminated ASCII character string, and provides various methods to operate ...
Parses a text-based XML document and creates an XmlElement object from it.
void setEmptyTextElementsIgnored(bool shouldBeIgnored) noexcept
Sets a flag to change the treatment of empty text elements.
String fromFirstOccurrenceOf(StringRef substringToStartFrom, bool includeSubStringInResult, bool ignoreCase) const
Returns a section of the string starting from a given substring.
virtual bool writeByte(char byte)
Writes a single byte to the stream.
static XmlElement * createTextElement(const String &text)
Creates a text element that can be added to a parent element.
void setInputSource(InputSource *newSource) noexcept
Sets an input source object to use for parsing documents that reference external entities.
void append(ObjectType *const newItem) noexcept
Appends an item to the list.
int64 writeFromInputStream(InputStream &, int64 maxNumBytesToWrite) override
Reads data from an input stream and writes it to this stream.
String trimCharactersAtEnd(StringRef charactersToTrim) const
Returns a copy of this string, having removed a specified set of characters from its end...
Used to build a tree of elements representing an XML document.
CharPointerType getCharPointer() const noexcept
Returns the character pointer currently being used to store this string.
bool isEmpty() const noexcept
Returns true if this pointer is pointing to a null character.
A special array for holding a list of strings.
~XmlDocument()
Destructor.
static bool isWhitespace(char character) noexcept
Checks whether a character is whitespace.
int addTokens(StringRef stringToTokenise, bool preserveQuotedStrings)
Breaks up a string into tokens and adds them to this array.
bool equalsIgnoreCase(const String &other) const noexcept
Case-insensitive comparison with another string.
const String & getLastParseError() const noexcept
Returns the parsing error that occurred the last time getDocumentElement was called.
String substring(int startIndex, int endIndex) const
Returns a subsection of the string.
static CharPointerType1 find(CharPointerType1 textToSearch, const CharPointerType2 substringToLookFor) noexcept
Returns a pointer to the first occurrence of a substring in a string.
String unquoted() const
Removes quotation marks from around the string, (if there are any).
const void * getData() const noexcept
Returns a pointer to the data that has been written to the stream.
static int getHexDigitValue(juce_wchar digit) noexcept
Returns 0 to 16 for '0' to 'F", or -1 for characters that aren't a legal hex digit.
Represents a local file or directory.
int indexOf(StringRef textToLookFor) const noexcept
Searches for a substring within this string.
size_t getDataSize() const noexcept
Returns the number of bytes of data that have been written to the stream.
static bool isByteOrderMark(const void *possibleByteOrder) noexcept
Returns true if the first three bytes in this pointer are the UTF8 byte-order mark (BOM)...
String upToFirstOccurrenceOf(StringRef substringToEndWith, bool includeSubStringInResult, bool ignoreCase) const
Returns the start of this string, up to the first occurrence of a substring.
bool startsWithChar(juce_wchar character) const noexcept
Tests whether the string begins with a particular character.
static bool isByteOrderMarkLittleEndian(const void *possibleByteOrder) noexcept
Returns true if the first pair of bytes in this pointer are the UTF16 byte-order mark (little endian)...
static XmlElement * parse(const File &file)
A handy static method that parses a file.
Allows efficient repeated insertions into a list.
void clear() noexcept
Resets this string to be empty.
int getIntValue() const noexcept
Reads the value of the string as a decimal number (up to 32 bits in size).
static String charToString(juce_wchar character)
Creates a string from a single character.
int size() const noexcept
Returns the number of strings in the array.
static bool isByteOrderMarkBigEndian(const void *possibleByteOrder) noexcept
Returns true if the first pair of bytes in this pointer are the UTF16 byte-order mark (big endian)...
bool appendUTF8Char(juce_wchar character)
Appends the utf-8 bytes for a unicode character.
String toUTF8() const
Returns a String created from the (UTF8) data that has been written to the stream.
bool containsNonWhitespaceChars() const noexcept
Returns true if this string contains any non-whitespace characters.
Writes data to an internal memory buffer, which grows as required.
int indexOfChar(juce_wchar characterToLookFor) const noexcept
Searches for a character inside this string.
String trim() const
Returns a copy of this string with any whitespace characters removed from the start and end...
static bool isLetterOrDigit(char character) noexcept
Checks whether a character is alphabetic or numeric.
String toString() const
Attempts to detect the encoding of the data and convert it to a string.
void appendCharPointer(CharPointerType startOfTextToAppend, CharPointerType endOfTextToAppend)
Appends a string to the end of this one.
XmlDocument(const String &documentText)
Creates an XmlDocument from the xml text.
XmlElement * getDocumentElement(bool onlyReadOuterDocumentElement=false)
Creates an XmlElement object to represent the main document node.
static int compareUpTo(CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept
Compares two null-terminated character strings, up to a given number of characters.
const String & getTagName() const noexcept
Returns this element's tag type name.
int getHexValue32() const noexcept
Parses the string as a hexadecimal number.
Wraps a pointer to a null-terminated UTF-8 character string, and provides various methods to operate ...