38 return XmlDocument (textToParse).getDocumentElement();
41std::unique_ptr<XmlElement> parseXML (
const String& textToParse)
46std::unique_ptr<XmlElement> parseXML (
const File& file)
48 return XmlDocument (file).getDocumentElement();
51std::unique_ptr<XmlElement> parseXMLIfTagMatches (
const String& textToParse, StringRef requiredTag)
53 return XmlDocument (textToParse).getDocumentElementIfTagMatches (requiredTag);
56std::unique_ptr<XmlElement> parseXMLIfTagMatches (
const File& file, StringRef requiredTag)
58 return XmlDocument (file).getDocumentElementIfTagMatches (requiredTag);
63 inputSource.reset (newSource);
68 ignoreEmptyTextElements = shouldBeIgnored;
71namespace XmlIdentifierChars
73 static bool isIdentifierCharSlow (juce_wchar c)
noexcept
76 || c ==
'_' || c ==
'-' || c ==
':' || c ==
'.';
79 static bool isIdentifierChar (juce_wchar c)
noexcept
81 static const uint32 legalChars[] = { 0, 0x7ff6000, 0x87fffffe, 0x7fffffe, 0 };
83 return ((
int) c < (
int) numElementsInArray (legalChars) * 32) ? ((legalChars [c >> 5] & (uint32) (1 << (c & 31))) != 0)
84 : isIdentifierCharSlow (c);
101 static String::CharPointerType findEndOfToken (String::CharPointerType p)
noexcept
103 while (isIdentifierChar (*p))
112 if (originalText.
isEmpty() && inputSource !=
nullptr)
114 std::unique_ptr<InputStream> in (inputSource->createInputStream());
121 #if JUCE_STRING_UTF_TYPE == 8
125 auto* text =
static_cast<const char*
> (data.
getData());
138 return parseDocumentElement (String::CharPointerType (text), onlyReadOuterDocumentElement);
147 return parseDocumentElement (originalText.
getCharPointer(), onlyReadOuterDocumentElement);
153 if (xml->hasTagName (requiredTag))
164void XmlDocument::setLastError (
const String& desc,
const bool carryOn)
167 errorOccurred = ! carryOn;
170String XmlDocument::getFileContents (
const String& filename)
const
172 if (inputSource !=
nullptr)
174 std::unique_ptr<InputStream> in (inputSource->createInputStreamFor (filename.trim().unquoted()));
177 return in->readEntireStreamAsString();
183juce_wchar XmlDocument::readNextChar() noexcept
185 auto c = input.getAndAdvance();
196std::unique_ptr<XmlElement> XmlDocument::parseDocumentElement (String::CharPointerType textToParse,
197 bool onlyReadOuterDocumentElement)
200 errorOccurred =
false;
202 needToLoadDTD =
true;
204 if (textToParse.isEmpty())
206 lastError =
"not enough input";
208 else if (! parseHeader())
210 lastError =
"malformed header";
212 else if (! parseDTD())
214 lastError =
"malformed DTD";
219 std::unique_ptr<XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
228bool XmlDocument::parseHeader()
230 skipNextWhiteSpace();
236 if (headerEnd.isEmpty())
240 auto encoding = String (input, headerEnd)
241 .fromFirstOccurrenceOf (
"encoding",
false,
true)
242 .fromFirstOccurrenceOf (
"=",
false,
false)
243 .fromFirstOccurrenceOf (
"\"",
false,
false)
244 .upToFirstOccurrenceOf (
"\"",
false,
false)
254 jassert (encoding.isEmpty() || encoding.startsWithIgnoreCase (
"utf-"));
257 input = headerEnd + 2;
258 skipNextWhiteSpace();
264bool XmlDocument::parseDTD()
269 auto dtdStart = input;
271 for (
int n = 1; n > 0;)
273 auto c = readNextChar();
284 dtdText = String (dtdStart, input - 1).
trim();
290void XmlDocument::skipNextWhiteSpace()
294 input.incrementToEndOfWhitespace();
309 auto closeComment = input.indexOf (CharPointer_ASCII (
"-->"));
311 if (closeComment < 0)
317 input += closeComment + 3;
324 auto closeBracket = input.indexOf (CharPointer_ASCII (
"?>"));
326 if (closeBracket < 0)
332 input += closeBracket + 2;
341void XmlDocument::readQuotedString (String& result)
343 auto quote = readNextChar();
347 auto c = readNextChar();
364 auto character = *input;
366 if (character == quote)
368 result.appendCharPointer (start, input);
373 if (character ==
'&')
375 result.appendCharPointer (start, input);
381 setLastError (
"unmatched quotes",
false);
392XmlElement* XmlDocument::readNextElement (
const bool alsoParseSubElements)
394 XmlElement* node =
nullptr;
395 skipNextWhiteSpace();
403 auto endOfToken = XmlIdentifierChars::findEndOfToken (input);
405 if (endOfToken == input)
408 skipNextWhiteSpace();
409 endOfToken = XmlIdentifierChars::findEndOfToken (input);
411 if (endOfToken == input)
413 setLastError (
"tag name missing",
false);
418 node =
new XmlElement (input, endOfToken);
420 LinkedListPointer<XmlElement::XmlAttributeNode>::Appender attributeAppender (node->attributes);
425 skipNextWhiteSpace();
429 if (c ==
'/' && input[1] ==
'>')
440 if (alsoParseSubElements)
441 readChildElements (*node);
447 if (XmlIdentifierChars::isIdentifierChar (c))
449 auto attNameEnd = XmlIdentifierChars::findEndOfToken (input);
451 if (attNameEnd != input)
453 auto attNameStart = input;
455 skipNextWhiteSpace();
457 if (readNextChar() ==
'=')
459 skipNextWhiteSpace();
460 auto nextChar = *input;
462 if (nextChar ==
'"' || nextChar ==
'\'')
464 auto* newAtt =
new XmlElement::XmlAttributeNode (attNameStart, attNameEnd);
465 readQuotedString (newAtt->value);
466 attributeAppender.append (newAtt);
472 setLastError (
"expected '=' after attribute '"
473 + String (attNameStart, attNameEnd) +
"'",
false);
481 setLastError (
"illegal character found in " + node->getTagName() +
": '" + c +
"'",
false);
491void XmlDocument::readChildElements (XmlElement& parent)
493 LinkedListPointer<XmlElement>::Appender childAppender (parent.firstChildElement);
497 auto preWhitespaceInput = input;
498 skipNextWhiteSpace();
502 setLastError (
"unmatched tags",
false);
513 auto closeTag = input.indexOf ((juce_wchar)
'>');
516 input += closeTag + 1;
524 auto inputStart = input;
532 setLastError (
"unterminated CDATA section",
false);
537 if (c0 ==
']' && input[1] ==
']' && input[2] ==
'>')
550 if (
auto* n = readNextElement (
true))
551 childAppender.append (n);
558 input = preWhitespaceInput;
559 MemoryOutputStream textElementContent;
560 bool contentShouldBeUsed = ! ignoreEmptyTextElements;
568 if (input[1] ==
'!' && input[2] ==
'-' && input[3] ==
'-')
571 auto closeComment = input.indexOf (CharPointer_ASCII (
"-->"));
573 if (closeComment < 0)
575 setLastError (
"unterminated comment",
false);
580 input += closeComment + 3;
589 setLastError (
"unmatched tags",
false);
599 if (entity.startsWithChar (
'<') && entity [1] != 0)
601 auto oldInput = input;
602 auto oldOutOfData = outOfData;
604 input = entity.getCharPointer();
607 while (
auto* n = readNextElement (
true))
608 childAppender.append (n);
611 outOfData = oldOutOfData;
615 textElementContent << entity;
616 contentShouldBeUsed = contentShouldBeUsed || entity.containsNonWhitespaceChars();
623 auto nextChar = *input;
625 if (nextChar ==
'\r')
629 if (input[1] ==
'\n')
633 if (nextChar ==
'<' || nextChar ==
'&')
638 setLastError (
"unmatched tags",
false);
643 textElementContent.appendUTF8Char (nextChar);
649 if (contentShouldBeUsed)
655void XmlDocument::readEntity (String& result)
660 if (input.compareIgnoreCaseUpTo (CharPointer_ASCII (
"amp;"), 4) == 0)
665 else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII (
"quot;"), 5) == 0)
670 else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII (
"apos;"), 5) == 0)
675 else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII (
"lt;"), 3) == 0)
680 else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII (
"gt;"), 3) == 0)
685 else if (*input ==
'#')
687 int64_t charCode = 0;
690 if (*input ==
'x' || *input ==
'X')
695 while (input[0] !=
';')
699 if (hexValue < 0 || ++numChars > 8)
701 setLastError (
"illegal escape sequence",
true);
705 charCode = (charCode << 4) | hexValue;
711 else if (input[0] >=
'0' && input[0] <=
'9')
717 const auto firstChar = input[0];
721 setLastError (
"unexpected end of input",
true);
725 if (firstChar ==
';')
730 setLastError (
"illegal escape sequence",
true);
734 charCode = charCode * 10 + ((int) firstChar -
'0');
742 setLastError (
"illegal escape sequence",
true);
747 result << (juce_wchar) charCode;
751 auto entityNameStart = input;
752 auto closingSemiColon = input.indexOf ((juce_wchar)
';');
754 if (closingSemiColon < 0)
761 input += closingSemiColon + 1;
762 result += expandExternalEntity (String (entityNameStart, (
size_t) closingSemiColon));
767String XmlDocument::expandEntity (
const String& ent)
779 if (char1 ==
'x' || char1 ==
'X')
782 if (char1 >=
'0' && char1 <=
'9')
785 setLastError (
"illegal escape sequence",
false);
789 return expandExternalEntity (ent);
792String XmlDocument::expandExternalEntity (
const String& entity)
801 if (tokenisedDTD[tokenisedDTD.
size() - 2].equalsIgnoreCase (
"system")
802 && tokenisedDTD[tokenisedDTD.
size() - 1].isQuotedString())
804 auto fn = tokenisedDTD[tokenisedDTD.
size() - 1];
806 tokenisedDTD.
clear();
807 tokenisedDTD.
addTokens (getFileContents (fn),
true);
811 tokenisedDTD.
clear();
818 if (closeBracket > openBracket)
820 closeBracket),
true);
824 for (
int i = tokenisedDTD.
size(); --i >= 0;)
826 if (tokenisedDTD[i].startsWithChar (
'%')
827 && tokenisedDTD[i].endsWithChar (
';'))
829 auto parsed = getParameterEntity (tokenisedDTD[i].substring (1, tokenisedDTD[i].length() - 1));
831 newToks.addTokens (parsed,
true);
835 for (
int j = newToks.size(); --j >= 0;)
836 tokenisedDTD.
insert (i, newToks[j]);
841 needToLoadDTD =
false;
844 for (
int i = 0; i < tokenisedDTD.
size(); ++i)
846 if (tokenisedDTD[i] == entity)
848 if (tokenisedDTD[i - 1].equalsIgnoreCase (
"<!entity"))
850 auto ent = tokenisedDTD [i + 1].trimCharactersAtEnd (
">").
trim().unquoted();
853 auto ampersand = ent.indexOfChar (
'&');
855 while (ampersand >= 0)
857 auto semiColon = ent.indexOf (i + 1,
";");
861 setLastError (
"entity without terminating semi-colon",
false);
865 auto resolved = expandEntity (ent.substring (i + 1, semiColon));
867 ent = ent.substring (0, ampersand)
869 + ent.substring (semiColon + 1);
871 ampersand = ent.indexOfChar (semiColon + 1,
'&');
879 setLastError (
"unknown entity",
true);
883String XmlDocument::getParameterEntity (
const String& entity)
885 for (
int i = 0; i < tokenisedDTD.
size(); ++i)
887 if (tokenisedDTD[i] == entity
888 && tokenisedDTD [i - 1] ==
"%"
889 && tokenisedDTD [i - 2].equalsIgnoreCase (
"<!entity"))
891 auto ent = tokenisedDTD [i + 1].trimCharactersAtEnd (
">");
893 if (ent.equalsIgnoreCase (
"system"))
894 return getFileContents (tokenisedDTD [i + 2].trimCharactersAtEnd (
">"));
896 return ent.trim().unquoted();
static bool isByteOrderMarkBigEndian(const void *possibleByteOrder) noexcept
static bool isByteOrderMarkLittleEndian(const void *possibleByteOrder) noexcept
static bool isByteOrderMark(const void *possibleByteOrder) noexcept
static bool isWhitespace(char character) noexcept
static int getHexDigitValue(juce_wchar digit) noexcept
static bool isLetterOrDigit(char character) noexcept
static CharPointerType1 find(CharPointerType1 textToSearch, const CharPointerType2 substringToLookFor) noexcept
static int compareUpTo(CharPointerType1 s1, CharPointerType2 s2, int maxChars) noexcept
const void * getData() const noexcept
size_t getDataSize() const noexcept
int64 writeFromInputStream(InputStream &, int64 maxNumBytesToWrite) override
virtual bool writeByte(char byte)
void insert(int index, String stringToAdd)
int size() const noexcept
int addTokens(StringRef stringToTokenise, bool preserveQuotedStrings)
CharPointerType getCharPointer() const noexcept
int indexOfChar(juce_wchar characterToLookFor) const noexcept
bool isEmpty() const noexcept
int lastIndexOfChar(juce_wchar character) const noexcept
String trimCharactersAtEnd(StringRef charactersToTrim) const
static String charToString(juce_wchar character)
String substring(int startIndex, int endIndex) const
bool isNotEmpty() const noexcept
const String & getLastParseError() const noexcept
std::unique_ptr< XmlElement > getDocumentElementIfTagMatches(StringRef requiredTag)
std::unique_ptr< XmlElement > getDocumentElement(bool onlyReadOuterDocumentElement=false)
XmlDocument(const String &documentText)
static std::unique_ptr< XmlElement > parse(const File &file)
void setInputSource(InputSource *newSource) noexcept
void setEmptyTextElementsIgnored(bool shouldBeIgnored) noexcept
static XmlElement * createTextElement(const String &text)