123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885 |
- /*
- ==============================================================================
- This file is part of the juce_core module of the JUCE library.
- Copyright (c) 2015 - ROLI Ltd.
- Permission to use, copy, modify, and/or distribute this software for any purpose with
- or without fee is hereby granted, provided that the above copyright notice and this
- permission notice appear in all copies.
- THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
- TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN
- NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
- DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
- IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- ------------------------------------------------------------------------------
- NOTE! This permissive ISC license applies ONLY to files within the juce_core module!
- All other JUCE modules are covered by a dual GPL/commercial license, so if you are
- using any other modules, be sure to check that you also comply with their license.
- For more details, visit www.juce.com
- ==============================================================================
- */
- XmlDocument::XmlDocument (const String& documentText)
- : originalText (documentText),
- input (nullptr),
- outOfData (false),
- errorOccurred (false),
- needToLoadDTD (false),
- ignoreEmptyTextElements (true)
- {
- }
- XmlDocument::XmlDocument (const File& file)
- : input (nullptr),
- outOfData (false),
- errorOccurred (false),
- needToLoadDTD (false),
- ignoreEmptyTextElements (true),
- inputSource (new FileInputSource (file))
- {
- }
- XmlDocument::~XmlDocument()
- {
- }
- XmlElement* XmlDocument::parse (const File& file)
- {
- XmlDocument doc (file);
- return doc.getDocumentElement();
- }
- XmlElement* XmlDocument::parse (const String& xmlData)
- {
- XmlDocument doc (xmlData);
- return doc.getDocumentElement();
- }
- void XmlDocument::setInputSource (InputSource* const newSource) noexcept
- {
- inputSource = newSource;
- }
- void XmlDocument::setEmptyTextElementsIgnored (const bool shouldBeIgnored) noexcept
- {
- ignoreEmptyTextElements = shouldBeIgnored;
- }
- namespace XmlIdentifierChars
- {
- static bool isIdentifierCharSlow (const juce_wchar c) noexcept
- {
- return CharacterFunctions::isLetterOrDigit (c)
- || c == '_' || c == '-' || c == ':' || c == '.';
- }
- static bool isIdentifierChar (const juce_wchar c) noexcept
- {
- static const uint32 legalChars[] = { 0, 0x7ff6000, 0x87fffffe, 0x7fffffe, 0 };
- return ((int) c < (int) numElementsInArray (legalChars) * 32) ? ((legalChars [c >> 5] & (1 << (c & 31))) != 0)
- : isIdentifierCharSlow (c);
- }
- /*static void generateIdentifierCharConstants()
- {
- uint32 n[8] = { 0 };
- for (int i = 0; i < 256; ++i)
- if (isIdentifierCharSlow (i))
- n[i >> 5] |= (1 << (i & 31));
- String s;
- for (int i = 0; i < 8; ++i)
- s << "0x" << String::toHexString ((int) n[i]) << ", ";
- DBG (s);
- }*/
- static String::CharPointerType findEndOfToken (String::CharPointerType p)
- {
- while (isIdentifierChar (*p))
- ++p;
- return p;
- }
- }
- XmlElement* XmlDocument::getDocumentElement (const bool onlyReadOuterDocumentElement)
- {
- if (originalText.isEmpty() && inputSource != nullptr)
- {
- ScopedPointer<InputStream> in (inputSource->createInputStream());
- if (in != nullptr)
- {
- MemoryOutputStream data;
- data.writeFromInputStream (*in, onlyReadOuterDocumentElement ? 8192 : -1);
- #if JUCE_STRING_UTF_TYPE == 8
- if (data.getDataSize() > 2)
- {
- data.writeByte (0);
- const char* text = static_cast<const char*> (data.getData());
- if (CharPointer_UTF16::isByteOrderMarkBigEndian (text)
- || CharPointer_UTF16::isByteOrderMarkLittleEndian (text))
- {
- originalText = data.toString();
- }
- else
- {
- if (CharPointer_UTF8::isByteOrderMark (text))
- text += 3;
- // parse the input buffer directly to avoid copying it all to a string..
- return parseDocumentElement (String::CharPointerType (text), onlyReadOuterDocumentElement);
- }
- }
- #else
- originalText = data.toString();
- #endif
- }
- }
- return parseDocumentElement (originalText.getCharPointer(), onlyReadOuterDocumentElement);
- }
- const String& XmlDocument::getLastParseError() const noexcept
- {
- return lastError;
- }
- void XmlDocument::setLastError (const String& desc, const bool carryOn)
- {
- lastError = desc;
- errorOccurred = ! carryOn;
- }
- String XmlDocument::getFileContents (const String& filename) const
- {
- if (inputSource != nullptr)
- {
- const ScopedPointer<InputStream> in (inputSource->createInputStreamFor (filename.trim().unquoted()));
- if (in != nullptr)
- return in->readEntireStreamAsString();
- }
- return String();
- }
- juce_wchar XmlDocument::readNextChar() noexcept
- {
- const juce_wchar c = input.getAndAdvance();
- if (c == 0)
- {
- outOfData = true;
- --input;
- }
- return c;
- }
- XmlElement* XmlDocument::parseDocumentElement (String::CharPointerType textToParse,
- const bool onlyReadOuterDocumentElement)
- {
- input = textToParse;
- errorOccurred = false;
- outOfData = false;
- needToLoadDTD = true;
- if (textToParse.isEmpty())
- {
- lastError = "not enough input";
- }
- else if (! parseHeader())
- {
- lastError = "malformed header";
- }
- else if (! parseDTD())
- {
- lastError = "malformed DTD";
- }
- else
- {
- lastError.clear();
- ScopedPointer<XmlElement> result (readNextElement (! onlyReadOuterDocumentElement));
- if (! errorOccurred)
- return result.release();
- }
- return nullptr;
- }
- bool XmlDocument::parseHeader()
- {
- skipNextWhiteSpace();
- if (CharacterFunctions::compareUpTo (input, CharPointer_ASCII ("<?xml"), 5) == 0)
- {
- const String::CharPointerType headerEnd (CharacterFunctions::find (input, CharPointer_ASCII ("?>")));
- if (headerEnd.isEmpty())
- return false;
- #if JUCE_DEBUG
- const String encoding (String (input, headerEnd)
- .fromFirstOccurrenceOf ("encoding", false, true)
- .fromFirstOccurrenceOf ("=", false, false)
- .fromFirstOccurrenceOf ("\"", false, false)
- .upToFirstOccurrenceOf ("\"", false, false).trim());
- /* If you load an XML document with a non-UTF encoding type, it may have been
- loaded wrongly.. Since all the files are read via the normal juce file streams,
- they're treated as UTF-8, so by the time it gets to the parser, the encoding will
- have been lost. Best plan is to stick to utf-8 or if you have specific files to
- read, use your own code to convert them to a unicode String, and pass that to the
- XML parser.
- */
- jassert (encoding.isEmpty() || encoding.startsWithIgnoreCase ("utf-"));
- #endif
- input = headerEnd + 2;
- skipNextWhiteSpace();
- }
- return true;
- }
- bool XmlDocument::parseDTD()
- {
- if (CharacterFunctions::compareUpTo (input, CharPointer_ASCII ("<!DOCTYPE"), 9) == 0)
- {
- input += 9;
- const String::CharPointerType dtdStart (input);
- for (int n = 1; n > 0;)
- {
- const juce_wchar c = readNextChar();
- if (outOfData)
- return false;
- if (c == '<')
- ++n;
- else if (c == '>')
- --n;
- }
- dtdText = String (dtdStart, input - 1).trim();
- }
- return true;
- }
- void XmlDocument::skipNextWhiteSpace()
- {
- for (;;)
- {
- input = input.findEndOfWhitespace();
- if (input.isEmpty())
- {
- outOfData = true;
- break;
- }
- if (*input == '<')
- {
- if (input[1] == '!'
- && input[2] == '-'
- && input[3] == '-')
- {
- input += 4;
- const int closeComment = input.indexOf (CharPointer_ASCII ("-->"));
- if (closeComment < 0)
- {
- outOfData = true;
- break;
- }
- input += closeComment + 3;
- continue;
- }
- if (input[1] == '?')
- {
- input += 2;
- const int closeBracket = input.indexOf (CharPointer_ASCII ("?>"));
- if (closeBracket < 0)
- {
- outOfData = true;
- break;
- }
- input += closeBracket + 2;
- continue;
- }
- }
- break;
- }
- }
- void XmlDocument::readQuotedString (String& result)
- {
- const juce_wchar quote = readNextChar();
- while (! outOfData)
- {
- const juce_wchar c = readNextChar();
- if (c == quote)
- break;
- --input;
- if (c == '&')
- {
- readEntity (result);
- }
- else
- {
- const String::CharPointerType start (input);
- for (;;)
- {
- const juce_wchar character = *input;
- if (character == quote)
- {
- result.appendCharPointer (start, input);
- ++input;
- return;
- }
- else if (character == '&')
- {
- result.appendCharPointer (start, input);
- break;
- }
- else if (character == 0)
- {
- setLastError ("unmatched quotes", false);
- outOfData = true;
- break;
- }
- ++input;
- }
- }
- }
- }
- XmlElement* XmlDocument::readNextElement (const bool alsoParseSubElements)
- {
- XmlElement* node = nullptr;
- skipNextWhiteSpace();
- if (outOfData)
- return nullptr;
- if (*input == '<')
- {
- ++input;
- String::CharPointerType endOfToken (XmlIdentifierChars::findEndOfToken (input));
- if (endOfToken == input)
- {
- // no tag name - but allow for a gap after the '<' before giving an error
- skipNextWhiteSpace();
- endOfToken = XmlIdentifierChars::findEndOfToken (input);
- if (endOfToken == input)
- {
- setLastError ("tag name missing", false);
- return node;
- }
- }
- node = new XmlElement (input, endOfToken);
- input = endOfToken;
- LinkedListPointer<XmlElement::XmlAttributeNode>::Appender attributeAppender (node->attributes);
- // look for attributes
- for (;;)
- {
- skipNextWhiteSpace();
- const juce_wchar c = *input;
- // empty tag..
- if (c == '/' && input[1] == '>')
- {
- input += 2;
- break;
- }
- // parse the guts of the element..
- if (c == '>')
- {
- ++input;
- if (alsoParseSubElements)
- readChildElements (*node);
- break;
- }
- // get an attribute..
- if (XmlIdentifierChars::isIdentifierChar (c))
- {
- String::CharPointerType attNameEnd (XmlIdentifierChars::findEndOfToken (input));
- if (attNameEnd != input)
- {
- const String::CharPointerType attNameStart (input);
- input = attNameEnd;
- skipNextWhiteSpace();
- if (readNextChar() == '=')
- {
- skipNextWhiteSpace();
- const juce_wchar nextChar = *input;
- if (nextChar == '"' || nextChar == '\'')
- {
- XmlElement::XmlAttributeNode* const newAtt
- = new XmlElement::XmlAttributeNode (attNameStart, attNameEnd);
- readQuotedString (newAtt->value);
- attributeAppender.append (newAtt);
- continue;
- }
- }
- else
- {
- setLastError ("expected '=' after attribute '"
- + String (attNameStart, attNameEnd) + "'", false);
- return node;
- }
- }
- }
- else
- {
- if (! outOfData)
- setLastError ("illegal character found in " + node->getTagName() + ": '" + c + "'", false);
- }
- break;
- }
- }
- return node;
- }
- void XmlDocument::readChildElements (XmlElement& parent)
- {
- LinkedListPointer<XmlElement>::Appender childAppender (parent.firstChildElement);
- for (;;)
- {
- const String::CharPointerType preWhitespaceInput (input);
- skipNextWhiteSpace();
- if (outOfData)
- {
- setLastError ("unmatched tags", false);
- break;
- }
- if (*input == '<')
- {
- const juce_wchar c1 = input[1];
- if (c1 == '/')
- {
- // our close tag..
- const int closeTag = input.indexOf ((juce_wchar) '>');
- if (closeTag >= 0)
- input += closeTag + 1;
- break;
- }
- if (c1 == '!' && CharacterFunctions::compareUpTo (input + 2, CharPointer_ASCII ("[CDATA["), 7) == 0)
- {
- input += 9;
- const String::CharPointerType inputStart (input);
- for (;;)
- {
- const juce_wchar c0 = *input;
- if (c0 == 0)
- {
- setLastError ("unterminated CDATA section", false);
- outOfData = true;
- break;
- }
- else if (c0 == ']'
- && input[1] == ']'
- && input[2] == '>')
- {
- childAppender.append (XmlElement::createTextElement (String (inputStart, input)));
- input += 3;
- break;
- }
- ++input;
- }
- }
- else
- {
- // this is some other element, so parse and add it..
- if (XmlElement* const n = readNextElement (true))
- childAppender.append (n);
- else
- break;
- }
- }
- else // must be a character block
- {
- input = preWhitespaceInput; // roll back to include the leading whitespace
- MemoryOutputStream textElementContent;
- bool contentShouldBeUsed = ! ignoreEmptyTextElements;
- for (;;)
- {
- const juce_wchar c = *input;
- if (c == '<')
- {
- if (input[1] == '!' && input[2] == '-' && input[3] == '-')
- {
- input += 4;
- const int closeComment = input.indexOf (CharPointer_ASCII ("-->"));
- if (closeComment < 0)
- {
- setLastError ("unterminated comment", false);
- outOfData = true;
- return;
- }
- input += closeComment + 3;
- continue;
- }
- break;
- }
- if (c == 0)
- {
- setLastError ("unmatched tags", false);
- outOfData = true;
- return;
- }
- if (c == '&')
- {
- String entity;
- readEntity (entity);
- if (entity.startsWithChar ('<') && entity [1] != 0)
- {
- const String::CharPointerType oldInput (input);
- const bool oldOutOfData = outOfData;
- input = entity.getCharPointer();
- outOfData = false;
- while (XmlElement* n = readNextElement (true))
- childAppender.append (n);
- input = oldInput;
- outOfData = oldOutOfData;
- }
- else
- {
- textElementContent << entity;
- contentShouldBeUsed = contentShouldBeUsed || entity.containsNonWhitespaceChars();
- }
- }
- else
- {
- for (;;)
- {
- const juce_wchar nextChar = *input;
- if (nextChar == '<' || nextChar == '&')
- break;
- if (nextChar == 0)
- {
- setLastError ("unmatched tags", false);
- outOfData = true;
- return;
- }
- textElementContent.appendUTF8Char (nextChar);
- contentShouldBeUsed = contentShouldBeUsed || ! CharacterFunctions::isWhitespace (nextChar);
- ++input;
- }
- }
- }
- if (contentShouldBeUsed)
- childAppender.append (XmlElement::createTextElement (textElementContent.toUTF8()));
- }
- }
- }
- void XmlDocument::readEntity (String& result)
- {
- // skip over the ampersand
- ++input;
- if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("amp;"), 4) == 0)
- {
- input += 4;
- result += '&';
- }
- else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("quot;"), 5) == 0)
- {
- input += 5;
- result += '"';
- }
- else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("apos;"), 5) == 0)
- {
- input += 5;
- result += '\'';
- }
- else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("lt;"), 3) == 0)
- {
- input += 3;
- result += '<';
- }
- else if (input.compareIgnoreCaseUpTo (CharPointer_ASCII ("gt;"), 3) == 0)
- {
- input += 3;
- result += '>';
- }
- else if (*input == '#')
- {
- int charCode = 0;
- ++input;
- if (*input == 'x' || *input == 'X')
- {
- ++input;
- int numChars = 0;
- while (input[0] != ';')
- {
- const int hexValue = CharacterFunctions::getHexDigitValue (input[0]);
- if (hexValue < 0 || ++numChars > 8)
- {
- setLastError ("illegal escape sequence", true);
- break;
- }
- charCode = (charCode << 4) | hexValue;
- ++input;
- }
- ++input;
- }
- else if (input[0] >= '0' && input[0] <= '9')
- {
- int numChars = 0;
- while (input[0] != ';')
- {
- if (++numChars > 12)
- {
- setLastError ("illegal escape sequence", true);
- break;
- }
- charCode = charCode * 10 + ((int) input[0] - '0');
- ++input;
- }
- ++input;
- }
- else
- {
- setLastError ("illegal escape sequence", true);
- result += '&';
- return;
- }
- result << (juce_wchar) charCode;
- }
- else
- {
- const String::CharPointerType entityNameStart (input);
- const int closingSemiColon = input.indexOf ((juce_wchar) ';');
- if (closingSemiColon < 0)
- {
- outOfData = true;
- result += '&';
- }
- else
- {
- input += closingSemiColon + 1;
- result += expandExternalEntity (String (entityNameStart, (size_t) closingSemiColon));
- }
- }
- }
- String XmlDocument::expandEntity (const String& ent)
- {
- if (ent.equalsIgnoreCase ("amp")) return String::charToString ('&');
- if (ent.equalsIgnoreCase ("quot")) return String::charToString ('"');
- if (ent.equalsIgnoreCase ("apos")) return String::charToString ('\'');
- if (ent.equalsIgnoreCase ("lt")) return String::charToString ('<');
- if (ent.equalsIgnoreCase ("gt")) return String::charToString ('>');
- if (ent[0] == '#')
- {
- const juce_wchar char1 = ent[1];
- if (char1 == 'x' || char1 == 'X')
- return String::charToString (static_cast<juce_wchar> (ent.substring (2).getHexValue32()));
- if (char1 >= '0' && char1 <= '9')
- return String::charToString (static_cast<juce_wchar> (ent.substring (1).getIntValue()));
- setLastError ("illegal escape sequence", false);
- return String::charToString ('&');
- }
- return expandExternalEntity (ent);
- }
- String XmlDocument::expandExternalEntity (const String& entity)
- {
- if (needToLoadDTD)
- {
- if (dtdText.isNotEmpty())
- {
- dtdText = dtdText.trimCharactersAtEnd (">");
- tokenisedDTD.addTokens (dtdText, true);
- if (tokenisedDTD [tokenisedDTD.size() - 2].equalsIgnoreCase ("system")
- && tokenisedDTD [tokenisedDTD.size() - 1].isQuotedString())
- {
- const String fn (tokenisedDTD [tokenisedDTD.size() - 1]);
- tokenisedDTD.clear();
- tokenisedDTD.addTokens (getFileContents (fn), true);
- }
- else
- {
- tokenisedDTD.clear();
- const int openBracket = dtdText.indexOfChar ('[');
- if (openBracket > 0)
- {
- const int closeBracket = dtdText.lastIndexOfChar (']');
- if (closeBracket > openBracket)
- tokenisedDTD.addTokens (dtdText.substring (openBracket + 1,
- closeBracket), true);
- }
- }
- for (int i = tokenisedDTD.size(); --i >= 0;)
- {
- if (tokenisedDTD[i].startsWithChar ('%')
- && tokenisedDTD[i].endsWithChar (';'))
- {
- const String parsed (getParameterEntity (tokenisedDTD[i].substring (1, tokenisedDTD[i].length() - 1)));
- StringArray newToks;
- newToks.addTokens (parsed, true);
- tokenisedDTD.remove (i);
- for (int j = newToks.size(); --j >= 0;)
- tokenisedDTD.insert (i, newToks[j]);
- }
- }
- }
- needToLoadDTD = false;
- }
- for (int i = 0; i < tokenisedDTD.size(); ++i)
- {
- if (tokenisedDTD[i] == entity)
- {
- if (tokenisedDTD[i - 1].equalsIgnoreCase ("<!entity"))
- {
- String ent (tokenisedDTD [i + 1].trimCharactersAtEnd (">").trim().unquoted());
- // check for sub-entities..
- int ampersand = ent.indexOfChar ('&');
- while (ampersand >= 0)
- {
- const int semiColon = ent.indexOf (i + 1, ";");
- if (semiColon < 0)
- {
- setLastError ("entity without terminating semi-colon", false);
- break;
- }
- const String resolved (expandEntity (ent.substring (i + 1, semiColon)));
- ent = ent.substring (0, ampersand)
- + resolved
- + ent.substring (semiColon + 1);
- ampersand = ent.indexOfChar (semiColon + 1, '&');
- }
- return ent;
- }
- }
- }
- setLastError ("unknown entity", true);
- return entity;
- }
- String XmlDocument::getParameterEntity (const String& entity)
- {
- for (int i = 0; i < tokenisedDTD.size(); ++i)
- {
- if (tokenisedDTD[i] == entity
- && tokenisedDTD [i - 1] == "%"
- && tokenisedDTD [i - 2].equalsIgnoreCase ("<!entity"))
- {
- const String ent (tokenisedDTD [i + 1].trimCharactersAtEnd (">"));
- if (ent.equalsIgnoreCase ("system"))
- return getFileContents (tokenisedDTD [i + 2].trimCharactersAtEnd (">"));
- return ent.trim().unquoted();
- }
- }
- return entity;
- }
|