From a0ecfe0b28f70c7952868b951076ef2e33f6d143 Mon Sep 17 00:00:00 2001 From: Guenter Obiltschnig Date: Tue, 31 Oct 2017 13:09:59 +0100 Subject: [PATCH] merge XMLStreamParser from develop --- XML/Makefile | 6 +- XML/include/Poco/XML/Content.h | 63 ++ XML/include/Poco/XML/QName.h | 135 +++ XML/include/Poco/XML/ValueTraits.h | 104 ++ XML/include/Poco/XML/XMLStreamParser.h | 630 ++++++++++++ .../Poco/XML/XMLStreamParserException.h | 58 ++ XML/src/ParserEngine.cpp | 2 +- XML/src/QName.cpp | 74 ++ XML/src/ValueTraits.cpp | 41 + XML/src/XMLStreamParser.cpp | 941 ++++++++++++++++++ XML/src/XMLStreamParserException.cpp | 90 ++ XML/testsuite/Makefile | 3 +- XML/testsuite/src/XMLStreamParserTest.cpp | 505 ++++++++++ XML/testsuite/src/XMLStreamParserTest.h | 40 + XML/testsuite/src/XMLTestSuite.cpp | 3 +- 15 files changed, 2689 insertions(+), 6 deletions(-) create mode 100644 XML/include/Poco/XML/Content.h create mode 100644 XML/include/Poco/XML/QName.h create mode 100644 XML/include/Poco/XML/ValueTraits.h create mode 100644 XML/include/Poco/XML/XMLStreamParser.h create mode 100644 XML/include/Poco/XML/XMLStreamParserException.h create mode 100644 XML/src/QName.cpp create mode 100644 XML/src/ValueTraits.cpp create mode 100644 XML/src/XMLStreamParser.cpp create mode 100644 XML/src/XMLStreamParserException.cpp create mode 100644 XML/testsuite/src/XMLStreamParserTest.cpp create mode 100644 XML/testsuite/src/XMLStreamParserTest.h diff --git a/XML/Makefile b/XML/Makefile index 92a085f2b..508bc4ddd 100644 --- a/XML/Makefile +++ b/XML/Makefile @@ -17,10 +17,10 @@ objects = AbstractContainerNode AbstractNode Attr AttrMap Attributes \ EntityResolverImpl ErrorHandler Event EventDispatcher EventException \ EventListener EventTarget InputSource LexicalHandler Locator LocatorImpl \ MutationEvent Name NamePool NamedNodeMap NamespaceStrategy \ - NamespaceSupport Node NodeFilter NodeIterator NodeList Notation \ - ParserEngine ProcessingInstruction SAXException SAXParser Text \ + NamespaceSupport NodeAppender Node NodeFilter NodeIterator NodeList Notation \ + ParserEngine ProcessingInstruction QName SAXException SAXParser Text \ TreeWalker WhitespaceFilter XMLException XMLFilter XMLFilterImpl XMLReader \ - XMLString XMLWriter NodeAppender + XMLString XMLWriter XMLStreamParser XMLStreamParserException ValueTraits expat_objects = xmlparse xmlrole xmltok diff --git a/XML/include/Poco/XML/Content.h b/XML/include/Poco/XML/Content.h new file mode 100644 index 000000000..8b2b31234 --- /dev/null +++ b/XML/include/Poco/XML/Content.h @@ -0,0 +1,63 @@ +// +// Content.h +// +// Library: XML +// Package: XML +// Module: Content +// +// Definition of the Content enum. +// +// Copyright (c) 2015, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// Based on libstudxml (http://www.codesynthesis.com/projects/libstudxml/). +// Copyright (c) 2009-2013 Code Synthesis Tools CC. +// +// SPDX-License-Identifier: BSL-1.0 +// + + +#ifndef XML_Content_INCLUDED +#define XML_Content_INCLUDED + + +namespace Poco { +namespace XML { + + +struct Content + /// XML content model. C++11 enum class emulated for C++98. + /// + /// element characters whitespaces notes + /// Empty no no ignored + /// Simple no yes preserved content accumulated + /// Complex yes no ignored + /// Mixed yes yes preserved +{ + enum value + { + Empty, + Simple, + Complex, + Mixed + }; + + Content(value v) + : _v(v) + { + } + + operator value() const + { + return _v; + } + +private: + value _v; +}; + + +} } // namespace Poco::XML + + +#endif // XML_Content_INCLUDED diff --git a/XML/include/Poco/XML/QName.h b/XML/include/Poco/XML/QName.h new file mode 100644 index 000000000..6a6a246e1 --- /dev/null +++ b/XML/include/Poco/XML/QName.h @@ -0,0 +1,135 @@ +// +// QName.h +// +// Library: XML +// Package: XML +// Module: QName +// +// Definition of the QName class. +// +// Copyright (c) 2015, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// Based on libstudxml (http://www.codesynthesis.com/projects/libstudxml/). +// Copyright (c) 2009-2013 Code Synthesis Tools CC. +// +// SPDX-License-Identifier: BSL-1.0 +// + + +#ifndef XML_QName_INCLUDED +#define XML_QName_INCLUDED + + +#include "Poco/XML/XML.h" +#include +#include + + +namespace Poco { +namespace XML { + + +class XML_API QName + /// This class represents a qualified XML name in the stream parser. + /// + /// Note that the optional prefix is just a "syntactic sugar". In + /// particular, it is ignored by the comparison operators and the + /// std::ostream insertion operator. +{ +public: + QName(); + QName(const std::string& name); + QName(const std::string& ns, const std::string& name); + QName(const std::string& ns, const std::string& name, const std::string& prefix); + + const std::string& namespaceURI() const; + /// Returns the namespace URI part of the name. + + const std::string& localName() const; + /// Returns the local part of the name. + + const std::string& prefix() const; + /// Returns the namespace prefix of the name. + + std::string& namespaceURI(); + /// Returns the namespace URI part of the name. + + std::string& localName(); + /// Returns the local part of the name. + + std::string& prefix(); + /// Returns the namespace prefix of the name. + + std::string toString() const; + /// Returns a printable representation in the [#] form. + +public: + friend bool operator < (const QName& x, const QName& y) + { + return x._ns < y._ns || (x._ns == y._ns && x._name < y._name); + } + + friend bool operator == (const QName& x, const QName& y) + { + return x._ns == y._ns && x._name == y._name; + } + + friend bool operator != (const QName& x, const QName& y) + { + return !(x == y); + } + +private: + std::string _ns; + std::string _name; + std::string _prefix; +}; + + +// +// inlines +// +inline const std::string& QName::namespaceURI() const +{ + return _ns; +} + + +inline const std::string& QName::localName() const +{ + return _name; +} + + +inline const std::string& QName::prefix() const +{ + return _prefix; +} + + +inline std::string& QName::namespaceURI() +{ + return _ns; +} + + +inline std::string& QName::localName() +{ + return _name; +} + + +inline std::string& QName::prefix() +{ + return _prefix; +} + + +XML_API std::ostream& operator << (std::ostream&, const QName&); + + +} } // namespace Poco::XML + + +#endif // XML_QName_INCLUDED diff --git a/XML/include/Poco/XML/ValueTraits.h b/XML/include/Poco/XML/ValueTraits.h new file mode 100644 index 000000000..46631c939 --- /dev/null +++ b/XML/include/Poco/XML/ValueTraits.h @@ -0,0 +1,104 @@ +// +// ValueTraits.h +// +// Library: XML +// Package: XML +// Module: ValueTraits +// +// Definition of the ValueTraits templates. +// +// Copyright (c) 2015, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// Based on libstudxml (http://www.codesynthesis.com/projects/libstudxml/). +// Copyright (c) 2009-2013 Code Synthesis Tools CC. +// +// SPDX-License-Identifier: BSL-1.0 +// + + +#ifndef XML_ValueTraits_INCLUDED +#define XML_ValueTraits_INCLUDED + + +#include "XMLStreamParserException.h" +#include +#include +#include +#include + + +namespace Poco { +namespace XML { + + +class XMLStreamParser; +class XMLStreamSerializer; + + +template +struct DefaultValueTraits +{ + static T + parse(std::string, const XMLStreamParser&); + + static std::string + serialize(const T&, const XMLStreamSerializer&); +}; + + +template <> +struct XML_API DefaultValueTraits +{ + static bool + parse(std::string, const XMLStreamParser&); + + static std::string serialize(bool v, const XMLStreamSerializer&) + { + return v ? "true" : "false"; + } +}; + + +template <> +struct XML_API DefaultValueTraits +{ + static std::string parse(std::string s, const XMLStreamParser&) + { + return s; + } + + static std::string serialize(const std::string& v, const XMLStreamSerializer&) + { + return v; + } +}; + + +template +struct ValueTraits: DefaultValueTraits +{ +}; + + +template +struct ValueTraits : DefaultValueTraits +{ +}; + + +template +T DefaultValueTraits::parse(std::string s, const XMLStreamParser& p) +{ + T r; + std::istringstream is(s); + if (!(is >> r && is.eof())) + throw XMLStreamParserException(p, "invalid value '" + s + "'"); + return r; +} + + +} } // namespace Poco::XML + + +#endif // XML_ValueTraits_INCLUDED diff --git a/XML/include/Poco/XML/XMLStreamParser.h b/XML/include/Poco/XML/XMLStreamParser.h new file mode 100644 index 000000000..580a3e8f0 --- /dev/null +++ b/XML/include/Poco/XML/XMLStreamParser.h @@ -0,0 +1,630 @@ +// +// XMLStreamParser.h +// +// Library: XML +// Package: XML +// Module: XMLStreamParser +// +// Definition of the XMLStreamParser class. +// +// Copyright (c) 2015, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// Based on libstudxml (http://www.codesynthesis.com/projects/libstudxml/). +// Copyright (c) 2009-2013 Code Synthesis Tools CC. +// +// SPDX-License-Identifier: BSL-1.0 +// + + +#ifndef XML_XMLStreamParser_INCLUDED +#define XML_XMLStreamParser_INCLUDED + + +// We only support UTF-8 expat. +#ifdef XML_UNICODE +#error UTF-16 expat (XML_UNICODE defined) is not supported +#endif + + +#include "Poco/XML/QName.h" +#include "Poco/XML/ValueTraits.h" +#include "Poco/XML/Content.h" +#include +#include +#include +#include +#include +#include + + +namespace Poco { +namespace XML { + + +class XML_API XMLStreamParser + /// The streaming XML pull parser and streaming XML serializer. The parser + /// is a conforming, non-validating XML 1.0 implementation (see Implementation Notes + /// for details). The application character encoding (that is, the encoding used + /// in the application's memory) for both parser and serializer is UTF-8. + /// The output encoding of the serializer is UTF-8 as well. The parser supports + /// UTF-8, UTF-16, ISO-8859-1, and US-ASCII input encodings. + /// + /// Attribute map: + /// + /// Attribute map lookup. If attribute is not found, then the version + /// without the default value throws an appropriate parsing exception + /// while the version with the default value returns that value. + /// + /// Note also that there is no attribute(ns, name) version since it + /// would conflict with attribute(name, dv) (qualified attributes + /// are not very common). + /// + /// Attribute map is valid throughout at the "element level" until + /// end_element and not just during EV_START_ELEMENT. As a special case, + /// the map is still valid after peek() that returned end_element until + /// this end_element event is retrieved with next(). + /// + /// Using parser: + /// + /// XMLStreamParser p(ifs, argv[1]); + /// for (XMLStreamParser::EventType e: p) + /// { + /// switch (e) + /// { + /// case XMLStreamParser::EV_START_ELEMENT: + /// cerr << p.line () << ':' << p.column () << ": start " << p.name () << endl; + /// break; + /// case XMLStreamParser::EV_END_ELEMENT: + /// cerr << p.line () << ':' << p.column () << ": end " << p.name () << endl; + /// break; + /// case XMLStreamParser::EV_START_ATTRIBUTE: + /// ... + /// case XMLStreamParser::EV_END_ATTRIBUTE: + /// ... + /// case XMLStreamParser::EV_CHARACTERS: + /// ... + /// } + /// } +{ +public: + enum EventType + /// Parsing events. + { + EV_START_ELEMENT, + EV_END_ELEMENT, + EV_START_ATTRIBUTE, + EV_END_ATTRIBUTE, + EV_CHARACTERS, + EV_START_NAMESPACE_DECL, + EV_END_NAMESPACE_DECL, + EV_EOF + }; + + typedef unsigned short FeatureType; + /// If both receive_attributes_event and RECEIVE_ATTRIBUTE_MAP are + /// specified, then RECEIVE_ATTRIBUTES_EVENT is assumed. + + static const FeatureType RECEIVE_ELEMENTS = 0x0001; + static const FeatureType RECEIVE_CHARACTERS = 0x0002; + static const FeatureType RECEIVE_ATTRIBUTE_MAP = 0x0004; + static const FeatureType RECEIVE_ATTRIBUTES_EVENT = 0x0008; + static const FeatureType RECEIVE_NAMESPACE_DECLS = 0x0010; + static const FeatureType RECEIVE_DEFAULT = RECEIVE_ELEMENTS | RECEIVE_CHARACTERS | RECEIVE_ATTRIBUTE_MAP; + + struct XML_API AttributeValueType + { + std::string value; + mutable bool handled; + }; + + typedef std::map AttributeMapType; + + struct XML_API Iterator + // C++11 range-based for support. Generally, the iterator interface + // doesn't make much sense for the XMLStreamParser so for now we have an + // implementation that is just enough to the range-based for. + { + typedef EventType value_type; + + Iterator(XMLStreamParser* p = 0, EventType e = EV_EOF): + _parser(p), + _e(e) + { + } + + value_type operator * () const + { + return _e; + } + + Iterator& operator ++ () + { + _e = _parser->next(); + return *this; + } + + bool operator == (Iterator y) const + /// Comparison only makes sense when comparing to end (eof). + { + return _e == EV_EOF && y._e == EV_EOF; + } + + bool operator != (Iterator y) const + /// Comparison only makes sense when comparing to end (eof). + { + return !(*this == y); + } + + private: + XMLStreamParser* _parser; + EventType _e; + }; + + Iterator begin() + { + return Iterator(this, next()); + } + + Iterator end() + { + return Iterator(this, EV_EOF); + } + + XMLStreamParser(std::istream&, const std::string& inputName, FeatureType = RECEIVE_DEFAULT); + /// The parser constructor takes three arguments: the stream to parse, + /// input name that is used in diagnostics to identify the document being + /// parsed, and the list of events we want the parser to report. + /// + /// Parse std::istream. Input name is used in diagnostics to identify + /// the document being parsed. + /// + /// If stream exceptions are enabled then std::ios_base::failure + /// exception is used to report io errors (badbit and failbit). + /// Otherwise, those are reported as the parsing exception. + + XMLStreamParser(const void* data, std::size_t size, const std::string& inputName, FeatureType = RECEIVE_DEFAULT); + /// Parse memory buffer that contains the whole document. Input name + /// is used in diagnostics to identify the document being parsed. + + ~XMLStreamParser(); + /// Destroys the XMLStreamParser. + + EventType next(); + /// Call the next() function when we are ready to handle the next piece of XML. + + void nextExpect(EventType); + /// Get the next event and make sure that it's what's expected. If it + /// is not, then throw an appropriate parsing exception. + + void nextExpect(EventType, const std::string& name); + void nextExpect(EventType, const QName& qname); + void nextExpect(EventType, const std::string& ns, const std::string& name); + + EventType peek(); + EventType event(); + /// Return the event that was last returned by the call to next() or peek(). + + const std::string& inputName() const; + const QName& getQName() const; + const std::string& namespaceURI() const; + const std::string& localName() const; + const std::string& prefix() const; + std::string& value(); + const std::string& value() const; + template T value() const; + Poco::UInt64 line() const; + Poco::UInt64 column() const; + const std::string& attribute(const std::string& name) const; + template + T attribute(const std::string& name) const; + std::string attribute(const std::string& name, const std::string& deflt) const; + template + T attribute(const std::string& name, const T& deflt) const; + const std::string& attribute(const QName& qname) const; + template + T attribute(const QName& qname) const; + std::string attribute(const QName& qname, const std::string& deflt) const; + template + T attribute(const QName& qname, const T& deflt) const; + bool attributePresent(const std::string& name) const; + bool attributePresent(const QName& qname) const; + const AttributeMapType& attributeMap() const; + + void content(Content); + Content content() const; + + void nextExpect(EventType, const std::string& name, Content); + void nextExpect(EventType, const QName& qname, Content); + void nextExpect(EventType, const std::string& ns, const std::string& name, Content); + + // Helpers for parsing elements with simple content. The first two + // functions assume that EV_START_ELEMENT has already been parsed. The + // rest parse the complete element, from start to end. + // + // Note also that as with attribute(), there is no (namespace,name) + // overload since it would conflicts with (namespace,deflt). + std::string element(); + + template + T element(); + std::string element(const std::string& name); + std::string element(const QName& qname); + template + T element(const std::string& name); + template + T element(const QName& qname); + std::string element(const std::string& name, const std::string& deflt); + std::string element(const QName& qname, const std::string& deflt); + template + T element(const std::string& name, const T& deflt); + template + T element(const QName& qname, const T& deflt); + +private: + XMLStreamParser(const XMLStreamParser&); + XMLStreamParser& operator = (const XMLStreamParser&); + + static void XMLCALL handleStartElement(void*, const XML_Char*, const XML_Char**); + static void XMLCALL handleEndElement(void*, const XML_Char*); + static void XMLCALL handleCharacters(void*, const XML_Char*, int); + static void XMLCALL handleStartNamespaceDecl(void*, const XML_Char*, const XML_Char*); + static void XMLCALL handleEndNamespaceDecl(void*, const XML_Char*); + + void init(); + EventType nextImpl(bool peek); + EventType nextBody(); + void handleError(); + + // If _size is 0, then data is std::istream. Otherwise, it is a buffer. + union + { + std::istream* is; + const void* buf; + } + _data; + + std::size_t _size; + const std::string _inputName; + FeatureType _feature; + XML_Parser _parser; + std::size_t _depth; + bool _accumulateContent; // Whether we are accumulating character content. + enum { state_next, state_peek } _parserState; + EventType _currentEvent; + EventType _queue; + QName _qname; + std::string _value; + const QName* _qualifiedName; + std::string* _pvalue; + Poco::UInt64 _line; + Poco::UInt64 _column; + + struct AttributeType + { + QName qname; + std::string value; + }; + + typedef std::vector attributes; + attributes _attributes; + attributes::size_type _currentAttributeIndex; // Index of the current attribute. + + typedef std::vector NamespaceDecls; + NamespaceDecls _startNamespace; + NamespaceDecls::size_type _startNamespaceIndex;// Index of the current decl. + NamespaceDecls _endNamespace; + NamespaceDecls::size_type _endNamespaceIndex;// Index of the current decl. + + struct ElementEntry + { + ElementEntry(std::size_t d, Content c = Content::Mixed): + depth(d), + content(c), + attributesUnhandled(0) + { + } + + std::size_t depth; + Content content; + AttributeMapType attributeMap; + mutable AttributeMapType::size_type attributesUnhandled; + }; + + typedef std::vector ElementState; + std::vector _elementState; + + const AttributeMapType _emptyAttrMap; + + const ElementEntry* getElement() const; + const ElementEntry* getElementImpl() const; + void popElement(); +}; + + +XML_API std::ostream& operator << (std::ostream&, XMLStreamParser::EventType); + + +// +// inlines +// +inline XMLStreamParser::EventType XMLStreamParser::event() + // Return the even that was last returned by the call to next() or peek(). +{ + return _currentEvent; +} + + +inline const std::string& XMLStreamParser::inputName() const +{ + return _inputName; +} + + +inline const QName& XMLStreamParser::getQName() const +{ + return *_qualifiedName; +} + + +inline const std::string& XMLStreamParser::namespaceURI() const +{ + return _qualifiedName->namespaceURI(); +} + + +inline const std::string& XMLStreamParser::localName() const +{ + return _qualifiedName->localName(); +} + + +inline const std::string& XMLStreamParser::prefix() const +{ + return _qualifiedName->prefix(); +} + + +inline std::string& XMLStreamParser::value() +{ + return *_pvalue; +} + + +inline const std::string& XMLStreamParser::value() const +{ + return *_pvalue; +} + + +inline Poco::UInt64 XMLStreamParser::line() const +{ + return _line; +} + + +inline Poco::UInt64 XMLStreamParser::column() const +{ + return _column; +} + + +inline XMLStreamParser::EventType XMLStreamParser::peek() +{ + if (_parserState == state_peek) + return _currentEvent; + else + { + EventType e(nextImpl(true)); + _parserState = state_peek; // Set it after the call to nextImpl(). + return e; + } +} + + +template +inline T XMLStreamParser::value() const +{ + return ValueTraits < T > ::parse(value(), *this); +} + + +inline const std::string& XMLStreamParser::attribute(const std::string& n) const +{ + return attribute(QName(n)); +} + + +template +inline T XMLStreamParser::attribute(const std::string& n) const +{ + return attribute < T > (QName(n)); +} + + +inline std::string XMLStreamParser::attribute(const std::string& n, const std::string& dv) const +{ + return attribute(QName(n), dv); +} + + +template +inline T XMLStreamParser::attribute(const std::string& n, const T& dv) const +{ + return attribute < T > (QName(n), dv); +} + + +template +inline T XMLStreamParser::attribute(const QName& qn) const +{ + return ValueTraits < T > ::parse(attribute(qn), *this); +} + + +inline bool XMLStreamParser::attributePresent(const std::string& n) const +{ + return attributePresent(QName(n)); +} + + +inline const XMLStreamParser::AttributeMapType& XMLStreamParser::attributeMap() const +{ + if (const ElementEntry* e = getElement()) + { + e->attributesUnhandled = 0; // Assume all handled. + return e->attributeMap; + } + + return _emptyAttrMap; +} + + +inline void XMLStreamParser::nextExpect(EventType e, const QName& qn) +{ + nextExpect(e, qn.namespaceURI(), qn.localName()); +} + + +inline void XMLStreamParser::nextExpect(EventType e, const std::string& n) +{ + nextExpect(e, std::string(), n); +} + + +inline void XMLStreamParser::nextExpect(EventType e, const QName& qn, Content c) +{ + nextExpect(e, qn); + poco_assert(e == EV_START_ELEMENT); + content(c); +} + + +inline void XMLStreamParser::nextExpect(EventType e, const std::string& n, Content c) +{ + nextExpect(e, std::string(), n); + poco_assert(e == EV_START_ELEMENT); + content(c); +} + + +inline void XMLStreamParser::nextExpect(EventType e, const std::string& ns, const std::string& n, Content c) +{ + nextExpect(e, ns, n); + poco_assert(e == EV_START_ELEMENT); + content(c); +} + + +template +inline T XMLStreamParser::element() +{ + return ValueTraits < T > ::parse(element(), *this); +} + + +inline std::string XMLStreamParser::element(const std::string& n) +{ + nextExpect(EV_START_ELEMENT, n); + return element(); +} + + +inline std::string XMLStreamParser::element(const QName& qn) +{ + nextExpect(EV_START_ELEMENT, qn); + return element(); +} + + +template +inline T XMLStreamParser::element(const std::string& n) +{ + return ValueTraits < T > ::parse(element(n), *this); +} + + +template +inline T XMLStreamParser::element(const QName& qn) +{ + return ValueTraits < T > ::parse(element(qn), *this); +} + + +inline std::string XMLStreamParser::element(const std::string& n, const std::string& dv) +{ + return element(QName(n), dv); +} + + +template +inline T XMLStreamParser::element(const std::string& n, const T& dv) +{ + return element < T > (QName(n), dv); +} + + +inline void XMLStreamParser::content(Content c) +{ + poco_assert(_parserState == state_next); + + if (!_elementState.empty() && _elementState.back().depth == _depth) + _elementState.back().content = c; + else + _elementState.push_back(ElementEntry(_depth, c)); +} + + +inline Content XMLStreamParser::content() const +{ + poco_assert(_parserState == state_next); + + return !_elementState.empty() && _elementState.back().depth == _depth ? _elementState.back().content : Content(Content::Mixed); +} + + +inline const XMLStreamParser::ElementEntry* XMLStreamParser::getElement() const +{ + return _elementState.empty() ? 0 : getElementImpl(); +} + + +template +T XMLStreamParser::attribute(const QName& qn, const T& dv) const +{ + if (const ElementEntry* e = getElement()) + { + AttributeMapType::const_iterator i(e->attributeMap.find(qn)); + + if (i != e->attributeMap.end()) + { + if (!i->second.handled) + { + i->second.handled = true; + e->attributesUnhandled--; + } + return ValueTraits < T > ::parse(i->second.value, *this); + } + } + + return dv; +} + + +template +T XMLStreamParser::element(const QName& qn, const T& dv) +{ + if (peek() == EV_START_ELEMENT && getQName() == qn) + { + next(); + return element(); + } + + return dv; +} + + +} } // namespace Poco::XML + + +#endif // XML_XMLStreamParser_INCLUDED diff --git a/XML/include/Poco/XML/XMLStreamParserException.h b/XML/include/Poco/XML/XMLStreamParserException.h new file mode 100644 index 000000000..249b1dbae --- /dev/null +++ b/XML/include/Poco/XML/XMLStreamParserException.h @@ -0,0 +1,58 @@ +// +// XMLStreamParserException.h +// +// Library: XML +// Package: XML +// Module: XMLStreamParserException +// +// Definition of the XMLStreamParserException class. +// +// Copyright (c) 2015, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// SPDX-License-Identifier: BSL-1.0 +// + + +#ifndef XML_XMLStreamParserException_INCLUDED +#define XML_XMLStreamParserException_INCLUDED + + +#include + + +namespace Poco { +namespace XML { + + +class XMLStreamParser; + + +class XML_API XMLStreamParserException: public Poco::XML::XMLException +{ +public: + XMLStreamParserException(const std::string& name, Poco::UInt64 line, Poco::UInt64 column, const std::string& description); + XMLStreamParserException(const XMLStreamParser&, const std::string& description); + virtual ~XMLStreamParserException() throw (); + + const char* name() const throw(); + Poco::UInt64 line() const; + Poco::UInt64 column() const; + const std::string& description() const; + virtual const char* what() const throw (); + +private: + void init(); + + std::string _name; + Poco::UInt64 _line; + Poco::UInt64 _column; + std::string _description; + std::string _what; +}; + + +} } // namespace Poco::XML + + +#endif // XML_XMLStreamParserException_INCLUDED diff --git a/XML/src/ParserEngine.cpp b/XML/src/ParserEngine.cpp index bc71422c4..f0e3b1c52 100644 --- a/XML/src/ParserEngine.cpp +++ b/XML/src/ParserEngine.cpp @@ -246,7 +246,7 @@ void ParserEngine::parse(const char* pBuffer, std::size_t size) std::size_t processed = 0; while (processed < size) { - const int bufferSize = processed + PARSE_BUFFER_SIZE < size ? PARSE_BUFFER_SIZE : size - processed; + const int bufferSize = processed + PARSE_BUFFER_SIZE < size ? PARSE_BUFFER_SIZE : static_cast(size - processed); if (!XML_Parse(_parser, pBuffer + processed, bufferSize, 0)) handleError(XML_GetErrorCode(_parser)); processed += bufferSize; diff --git a/XML/src/QName.cpp b/XML/src/QName.cpp new file mode 100644 index 000000000..01b7e13fa --- /dev/null +++ b/XML/src/QName.cpp @@ -0,0 +1,74 @@ +// +// QName.cpp +// +// $Id$ +// +// Library: XML +// Package: XML +// Module: QName +// +// Copyright (c) 2015, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// Based on libstudxml (http://www.codesynthesis.com/projects/libstudxml/). +// Copyright (c) 2009-2013 Code Synthesis Tools CC. +// +// SPDX-License-Identifier: BSL-1.0 +// + + +#include "Poco/XML/QName.h" +#include + + +namespace Poco { +namespace XML { + + +QName::QName() +{ +} + + +QName::QName(const std::string& name) : + _name(name) +{ +} + + +QName::QName(const std::string& ns, const std::string& name) : + _ns(ns), + _name(name) +{ +} + + +QName::QName(const std::string& ns, const std::string& name, const std::string& prefix) : + _ns(ns), + _name(name), + _prefix(prefix) +{ +} + + +std::string QName::toString() const +{ + std::string r; + if (!_ns.empty()) + { + r += _ns; + r += '#'; + } + + r += _name; + return r; +} + + +std::ostream& operator << (std::ostream& os, const QName& qn) +{ + return os << qn.toString(); +} + + +} } // namespace Poco::XML diff --git a/XML/src/ValueTraits.cpp b/XML/src/ValueTraits.cpp new file mode 100644 index 000000000..2e0d48198 --- /dev/null +++ b/XML/src/ValueTraits.cpp @@ -0,0 +1,41 @@ +// +// ValueTraits.cpp +// +// $Id$ +// +// Library: XML +// Package: XML +// Module: ValueTraits +// +// Definition of the ValueTraits templates. +// +// Copyright (c) 2015, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// Based on libstudxml (http://www.codesynthesis.com/projects/libstudxml/). +// Copyright (c) 2009-2013 Code Synthesis Tools CC. +// +// SPDX-License-Identifier: BSL-1.0 +// + + +#include "Poco/XML/XMLStreamParser.h" +#include "Poco/XML/XMLStreamParserException.h" + + +namespace Poco { +namespace XML { + + +bool DefaultValueTraits::parse(std::string s, const XMLStreamParser& p) +{ + if (s == "true" || s == "1" || s == "True" || s == "TRUE") + return true; + else if (s == "false" || s == "0" || s == "False" || s == "FALSE") + return false; + else + throw XMLStreamParserException(p, "invalid bool value '" + s + "'"); +} + + +} } // namespace Poco::XML diff --git a/XML/src/XMLStreamParser.cpp b/XML/src/XMLStreamParser.cpp new file mode 100644 index 000000000..befe62a92 --- /dev/null +++ b/XML/src/XMLStreamParser.cpp @@ -0,0 +1,941 @@ +// +// XMLStreamParser.cpp +// +// $Id$ +// +// Library: XML +// Package: XML +// Module: XMLStreamParser +// +// Copyright (c) 2015, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// Based on libstudxml (http://www.codesynthesis.com/projects/libstudxml/). +// Copyright (c) 2009-2013 Code Synthesis Tools CC. +// +// SPDX-License-Identifier: BSL-1.0 +// + + +#include "Poco/XML/XMLStreamParser.h" +#include +#include +#include +#include +#include + + +namespace Poco { +namespace XML { + + +struct StreamExceptionController +{ + StreamExceptionController(std::istream& is): + _istr(is), + _oldState(_istr.exceptions()) + { + _istr.exceptions(_oldState & ~std::istream::failbit); + } + + ~StreamExceptionController() + { + std::istream::iostate s = _istr.rdstate(); + s &= ~std::istream::failbit; + + // If our error state (sans failbit) intersects with the + // exception state then that means we have an active + // exception and changing error/exception state will + // cause another to be thrown. + if (!(_oldState & s)) + { + // Clear failbit if it was caused by eof. + // + if (_istr.fail() && _istr.eof()) + _istr.clear(s); + + _istr.exceptions(_oldState); + } + } + +private: + StreamExceptionController(const StreamExceptionController&); + StreamExceptionController& operator = (const StreamExceptionController&); + +private: + std::istream& _istr; + std::istream::iostate _oldState; +}; + + +static const char* parserEventStrings[] = +{ + "start element", + "end element", + "start attribute", + "end attribute", + "characters", + "start namespace declaration", + "end namespace declaration", + "end of file" +}; + + +std::ostream& operator << (std::ostream& os, XMLStreamParser::EventType e) +{ + return os << parserEventStrings[e]; +} + + +XMLStreamParser::XMLStreamParser(std::istream& is, const std::string& iname, FeatureType f): + _size(0), + _inputName(iname), + _feature(f) +{ + _data.is = &is; + init(); +} + + +XMLStreamParser::XMLStreamParser(const void* data, std::size_t size, const std::string& iname, FeatureType f): + _size(size), + _inputName(iname), + _feature(f) +{ + poco_assert(data != 0 && size != 0); + + _data.buf = data; + init(); +} + + +XMLStreamParser::~XMLStreamParser() +{ + if (_parser) XML_ParserFree(_parser); +} + + +void XMLStreamParser::init() +{ + _depth = 0; + _parserState = state_next; + _currentEvent = EV_EOF; + _queue = EV_EOF; + + _qualifiedName = &_qname; + _pvalue = &_value; + + _line = 0; + _column = 0; + + _currentAttributeIndex = 0; + _startNamespaceIndex = 0; + _endNamespaceIndex = 0; + + if ((_feature & RECEIVE_ATTRIBUTE_MAP) != 0 && (_feature & RECEIVE_ATTRIBUTES_EVENT) != 0) + _feature &= ~RECEIVE_ATTRIBUTE_MAP; + + // Allocate the XMLStreamParser. Make sure nothing else can throw after + // this call since otherwise we will leak it. + // + _parser = XML_ParserCreateNS(0, XML_Char(' ')); + + if (_parser == 0) + throw std::bad_alloc(); + + // Get prefixes in addition to namespaces and local names. + // + XML_SetReturnNSTriplet(_parser, true); + + // Set handlers. + // + XML_SetUserData(_parser, this); + + if ((_feature & RECEIVE_ELEMENTS) != 0) + { + XML_SetStartElementHandler(_parser, &handleStartElement); + XML_SetEndElementHandler(_parser, &handleEndElement); + } + + if ((_feature & RECEIVE_CHARACTERS) != 0) + XML_SetCharacterDataHandler(_parser, &handleCharacters); + + if ((_feature & RECEIVE_NAMESPACE_DECLS) != 0) + XML_SetNamespaceDeclHandler(_parser, &handleStartNamespaceDecl, &handleEndNamespaceDecl); +} + + +void XMLStreamParser::handleError() +{ + XML_Error e(XML_GetErrorCode(_parser)); + + if (e == XML_ERROR_ABORTED) + { + // For now we only abort the XMLStreamParser in the handleCharacters() and + // handleStartElement() handlers. + // + switch (content()) + { + case Content::Empty: + throw XMLStreamParserException(*this, "characters in empty content"); + case Content::Simple: + throw XMLStreamParserException(*this, "element in simple content"); + case Content::Complex: + throw XMLStreamParserException(*this, "characters in complex content"); + default: + poco_assert(false); + } + } + else + throw XMLStreamParserException(_inputName, XML_GetCurrentLineNumber(_parser), XML_GetCurrentColumnNumber(_parser), XML_ErrorString(e)); +} + + +XMLStreamParser::EventType XMLStreamParser::next() +{ + if (_parserState == state_next) + return nextImpl(false); + else + { + // If we previously peeked at start/end_element, then adjust + // state accordingly. + // + switch (_currentEvent) + { + case EV_END_ELEMENT: + { + if (!_elementState.empty() && _elementState.back().depth == _depth) + popElement(); + + _depth--; + break; + } + case EV_START_ELEMENT: + { + _depth++; + break; + } + default: + break; + } + + _parserState = state_next; + return _currentEvent; + } +} + + +const std::string& XMLStreamParser::attribute(const QName& qn) const +{ + if (const ElementEntry* e = getElement()) + { + AttributeMapType::const_iterator i(e->attributeMap.find(qn)); + + if (i != e->attributeMap.end()) + { + if (!i->second.handled) + { + i->second.handled = true; + e->attributesUnhandled--; + } + return i->second.value; + } + } + + throw XMLStreamParserException(*this, "attribute '" + qn.toString() + "' expected"); +} + + +std::string XMLStreamParser::attribute(const QName& qn, const std::string& dv) const +{ + if (const ElementEntry* e = getElement()) + { + AttributeMapType::const_iterator i(e->attributeMap.find(qn)); + + if (i != e->attributeMap.end()) + { + if (!i->second.handled) + { + i->second.handled = true; + e->attributesUnhandled--; + } + return i->second.value; + } + } + + return dv; +} + + +bool XMLStreamParser::attributePresent(const QName& qn) const +{ + if (const ElementEntry* e = getElement()) + { + AttributeMapType::const_iterator i(e->attributeMap.find(qn)); + + if (i != e->attributeMap.end()) + { + if (!i->second.handled) + { + i->second.handled = true; + e->attributesUnhandled--; + } + return true; + } + } + + return false; +} + + +void XMLStreamParser::nextExpect(EventType e) +{ + if (next() != e) + throw XMLStreamParserException(*this, std::string(parserEventStrings[e]) + " expected"); +} + + +void XMLStreamParser::nextExpect(EventType e, const std::string& ns, const std::string& n) +{ + if (next() != e || namespaceURI() != ns || localName() != n) + throw XMLStreamParserException(*this, std::string(parserEventStrings[e]) + " '" + QName(ns, n).toString() + "' expected"); +} + + +std::string XMLStreamParser::element() +{ + content(Content::Simple); + std::string r; + + // The content of the element can be empty in which case there + // will be no characters event. + // + EventType e(next()); + if (e == EV_CHARACTERS) + { + r.swap(value()); + e = next(); + } + + // We cannot really get anything other than end_element since + // the simple content validation won't allow it. + // + poco_assert(e == EV_END_ELEMENT); + + return r; +} + + +std::string XMLStreamParser::element(const QName& qn, const std::string& dv) +{ + if (peek() == EV_START_ELEMENT && getQName() == qn) + { + next(); + return element(); + } + + return dv; +} + + +const XMLStreamParser::ElementEntry* XMLStreamParser::getElementImpl() const +{ + // The handleStartElement() Expat handler may have already provisioned + // an entry in the element stack. In this case, we need to get the + // one before it, if any. + // + const ElementEntry* r(0); + ElementState::size_type n(_elementState.size() - 1); + + if (_elementState[n].depth == _depth) + r = &_elementState[n]; + else if (n != 0 && _elementState[n].depth > _depth) + { + n--; + if (_elementState[n].depth == _depth) + r = &_elementState[n]; + } + + return r; +} + + +void XMLStreamParser::popElement() +{ + // Make sure there are no unhandled attributes left. + // + const ElementEntry& e(_elementState.back()); + if (e.attributesUnhandled != 0) + { + // Find the first unhandled attribute and report it. + // + for (AttributeMapType::const_iterator i(e.attributeMap.begin()); i != e.attributeMap.end(); ++i) + { + if (!i->second.handled) + throw XMLStreamParserException(*this, "unexpected attribute '" + i->first.toString() + "'"); + } + poco_assert(false); + } + + _elementState.pop_back(); +} + + +XMLStreamParser::EventType XMLStreamParser::nextImpl(bool peek) +{ + EventType e(nextBody()); + + // Content-specific processing. Note that we handle characters in the + // handleCharacters() Expat handler for two reasons. Firstly, it is faster + // to ignore the whitespaces at the source. Secondly, this allows us + // to distinguish between element and attribute characters. We can + // move this processing to the handler because the characters event + // is never queued. + // + switch (e) + { + case EV_END_ELEMENT: + { + // If this is a peek, then avoid popping the stack just yet. + // This way, the attribute map will still be valid until we + // call next(). + // + if (!peek) + { + if (!_elementState.empty() && _elementState.back().depth == _depth) + popElement(); + + _depth--; + } + break; + } + case EV_START_ELEMENT: + { + if (const ElementEntry* pEntry = getElement()) + { + switch (pEntry->content) + { + case Content::Empty: + throw XMLStreamParserException(*this, "element in empty content"); + case Content::Simple: + throw XMLStreamParserException(*this, "element in simple content"); + default: + break; + } + } + + // If this is a peek, then delay adjusting the depth. + // + if (!peek) + _depth++; + + break; + } + default: + break; + } + + return e; +} + + +XMLStreamParser::EventType XMLStreamParser::nextBody() +{ + // See if we have any start namespace declarations we need to return. + // + if (_startNamespaceIndex < _startNamespace.size()) + { + // Based on the previous event determine what's the next one must be. + // + switch (_currentEvent) + { + case EV_START_NAMESPACE_DECL: + { + if (++_startNamespaceIndex == _startNamespace.size()) + { + _startNamespaceIndex = 0; + _startNamespace.clear(); + _qualifiedName = &_qname; + break; // No more declarations. + } + // Fall through. + } + case EV_START_ELEMENT: + { + _currentEvent = EV_START_NAMESPACE_DECL; + _qualifiedName = &_startNamespace[_startNamespaceIndex]; + return _currentEvent; + } + default: + { + poco_assert(false); + return _currentEvent = EV_EOF; + } + } + } + + // See if we have any attributes we need to return as events. + // + if (_currentAttributeIndex < _attributes.size()) + { + // Based on the previous event determine what's the next one must be. + // + switch (_currentEvent) + { + case EV_START_ATTRIBUTE: + { + _currentEvent = EV_CHARACTERS; + _pvalue = &_attributes[_currentAttributeIndex].value; + return _currentEvent; + } + case EV_CHARACTERS: + { + _currentEvent = EV_END_ATTRIBUTE; // Name is already set. + return _currentEvent; + } + case EV_END_ATTRIBUTE: + { + if (++_currentAttributeIndex == _attributes.size()) + { + _currentAttributeIndex = 0; + _attributes.clear(); + _qualifiedName = &_qname; + _pvalue = &_value; + break; // No more attributes. + } + // Fall through. + } + case EV_START_ELEMENT: + case EV_START_NAMESPACE_DECL: + { + _currentEvent = EV_START_ATTRIBUTE; + _qualifiedName = &_attributes[_currentAttributeIndex].qname; + return _currentEvent; + } + default: + { + poco_assert(false); + return _currentEvent = EV_EOF; + } + } + } + + // See if we have any end namespace declarations we need to return. + // + if (_endNamespaceIndex < _endNamespace.size()) + { + // Based on the previous event determine what's the next one must be. + // + switch (_currentEvent) + { + case EV_END_NAMESPACE_DECL: + { + if (++_endNamespaceIndex == _endNamespace.size()) + { + _endNamespaceIndex = 0; + _endNamespace.clear(); + _qualifiedName = &_qname; + break; // No more declarations. + } + // Fall through. + } + // The end namespace declaration comes before the end element + // which means it can follow pretty much any other event. + // + default: + { + _currentEvent = EV_END_NAMESPACE_DECL; + _qualifiedName = &_endNamespace[_endNamespaceIndex]; + return _currentEvent; + } + } + } + + // Check the queue. + // + if (_queue != EV_EOF) + { + _currentEvent = _queue; + _queue = EV_EOF; + + _line = XML_GetCurrentLineNumber(_parser); + _column = XML_GetCurrentColumnNumber(_parser); + + return _currentEvent; + } + + // Reset the character accumulation flag. + // + _accumulateContent = false; + + XML_ParsingStatus ps; + XML_GetParsingStatus(_parser, &ps); + + switch (ps.parsing) + { + case XML_INITIALIZED: + { + // As if we finished the previous chunk. + break; + } + case XML_PARSING: + { + poco_assert(false); + return _currentEvent = EV_EOF; + } + case XML_FINISHED: + { + return _currentEvent = EV_EOF; + } + case XML_SUSPENDED: + { + switch (XML_ResumeParser(_parser)) + { + case XML_STATUS_SUSPENDED: + { + // If the XMLStreamParser is again in the suspended state, then + // that means we have the next event. + // + return _currentEvent; + } + case XML_STATUS_OK: + { + // Otherwise, we need to get and parse the next chunk of data + // unless this was the last chunk, in which case this is eof. + // + if (ps.finalBuffer) + return _currentEvent = EV_EOF; + + break; + } + case XML_STATUS_ERROR: + handleError(); + } + break; + } + } + + // Get and parse the next chunk of data until we get the next event + // or reach eof. + // + if (!_accumulateContent) + _currentEvent = EV_EOF; + + XML_Status s; + do + { + if (_size != 0) + { + s = XML_Parse(_parser, static_cast(_data.buf), static_cast(_size), true); + + if (s == XML_STATUS_ERROR) + handleError(); + + break; + } + else + { + const size_t cap(4096); + + char* b(static_cast(XML_GetBuffer(_parser, cap))); + if (b == 0) + throw std::bad_alloc(); + + // Temporarily unset the exception failbit. Also clear the fail bit + // when we reset the old state if it was caused by eof. + // + std::istream& is(*_data.is); + { + StreamExceptionController sec(is); + is.read(b, static_cast(cap)); + } + + // If the caller hasn't configured the stream to use exceptions, + // then use the parsing exception to report an error. + // + if (is.bad() || (is.fail() && !is.eof())) + throw XMLStreamParserException(*this, "io failure"); + + bool eof(is.eof()); + + s = XML_ParseBuffer(_parser, static_cast(is.gcount()), eof); + + if (s == XML_STATUS_ERROR) + handleError(); + + if (eof) + break; + } + } while (s != XML_STATUS_SUSPENDED); + + return _currentEvent; +} + + +static void splitName(const XML_Char* s, QName& qn) +{ + std::string& ns(qn.namespaceURI()); + std::string& name(qn.localName()); + std::string& prefix(qn.prefix()); + + const char* p(strchr(s, ' ')); + + if (p == 0) + { + ns.clear(); + name = s; + prefix.clear(); + } + else + { + ns.assign(s, 0, p - s); + + s = p + 1; + p = strchr(s, ' '); + + if (p == 0) + { + name = s; + prefix.clear(); + } + else + { + name.assign(s, 0, p - s); + prefix = p + 1; + } + } +} + + +void XMLCALL XMLStreamParser::handleStartElement(void* v, const XML_Char* name, const XML_Char** atts) +{ + XMLStreamParser& p(*static_cast(v)); + + XML_ParsingStatus ps; + XML_GetParsingStatus(p._parser, &ps); + + // Expat has a (mis)-feature of a possibily calling handlers even + // after the non-resumable XML_StopParser call. + // + if (ps.parsing == XML_FINISHED) + return; + + // Cannot be a followup event. + // + poco_assert(ps.parsing == XML_PARSING); + + // When accumulating characters in simple content, we expect to + // see more characters or end element. Seeing start element is + // possible but means violation of the content model. + // + if (p._accumulateContent) + { + // It would have been easier to throw the exception directly, + // however, the Expat code is most likely not exception safe. + // + p._line = XML_GetCurrentLineNumber(p._parser); + p._column = XML_GetCurrentColumnNumber(p._parser); + XML_StopParser(p._parser, false); + return; + } + + p._currentEvent = EV_START_ELEMENT; + splitName(name, p._qname); + + p._line = XML_GetCurrentLineNumber(p._parser); + p._column = XML_GetCurrentColumnNumber(p._parser); + + // Handle attributes. + // + if (*atts != 0) + { + bool am((p._feature & RECEIVE_ATTRIBUTE_MAP) != 0); + bool ae((p._feature & RECEIVE_ATTRIBUTES_EVENT) != 0); + + // Provision an entry for this element. + // + ElementEntry* pe(0); + if (am) + { + p._elementState.push_back(ElementEntry(p._depth + 1)); + pe = &p._elementState.back(); + } + + if (am || ae) + { + for (; *atts != 0; atts += 2) + { + if (am) + { + QName qn; + splitName(*atts, qn); + AttributeMapType::value_type v(qn, AttributeValueType()); + v.second.value = *(atts + 1); + v.second.handled = false; + pe->attributeMap.insert(v); + } + else + { + p._attributes.push_back(AttributeType()); + splitName(*atts, p._attributes.back().qname); + p._attributes.back().value = *(atts + 1); + } + } + + if (am) + pe->attributesUnhandled = pe->attributeMap.size(); + } + } + + XML_StopParser(p._parser, true); +} + + +void XMLCALL XMLStreamParser::handleEndElement(void* v, const XML_Char* name) +{ + XMLStreamParser& p(*static_cast(v)); + + XML_ParsingStatus ps; + XML_GetParsingStatus(p._parser, &ps); + + // Expat has a (mis)-feature of a possibily calling handlers even + // after the non-resumable XML_StopParser call. + // + if (ps.parsing == XML_FINISHED) + return; + + // This can be a followup event for empty elements (). In this + // case the element name is already set. + // + if (ps.parsing != XML_PARSING) + p._queue = EV_END_ELEMENT; + else + { + splitName(name, p._qname); + + // If we are accumulating characters, then queue this event. + // + if (p._accumulateContent) + p._queue = EV_END_ELEMENT; + else + { + p._currentEvent = EV_END_ELEMENT; + + p._line = XML_GetCurrentLineNumber(p._parser); + p._column = XML_GetCurrentColumnNumber(p._parser); + } + + XML_StopParser(p._parser, true); + } +} + + +void XMLCALL XMLStreamParser::handleCharacters(void* v, const XML_Char* s, int n) +{ + XMLStreamParser& p(*static_cast(v)); + + XML_ParsingStatus ps; + XML_GetParsingStatus(p._parser, &ps); + + // Expat has a (mis)-feature of a possibily calling handlers even + // after the non-resumable XML_StopParser call. + // + if (ps.parsing == XML_FINISHED) + return; + + Content cont(p.content()); + + // If this is empty or complex content, see if these are whitespaces. + // + switch (cont) + { + case Content::Empty: + case Content::Complex: + { + for (int i(0); i != n; ++i) + { + char c(s[i]); + if (c == 0x20 || c == 0x0A || c == 0x0D || c == 0x09) + continue; + + // It would have been easier to throw the exception directly, + // however, the Expat code is most likely not exception safe. + // + p._line = XML_GetCurrentLineNumber(p._parser); + p._column = XML_GetCurrentColumnNumber(p._parser); + XML_StopParser(p._parser, false); + break; + } + return; + } + default: + break; + } + + // Append the characters if we are accumulating. This can also be a + // followup event for another character event. In this case also + // append the data. + // + if (p._accumulateContent || ps.parsing != XML_PARSING) + { + poco_assert(p._currentEvent == EV_CHARACTERS); + p._value.append(s, n); + } + else + { + p._currentEvent = EV_CHARACTERS; + p._value.assign(s, n); + + p._line = XML_GetCurrentLineNumber(p._parser); + p._column = XML_GetCurrentColumnNumber(p._parser); + + // In simple content we need to accumulate all the characters + // into a single event. To do this we will let the XMLStreamParser run + // until we reach the end of the element. + // + if (cont == Content::Simple) + p._accumulateContent = true; + else + XML_StopParser(p._parser, true); + } +} + + +void XMLCALL XMLStreamParser::handleStartNamespaceDecl(void* v, const XML_Char* prefix, const XML_Char* ns) +{ + XMLStreamParser& p(*static_cast(v)); + + XML_ParsingStatus ps; + XML_GetParsingStatus(p._parser, &ps); + + // Expat has a (mis)-feature of a possibily calling handlers even + // after the non-resumable XML_StopParser call. + // + if (ps.parsing == XML_FINISHED) + return; + + p._startNamespace.push_back(QName()); + p._startNamespace.back().prefix() = (prefix != 0 ? prefix : ""); + p._startNamespace.back().namespaceURI() = (ns != 0 ? ns : ""); +} + + +void XMLCALL XMLStreamParser::handleEndNamespaceDecl(void* v, const XML_Char* prefix) +{ + XMLStreamParser& p(*static_cast(v)); + + XML_ParsingStatus ps; + XML_GetParsingStatus(p._parser, &ps); + + // Expat has a (mis)-feature of a possibily calling handlers even + // after the non-resumable XML_StopParser call. + // + if (ps.parsing == XML_FINISHED) + return; + + p._endNamespace.push_back(QName()); + p._endNamespace.back().prefix() = (prefix != 0 ? prefix : ""); +} + + +} } // namespace Poco::XML diff --git a/XML/src/XMLStreamParserException.cpp b/XML/src/XMLStreamParserException.cpp new file mode 100644 index 000000000..e28e5cde5 --- /dev/null +++ b/XML/src/XMLStreamParserException.cpp @@ -0,0 +1,90 @@ +// +// XMLStreamParserException.cpp +// +// $Id$ +// +// Library: XML +// Package: XML +// Module: XMLStreamParserException +// +// Copyright (c) 2015, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// SPDX-License-Identifier: BSL-1.0 +// + + +#include "Poco/XML/XMLStreamParserException.h" +#include "Poco/XML/XMLStreamParser.h" + + +namespace Poco { +namespace XML { + + +XMLStreamParserException::~XMLStreamParserException() throw () +{ +} + + +XMLStreamParserException::XMLStreamParserException(const std::string& n, Poco::UInt64 l, Poco::UInt64 c, const std::string& d): + _name(n), + _line(l), + _column(c), + _description(d) +{ + init(); +} + + +XMLStreamParserException::XMLStreamParserException(const XMLStreamParser& p, const std::string& d): + _name(p.inputName()), + _line(p.line()), + _column(p.column()), + _description(d) +{ + init(); +} + + +void XMLStreamParserException::init() +{ + std::ostringstream os; + if (!_name.empty()) + os << _name << ':'; + os << _line << ':' << _column << ": error: " << _description; + _what = os.str(); +} + + +const char* XMLStreamParserException::name() const throw() +{ + return _name.c_str(); +} + + +Poco::UInt64 XMLStreamParserException::line() const +{ + return _line; +} + + +Poco::UInt64 XMLStreamParserException::column() const +{ + return _column; +} + + +const std::string& XMLStreamParserException::description() const +{ + return _description; +} + + +char const* XMLStreamParserException::what() const throw () +{ + return _what.c_str(); +} + + +} } // namespace Poco::XML diff --git a/XML/testsuite/Makefile b/XML/testsuite/Makefile index 92ae8afc9..ef6423688 100644 --- a/XML/testsuite/Makefile +++ b/XML/testsuite/Makefile @@ -10,7 +10,8 @@ objects = AttributesImplTest ChildNodesTest DOMTestSuite DocumentTest \ DocumentTypeTest Driver ElementTest EventTest NamePoolTest NameTest \ NamespaceSupportTest NodeIteratorTest NodeTest ParserWriterTest \ SAXParserTest SAXTestSuite TextTest TreeWalkerTest \ - XMLTestSuite XMLWriterTest NodeAppenderTest + XMLTestSuite XMLWriterTest NodeAppenderTest \ + XMLStreamParserTest target = testrunner target_version = 1 diff --git a/XML/testsuite/src/XMLStreamParserTest.cpp b/XML/testsuite/src/XMLStreamParserTest.cpp new file mode 100644 index 000000000..05e8a32f4 --- /dev/null +++ b/XML/testsuite/src/XMLStreamParserTest.cpp @@ -0,0 +1,505 @@ +// +// XMLStreamParserTest.cpp +// +// $Id$ +// +// Copyright (c) 2015, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// SPDX-License-Identifier: BSL-1.0 +// + +#include "XMLStreamParserTest.h" +#include "CppUnit/TestCaller.h" +#include "CppUnit/TestSuite.h" +#include "Poco/XML/XMLStreamParser.h" +#include "Poco/Exception.h" +#include +#include +#include +#include + + +using namespace Poco::XML; + + +XMLStreamParserTest::XMLStreamParserTest(const std::string& name): + CppUnit::TestCase(name) +{ +} + + +XMLStreamParserTest::~XMLStreamParserTest() +{ +} + + +void XMLStreamParserTest::testParser() +{ + // Test error handling. + // + try + { + std::istringstream is("X"); + XMLStreamParser p(is, "test"); + + assert(p.next() == XMLStreamParser::EV_START_ELEMENT); + assert(p.next() == XMLStreamParser::EV_START_ELEMENT); + assert(p.next() == XMLStreamParser::EV_CHARACTERS && p.value() == "X"); + p.next(); + assert(false); + } + catch (const Poco::Exception&) + { + // cerr << e.what () << endl; + } + + try + { + std::istringstream is(""); + is.exceptions(std::ios_base::badbit | std::ios_base::failbit); + XMLStreamParser p(is, "test"); + + is.setstate(std::ios_base::badbit); + p.next(); + assert(false); + } + catch (const std::ios_base::failure&) + { + } + + // Test the nextExpect() functionality. + // + { + std::istringstream is(""); + XMLStreamParser p(is, "test"); + p.nextExpect(XMLStreamParser::EV_START_ELEMENT, "root"); + p.nextExpect(XMLStreamParser::EV_END_ELEMENT); + } + + try + { + std::istringstream is(""); + XMLStreamParser p(is, "test"); + p.nextExpect(XMLStreamParser::EV_END_ELEMENT); + assert(false); + } + catch (const Poco::Exception&) + { + // cerr << e.what () << endl; + } + + try + { + std::istringstream is(""); + XMLStreamParser p(is, "test"); + p.nextExpect(XMLStreamParser::EV_START_ELEMENT, "root1"); + assert(false); + } + catch (const Poco::Exception&) + { + // cerr << e.what () << endl; + } + + // Test nextExpect() with content setting. + // + { + std::istringstream is(" "); + XMLStreamParser p(is, "empty"); + + p.nextExpect(XMLStreamParser::EV_START_ELEMENT, "root", Content::Empty); + p.nextExpect(XMLStreamParser::EV_END_ELEMENT); + p.nextExpect(XMLStreamParser::EV_EOF); + } + + // Test namespace declarations. + // + { + // Followup end element event that should be precedeeded by end + // namespace declaration. + // + std::istringstream is(""); + XMLStreamParser p(is, "test", XMLStreamParser::RECEIVE_DEFAULT | XMLStreamParser::RECEIVE_NAMESPACE_DECLS); + + p.nextExpect(XMLStreamParser::EV_START_ELEMENT, "root"); + p.nextExpect(XMLStreamParser::EV_START_NAMESPACE_DECL); + p.nextExpect(XMLStreamParser::EV_END_NAMESPACE_DECL); + p.nextExpect(XMLStreamParser::EV_END_ELEMENT); + } + + // Test value extraction. + // + { + std::istringstream is("123"); + XMLStreamParser p(is, "test"); + p.nextExpect(XMLStreamParser::EV_START_ELEMENT, "root"); + p.nextExpect(XMLStreamParser::EV_CHARACTERS); + assert(p.value() == 123); + p.nextExpect(XMLStreamParser::EV_END_ELEMENT); + } + + // Test attribute maps. + // + { + std::istringstream is(""); + XMLStreamParser p(is, "test"); + p.nextExpect(XMLStreamParser::EV_START_ELEMENT, "root"); + + assert(p.attribute("a") == "a"); + assert(p.attribute("b", "B") == "b"); + assert(p.attribute("c", "C") == "C"); + assert(p.attribute("d") == 123); + assert(p.attribute("t") == true); + assert(p.attribute("f", false) == false); + + p.nextExpect(XMLStreamParser::EV_END_ELEMENT); + } + + { + std::istringstream is(""); + XMLStreamParser p(is, "test"); + p.nextExpect(XMLStreamParser::EV_START_ELEMENT, "root"); + assert(p.attribute("a") == "a"); + assert(p.peek() == XMLStreamParser::EV_START_ELEMENT && p.localName() == "nested"); + assert(p.attribute("a") == "a"); + p.nextExpect(XMLStreamParser::EV_START_ELEMENT, "nested"); + assert(p.attribute("a") == "A"); + p.nextExpect(XMLStreamParser::EV_START_ELEMENT, "inner"); + assert(p.attribute("a", "") == ""); + p.nextExpect(XMLStreamParser::EV_END_ELEMENT); + assert(p.attribute("a") == "A"); + assert(p.peek() == XMLStreamParser::EV_END_ELEMENT); + assert(p.attribute("a") == "A"); // Still valid. + p.nextExpect(XMLStreamParser::EV_END_ELEMENT); + assert(p.attribute("a") == "a"); + p.nextExpect(XMLStreamParser::EV_END_ELEMENT); + assert(p.attribute("a", "") == ""); + } + + try + { + std::istringstream is(""); + XMLStreamParser p(is, "test"); + p.nextExpect(XMLStreamParser::EV_START_ELEMENT, "root"); + assert(p.attribute("a") == "a"); + p.nextExpect(XMLStreamParser::EV_END_ELEMENT); + assert(false); + } + catch (const Poco::Exception&) + { + // cerr << e.what () << endl; + } + + try + { + std::istringstream is(""); + XMLStreamParser p(is, "test"); + p.nextExpect(XMLStreamParser::EV_START_ELEMENT, "root"); + p.attribute("a"); + assert(false); + } + catch (const Poco::Exception&) + { + // cerr << e.what () << endl; + } + + // Test peeking and getting the current event. + // + { + std::istringstream is("x"); + XMLStreamParser p(is, "peek", XMLStreamParser::RECEIVE_DEFAULT | XMLStreamParser::RECEIVE_ATTRIBUTES_EVENT); + + assert(p.event() == XMLStreamParser::EV_EOF); + + assert(p.peek() == XMLStreamParser::EV_START_ELEMENT); + assert(p.next() == XMLStreamParser::EV_START_ELEMENT); + assert(p.event() == XMLStreamParser::EV_START_ELEMENT); + + assert(p.peek() == XMLStreamParser::EV_START_ATTRIBUTE); + assert(p.event() == XMLStreamParser::EV_START_ATTRIBUTE); + assert(p.next() == XMLStreamParser::EV_START_ATTRIBUTE); + + assert(p.peek() == XMLStreamParser::EV_CHARACTERS && p.value() == "x"); + assert(p.next() == XMLStreamParser::EV_CHARACTERS && p.value() == "x"); + assert(p.event() == XMLStreamParser::EV_CHARACTERS && p.value() == "x"); + + assert(p.peek() == XMLStreamParser::EV_END_ATTRIBUTE); + assert(p.event() == XMLStreamParser::EV_END_ATTRIBUTE); + assert(p.next() == XMLStreamParser::EV_END_ATTRIBUTE); + + assert(p.peek() == XMLStreamParser::EV_CHARACTERS && p.value() == "x"); + assert(p.next() == XMLStreamParser::EV_CHARACTERS && p.value() == "x"); + assert(p.event() == XMLStreamParser::EV_CHARACTERS && p.value() == "x"); + + assert(p.peek() == XMLStreamParser::EV_START_ELEMENT); + assert(p.next() == XMLStreamParser::EV_START_ELEMENT); + assert(p.event() == XMLStreamParser::EV_START_ELEMENT); + + assert(p.peek() == XMLStreamParser::EV_END_ELEMENT); + assert(p.next() == XMLStreamParser::EV_END_ELEMENT); + assert(p.event() == XMLStreamParser::EV_END_ELEMENT); + + assert(p.peek() == XMLStreamParser::EV_END_ELEMENT); + assert(p.next() == XMLStreamParser::EV_END_ELEMENT); + assert(p.event() == XMLStreamParser::EV_END_ELEMENT); + + assert(p.peek() == XMLStreamParser::EV_EOF); + assert(p.next() == XMLStreamParser::EV_EOF); + assert(p.event() == XMLStreamParser::EV_EOF); + } + + // Test content processing. + // + + // empty + // + { + std::istringstream is(" \n\t "); + XMLStreamParser p(is, "empty", XMLStreamParser::RECEIVE_DEFAULT | XMLStreamParser::RECEIVE_ATTRIBUTES_EVENT); + + assert(p.next() == XMLStreamParser::EV_START_ELEMENT); + p.content(Content::Empty); + assert(p.next() == XMLStreamParser::EV_START_ATTRIBUTE); + assert(p.next() == XMLStreamParser::EV_CHARACTERS && p.value() == " x "); + assert(p.next() == XMLStreamParser::EV_END_ATTRIBUTE); + assert(p.next() == XMLStreamParser::EV_END_ELEMENT); + assert(p.next() == XMLStreamParser::EV_EOF); + } + + try + { + std::istringstream is(" \n & X \t "); + XMLStreamParser p(is, "empty"); + + assert(p.next() == XMLStreamParser::EV_START_ELEMENT); + p.content(Content::Empty); + p.next(); + assert(false); + } + catch (const Poco::Exception&) + { + // cerr << e.what () << endl; + } + + // simple + // + { + std::istringstream is(" X "); + XMLStreamParser p(is, "simple"); + + assert(p.next() == XMLStreamParser::EV_START_ELEMENT); + p.content(Content::Simple); + assert(p.next() == XMLStreamParser::EV_CHARACTERS && p.value() == " X "); + assert(p.next() == XMLStreamParser::EV_END_ELEMENT); + assert(p.next() == XMLStreamParser::EV_EOF); + } + + try + { + std::istringstream is(" ? "); + XMLStreamParser p(is, "simple"); + + assert(p.next() == XMLStreamParser::EV_START_ELEMENT); + p.content(Content::Simple); + assert(p.next() == XMLStreamParser::EV_CHARACTERS && p.value() == " ? "); + p.next(); + assert(false); + } + catch (const Poco::Exception&) + { + // cerr << e.what () << endl; + } + + { + // Test content accumulation in simple content. + // + std::istringstream is("123"); + XMLStreamParser p(is, "simple", XMLStreamParser::RECEIVE_DEFAULT | XMLStreamParser::RECEIVE_NAMESPACE_DECLS); + + assert(p.next() == XMLStreamParser::EV_START_ELEMENT); + p.nextExpect(XMLStreamParser::EV_START_NAMESPACE_DECL); + p.content(Content::Simple); + assert(p.next() == XMLStreamParser::EV_CHARACTERS && p.value() == "123"); + p.nextExpect(XMLStreamParser::EV_END_NAMESPACE_DECL); + assert(p.next() == XMLStreamParser::EV_END_ELEMENT); + assert(p.next() == XMLStreamParser::EV_EOF); + } + + try + { + // Test error handling in accumulation in simple content. + // + std::istringstream is("123"); + XMLStreamParser p(is, "simple", XMLStreamParser::RECEIVE_DEFAULT | XMLStreamParser::RECEIVE_NAMESPACE_DECLS); + + assert(p.next() == XMLStreamParser::EV_START_ELEMENT); + p.nextExpect(XMLStreamParser::EV_START_NAMESPACE_DECL); + p.content(Content::Simple); + p.next(); + assert(false); + } + catch (const Poco::Exception&) + { + // cerr << e.what () << endl; + } + + // complex + // + { + std::istringstream is("\n" + " \n" + " \n" + " X \n" + " \n" + "\n"); + XMLStreamParser p(is, "complex", XMLStreamParser::RECEIVE_DEFAULT | XMLStreamParser::RECEIVE_ATTRIBUTES_EVENT); + + assert(p.next() == XMLStreamParser::EV_START_ELEMENT); // root + p.content(Content::Complex); + + assert(p.next() == XMLStreamParser::EV_START_ATTRIBUTE); + assert(p.next() == XMLStreamParser::EV_CHARACTERS && p.value() == " x "); + assert(p.next() == XMLStreamParser::EV_END_ATTRIBUTE); + + assert(p.next() == XMLStreamParser::EV_START_ELEMENT); // nested + p.content(Content::Complex); + + assert(p.next() == XMLStreamParser::EV_START_ELEMENT); // inner + p.content(Content::Empty); + assert(p.next() == XMLStreamParser::EV_END_ELEMENT); // inner + + assert(p.next() == XMLStreamParser::EV_START_ELEMENT); // inner + p.content(Content::Simple); + assert(p.next() == XMLStreamParser::EV_CHARACTERS && p.value() == " X "); + assert(p.next() == XMLStreamParser::EV_END_ELEMENT); // inner + + assert(p.next() == XMLStreamParser::EV_END_ELEMENT); // nested + assert(p.next() == XMLStreamParser::EV_END_ELEMENT); // root + assert(p.next() == XMLStreamParser::EV_EOF); + } + + try + { + std::istringstream is(" \n X X "); + XMLStreamParser p(is, "complex"); + + assert(p.next() == XMLStreamParser::EV_START_ELEMENT); + p.content(Content::Complex); + assert(p.next() == XMLStreamParser::EV_START_ELEMENT); + assert(p.next() == XMLStreamParser::EV_END_ELEMENT); + p.next(); + assert(false); + } + catch (const Poco::Exception&) + { + // cerr << e.what () << endl; + } + + // Test element with simple content helpers. + // + { + std::istringstream is("" + " X" + " " + " 123" + " Y" + " Z" + " 234" + " 345" + " A" + " B" + " A" + " B" + " 1" + " 2" + " 1" + " 2" + ""); + XMLStreamParser p(is, "element"); + + p.nextExpect(XMLStreamParser::EV_START_ELEMENT, "root", Content::Complex); + + p.nextExpect(XMLStreamParser::EV_START_ELEMENT, "nested"); + assert(p.element() == "X"); + + p.nextExpect(XMLStreamParser::EV_START_ELEMENT, "nested"); + assert(p.element() == ""); + + p.nextExpect(XMLStreamParser::EV_START_ELEMENT, "nested"); + assert(p.element() == 123); + + assert(p.element("nested") == "Y"); + assert(p.element(QName("test", "nested")) == "Z"); + + assert(p.element("nested") == 234); + assert(p.element(QName("test", "nested")) == 345); + + assert(p.element("nested", "a") == "A"); + assert(p.element(QName("test", "nested"), "b") == "B"); + + assert(p.element("nested", "a") == "a" && p.element("nested1") == "A"); + assert(p.element(QName("test", "nested"), "b") == "b" && p.element(QName("test", "nested1")) == "B"); + + assert(p.element("nested", 10) == 1); + assert(p.element(QName("test", "nested"), 20) == 2); + + assert(p.element("nested", 10) == 10 && p.element("nested1") == 1); + assert(p.element(QName("test", "nested"), 20) == 20 && p.element(QName("test", "nested1")) == 2); + + p.nextExpect(XMLStreamParser::EV_END_ELEMENT); + } + + // Test the iterator interface. + // + { + std::istringstream is("X"); + XMLStreamParser p(is, "iterator"); + + std::vector v; + + for (XMLStreamParser::Iterator i(p.begin()); i != p.end(); ++i) + v.push_back(*i); + + //for (XMLStreamParser::EventType e: p) + // v.push_back (e); + + assert(v.size() == 5); + assert(v[0] == XMLStreamParser::EV_START_ELEMENT); + assert(v[1] == XMLStreamParser::EV_START_ELEMENT); + assert(v[2] == XMLStreamParser::EV_CHARACTERS); + assert(v[3] == XMLStreamParser::EV_END_ELEMENT); + assert(v[4] == XMLStreamParser::EV_END_ELEMENT); + } + + // Test space extraction into the std::string value. + // + { + std::istringstream is(" b "); + XMLStreamParser p(is, "test"); + p.nextExpect(XMLStreamParser::EV_START_ELEMENT, "root"); + assert(p.attribute("a") == " a "); + p.nextExpect(XMLStreamParser::EV_CHARACTERS); + assert(p.value() == " b "); + p.nextExpect(XMLStreamParser::EV_END_ELEMENT); + } +} + + +void XMLStreamParserTest::setUp() +{ +} + + +void XMLStreamParserTest::tearDown() +{ +} + + +CppUnit::Test* XMLStreamParserTest::suite() +{ + CppUnit::TestSuite* pSuite = new CppUnit::TestSuite("XMLStreamParserTest"); + + CppUnit_addTest(pSuite, XMLStreamParserTest, testParser); + + return pSuite; +} diff --git a/XML/testsuite/src/XMLStreamParserTest.h b/XML/testsuite/src/XMLStreamParserTest.h new file mode 100644 index 000000000..2579a74c3 --- /dev/null +++ b/XML/testsuite/src/XMLStreamParserTest.h @@ -0,0 +1,40 @@ +// +// XMLStreamParserTest.h +// +// $Id$ +// +// Definition of the XMLStreamParserTest class. +// +// Copyright (c) 2015, Applied Informatics Software Engineering GmbH. +// and Contributors. +// +// SPDX-License-Identifier: BSL-1.0 +// + + +#ifndef XMLStreamParserTest_INCLUDED +#define XMLStreamParserTest_INCLUDED + + +#include "Poco/XML/XML.h" +#include "CppUnit/TestCase.h" + + +class XMLStreamParserTest: public CppUnit::TestCase +{ +public: + XMLStreamParserTest(const std::string& name); + ~XMLStreamParserTest(); + + void testParser(); + + void setUp(); + void tearDown(); + + static CppUnit::Test* suite(); + +private: +}; + + +#endif // XMLStreamParserTest_INCLUDED diff --git a/XML/testsuite/src/XMLTestSuite.cpp b/XML/testsuite/src/XMLTestSuite.cpp index e73afc638..82e908b02 100644 --- a/XML/testsuite/src/XMLTestSuite.cpp +++ b/XML/testsuite/src/XMLTestSuite.cpp @@ -14,7 +14,7 @@ #include "XMLWriterTest.h" #include "SAXTestSuite.h" #include "DOMTestSuite.h" - +#include "XMLStreamParserTest.h" CppUnit::Test* XMLTestSuite::suite() { @@ -25,6 +25,7 @@ CppUnit::Test* XMLTestSuite::suite() pSuite->addTest(XMLWriterTest::suite()); pSuite->addTest(SAXTestSuite::suite()); pSuite->addTest(DOMTestSuite::suite()); + pSuite->addTest(XMLStreamParserTest::suite()); return pSuite; }