From 908ca6ec685025e2e38bcff3d87e6d200bae5a67 Mon Sep 17 00:00:00 2001 From: Daniil Zotkin Date: Wed, 11 Mar 2020 14:31:07 +0300 Subject: [PATCH] Fix percent-encoded fragment modification in Poco::URI Before this commit using Poco::URI class to parse specific URIs that had percent-encoded fragment identifier resulted in the loss of information concerning the way the fragment identifier was encoded. There could be the cases when the result of Poco::URI object serialization to string did not match the original URI string Poco::URI object was created from. In this commit we change the internal logic of fragment processing in Poco::URI, so that the fragment is stored inside the class in raw form (the same way as query string). The methods getFragment and setFragment work the old way (with percent-decoded fragment values), new methods getRawFragment and setRawFragment are added to get access to the original fragment representation. --- Foundation/include/Poco/URI.h | 29 +++++++++++++++++++--------- Foundation/src/URI.cpp | 25 ++++++++++++++++++------ Foundation/testsuite/src/URITest.cpp | 13 +++++++++++++ 3 files changed, 52 insertions(+), 15 deletions(-) diff --git a/Foundation/include/Poco/URI.h b/Foundation/include/Poco/URI.h index 950a79c66..f9cba6ade 100644 --- a/Foundation/include/Poco/URI.h +++ b/Foundation/include/Poco/URI.h @@ -40,15 +40,18 @@ class Foundation_API URI /// The class automatically performs a few normalizations on /// all URIs and URI parts passed to it: /// * scheme identifiers are converted to lower case - /// * percent-encoded characters are decoded (except for the query string) + /// * percent-encoded characters are decoded (except for the query string and fragment string) /// * optionally, dot segments are removed from paths (see normalize()) /// - /// Note that dealing with query strings requires some precautions, as, internally, - /// query strings are stored in percent-encoded form, while all other parts of the URI - /// are stored in decoded form. While parsing query strings from properly encoded URLs - /// generally works, explicitly setting query strings with setQuery() or extracting - /// query strings with getQuery() may lead to ambiguities. See the descriptions of - /// setQuery(), setRawQuery(), getQuery() and getRawQuery() for more information. + /// Note that dealing with query strings and fragment strings requires some precautions, + /// as, internally, query strings and fragment strings are stored in percent-encoded + /// form, while all other parts of the URI are stored in decoded form. While parsing + /// query strings and fragment strings from properly encoded URLs generally works, + /// explicitly setting query strings (fragment strings) with setQuery() (setFragment()) + /// or extracting query strings (fragment strings) with getQuery() (getFragment()) may + /// lead to ambiguities. See the descriptions of setQuery(), setRawQuery(), getQuery(), + /// getRawQuery(), setFragment(), setRawFragment(), getFragment() and getRawFragment() + /// for more information. { public: using QueryParameters = std::vector>; @@ -230,12 +233,20 @@ public: /// /// Calls addQueryParameter() for each parameter name and value. - const std::string& getFragment() const; + std::string getFragment() const; /// Returns the fragment part of the URI. void setFragment(const std::string& fragment); /// Sets the fragment part of the URI. + std::string getRawFragment() const; + /// Returns the fragment part of the URI in raw form. + + void setRawFragment(const std::string& fragment); + /// Sets the fragment part of the URI. + /// + /// The given fragment string must be properly percent-encoded + void setPathEtc(const std::string& pathEtc); /// Sets the path, query and fragment parts of the URI. @@ -400,7 +411,7 @@ inline const std::string& URI::getRawQuery() const } -inline const std::string& URI::getFragment() const +inline std::string URI::getRawFragment() const { return _fragment; } diff --git a/Foundation/src/URI.cpp b/Foundation/src/URI.cpp index 0c2d2312e..51952eac5 100644 --- a/Foundation/src/URI.cpp +++ b/Foundation/src/URI.cpp @@ -257,7 +257,7 @@ std::string URI::toString() const if (!_fragment.empty()) { uri += '#'; - encode(_fragment, RESERVED_FRAGMENT, uri); + uri.append(_fragment); } return uri; } @@ -420,10 +420,24 @@ void URI::setQueryParameters(const QueryParameters& params) } +std::string URI::getFragment() const +{ + std::string fragment; + decode(_fragment, fragment); + return fragment; +} + + void URI::setFragment(const std::string& fragment) { _fragment.clear(); - decode(fragment, _fragment); + encode(fragment, RESERVED_FRAGMENT, _fragment); +} + + +void URI::setRawFragment(const std::string& fragment) +{ + _fragment = fragment; } @@ -450,7 +464,7 @@ std::string URI::getPathEtc() const if (!_fragment.empty()) { pathEtc += '#'; - encode(_fragment, RESERVED_FRAGMENT, pathEtc); + pathEtc += _fragment; } return pathEtc; } @@ -882,9 +896,8 @@ void URI::parseQuery(std::string::const_iterator& it, const std::string::const_i void URI::parseFragment(std::string::const_iterator& it, const std::string::const_iterator& end) { - std::string fragment; - while (it != end) fragment += *it++; - decode(fragment, _fragment); + _fragment.clear(); + while (it != end) _fragment += *it++; } diff --git a/Foundation/testsuite/src/URITest.cpp b/Foundation/testsuite/src/URITest.cpp index f76a28397..002e8856f 100644 --- a/Foundation/testsuite/src/URITest.cpp +++ b/Foundation/testsuite/src/URITest.cpp @@ -743,6 +743,7 @@ void URITest::testOther() assertTrue (uri.getQuery() == "q=hello%world"); assertTrue (uri.getRawQuery() == "q=hello%25world"); assertTrue (uri.getFragment() == "frag ment"); + assertTrue (uri.getRawFragment() == "frag%20ment"); assertTrue (uri.toString() == "http://google.com/search?q=hello%25world#frag%20ment"); assertTrue (uri.getPathEtc() == "/search?q=hello%25world#frag%20ment"); @@ -783,8 +784,20 @@ void URITest::testOther() assertTrue (uri.getQuery() == "q=hello+world"); assertTrue (uri.getRawQuery() == "q=hello+world"); assertTrue (uri.getFragment() == "frag ment"); + assertTrue (uri.getRawFragment() == "frag%20ment"); assertTrue (uri.toString() == "http://google.com/search?q=hello+world#frag%20ment"); assertTrue (uri.getPathEtc() == "/search?q=hello+world#frag%20ment"); + + uri.setFragment("foo/bar"); + assertTrue (uri.getFragment() == "foo/bar"); + assertTrue (uri.getRawFragment() == "foo/bar"); + assertTrue (uri.toString() == "http://google.com/search?q=hello+world#foo/bar"); + assertTrue (uri.getPathEtc() == "/search?q=hello+world#foo/bar"); + uri.setRawFragment("foo%2Fbar"); + assertTrue (uri.getFragment() == "foo/bar"); + assertTrue (uri.getRawFragment() == "foo%2Fbar"); + assertTrue (uri.toString() == "http://google.com/search?q=hello+world#foo%2Fbar"); + assertTrue (uri.getPathEtc() == "/search?q=hello+world#foo%2Fbar"); }