diff options
author | Richard Walters <rwalters@digitalstirling.com> | 2018-06-30 22:30:19 -0700 |
---|---|---|
committer | Richard Walters <rwalters@digitalstirling.com> | 2018-06-30 22:30:19 -0700 |
commit | 8c752fdcf71f1d8f3980c8066e2bedf782d9739c (patch) | |
tree | 7bc5ad87eff0ddd584f77c08858c240cd6839721 | |
parent | a43820d0b4014878e4bbfede6acde25f5830faa7 (diff) |
Add more element parsing of URIs
* Add IsRelativeReference.
* Add IsRelativePath.
* Add Query.
* Add Fragment.
* Add UserInfo.
* Fix parsing of URIs that have no scheme.
-rw-r--r-- | include/Uri/Uri.hpp | 55 | ||||
-rw-r--r-- | src/Uri.cpp | 127 | ||||
-rw-r--r-- | test/src/UriTests.cpp | 122 |
3 files changed, 286 insertions, 18 deletions
diff --git a/include/Uri/Uri.hpp b/include/Uri/Uri.hpp index 4907aaf..8be3dd0 100644 --- a/include/Uri/Uri.hpp +++ b/include/Uri/Uri.hpp @@ -61,6 +61,17 @@ namespace Uri { std::string GetScheme() const; /** + * This method returns the "UserInfo" element of the URI. + * + * @return + * The "UserInfo" element of the URI is returned. + * + * @retval "" + * This is returned if there is no "UserInfo" element in the URI. + */ + std::string GetUserInfo() const; + + /** * This method returns the "host" element of the URI. * * @return @@ -108,6 +119,50 @@ namespace Uri { */ uint16_t GetPort() const; + /** + * This method returns an indication of whether or not + * the URI is a relative reference. + * + * @return + * An indication of whether or not the URI is a + * relative reference is returned. + */ + bool IsRelativeReference() const; + + /** + * This method returns an indication of whether or not + * the URI contains a relative path. + * + * @return + * An indication of whether or not the URI contains a + * relative path is returned. + */ + bool ContainsRelativePath() const; + + /** + * This method returns the "query" element of the URI, + * if it has one. + * + * @return + * The "query" element of the URI is returned. + * + * @retval "" + * This is returned if there is no "query" element in the URI. + */ + std::string GetQuery() const; + + /** + * This method returns the "fragment" element of the URI, + * if it has one. + * + * @return + * The "fragment" element of the URI is returned. + * + * @retval "" + * This is returned if there is no "fragment" element in the URI. + */ + std::string GetFragment() const; + // Private properties private: /** diff --git a/src/Uri.cpp b/src/Uri.cpp index 78f3b4d..984d3ed 100644 --- a/src/Uri.cpp +++ b/src/Uri.cpp @@ -22,6 +22,11 @@ namespace Uri { std::string scheme; /** + * This is the "UserInfo" element of the URI. + */ + std::string userInfo; + + /** * This is the "host" element of the URI. */ std::string host; @@ -42,6 +47,18 @@ namespace Uri { * as a sequence of segments. */ std::vector< std::string > path; + + /** + * This is the "query" element of the URI, + * if it has one. + */ + std::string query; + + /** + * This is the "fragment" element of the URI, + * if it has one. + */ + std::string fragment; }; Uri::~Uri() = default; @@ -54,20 +71,49 @@ namespace Uri { bool Uri::ParseFromString(const std::string& uriString) { // First parse the scheme. const auto schemeEnd = uriString.find(':'); - impl_->scheme = uriString.substr(0, schemeEnd); - auto rest = uriString.substr(schemeEnd + 1); + std::string rest; + if (schemeEnd == std::string::npos) { + impl_->scheme.clear(); + rest = uriString; + } else { + impl_->scheme = uriString.substr(0, schemeEnd); + rest = uriString.substr(schemeEnd + 1); + } - // Next parse the host. + // Next parse the authority. impl_->hasPort = false; - if (rest.substr(0, 2) == "//") { - const auto authorityEnd = rest.find('/', 2); - const auto portDelimiter = rest.find(':'); + const auto pathEnd = rest.find_first_of("?#"); + auto authorityAndPathString = rest.substr(0, pathEnd); + const auto queryAndOrFragment = rest.substr(authorityAndPathString.length()); + std::string hostPortAndPathString; + if (authorityAndPathString.substr(0, 2) == "//") { + // Strip off authority marker. + authorityAndPathString = authorityAndPathString.substr(2); + + // First separate the authority from the path. + auto authorityEnd = authorityAndPathString.find('/'); + if (authorityEnd == std::string::npos) { + authorityEnd = authorityAndPathString.length(); + } + + // Next, check if there is a UserInfo, and if so, extract it. + const auto userInfoDelimiter = authorityAndPathString.find('@'); + if (userInfoDelimiter == std::string::npos) { + impl_->userInfo.clear(); + hostPortAndPathString = authorityAndPathString; + } else { + impl_->userInfo = authorityAndPathString.substr(0, userInfoDelimiter); + hostPortAndPathString = authorityAndPathString.substr(userInfoDelimiter + 1); + } + + // Next, parsing host and port from authority and path. + const auto portDelimiter = hostPortAndPathString.find(':'); if (portDelimiter == std::string::npos) { - impl_->host = rest.substr(2, authorityEnd - 2); + impl_->host = hostPortAndPathString.substr(0, authorityEnd); } else { - impl_->host = rest.substr(2, portDelimiter - 2); + impl_->host = hostPortAndPathString.substr(0, portDelimiter); uint32_t newPort = 0; - for (auto c: rest.substr(portDelimiter + 1, authorityEnd - portDelimiter - 1)) { + for (auto c: hostPortAndPathString.substr(portDelimiter + 1, authorityEnd - portDelimiter - 1)) { if ( (c < '0') || (c > '9') @@ -85,32 +131,53 @@ namespace Uri { impl_->port = (uint16_t)newPort; impl_->hasPort = true; } - rest = rest.substr(authorityEnd); + hostPortAndPathString = authorityAndPathString.substr(authorityEnd); } else { impl_->host.clear(); + hostPortAndPathString = authorityAndPathString; } + auto pathString = hostPortAndPathString; - // Finally, parse the path. + // Next, parse the path. impl_->path.clear(); - if (rest == "/") { + if (pathString == "/") { // Special case of a path that is empty but needs a single // empty-string element to indicate that it is absolute. impl_->path.push_back(""); - } else if (!rest.empty()) { + pathString.clear(); + } else if (!pathString.empty()) { for(;;) { - auto pathDelimiter = rest.find('/'); + auto pathDelimiter = pathString.find('/'); if (pathDelimiter == std::string::npos) { - impl_->path.push_back(rest); + impl_->path.push_back(pathString); + pathString.clear(); break; } else { impl_->path.emplace_back( - rest.begin(), - rest.begin() + pathDelimiter + pathString.begin(), + pathString.begin() + pathDelimiter ); - rest = rest.substr(pathDelimiter + 1); + pathString = pathString.substr(pathDelimiter + 1); } } } + + // Next, parse the fragment if there is one. + const auto fragmentDelimiter = queryAndOrFragment.find('#'); + if (fragmentDelimiter == std::string::npos) { + impl_->fragment.clear(); + rest = queryAndOrFragment; + } else { + impl_->fragment = queryAndOrFragment.substr(fragmentDelimiter + 1); + rest = queryAndOrFragment.substr(0, fragmentDelimiter); + } + + // Finally, if anything is left, it's the query. + if (rest.empty()) { + impl_->query.clear(); + } else { + impl_->query = rest.substr(1); + } return true; } @@ -118,6 +185,10 @@ namespace Uri { return impl_->scheme; } + std::string Uri::GetUserInfo() const { + return impl_->userInfo; + } + std::string Uri::GetHost() const { return impl_->host; } @@ -134,4 +205,24 @@ namespace Uri { return impl_->port; } + bool Uri::IsRelativeReference() const { + return impl_->scheme.empty(); + } + + bool Uri::ContainsRelativePath() const { + if (impl_->path.empty()) { + return true; + } else { + return !impl_->path[0].empty(); + } + } + + std::string Uri::GetQuery() const { + return impl_->query; + } + + std::string Uri::GetFragment() const { + return impl_->fragment; + } + } diff --git a/test/src/UriTests.cpp b/test/src/UriTests.cpp index 2be24b6..a8ecd5b 100644 --- a/test/src/UriTests.cpp +++ b/test/src/UriTests.cpp @@ -10,6 +10,19 @@ #include <stddef.h> #include <Uri/Uri.hpp> +TEST(UriTests, ParseFromStringNoScheme) { + Uri::Uri uri; + ASSERT_TRUE(uri.ParseFromString("foo/bar")); + ASSERT_EQ("", uri.GetScheme()); + ASSERT_EQ( + (std::vector< std::string >{ + "foo", + "bar", + }), + uri.GetPath() + ); +} + TEST(UriTests, ParseFromStringUrl) { Uri::Uri uri; ASSERT_TRUE(uri.ParseFromString("http://www.example.com/foo/bar")); @@ -106,3 +119,112 @@ TEST(UriTests, ParseFromStringBadPortNumberNegative) { Uri::Uri uri; ASSERT_FALSE(uri.ParseFromString("http://www.example.com:-1234/foo/bar")); } + +TEST(UriTests, ParseFromStringEndsAfterAuthority) { + Uri::Uri uri; + ASSERT_TRUE(uri.ParseFromString("http://www.example.com")); +} + +TEST(UriTests, ParseFromStringRelativeVsNonRelativeReferences) { + struct TestVector { + std::string uriString; + bool isRelativeReference; + }; + const std::vector< TestVector > testVectors{ + {"http://www.example.com/", false}, + {"http://www.example.com", false}, + {"/", true}, + {"foo", true}, + }; + size_t index = 0; + for (const auto& testVector : testVectors) { + Uri::Uri uri; + ASSERT_TRUE(uri.ParseFromString(testVector.uriString)) << index; + ASSERT_EQ(testVector.isRelativeReference, uri.IsRelativeReference()) << index; + ++index; + } +} + +TEST(UriTests, ParseFromStringRelativeVsNonRelativePaths) { + struct TestVector { + std::string uriString; + bool containsRelativePath; + }; + const std::vector< TestVector > testVectors{ + {"http://www.example.com/", false}, + {"http://www.example.com", true}, + {"/", false}, + {"foo", true}, + + /* + * This is only a valid test vector if we understand + * correctly that an empty string IS a valid + * "relative reference" URI with an empty path. + */ + {"", true}, + }; + size_t index = 0; + for (const auto& testVector : testVectors) { + Uri::Uri uri; + ASSERT_TRUE(uri.ParseFromString(testVector.uriString)) << index; + ASSERT_EQ(testVector.containsRelativePath, uri.ContainsRelativePath()) << index; + ++index; + } +} + +TEST(UriTests, ParseFromStringQueryAndFragmentElements) { + struct TestVector { + std::string uriString; + std::string host; + std::string query; + std::string fragment; + }; + const std::vector< TestVector > testVectors{ + {"http://www.example.com/", "www.example.com", "", ""}, + {"http://example.com?foo", "example.com", "foo", ""}, + {"http://www.example.com#foo", "www.example.com", "", "foo"}, + {"http://www.example.com?foo#bar", "www.example.com", "foo", "bar"}, + {"http://www.example.com?earth?day#bar", "www.example.com", "earth?day", "bar"}, + {"http://www.example.com/spam?foo#bar", "www.example.com", "foo", "bar"}, + + /* + * NOTE: curiously, but we think this is correct, that + * having a trailing question mark is equivalent to not having + * any question mark, because in both cases, the query element + * is empty string. Perhaps research deeper to see if this is right. + */ + {"http://www.example.com/?", "www.example.com", "", ""}, + }; + size_t index = 0; + for (const auto& testVector : testVectors) { + Uri::Uri uri; + ASSERT_TRUE(uri.ParseFromString(testVector.uriString)) << index; + ASSERT_EQ(testVector.host, uri.GetHost()) << index; + ASSERT_EQ(testVector.query, uri.GetQuery()) << index; + ASSERT_EQ(testVector.fragment, uri.GetFragment()) << index; + ++index; + } +} + +TEST(UriTests, ParseFromStringUserInfo) { + struct TestVector { + std::string uriString; + std::string userInfo; + }; + const std::vector< TestVector > testVectors{ + {"http://www.example.com/", ""}, + {"http://joe@www.example.com", "joe"}, + {"http://pepe:feelsbadman@www.example.com", "pepe:feelsbadman"}, + {"//www.example.com", ""}, + {"//bob@www.example.com", "bob"}, + {"/", ""}, + {"foo", ""}, + }; + size_t index = 0; + for (const auto& testVector : testVectors) { + Uri::Uri uri; + ASSERT_TRUE(uri.ParseFromString(testVector.uriString)) << index; + ASSERT_EQ(testVector.userInfo, uri.GetUserInfo()) << index; + ++index; + } +} |