aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/Uri.cpp237
-rw-r--r--test/src/UriTests.cpp106
2 files changed, 267 insertions, 76 deletions
diff --git a/src/Uri.cpp b/src/Uri.cpp
index 2b9b93a..44ce0b0 100644
--- a/src/Uri.cpp
+++ b/src/Uri.cpp
@@ -129,6 +129,161 @@ namespace {
};
}
+ /**
+ * This method checks and decodes the given path segment.
+ *
+ * @param[in,out] segment
+ * On input, this is the path segment to check and decode.
+ * On output, this is the decoded path segment.
+ *
+ * @return
+ * An indication of whether or not the path segment
+ * passed all checks and was decoded successfully is returned.
+ */
+ bool DecodePathSegment(std::string& segment) {
+ const auto originalSegment = std::move(segment);
+ segment.clear();
+ size_t decoderState = 0;
+ int decodedCharacter = 0;
+ for (const auto c: originalSegment) {
+ switch(decoderState) {
+ case 0: { // default
+ if (c == '%') {
+ decoderState = 1;
+ } else {
+ if (
+ IsCharacterInSet(
+ c,
+ {
+ // unreserved
+ 'a','z', 'A','Z', // ALPHA
+ '0','9', // DIGIT
+ '-','-', '.','.', '_','_', '~','~',
+
+ // sub-delims
+ '!','!', '$','$', '&','&', '\'','\'', '(','(', ')',')',
+ '*','*', '+','+', ',',',', ';',';', '=','=',
+
+ // (also allowed in segment or pchar)
+ ':',':', '@','@'
+ }
+ )
+ ) {
+ segment.push_back(c);
+ } else {
+ return false;
+ }
+ }
+ } break;
+
+ case 1: { // % ...
+ decoderState = 2;
+ decodedCharacter <<= 4;
+ if (IsCharacterInSet(c, {'0','9'})) {
+ decodedCharacter += (int)(c - '0');
+ } else if (IsCharacterInSet(c, {'A','F'})) {
+ decodedCharacter += (int)(c - 'A') + 10;
+ } else {
+ return false;
+ }
+ } break;
+
+ case 2: { // %[0-9A-F] ...
+ decoderState = 0;
+ decodedCharacter <<= 4;
+ if (IsCharacterInSet(c, {'0','9'})) {
+ decodedCharacter += (int)(c - '0');
+ } else if (IsCharacterInSet(c, {'A','F'})) {
+ decodedCharacter += (int)(c - 'A') + 10;
+ } else {
+ return false;
+ }
+ segment.push_back((char)decodedCharacter);
+ } break;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * This method checks and decodes the given query or fragment.
+ *
+ * @param[in,out] queryOrFragment
+ * On input, this is the query or fragment to check and decode.
+ * On output, this is the decoded query or fragment.
+ *
+ * @return
+ * An indication of whether or not the query or fragment
+ * passed all checks and was decoded successfully is returned.
+ */
+ bool DecodeQueryOrFragment(std::string& queryOrFragment) {
+ const auto originalQueryOrFragment = std::move(queryOrFragment);
+ queryOrFragment.clear();
+ size_t decoderState = 0;
+ int decodedCharacter = 0;
+ for (const auto c: originalQueryOrFragment) {
+ switch(decoderState) {
+ case 0: { // default
+ if (c == '%') {
+ decoderState = 1;
+ } else {
+ if (
+ IsCharacterInSet(
+ c,
+ {
+ // unreserved
+ 'a','z', 'A','Z', // ALPHA
+ '0','9', // DIGIT
+ '-','-', '.','.', '_','_', '~','~',
+
+ // sub-delims
+ '!','!', '$','$', '&','&', '\'','\'', '(','(', ')',')',
+ '*','*', '+','+', ',',',', ';',';', '=','=',
+
+ // (also allowed in pchar)
+ ':',':', '@','@',
+
+ // (also allowed in query or fragment)
+ '/','/', '?','?'
+ }
+ )
+ ) {
+ queryOrFragment.push_back(c);
+ } else {
+ return false;
+ }
+ }
+ } break;
+
+ case 1: { // % ...
+ decoderState = 2;
+ decodedCharacter <<= 4;
+ if (IsCharacterInSet(c, {'0','9'})) {
+ decodedCharacter += (int)(c - '0');
+ } else if (IsCharacterInSet(c, {'A','F'})) {
+ decodedCharacter += (int)(c - 'A') + 10;
+ } else {
+ return false;
+ }
+ } break;
+
+ case 2: { // %[0-9A-F] ...
+ decoderState = 0;
+ decodedCharacter <<= 4;
+ if (IsCharacterInSet(c, {'0','9'})) {
+ decodedCharacter += (int)(c - '0');
+ } else if (IsCharacterInSet(c, {'A','F'})) {
+ decodedCharacter += (int)(c - 'A') + 10;
+ } else {
+ return false;
+ }
+ queryOrFragment.push_back((char)decodedCharacter);
+ } break;
+ }
+ }
+ return true;
+ }
+
}
namespace Uri {
@@ -185,82 +340,6 @@ namespace Uri {
// Methods
/**
- * This method checks and decodes the given path segment.
- *
- * @param[in,out] segment
- * On input, this is the path segment to check and decode.
- * On output, this is the decoded path segment.
- *
- * @return
- * An indication of whether or not the path segment
- * passed all checks and was decoded successfully is returned.
- */
- bool DecodePathSegment(std::string& segment) {
- const auto originalSegment = std::move(segment);
- segment.clear();
- size_t decoderState = 0;
- int decodedCharacter = 0;
- for (const auto c: originalSegment) {
- switch(decoderState) {
- case 0: { // default
- if (c == '%') {
- decoderState = 1;
- } else {
- if (
- IsCharacterInSet(
- c,
- {
- // unreserved
- 'a','z', 'A','Z', // ALPHA
- '0','9', // DIGIT
- '-','-', '.','.', '_','_', '~','~',
-
- // sub-delims
- '!','!', '$','$', '&','&', '\'','\'', '(','(', ')',')',
- '*','*', '+','+', ',',',', ';',';', '=','=',
-
- // (also allowed in segment or pchar)
- ':',':', '@','@'
- }
- )
- ) {
- segment.push_back(c);
- } else {
- return false;
- }
- }
- } break;
-
- case 1: { // % ...
- decoderState = 2;
- decodedCharacter <<= 4;
- if (IsCharacterInSet(c, {'0','9'})) {
- decodedCharacter += (int)(c - '0');
- } else if (IsCharacterInSet(c, {'A','F'})) {
- decodedCharacter += (int)(c - 'A') + 10;
- } else {
- return false;
- }
- } break;
-
- case 2: { // %[0-9A-F] ...
- decoderState = 0;
- decodedCharacter <<= 4;
- if (IsCharacterInSet(c, {'0','9'})) {
- decodedCharacter += (int)(c - '0');
- } else if (IsCharacterInSet(c, {'A','F'})) {
- decodedCharacter += (int)(c - 'A') + 10;
- } else {
- return false;
- }
- segment.push_back((char)decodedCharacter);
- } break;
- }
- }
- return true;
- }
-
- /**
* This method builds the internal path element sequence
* by parsing it from the given path string.
*
@@ -616,6 +695,9 @@ namespace Uri {
impl_->fragment = queryAndOrFragment.substr(fragmentDelimiter + 1);
rest = queryAndOrFragment.substr(0, fragmentDelimiter);
}
+ if (!DecodeQueryOrFragment(impl_->fragment)) {
+ return false;
+ }
// Finally, if anything is left, it's the query.
if (rest.empty()) {
@@ -623,6 +705,9 @@ namespace Uri {
} else {
impl_->query = rest.substr(1);
}
+ if (!DecodeQueryOrFragment(impl_->query)) {
+ return false;
+ }
return true;
}
diff --git a/test/src/UriTests.cpp b/test/src/UriTests.cpp
index d5ab920..6d59697 100644
--- a/test/src/UriTests.cpp
+++ b/test/src/UriTests.cpp
@@ -418,3 +418,109 @@ TEST(UriTests, ParseFromStringPathBarelyLegal) {
++index;
}
}
+
+TEST(UriTests, ParseFromStringQueryIllegalCharacters) {
+ const std::vector< std::string > testVectors{
+ {"http://www.example.com/?foo[bar"},
+ {"http://www.example.com/?]bar"},
+ {"http://www.example.com/?foo]"},
+ {"http://www.example.com/?["},
+ {"http://www.example.com/?abc/foo]"},
+ {"http://www.example.com/?abc/["},
+ {"http://www.example.com/?foo]/abc"},
+ {"http://www.example.com/?[/abc"},
+ {"http://www.example.com/?foo]/"},
+ {"http://www.example.com/?[/"},
+ {"?foo[bar"},
+ {"?]bar"},
+ {"?foo]"},
+ {"?["},
+ {"?abc/foo]"},
+ {"?abc/["},
+ {"?foo]/abc"},
+ {"?[/abc"},
+ {"?foo]/"},
+ {"?[/"},
+ };
+ size_t index = 0;
+ for (const auto& testVector : testVectors) {
+ Uri::Uri uri;
+ ASSERT_FALSE(uri.ParseFromString(testVector)) << index;
+ ++index;
+ }
+}
+
+TEST(UriTests, ParseFromStringQueryBarelyLegal) {
+ struct TestVector {
+ std::string uriString;
+ std::string query;
+ };
+ const std::vector< TestVector > testVectors{
+ {"/?:/foo", ":/foo"},
+ {"?bob@/foo", "bob@/foo"},
+ {"?hello!", "hello!"},
+ {"urn:?hello,%20w%6Frld", "hello, world"},
+ {"//example.com/foo?(bar)/", "(bar)/"},
+ {"http://www.example.com/?foo?bar", "foo?bar" },
+ };
+ size_t index = 0;
+ for (const auto& testVector : testVectors) {
+ Uri::Uri uri;
+ ASSERT_TRUE(uri.ParseFromString(testVector.uriString)) << index;
+ ASSERT_EQ(testVector.query, uri.GetQuery());
+ ++index;
+ }
+}
+
+TEST(UriTests, ParseFromStringFragmentIllegalCharacters) {
+ const std::vector< std::string > testVectors{
+ {"http://www.example.com/#foo[bar"},
+ {"http://www.example.com/#]bar"},
+ {"http://www.example.com/#foo]"},
+ {"http://www.example.com/#["},
+ {"http://www.example.com/#abc/foo]"},
+ {"http://www.example.com/#abc/["},
+ {"http://www.example.com/#foo]/abc"},
+ {"http://www.example.com/#[/abc"},
+ {"http://www.example.com/#foo]/"},
+ {"http://www.example.com/#[/"},
+ {"#foo[bar"},
+ {"#]bar"},
+ {"#foo]"},
+ {"#["},
+ {"#abc/foo]"},
+ {"#abc/["},
+ {"#foo]/abc"},
+ {"#[/abc"},
+ {"#foo]/"},
+ {"#[/"},
+ };
+ size_t index = 0;
+ for (const auto& testVector : testVectors) {
+ Uri::Uri uri;
+ ASSERT_FALSE(uri.ParseFromString(testVector)) << index;
+ ++index;
+ }
+}
+
+TEST(UriTests, ParseFromStringFragmentBarelyLegal) {
+ struct TestVector {
+ std::string uriString;
+ std::string fragment;
+ };
+ const std::vector< TestVector > testVectors{
+ {"/#:/foo", ":/foo"},
+ {"#bob@/foo", "bob@/foo"},
+ {"#hello!", "hello!"},
+ {"urn:#hello,%20w%6Frld", "hello, world"},
+ {"//example.com/foo#(bar)/", "(bar)/"},
+ {"http://www.example.com/#foo?bar", "foo?bar" },
+ };
+ size_t index = 0;
+ for (const auto& testVector : testVectors) {
+ Uri::Uri uri;
+ ASSERT_TRUE(uri.ParseFromString(testVector.uriString)) << index;
+ ASSERT_EQ(testVector.fragment, uri.GetFragment());
+ ++index;
+ }
+}