aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Walters <rwalters@digitalstirling.com>2018-07-01 15:08:41 -0700
committerRichard Walters <rwalters@digitalstirling.com>2018-07-01 15:08:41 -0700
commit4eb4f0c150642cf2fa92f75000ab5108d1908e48 (patch)
treeacb4a6e2e39a0316a64c22b5ef3023716d995334
parentb4cc26f831573d8dc122b4d2ba8a5f5d8d7e3773 (diff)
Check for illegal characters in path segments
-rw-r--r--src/Uri.cpp81
-rw-r--r--test/src/UriTests.cpp52
2 files changed, 133 insertions, 0 deletions
diff --git a/src/Uri.cpp b/src/Uri.cpp
index 38dbe50..2b9b93a 100644
--- a/src/Uri.cpp
+++ b/src/Uri.cpp
@@ -185,6 +185,82 @@ namespace Uri {
// Methods
/**
+ * This method checks and decodes the given path segment.
+ *
+ * @param[in,out] segment
+ * On input, this is the path segment to check and decode.
+ * On output, this is the decoded path segment.
+ *
+ * @return
+ * An indication of whether or not the path segment
+ * passed all checks and was decoded successfully is returned.
+ */
+ bool DecodePathSegment(std::string& segment) {
+ const auto originalSegment = std::move(segment);
+ segment.clear();
+ size_t decoderState = 0;
+ int decodedCharacter = 0;
+ for (const auto c: originalSegment) {
+ switch(decoderState) {
+ case 0: { // default
+ if (c == '%') {
+ decoderState = 1;
+ } else {
+ if (
+ IsCharacterInSet(
+ c,
+ {
+ // unreserved
+ 'a','z', 'A','Z', // ALPHA
+ '0','9', // DIGIT
+ '-','-', '.','.', '_','_', '~','~',
+
+ // sub-delims
+ '!','!', '$','$', '&','&', '\'','\'', '(','(', ')',')',
+ '*','*', '+','+', ',',',', ';',';', '=','=',
+
+ // (also allowed in segment or pchar)
+ ':',':', '@','@'
+ }
+ )
+ ) {
+ segment.push_back(c);
+ } else {
+ return false;
+ }
+ }
+ } break;
+
+ case 1: { // % ...
+ decoderState = 2;
+ decodedCharacter <<= 4;
+ if (IsCharacterInSet(c, {'0','9'})) {
+ decodedCharacter += (int)(c - '0');
+ } else if (IsCharacterInSet(c, {'A','F'})) {
+ decodedCharacter += (int)(c - 'A') + 10;
+ } else {
+ return false;
+ }
+ } break;
+
+ case 2: { // %[0-9A-F] ...
+ decoderState = 0;
+ decodedCharacter <<= 4;
+ if (IsCharacterInSet(c, {'0','9'})) {
+ decodedCharacter += (int)(c - '0');
+ } else if (IsCharacterInSet(c, {'A','F'})) {
+ decodedCharacter += (int)(c - 'A') + 10;
+ } else {
+ return false;
+ }
+ segment.push_back((char)decodedCharacter);
+ } break;
+ }
+ }
+ return true;
+ }
+
+ /**
* This method builds the internal path element sequence
* by parsing it from the given path string.
*
@@ -218,6 +294,11 @@ namespace Uri {
}
}
}
+ for (auto& segment: path) {
+ if (!DecodePathSegment(segment)) {
+ return false;
+ }
+ }
return true;
}
diff --git a/test/src/UriTests.cpp b/test/src/UriTests.cpp
index 4c1f307..d5ab920 100644
--- a/test/src/UriTests.cpp
+++ b/test/src/UriTests.cpp
@@ -366,3 +366,55 @@ TEST(UriTests, ParseFromStringDontMisinterpretColonInOtherPlacesAsSchemeDelimite
++index;
}
}
+
+TEST(UriTests, ParseFromStringPathIllegalCharacters) {
+ const std::vector< std::string > testVectors{
+ {"http://www.example.com/foo[bar"},
+ {"http://www.example.com/]bar"},
+ {"http://www.example.com/foo]"},
+ {"http://www.example.com/["},
+ {"http://www.example.com/abc/foo]"},
+ {"http://www.example.com/abc/["},
+ {"http://www.example.com/foo]/abc"},
+ {"http://www.example.com/[/abc"},
+ {"http://www.example.com/foo]/"},
+ {"http://www.example.com/[/"},
+ {"/foo[bar"},
+ {"/]bar"},
+ {"/foo]"},
+ {"/["},
+ {"/abc/foo]"},
+ {"/abc/["},
+ {"/foo]/abc"},
+ {"/[/abc"},
+ {"/foo]/"},
+ {"/[/"},
+ };
+ size_t index = 0;
+ for (const auto& testVector : testVectors) {
+ Uri::Uri uri;
+ ASSERT_FALSE(uri.ParseFromString(testVector)) << index;
+ ++index;
+ }
+}
+
+TEST(UriTests, ParseFromStringPathBarelyLegal) {
+ struct TestVector {
+ std::string uriString;
+ std::vector< std::string > path;
+ };
+ const std::vector< TestVector > testVectors{
+ {"/:/foo", {"", ":", "foo"}},
+ {"bob@/foo", {"bob@", "foo"}},
+ {"hello!", {"hello!"}},
+ {"urn:hello,%20w%6Frld", {"hello, world"}},
+ {"//example.com/foo/(bar)/", {"", "foo", "(bar)", ""}},
+ };
+ size_t index = 0;
+ for (const auto& testVector : testVectors) {
+ Uri::Uri uri;
+ ASSERT_TRUE(uri.ParseFromString(testVector.uriString)) << index;
+ ASSERT_EQ(testVector.path, uri.GetPath());
+ ++index;
+ }
+}