diff options
author | Richard Walters <rwalters@digitalstirling.com> | 2018-07-01 15:08:41 -0700 |
---|---|---|
committer | Richard Walters <rwalters@digitalstirling.com> | 2018-07-01 15:08:41 -0700 |
commit | 4eb4f0c150642cf2fa92f75000ab5108d1908e48 (patch) | |
tree | acb4a6e2e39a0316a64c22b5ef3023716d995334 | |
parent | b4cc26f831573d8dc122b4d2ba8a5f5d8d7e3773 (diff) |
Check for illegal characters in path segments
-rw-r--r-- | src/Uri.cpp | 81 | ||||
-rw-r--r-- | test/src/UriTests.cpp | 52 |
2 files changed, 133 insertions, 0 deletions
diff --git a/src/Uri.cpp b/src/Uri.cpp index 38dbe50..2b9b93a 100644 --- a/src/Uri.cpp +++ b/src/Uri.cpp @@ -185,6 +185,82 @@ namespace Uri { // Methods /** + * This method checks and decodes the given path segment. + * + * @param[in,out] segment + * On input, this is the path segment to check and decode. + * On output, this is the decoded path segment. + * + * @return + * An indication of whether or not the path segment + * passed all checks and was decoded successfully is returned. + */ + bool DecodePathSegment(std::string& segment) { + const auto originalSegment = std::move(segment); + segment.clear(); + size_t decoderState = 0; + int decodedCharacter = 0; + for (const auto c: originalSegment) { + switch(decoderState) { + case 0: { // default + if (c == '%') { + decoderState = 1; + } else { + if ( + IsCharacterInSet( + c, + { + // unreserved + 'a','z', 'A','Z', // ALPHA + '0','9', // DIGIT + '-','-', '.','.', '_','_', '~','~', + + // sub-delims + '!','!', '$','$', '&','&', '\'','\'', '(','(', ')',')', + '*','*', '+','+', ',',',', ';',';', '=','=', + + // (also allowed in segment or pchar) + ':',':', '@','@' + } + ) + ) { + segment.push_back(c); + } else { + return false; + } + } + } break; + + case 1: { // % ... + decoderState = 2; + decodedCharacter <<= 4; + if (IsCharacterInSet(c, {'0','9'})) { + decodedCharacter += (int)(c - '0'); + } else if (IsCharacterInSet(c, {'A','F'})) { + decodedCharacter += (int)(c - 'A') + 10; + } else { + return false; + } + } break; + + case 2: { // %[0-9A-F] ... + decoderState = 0; + decodedCharacter <<= 4; + if (IsCharacterInSet(c, {'0','9'})) { + decodedCharacter += (int)(c - '0'); + } else if (IsCharacterInSet(c, {'A','F'})) { + decodedCharacter += (int)(c - 'A') + 10; + } else { + return false; + } + segment.push_back((char)decodedCharacter); + } break; + } + } + return true; + } + + /** * This method builds the internal path element sequence * by parsing it from the given path string. * @@ -218,6 +294,11 @@ namespace Uri { } } } + for (auto& segment: path) { + if (!DecodePathSegment(segment)) { + return false; + } + } return true; } diff --git a/test/src/UriTests.cpp b/test/src/UriTests.cpp index 4c1f307..d5ab920 100644 --- a/test/src/UriTests.cpp +++ b/test/src/UriTests.cpp @@ -366,3 +366,55 @@ TEST(UriTests, ParseFromStringDontMisinterpretColonInOtherPlacesAsSchemeDelimite ++index; } } + +TEST(UriTests, ParseFromStringPathIllegalCharacters) { + const std::vector< std::string > testVectors{ + {"http://www.example.com/foo[bar"}, + {"http://www.example.com/]bar"}, + {"http://www.example.com/foo]"}, + {"http://www.example.com/["}, + {"http://www.example.com/abc/foo]"}, + {"http://www.example.com/abc/["}, + {"http://www.example.com/foo]/abc"}, + {"http://www.example.com/[/abc"}, + {"http://www.example.com/foo]/"}, + {"http://www.example.com/[/"}, + {"/foo[bar"}, + {"/]bar"}, + {"/foo]"}, + {"/["}, + {"/abc/foo]"}, + {"/abc/["}, + {"/foo]/abc"}, + {"/[/abc"}, + {"/foo]/"}, + {"/[/"}, + }; + size_t index = 0; + for (const auto& testVector : testVectors) { + Uri::Uri uri; + ASSERT_FALSE(uri.ParseFromString(testVector)) << index; + ++index; + } +} + +TEST(UriTests, ParseFromStringPathBarelyLegal) { + struct TestVector { + std::string uriString; + std::vector< std::string > path; + }; + const std::vector< TestVector > testVectors{ + {"/:/foo", {"", ":", "foo"}}, + {"bob@/foo", {"bob@", "foo"}}, + {"hello!", {"hello!"}}, + {"urn:hello,%20w%6Frld", {"hello, world"}}, + {"//example.com/foo/(bar)/", {"", "foo", "(bar)", ""}}, + }; + size_t index = 0; + for (const auto& testVector : testVectors) { + Uri::Uri uri; + ASSERT_TRUE(uri.ParseFromString(testVector.uriString)) << index; + ASSERT_EQ(testVector.path, uri.GetPath()); + ++index; + } +} |