diff options
author | Richard Walters <rwalters@digitalstirling.com> | 2018-07-01 15:20:30 -0700 |
---|---|---|
committer | Richard Walters <rwalters@digitalstirling.com> | 2018-07-01 15:20:30 -0700 |
commit | d3a446cd9c3846735f4ea9e5270352633c597071 (patch) | |
tree | 4dd760e75c295b340db489df35b5275e1fba9352 | |
parent | 4eb4f0c150642cf2fa92f75000ab5108d1908e48 (diff) |
Check for illegal characters in query and fragment elements
-rw-r--r-- | src/Uri.cpp | 237 | ||||
-rw-r--r-- | test/src/UriTests.cpp | 106 |
2 files changed, 267 insertions, 76 deletions
diff --git a/src/Uri.cpp b/src/Uri.cpp index 2b9b93a..44ce0b0 100644 --- a/src/Uri.cpp +++ b/src/Uri.cpp @@ -129,6 +129,161 @@ namespace { }; } + /** + * This method checks and decodes the given path segment. + * + * @param[in,out] segment + * On input, this is the path segment to check and decode. + * On output, this is the decoded path segment. + * + * @return + * An indication of whether or not the path segment + * passed all checks and was decoded successfully is returned. + */ + bool DecodePathSegment(std::string& segment) { + const auto originalSegment = std::move(segment); + segment.clear(); + size_t decoderState = 0; + int decodedCharacter = 0; + for (const auto c: originalSegment) { + switch(decoderState) { + case 0: { // default + if (c == '%') { + decoderState = 1; + } else { + if ( + IsCharacterInSet( + c, + { + // unreserved + 'a','z', 'A','Z', // ALPHA + '0','9', // DIGIT + '-','-', '.','.', '_','_', '~','~', + + // sub-delims + '!','!', '$','$', '&','&', '\'','\'', '(','(', ')',')', + '*','*', '+','+', ',',',', ';',';', '=','=', + + // (also allowed in segment or pchar) + ':',':', '@','@' + } + ) + ) { + segment.push_back(c); + } else { + return false; + } + } + } break; + + case 1: { // % ... + decoderState = 2; + decodedCharacter <<= 4; + if (IsCharacterInSet(c, {'0','9'})) { + decodedCharacter += (int)(c - '0'); + } else if (IsCharacterInSet(c, {'A','F'})) { + decodedCharacter += (int)(c - 'A') + 10; + } else { + return false; + } + } break; + + case 2: { // %[0-9A-F] ... + decoderState = 0; + decodedCharacter <<= 4; + if (IsCharacterInSet(c, {'0','9'})) { + decodedCharacter += (int)(c - '0'); + } else if (IsCharacterInSet(c, {'A','F'})) { + decodedCharacter += (int)(c - 'A') + 10; + } else { + return false; + } + segment.push_back((char)decodedCharacter); + } break; + } + } + return true; + } + + /** + * This method checks and decodes the given query or fragment. + * + * @param[in,out] queryOrFragment + * On input, this is the query or fragment to check and decode. + * On output, this is the decoded query or fragment. + * + * @return + * An indication of whether or not the query or fragment + * passed all checks and was decoded successfully is returned. + */ + bool DecodeQueryOrFragment(std::string& queryOrFragment) { + const auto originalQueryOrFragment = std::move(queryOrFragment); + queryOrFragment.clear(); + size_t decoderState = 0; + int decodedCharacter = 0; + for (const auto c: originalQueryOrFragment) { + switch(decoderState) { + case 0: { // default + if (c == '%') { + decoderState = 1; + } else { + if ( + IsCharacterInSet( + c, + { + // unreserved + 'a','z', 'A','Z', // ALPHA + '0','9', // DIGIT + '-','-', '.','.', '_','_', '~','~', + + // sub-delims + '!','!', '$','$', '&','&', '\'','\'', '(','(', ')',')', + '*','*', '+','+', ',',',', ';',';', '=','=', + + // (also allowed in pchar) + ':',':', '@','@', + + // (also allowed in query or fragment) + '/','/', '?','?' + } + ) + ) { + queryOrFragment.push_back(c); + } else { + return false; + } + } + } break; + + case 1: { // % ... + decoderState = 2; + decodedCharacter <<= 4; + if (IsCharacterInSet(c, {'0','9'})) { + decodedCharacter += (int)(c - '0'); + } else if (IsCharacterInSet(c, {'A','F'})) { + decodedCharacter += (int)(c - 'A') + 10; + } else { + return false; + } + } break; + + case 2: { // %[0-9A-F] ... + decoderState = 0; + decodedCharacter <<= 4; + if (IsCharacterInSet(c, {'0','9'})) { + decodedCharacter += (int)(c - '0'); + } else if (IsCharacterInSet(c, {'A','F'})) { + decodedCharacter += (int)(c - 'A') + 10; + } else { + return false; + } + queryOrFragment.push_back((char)decodedCharacter); + } break; + } + } + return true; + } + } namespace Uri { @@ -185,82 +340,6 @@ namespace Uri { // Methods /** - * This method checks and decodes the given path segment. - * - * @param[in,out] segment - * On input, this is the path segment to check and decode. - * On output, this is the decoded path segment. - * - * @return - * An indication of whether or not the path segment - * passed all checks and was decoded successfully is returned. - */ - bool DecodePathSegment(std::string& segment) { - const auto originalSegment = std::move(segment); - segment.clear(); - size_t decoderState = 0; - int decodedCharacter = 0; - for (const auto c: originalSegment) { - switch(decoderState) { - case 0: { // default - if (c == '%') { - decoderState = 1; - } else { - if ( - IsCharacterInSet( - c, - { - // unreserved - 'a','z', 'A','Z', // ALPHA - '0','9', // DIGIT - '-','-', '.','.', '_','_', '~','~', - - // sub-delims - '!','!', '$','$', '&','&', '\'','\'', '(','(', ')',')', - '*','*', '+','+', ',',',', ';',';', '=','=', - - // (also allowed in segment or pchar) - ':',':', '@','@' - } - ) - ) { - segment.push_back(c); - } else { - return false; - } - } - } break; - - case 1: { // % ... - decoderState = 2; - decodedCharacter <<= 4; - if (IsCharacterInSet(c, {'0','9'})) { - decodedCharacter += (int)(c - '0'); - } else if (IsCharacterInSet(c, {'A','F'})) { - decodedCharacter += (int)(c - 'A') + 10; - } else { - return false; - } - } break; - - case 2: { // %[0-9A-F] ... - decoderState = 0; - decodedCharacter <<= 4; - if (IsCharacterInSet(c, {'0','9'})) { - decodedCharacter += (int)(c - '0'); - } else if (IsCharacterInSet(c, {'A','F'})) { - decodedCharacter += (int)(c - 'A') + 10; - } else { - return false; - } - segment.push_back((char)decodedCharacter); - } break; - } - } - return true; - } - - /** * This method builds the internal path element sequence * by parsing it from the given path string. * @@ -616,6 +695,9 @@ namespace Uri { impl_->fragment = queryAndOrFragment.substr(fragmentDelimiter + 1); rest = queryAndOrFragment.substr(0, fragmentDelimiter); } + if (!DecodeQueryOrFragment(impl_->fragment)) { + return false; + } // Finally, if anything is left, it's the query. if (rest.empty()) { @@ -623,6 +705,9 @@ namespace Uri { } else { impl_->query = rest.substr(1); } + if (!DecodeQueryOrFragment(impl_->query)) { + return false; + } return true; } diff --git a/test/src/UriTests.cpp b/test/src/UriTests.cpp index d5ab920..6d59697 100644 --- a/test/src/UriTests.cpp +++ b/test/src/UriTests.cpp @@ -418,3 +418,109 @@ TEST(UriTests, ParseFromStringPathBarelyLegal) { ++index; } } + +TEST(UriTests, ParseFromStringQueryIllegalCharacters) { + const std::vector< std::string > testVectors{ + {"http://www.example.com/?foo[bar"}, + {"http://www.example.com/?]bar"}, + {"http://www.example.com/?foo]"}, + {"http://www.example.com/?["}, + {"http://www.example.com/?abc/foo]"}, + {"http://www.example.com/?abc/["}, + {"http://www.example.com/?foo]/abc"}, + {"http://www.example.com/?[/abc"}, + {"http://www.example.com/?foo]/"}, + {"http://www.example.com/?[/"}, + {"?foo[bar"}, + {"?]bar"}, + {"?foo]"}, + {"?["}, + {"?abc/foo]"}, + {"?abc/["}, + {"?foo]/abc"}, + {"?[/abc"}, + {"?foo]/"}, + {"?[/"}, + }; + size_t index = 0; + for (const auto& testVector : testVectors) { + Uri::Uri uri; + ASSERT_FALSE(uri.ParseFromString(testVector)) << index; + ++index; + } +} + +TEST(UriTests, ParseFromStringQueryBarelyLegal) { + struct TestVector { + std::string uriString; + std::string query; + }; + const std::vector< TestVector > testVectors{ + {"/?:/foo", ":/foo"}, + {"?bob@/foo", "bob@/foo"}, + {"?hello!", "hello!"}, + {"urn:?hello,%20w%6Frld", "hello, world"}, + {"//example.com/foo?(bar)/", "(bar)/"}, + {"http://www.example.com/?foo?bar", "foo?bar" }, + }; + size_t index = 0; + for (const auto& testVector : testVectors) { + Uri::Uri uri; + ASSERT_TRUE(uri.ParseFromString(testVector.uriString)) << index; + ASSERT_EQ(testVector.query, uri.GetQuery()); + ++index; + } +} + +TEST(UriTests, ParseFromStringFragmentIllegalCharacters) { + const std::vector< std::string > testVectors{ + {"http://www.example.com/#foo[bar"}, + {"http://www.example.com/#]bar"}, + {"http://www.example.com/#foo]"}, + {"http://www.example.com/#["}, + {"http://www.example.com/#abc/foo]"}, + {"http://www.example.com/#abc/["}, + {"http://www.example.com/#foo]/abc"}, + {"http://www.example.com/#[/abc"}, + {"http://www.example.com/#foo]/"}, + {"http://www.example.com/#[/"}, + {"#foo[bar"}, + {"#]bar"}, + {"#foo]"}, + {"#["}, + {"#abc/foo]"}, + {"#abc/["}, + {"#foo]/abc"}, + {"#[/abc"}, + {"#foo]/"}, + {"#[/"}, + }; + size_t index = 0; + for (const auto& testVector : testVectors) { + Uri::Uri uri; + ASSERT_FALSE(uri.ParseFromString(testVector)) << index; + ++index; + } +} + +TEST(UriTests, ParseFromStringFragmentBarelyLegal) { + struct TestVector { + std::string uriString; + std::string fragment; + }; + const std::vector< TestVector > testVectors{ + {"/#:/foo", ":/foo"}, + {"#bob@/foo", "bob@/foo"}, + {"#hello!", "hello!"}, + {"urn:#hello,%20w%6Frld", "hello, world"}, + {"//example.com/foo#(bar)/", "(bar)/"}, + {"http://www.example.com/#foo?bar", "foo?bar" }, + }; + size_t index = 0; + for (const auto& testVector : testVectors) { + Uri::Uri uri; + ASSERT_TRUE(uri.ParseFromString(testVector.uriString)) << index; + ASSERT_EQ(testVector.fragment, uri.GetFragment()); + ++index; + } +} |