diff options
| -rw-r--r-- | src/Uri.cpp | 81 | ||||
| -rw-r--r-- | test/src/UriTests.cpp | 52 | 
2 files changed, 133 insertions, 0 deletions
diff --git a/src/Uri.cpp b/src/Uri.cpp index 38dbe50..2b9b93a 100644 --- a/src/Uri.cpp +++ b/src/Uri.cpp @@ -185,6 +185,82 @@ namespace Uri {          // Methods          /** +         * This method checks and decodes the given path segment. +         * +         * @param[in,out] segment +         *     On input, this is the path segment to check and decode. +         *     On output, this is the decoded path segment. +         * +         * @return +         *     An indication of whether or not the path segment +         *     passed all checks and was decoded successfully is returned. +         */ +        bool DecodePathSegment(std::string& segment) { +            const auto originalSegment = std::move(segment); +            segment.clear(); +            size_t decoderState = 0; +            int decodedCharacter = 0; +            for (const auto c: originalSegment) { +                switch(decoderState) { +                    case 0: { // default +                        if (c == '%') { +                            decoderState = 1; +                        } else { +                            if ( +                                IsCharacterInSet( +                                    c, +                                    { +                                        // unreserved +                                        'a','z', 'A','Z', // ALPHA +                                        '0','9', // DIGIT +                                        '-','-', '.','.', '_','_', '~','~', + +                                        // sub-delims +                                        '!','!', '$','$', '&','&', '\'','\'', '(','(', ')',')', +                                        '*','*', '+','+', ',',',', ';',';', '=','=', + +                                        // (also allowed in segment or pchar) +                                        ':',':', '@','@' +                                    } +                                ) +                            ) { +                                segment.push_back(c); +                            } else { +                                return false; +                            } +                        } +                    } break; + +                    case 1: { // % ... +                        decoderState = 2; +                        decodedCharacter <<= 4; +                        if (IsCharacterInSet(c, {'0','9'})) { +                            decodedCharacter += (int)(c - '0'); +                        } else if (IsCharacterInSet(c, {'A','F'})) { +                            decodedCharacter += (int)(c - 'A') + 10; +                        } else { +                            return false; +                        } +                    } break; + +                    case 2: { // %[0-9A-F] ... +                        decoderState = 0; +                        decodedCharacter <<= 4; +                        if (IsCharacterInSet(c, {'0','9'})) { +                            decodedCharacter += (int)(c - '0'); +                        } else if (IsCharacterInSet(c, {'A','F'})) { +                            decodedCharacter += (int)(c - 'A') + 10; +                        } else { +                            return false; +                        } +                        segment.push_back((char)decodedCharacter); +                    } break; +                } +            } +            return true; +        } + +        /**           * This method builds the internal path element sequence           * by parsing it from the given path string.           * @@ -218,6 +294,11 @@ namespace Uri {                      }                  }              } +            for (auto& segment: path) { +                if (!DecodePathSegment(segment)) { +                    return false; +                } +            }              return true;          } diff --git a/test/src/UriTests.cpp b/test/src/UriTests.cpp index 4c1f307..d5ab920 100644 --- a/test/src/UriTests.cpp +++ b/test/src/UriTests.cpp @@ -366,3 +366,55 @@ TEST(UriTests, ParseFromStringDontMisinterpretColonInOtherPlacesAsSchemeDelimite          ++index;      }  } + +TEST(UriTests, ParseFromStringPathIllegalCharacters) { +    const std::vector< std::string > testVectors{ +        {"http://www.example.com/foo[bar"}, +        {"http://www.example.com/]bar"}, +        {"http://www.example.com/foo]"}, +        {"http://www.example.com/["}, +        {"http://www.example.com/abc/foo]"}, +        {"http://www.example.com/abc/["}, +        {"http://www.example.com/foo]/abc"}, +        {"http://www.example.com/[/abc"}, +        {"http://www.example.com/foo]/"}, +        {"http://www.example.com/[/"}, +        {"/foo[bar"}, +        {"/]bar"}, +        {"/foo]"}, +        {"/["}, +        {"/abc/foo]"}, +        {"/abc/["}, +        {"/foo]/abc"}, +        {"/[/abc"}, +        {"/foo]/"}, +        {"/[/"}, +    }; +    size_t index = 0; +    for (const auto& testVector : testVectors) { +        Uri::Uri uri; +        ASSERT_FALSE(uri.ParseFromString(testVector)) << index; +        ++index; +    } +} + +TEST(UriTests, ParseFromStringPathBarelyLegal) { +    struct TestVector { +        std::string uriString; +        std::vector< std::string > path; +    }; +    const std::vector< TestVector > testVectors{ +        {"/:/foo", {"", ":", "foo"}}, +        {"bob@/foo", {"bob@", "foo"}}, +        {"hello!", {"hello!"}}, +        {"urn:hello,%20w%6Frld", {"hello, world"}}, +        {"//example.com/foo/(bar)/", {"", "foo", "(bar)", ""}}, +    }; +    size_t index = 0; +    for (const auto& testVector : testVectors) { +        Uri::Uri uri; +        ASSERT_TRUE(uri.ParseFromString(testVector.uriString)) << index; +        ASSERT_EQ(testVector.path, uri.GetPath()); +        ++index; +    } +}  | 
