diff options
author | Richard Walters <rwalters@digitalstirling.com> | 2018-07-01 14:48:14 -0700 |
---|---|---|
committer | Richard Walters <rwalters@digitalstirling.com> | 2018-07-01 14:48:14 -0700 |
commit | 9f9ee6af4299dbc95f5d7b814679714ba0ab5051 (patch) | |
tree | f69cafb9f60470d762b73388fbd86775459df200 | |
parent | 6b9217cc7eeb72363f33a0b1330dcdca52d25f8e (diff) |
Handle bad host names
* Detect bad characters in host names.
* Incorporate splitting host and port into the state
machine that is parsing/decoding the host.
NOTE:
IPv6address is not checked for bad characters yet.
More research is needed to learn exactly what are
the various ways to write an IPv6 address.
-rw-r--r-- | src/Uri.cpp | 146 | ||||
-rw-r--r-- | test/src/UriTests.cpp | 38 |
2 files changed, 179 insertions, 5 deletions
diff --git a/src/Uri.cpp b/src/Uri.cpp index e218ecd..ba04298 100644 --- a/src/Uri.cpp +++ b/src/Uri.cpp @@ -305,13 +305,149 @@ namespace Uri { } // Next, parsing host and port from authority and path. - const auto portDelimiter = hostPortString.find(':'); - if (portDelimiter == std::string::npos) { - host = hostPortString; + std::string portString; + size_t decoderState = 0; + int decodedCharacter = 0; + host.clear(); + for (const auto c: hostPortString) { + switch(decoderState) { + case 0: { // first character + if (c == '[') { + host.push_back(c); + decoderState = 4; + break; + } else { + decoderState = 1; + } + } + + case 1: { // reg-name or IPv4Address + if (c == '%') { + decoderState = 2; + } else if (c == ':') { + decoderState = 9; + } else { + if ( + IsCharacterInSet( + c, + { + // unreserved + 'a','z', 'A','Z', // ALPHA + '0','9', // DIGIT + '-','-', '.','.', '_','_', '~','~', + + // sub-delims + '!','!', '$','$', '&','&', '\'','\'', '(','(', ')',')', + '*','*', '+','+', ',',',', ';',';', '=','=', + + // (also allowed in reg-name) + ':',':', + } + ) + ) { + host.push_back(c); + } else { + return false; + } + } + } break; + + case 2: { // % ... + decoderState = 3; + decodedCharacter <<= 4; + if (IsCharacterInSet(c, {'0','9'})) { + decodedCharacter += (int)(c - '0'); + } else if (IsCharacterInSet(c, {'A','F'})) { + decodedCharacter += (int)(c - 'A') + 10; + } else { + return false; + } + } break; + + case 3: { // %[0-9A-F] ... + decoderState = 1; + decodedCharacter <<= 4; + if (IsCharacterInSet(c, {'0','9'})) { + decodedCharacter += (int)(c - '0'); + } else if (IsCharacterInSet(c, {'A','F'})) { + decodedCharacter += (int)(c - 'A') + 10; + } else { + return false; + } + host.push_back((char)decodedCharacter); + } break; + + case 4: { // IP-literal + if (c == 'v') { + host.push_back(c); + decoderState = 6; + break; + } else { + decoderState = 5; + } + } + + case 5: { // IPv6Address + // TODO: research this offline first + // before attempting to code it + host.push_back(c); + if (c == ']') { + decoderState = 8; + } + } break; + + case 6: { // IPvFuture: v ... + if (c == '.') { + decoderState = 7; + } else if (!IsCharacterInSet(c, {'0','9', 'A','F'})) { + return false; + } + host.push_back(c); + } break; + + case 7: { // IPvFuture v 1*HEXDIG . ... + host.push_back(c); + if (c == ']') { + decoderState = 8; + } else if ( + !IsCharacterInSet( + c, + { + // unreserved + 'a','z', 'A','Z', // ALPHA + '0','9', // DIGIT + '-','-', '.','.', '_','_', '~','~', + + // sub-delims + '!','!', '$','$', '&','&', '\'','\'', '(','(', ')',')', + '*','*', '+','+', ',',',', ';',';', '=','=', + + // (also allowed in IPvFuture) + ':',':', + } + ) + ) { + return false; + } + } break; + + case 8: { // illegal to have anything else, unless it's a colon, + // in which case it's a port delimiter + if (c == ':') { + decoderState = 9; + } else { + return false; + } + } break; + + case 9: { // port + portString.push_back(c); + } break; + } + } + if (portString.empty()) { hasPort = false; } else { - host = hostPortString.substr(0, portDelimiter); - const auto portString = hostPortString.substr(portDelimiter + 1); if (!ParseUint16(portString, port)) { return false; } diff --git a/test/src/UriTests.cpp b/test/src/UriTests.cpp index 1724b89..d7ada25 100644 --- a/test/src/UriTests.cpp +++ b/test/src/UriTests.cpp @@ -311,6 +311,44 @@ TEST(UriTests, ParseFromStringUserInfoBarelyLegal) { } } +TEST(UriTests, ParseFromStringHostIllegalCharacters) { + const std::vector< std::string > testVectors{ + {"//%X@www.example.com/"}, + {"//@www:example.com/"}, + {"//[vX.:]/"}, + }; + size_t index = 0; + for (const auto& testVector : testVectors) { + Uri::Uri uri; + ASSERT_FALSE(uri.ParseFromString(testVector)) << index; + ++index; + } +} + +TEST(UriTests, ParseFromStringHostBarelyLegal) { + struct TestVector { + std::string uriString; + std::string host; + }; + const std::vector< TestVector > testVectors{ + {"//%41/", "A"}, + {"///", ""}, + {"//!/", "!"}, + {"//'/", "'"}, + {"//(/", "("}, + {"//;/", ";"}, + {"//1.2.3.4/", "1.2.3.4"}, + {"//[v7.:]/", "[v7.:]"}, + }; + size_t index = 0; + for (const auto& testVector : testVectors) { + Uri::Uri uri; + ASSERT_TRUE(uri.ParseFromString(testVector.uriString)) << index; + ASSERT_EQ(testVector.host, uri.GetHost()); + ++index; + } +} + TEST(UriTests, ParseFromStringDontMisinterpretColonInAuthorityAsSchemeDelimiter) { const std::vector< std::string > testVectors{ {"//foo:bar@www.example.com/"}, |