aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/Uri.cpp64
1 files changed, 41 insertions, 23 deletions
diff --git a/src/Uri.cpp b/src/Uri.cpp
index 890887b..53f4035 100644
--- a/src/Uri.cpp
+++ b/src/Uri.cpp
@@ -399,6 +399,23 @@ namespace Uri {
* is returned.
*/
bool ParseAuthority(const std::string& authorityString) {
+ /**
+ * These are the various states for the state machine implemented
+ * below to correctly split up and validate the URI substring
+ * containing the host and potentially a port number as well.
+ */
+ enum class HostParsingState {
+ FIRST_CHARACTER,
+ NOT_IP_LITERAL,
+ PERCENT_ENCODED_CHARACTER,
+ IP_LITERAL,
+ IPV6_ADDRESS,
+ IPV_FUTURE_NUMBER,
+ IPV_FUTURE_BODY,
+ GARBAGE_CHECK,
+ PORT,
+ };
+
// Next, check if there is a UserInfo, and if so, extract it.
const auto userInfoDelimiter = authorityString.find('@');
std::string hostPortString;
@@ -415,30 +432,30 @@ namespace Uri {
// Next, parsing host and port from authority and path.
std::string portString;
- size_t decoderState = 0;
+ HostParsingState hostParsingState = HostParsingState::FIRST_CHARACTER;
int decodedCharacter = 0;
host.clear();
PercentEncodedCharacterDecoder pecDecoder;
bool hostIsRegName = false;
for (const auto c: hostPortString) {
- switch(decoderState) {
- case 0: { // first character
+ switch(hostParsingState) {
+ case HostParsingState::FIRST_CHARACTER: {
if (c == '[') {
host.push_back(c);
- decoderState = 3;
+ hostParsingState = HostParsingState::IP_LITERAL;
break;
} else {
- decoderState = 1;
+ hostParsingState = HostParsingState::NOT_IP_LITERAL;
hostIsRegName = true;
}
}
- case 1: { // reg-name or IPv4Address
+ case HostParsingState::NOT_IP_LITERAL: {
if (c == '%') {
pecDecoder = PercentEncodedCharacterDecoder();
- decoderState = 2;
+ hostParsingState = HostParsingState::PERCENT_ENCODED_CHARACTER;
} else if (c == ':') {
- decoderState = 8;
+ hostParsingState = HostParsingState::PORT;
} else {
if (REG_NAME_NOT_PCT_ENCODED.Contains(c)) {
host.push_back(c);
@@ -448,63 +465,64 @@ namespace Uri {
}
} break;
- case 2: { // % ...
+ case HostParsingState::PERCENT_ENCODED_CHARACTER: {
if (!pecDecoder.NextEncodedCharacter(c)) {
return false;
}
if (pecDecoder.Done()) {
- decoderState = 1;
+ hostParsingState = HostParsingState::NOT_IP_LITERAL;
host.push_back((char)pecDecoder.GetDecodedCharacter());
}
} break;
- case 3: { // IP-literal
+ case HostParsingState::IP_LITERAL: {
if (c == 'v') {
host.push_back(c);
- decoderState = 5;
+ hostParsingState = HostParsingState::IPV_FUTURE_NUMBER;
break;
} else {
- decoderState = 4;
+ hostParsingState = HostParsingState::IPV6_ADDRESS;
}
}
- case 4: { // IPv6Address
+ case HostParsingState::IPV6_ADDRESS: {
// TODO: research this offline first
// before attempting to code it
host.push_back(c);
if (c == ']') {
- decoderState = 7;
+ hostParsingState = HostParsingState::GARBAGE_CHECK;
}
} break;
- case 5: { // IPvFuture: v ...
+ case HostParsingState::IPV_FUTURE_NUMBER: {
if (c == '.') {
- decoderState = 6;
+ hostParsingState = HostParsingState::IPV_FUTURE_BODY;
} else if (!HEXDIG.Contains(c)) {
return false;
}
host.push_back(c);
} break;
- case 6: { // IPvFuture v 1*HEXDIG . ...
+ case HostParsingState::IPV_FUTURE_BODY: {
host.push_back(c);
if (c == ']') {
- decoderState = 7;
+ hostParsingState = HostParsingState::GARBAGE_CHECK;
} else if (!IPV_FUTURE_LAST_PART.Contains(c)) {
return false;
}
} break;
- case 7: { // illegal to have anything else, unless it's a colon,
- // in which case it's a port delimiter
+ case HostParsingState::GARBAGE_CHECK: {
+ // illegal to have anything else, unless it's a colon,
+ // in which case it's a port delimiter
if (c == ':') {
- decoderState = 8;
+ hostParsingState = HostParsingState::PORT;
} else {
return false;
}
} break;
- case 8: { // port
+ case HostParsingState::PORT: {
portString.push_back(c);
} break;
}