diff options
author | Richard Walters <rwalters@digitalstirling.com> | 2020-10-13 01:09:18 -0700 |
---|---|---|
committer | Richard Walters <rwalters@digitalstirling.com> | 2020-10-13 01:09:18 -0700 |
commit | dc2a011598f4aa9e9de927333e467e623276d5ec (patch) | |
tree | 4b5c71634af516cdc96c512f28a02370d48c25b3 /src/parse_host_port.rs | |
parent | 4accf8c296ef7a1f6bd10a90b7a06b3b499ccda6 (diff) |
Rust refactoring
* Move Context, Error, and character classes to their own modules.
* Move host/port parsing and IP address validation to their
own modules, and break the code up into different functions
to process their state machines.
Diffstat (limited to 'src/parse_host_port.rs')
-rw-r--r-- | src/parse_host_port.rs | 202 |
1 files changed, 202 insertions, 0 deletions
diff --git a/src/parse_host_port.rs b/src/parse_host_port.rs new file mode 100644 index 0000000..2dcdde9 --- /dev/null +++ b/src/parse_host_port.rs @@ -0,0 +1,202 @@ +#![warn(clippy::pedantic)] + +use std::convert::TryFrom; + +use super::character_classes::{ + HEXDIG, + IPV_FUTURE_LAST_PART, + REG_NAME_NOT_PCT_ENCODED, +}; +use super::context::Context; +use super::error::Error; +use super::percent_encoded_character_decoder::PercentEncodedCharacterDecoder; +use super::validate_ipv6_address::validate_ipv6_address; + +struct Shared { + host: Vec<u8>, + host_is_reg_name: bool, + ipv6_address: String, + pec_decoder: PercentEncodedCharacterDecoder, + port_string: String, +} + +enum State { + NotIpLiteral(Shared), + PercentEncodedCharacter(Shared), + Ipv6Address(Shared), + IpvFutureNumber(Shared), + IpvFutureBody(Shared), + GarbageCheck(Shared), + Port(Shared), +} + +impl State{ + fn finalize(self) -> Result<(Vec<u8>, Option<u16>), Error> { + match self { + Self::PercentEncodedCharacter(_) + | Self::Ipv6Address(_) + | Self::IpvFutureNumber(_) + | Self::IpvFutureBody(_) => { + // truncated or ended early + Err(Error::TruncatedHost) + }, + Self::NotIpLiteral(state) + | Self::GarbageCheck(state) + | Self::Port(state) => { + let mut state = state; + if state.host_is_reg_name { + state.host.make_ascii_lowercase(); + } + let port = if state.port_string.is_empty() { + None + } else { + match state.port_string.parse::<u16>() { + Ok(port) => { + Some(port) + }, + Err(error) => { + return Err(Error::IllegalPortNumber(error)); + } + } + }; + Ok((state.host, port)) + }, + } + } + + fn new(host_port_string: &str) -> (Self, &str) { + let mut shared = Shared{ + host: Vec::<u8>::new(), + host_is_reg_name: false, + ipv6_address: String::new(), + pec_decoder: PercentEncodedCharacterDecoder::new(), + port_string: String::new(), + }; + let mut host_port_string = host_port_string; + if host_port_string.starts_with("[v") { + host_port_string = &host_port_string[2..]; + shared.host.push(b'v'); + ( + Self::IpvFutureNumber(shared), + host_port_string + ) + } else if host_port_string.starts_with('[') { + host_port_string = &host_port_string[1..]; + ( + Self::Ipv6Address(shared), + host_port_string + ) + } else { + shared.host_is_reg_name = true; + ( + Self::NotIpLiteral(shared), + host_port_string + ) + } + } + + fn next(self, c: char) -> Result<Self, Error> { + match self { + Self::NotIpLiteral(state) => Self::next_not_ip_literal(state, c), + Self::PercentEncodedCharacter(state) => Self::next_percent_encoded_character(state, c), + Self::Ipv6Address(state) => Self::next_ipv6_address(state, c), + Self::IpvFutureNumber(state) => Self::next_ipv_future_number(state, c), + Self::IpvFutureBody(state) => Self::next_ipv_future_body(state, c), + Self::GarbageCheck(state) => Self::next_garbage_check(state, c), + Self::Port(state) => Self::next_port(state, c), + } + } + + fn next_not_ip_literal(state: Shared, c: char) -> Result<Self, Error> { + let mut state = state; + if c == '%' { + Ok(Self::PercentEncodedCharacter(state)) + } else if c == ':' { + Ok(Self::Port(state)) + } else if REG_NAME_NOT_PCT_ENCODED.contains(&c) { + state.host.push(u8::try_from(c as u32).unwrap()); + Ok(Self::NotIpLiteral(state)) + } else { + Err(Error::IllegalCharacter(Context::Host)) + } + } + + fn next_percent_encoded_character(state: Shared, c: char) -> Result<Self, Error> { + let mut state = state; + if let Some(ci) = state.pec_decoder.next(c)? { + state.host.push(ci); + Ok(Self::NotIpLiteral(state)) + } else { + Ok(Self::PercentEncodedCharacter(state)) + } + } + + fn next_ipv6_address(state: Shared, c: char) -> Result<Self, Error> { + let mut state = state; + if c == ']' { + validate_ipv6_address(&state.ipv6_address)?; + state.host = state.ipv6_address.chars().map( + |c| u8::try_from(c as u32).unwrap() + ).collect(); + Ok(Self::GarbageCheck(state)) + } else { + state.ipv6_address.push(c); + Ok(Self::Ipv6Address(state)) + } + } + + fn next_ipv_future_number(state: Shared, c: char) -> Result<Self, Error> { + let mut state = state; + if c == '.' { + state.host.push(b'.'); + Ok(Self::IpvFutureBody(state)) + } else if c == ']' { + Err(Error::TruncatedHost) + } else if HEXDIG.contains(&c) { + state.host.push(u8::try_from(c as u32).unwrap()); + Ok(Self::IpvFutureNumber(state)) + } else { + Err(Error::IllegalCharacter(Context::IpvFuture)) + } + } + + fn next_ipv_future_body(state: Shared, c: char) -> Result<Self, Error> { + let mut state = state; + if c == ']' { + Ok(Self::GarbageCheck(state)) + } else if IPV_FUTURE_LAST_PART.contains(&c) { + state.host.push(u8::try_from(c as u32).unwrap()); + Ok(Self::IpvFutureBody(state)) + } else { + Err(Error::IllegalCharacter(Context::IpvFuture)) + } + } + + fn next_garbage_check(state: Shared, c: char) -> Result<Self, Error> { + // illegal to have anything else, unless it's a colon, + // in which case it's a port delimiter + if c == ':' { + Ok(Self::Port(state)) + } else { + Err(Error::IllegalCharacter(Context::Host)) + } + } + + fn next_port(state: Shared, c: char) -> Result<Self, Error> { + let mut state = state; + state.port_string.push(c); + Ok(Self::Port(state)) + } +} + +pub fn parse_host_port<T>(host_port_string: T) -> Result<(Vec<u8>, Option<u16>), Error> + where T: AsRef<str> +{ + let (machine, host_port_string) = State::new(host_port_string.as_ref()); + host_port_string + .chars() + .try_fold(machine, |machine, c| { + machine.next(c) + })? + .finalize() +} |