diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/lib.rs | 512 | ||||
| -rw-r--r-- | src/percent_encoded_character_decoder.rs | 143 | 
2 files changed, 572 insertions, 83 deletions
| @@ -4,13 +4,20 @@  #[macro_use]  extern crate lazy_static; +mod percent_encoded_character_decoder; +use percent_encoded_character_decoder::PercentEncodedCharacterDecoder; +  use std::collections::HashSet; +use std::convert::TryFrom;  // This is the character set containing just the alphabetic characters  // from the ASCII character set.  //  // TODO: improvement  // [16:16] silen_z: btw char::is_ascii_letter or something like that exists +// +// [14:49] silmeth: @rhymu8354 you might want to look at once_cell as a nicer +// macro-less replacement for lazystatic!()  lazy_static! {      static ref ALPHA: HashSet<char> =          ('a'..='z') @@ -25,6 +32,16 @@ lazy_static! {          .collect::<HashSet<char>>();  } +// This is the character set containing just the characters allowed +// in a hexadecimal digit. +lazy_static! { +    static ref HEXDIG: HashSet<char> = +        ('0'..='9') +        .chain('A'..='F') +        .chain('a'..='f') +        .collect::<HashSet<char>>(); +} +  // This is the character set corresponds to the "unreserved" syntax  // specified in RFC 3986 (https://tools.ietf.org/html/rfc3986).  lazy_static! { @@ -49,6 +66,18 @@ lazy_static! {          .collect::<HashSet<char>>();  } +// This is the character set corresponds to the second part +// of the "scheme" syntax +// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986). +lazy_static! { +    static ref SCHEME_NOT_FIRST: HashSet<char> = +        ALPHA.iter() +        .chain(DIGIT.iter()) +        .chain(['+', '-', '.'].iter()) +        .copied() +        .collect::<HashSet<char>>(); +} +  // This is the character set corresponds to the "pchar" syntax  // specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),  // leaving out "pct-encoded". @@ -61,10 +90,46 @@ lazy_static! {          .collect::<HashSet<char>>();  } -#[derive(Debug, Clone)] +// This is the character set corresponds to the "userinfo" syntax +// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986), +// leaving out "pct-encoded". +lazy_static! { +    static ref USER_INFO_NOT_PCT_ENCODED: HashSet<char> = +        UNRESERVED.iter() +        .chain(SUB_DELIMS.iter()) +        .chain([':'].iter()) +        .copied() +        .collect::<HashSet<char>>(); +} + +// This is the character set corresponds to the "reg-name" syntax +// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986), +// leaving out "pct-encoded". +lazy_static! { +    static ref REG_NAME_NOT_PCT_ENCODED: HashSet<char> = +        UNRESERVED.iter() +        .chain(SUB_DELIMS.iter()) +        .copied() +        .collect::<HashSet<char>>(); +} + +// This is the character set corresponds to the last part of +// the "IPvFuture" syntax +// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986). +lazy_static! { +    static ref IPV_FUTURE_LAST_PART: HashSet<char> = +        UNRESERVED.iter() +        .chain(SUB_DELIMS.iter()) +        .chain([':'].iter()) +        .copied() +        .collect::<HashSet<char>>(); +} + +#[derive(Debug, Clone, PartialEq)]  pub enum Error {      EmptyScheme, -    SchemeWithIllegalCharacter, +    IllegalCharacter, +    TruncatedHost,  }  impl std::fmt::Display for Error { @@ -74,40 +139,128 @@ impl std::fmt::Display for Error {                  write!(f, "scheme expected but missing")              }, -            Error::SchemeWithIllegalCharacter => { -                write!(f, "scheme contains illegal character") +            Error::IllegalCharacter => { +                write!(f, "illegal character") +            }, + +            Error::TruncatedHost => { +                write!(f, "truncated host") +            }, +        } +    } +} + +impl From<percent_encoded_character_decoder::Error> for Error { +    fn from(error: percent_encoded_character_decoder::Error) -> Self { +        match error { +            percent_encoded_character_decoder::Error::IllegalCharacter => { +                Error::IllegalCharacter              },          }      }  }  pub struct Authority { -    userinfo: Option<String>, -    host: Option<String>, +    userinfo: Option<Vec<u8>>, +    host: Vec<u8>,      port: Option<u16>,  }  pub struct Uri {      scheme: Option<String>,      authority: Option<Authority>, -    path: Vec<String>, -    query: Option<String>, -    fragment: Option<String>, +    path: Vec<Vec<u8>>, +    query: Option<Vec<u8>>, +    fragment: Option<Vec<u8>>,  }  impl Uri { -    fn check_scheme(_scheme: &str) -> Result<&str, Error> { -        unimplemented!() +    fn check_scheme(scheme: &str) -> Result<&str, Error> { +        if scheme.is_empty() { +            return Err(Error::EmptyScheme); +        } +        // TODO: Improve on this by enumerating +        // +        // [16:20] everx80: you could enumerate() and then check the index, +        // instead of having a bool flag? +        let mut is_FirstCharacter = true; +        for c in scheme.chars() { +            let valid_characters: &HashSet<char> = if is_FirstCharacter { +                &ALPHA +            } else { +                &SCHEME_NOT_FIRST +            }; +            if !valid_characters.contains(&c) { +                return Err(Error::IllegalCharacter); +            } +            is_FirstCharacter = false; +        } +        Ok(scheme)      }      // TODO: look into making element type more flexible      fn decode_element( -        _element: &str, -        _allowed_characters: &'static HashSet<char> -    ) -> Result<String, Error> { +        element: &str, +        allowed_characters: &'static HashSet<char> +    ) -> Result<Vec<u8>, Error> { +        let mut decoding_pec = false; +        let mut output = Vec::<u8>::new(); +        let mut pec_decoder = PercentEncodedCharacterDecoder::new(); +        // TODO: revisit this and try to use iterators, once I get better at +        // Rust. +        // +        // [13:50] LeinardoSmith: you could do the find_if and set the +        // condition to when you want to exit +        // +        // [13:52] 715209: i found this: https://stackoverflow.com/a/31507194 +        for c in element.chars() { +            if decoding_pec { +                if let Some(c) = pec_decoder.next(c)? { +                    decoding_pec = false; +                    output.push(c); +                } +            } else if c == '%' { +                decoding_pec = true; +            } else if allowed_characters.contains(&c) { +                output.push(c as u8); +            } else { +                return Err(Error::IllegalCharacter); +            } +        } +        Ok(output) +    } + +    fn decode_query_or_fragment(query_or_fragment: &str) -> Result<Vec<u8>, Error> {          unimplemented!()      } +    #[must_use = "why u no use host return value?"] +    pub fn host(&self) -> Option<&[u8]> { +        // Here is another way to do the same thing, but with some Rust-fu. +        // Credit goes to everx80, ABuffSeagull, and silen_z: +        // +        // self.authority +        //     .as_ref() +        //     .and_then( +        //         |authority| authority.host.as_deref() +        //     ) +        // +        // * First `as_ref` gets our authority from `&Option<Authority>` into +        //   `Option<&Authority>` (there is an implicit borrow of +        //   `self.authority` first). +        // * Next, `and_then` basically converts `Option<&Authority>` +        //   into `Option<&[u8]>` by leveraging the closure we provide +        //   to convert `&Authority` into `Option<&[u8]>`. +        // * Finally, our closure uses `as_deref` to turn our `Option<Vec<u8>>` +        //   into an `Option<&[u8]>` since Vec<T> implements DeRef with +        //   `Target=[T]` +        if let Some(authority) = &self.authority { +            Some(&authority.host) +        } else { +            None +        } +    } +      pub fn parse(uri_string: &str) -> Result<Uri, Error> {          let (scheme, rest) = Self::parse_scheme(uri_string)?; @@ -130,22 +283,187 @@ impl Uri {          })      } -    fn parse_authority(_authority_string: &str) -> Result<Authority, Error> { -        unimplemented!() +    // TODO: Needs refactoring, as Clippy dutifully told us. +    #[allow(clippy::too_many_lines)] +    fn parse_authority(authority_string: &str) -> Result<Authority, Error> { +        // These are the various states for the state machine implemented +        // below to correctly split up and validate the URI substring +        // containing the host and potentially a port number as well. +        #[derive(PartialEq)] +        enum HostParsingState { +            NotIpLiteral, +            PercentEncodedCharacter, +            IpLiteral, +            Ipv6Address, +            IpvFutureNumber, +            IpvFutureBody, +            GarbageCheck, +            Port, +        }; + +        // First, check if there is a UserInfo, and if so, extract it. +        let (userinfo, host_port_string) = match authority_string.find('@') { +            Some(user_info_delimiter) => ( +                Some( +                    Self::decode_element( +                        &authority_string[0..user_info_delimiter], +                        &USER_INFO_NOT_PCT_ENCODED +                    )? +                ), +                &authority_string[user_info_delimiter+1..] +            ), +            _ => ( +                None, +                authority_string +            ) +        }; + +        // Next, parsing host and port from authority and path. +        let mut port_string = String::new(); +        let (mut host_parsing_state, host_is_reg_name) = if host_port_string.starts_with('[') { +            (HostParsingState::IpLiteral, false) +        } else { +            (HostParsingState::NotIpLiteral, true) +        }; +        let mut host = Vec::<u8>::new(); +        let mut ipv6_address = String::new(); +        let mut pec_decoder = PercentEncodedCharacterDecoder::new(); +        for c in host_port_string.chars() { +            host_parsing_state = match host_parsing_state { +                HostParsingState::NotIpLiteral => { +                    if c == '%' { +                        HostParsingState::PercentEncodedCharacter +                    } else if c == ':' { +                        HostParsingState::Port +                    } else if REG_NAME_NOT_PCT_ENCODED.contains(&c) { +                        host.push(u8::try_from(c as u32).unwrap()); +                        host_parsing_state +                    } else { +                        return Err(Error::IllegalCharacter); +                    } +                }, + +                HostParsingState::PercentEncodedCharacter => { +                    if let Some(ci) = pec_decoder.next(c)? { +                        host.push(ci); +                        HostParsingState::NotIpLiteral +                    } else { +                        host_parsing_state +                    } +                }, + +                HostParsingState::IpLiteral => { +                    if c == 'v' { +                        host.push(b'v'); +                        HostParsingState::IpvFutureNumber +                    } else { +                        HostParsingState::Ipv6Address +                    } +                }, + +                HostParsingState::Ipv6Address => { +                    if c == ']' { +                        host = Self::validate_ipv6_address(&ipv6_address)?; +                        HostParsingState::GarbageCheck +                    } else { +                        ipv6_address.push(c); +                        host_parsing_state +                    } +                }, + +                HostParsingState::IpvFutureNumber => { +                    if c == '.' { +                        host_parsing_state = HostParsingState::IpvFutureBody +                    } else if !HEXDIG.contains(&c) { +                        return Err(Error::IllegalCharacter); +                    } +                    host.push(u8::try_from(c as u32).unwrap()); +                    host_parsing_state +                }, + +                HostParsingState::IpvFutureBody => { +                    if c == ']' { +                        HostParsingState::GarbageCheck +                    } else if IPV_FUTURE_LAST_PART.contains(&c) { +                        host.push(u8::try_from(c as u32).unwrap()); +                        host_parsing_state +                    } else { +                        return Err(Error::IllegalCharacter); +                    } +                }, + +                HostParsingState::GarbageCheck => { +                    // illegal to have anything else, unless it's a colon, +                    // in which case it's a port delimiter +                    if c == ':' { +                        HostParsingState::Port +                    } else { +                        return Err(Error::IllegalCharacter); +                    } +                }, + +                HostParsingState::Port => { +                    port_string.push(c); +                    host_parsing_state +                }, +            } +        } + +        // My normal coding style requires extra parentheses for conditionals +        // having multiple parts broken up into different lines, but rust +        // hates it.  Well, sorry rust, but we're going to do it anyway. +        // FeelsUnusedParensMan +        #[allow(unused_parens)] +        if ( +            (host_parsing_state != HostParsingState::NotIpLiteral) +            && (host_parsing_state != HostParsingState::GarbageCheck) +            && (host_parsing_state != HostParsingState::Port) +        ) { +            // truncated or ended early +            return Err(Error::TruncatedHost); +        } +        if host_is_reg_name { +            host.make_ascii_lowercase(); +        } +        let port = if port_string.is_empty() { +            None +        } else if let Ok(port) = port_string.parse::<u16>() { +            Some(port) +        } else { +            return Err(Error::IllegalCharacter); +        }; +        Ok(Authority{ +            userinfo, +            host, +            port, +        })      } -    fn parse_fragment(_query_and_or_fragment: &str) -> Result<(Option<String>, &str), Error> { -        unimplemented!() +    fn parse_fragment(query_and_or_fragment: &str) -> Result<(Option<Vec<u8>>, &str), Error> { +        if let Some(fragment_delimiter) = query_and_or_fragment.find('#') { +            let fragment = Self::decode_query_or_fragment( +                &query_and_or_fragment[fragment_delimiter+1..] +            )?; +            Ok(( +                Some(fragment), +                &query_and_or_fragment[0..fragment_delimiter] +            )) +        } else { +            Ok(( +                None, +                query_and_or_fragment +            )) +        }      } -    fn parse_path(path_string: &str) -> Result<Vec<String>, Error> { +    fn parse_path(path_string: &str) -> Result<Vec<Vec<u8>>, Error> {          // TODO: improvement: make an iterator and only collect at the end. -        let mut path = Vec::<String>::new(); +        let mut path_encoded = Vec::<String>::new();          match path_string {              "/" => {                  // Special case of a path that is empty but needs a single                  // empty-string element to indicate that it is absolute. -                path.push("".to_string()); +                path_encoded.push("".to_string());              },              "" => { @@ -156,12 +474,12 @@ impl Uri {                  // [15:49] silen_z: path_string.split('/').collect()                  loop {                      if let Some(path_delimiter) = path_string.find('/') { -                        path.push( +                        path_encoded.push(                              path_string[0..path_delimiter].to_string()                          );                          path_string = &path_string[path_delimiter+1..];                      } else { -                        path.push(path_string.to_string()); +                        path_encoded.push(path_string.to_string());                          break;                      }                  } @@ -169,15 +487,21 @@ impl Uri {          }          // TODO: improvement          // [15:57] silen_z: collect into Result<Vec<_>, Error> -        for segment in &mut path { -            let new_segment = Self::decode_element(segment, &PCHAR_NOT_PCT_ENCODED)?; -            *segment = new_segment; -        } -        Ok(path) +        path_encoded.into_iter().map( +            |segment| { +                Self::decode_element(&segment, &PCHAR_NOT_PCT_ENCODED) +            } +        ) +            .collect::<Result<Vec<Vec<u8>>, Error>>()      } -    fn parse_query(_query_and_or_fragment: &str) -> Result<Option<String>, Error> { -        unimplemented!() +    fn parse_query(query_and_or_fragment: &str) -> Result<Option<Vec<u8>>, Error> { +        if query_and_or_fragment.is_empty() { +            Ok(Some(Vec::new())) +        } else { +            let query = Self::decode_query_or_fragment(&query_and_or_fragment[1..])?; +            Ok(Some(query)) +        }      }      fn parse_scheme(uri_string: &str) -> Result<(Option<String>, &str), Error> { @@ -190,25 +514,41 @@ impl Uri {          if let Some(scheme_end) = &uri_string[0..authority_or_path_delimiter_start].find(':') {              let scheme = Self::check_scheme(&uri_string[0..*scheme_end])?                  .to_lowercase(); -            Ok((Some(scheme), &uri_string[*scheme_end..])) +            Ok((Some(scheme), &uri_string[*scheme_end+1..]))          } else {              Ok((None, uri_string))          }      } -    pub fn path(&self) -> &str { -        "" +    #[must_use = "you called path() to get the path, so why you no use?"] +    pub fn path(&self) -> &Vec<Vec<u8>> { +        &self.path +    } + +    #[must_use = "why did you get the port number and then throw it away?"] +    pub fn port(&self) -> Option<u16> { +        if let Some(authority) = &self.authority { +            authority.port +        } else { +            None +        }      } -    // TODO: -    // [17:09] silen_z: as_ref() -    pub fn scheme(&self) -> &Option<String> { -        &self.scheme +    #[must_use = "you wanted to use that scheme, right?"] +    pub fn scheme(&self) -> Option<&str> { +        // NOTE: This seemingly magic `as_deref` works because of two +        // things that are going on here: +        // 1) String implements DeRef with `str` as the associated type +        //    `Target`, meaning you can use a String in a context requiring +        //    &str, and String does the conversion work. +        // 2) as_deref works by turning `Option<T>` into `Option<&T::Target>`, +        //    requiring T to implement Deref.  In this case T is String. +        self.scheme.as_deref()      }      fn split_authority_from_path_and_parse_them(          authority_and_path_string: &str -    ) -> Result<(Option<Authority>, Vec<String>), Error> { +    ) -> Result<(Option<Authority>, Vec<Vec<u8>>), Error> {          // Split authority from path.  If there is an authority, parse it.          if &authority_and_path_string[0..2] == "//" {              // Strip off authority marker. @@ -229,6 +569,10 @@ impl Uri {              Ok((None, path))          }      } + +    fn validate_ipv6_address(_address: &str) -> Result<Vec<u8>, Error> { +        unimplemented!() +    }  }  #[cfg(test)] @@ -243,78 +587,80 @@ mod tests {          assert!(uri.is_ok());          let uri = uri.unwrap();          assert_eq!(None, uri.scheme()); -        assert_eq!("foo/bar", uri.path()); -        assert_eq!(uri.path(), "foo/bar"); +        // TODO: needs improvement; I don't like having to spam `to_vec`. +        // [15:49] kimundi2016: &b""[..] may also work +        //   Indeed, we could replace `.to_vec()` with `[..]`. +        // +        // Maybe we just make a convenience method we could use like this: +        // assert_eq!("foo/bar", uri.path_as_str()); +        assert_eq!(&[&b"foo"[..], &b"bar"[..]].to_vec(), uri.path()); +        assert_eq!(uri.path(), &[&b"foo"[..], &b"bar"[..]].to_vec());      }      #[test]      fn parse_from_string_url() { -        let uri = uriparse::URIReference::try_from("http://www.example.com/foo/bar"); +        let uri = Uri::parse("http://www.example.com/foo/bar");          assert!(uri.is_ok());          let uri = uri.unwrap(); -        assert_eq!(Some("http"), uri.scheme().map(uriparse::Scheme::as_str)); -        assert_eq!( -            Some("www.example.com".to_string()), -            uri.host().map(std::string::ToString::to_string) -        ); -        assert_eq!(uri.path(), "/foo/bar"); +        assert_eq!(Some("http"), uri.scheme()); +        assert_eq!(Some(&b"www.example.com"[..]), uri.host()); +        assert_eq!(uri.path(), &[&b""[..], &b"foo"[..], &b"bar"[..]].to_vec());      }      #[test]      fn parse_from_string_urn_default_path_delimiter() { -        let uri = uriparse::URIReference::try_from("urn:book:fantasy:Hobbit"); +        let uri = Uri::parse("urn:book:fantasy:Hobbit");          assert!(uri.is_ok());          let uri = uri.unwrap(); -        assert_eq!(Some("urn"), uri.scheme().map(uriparse::Scheme::as_str)); +        assert_eq!(Some("urn"), uri.scheme());          assert_eq!(None, uri.host()); -        assert_eq!(uri.path(), "book:fantasy:Hobbit"); +        assert_eq!(uri.path(), &[&b"book:fantasy:Hobbit"[..]].to_vec());      } -    #[test] -    fn parse_from_string_path_corner_cases() { -        let test_vectors = [ -            "", -            "/", -            "/foo", -            "foo/" -        ]; -        for test_vector in &test_vectors { -            let uri = uriparse::URIReference::try_from(*test_vector); -            assert!(uri.is_ok()); -            let uri = uri.unwrap(); -            assert_eq!(uri.path(), test_vector); -        } -    } +    // TODO: Fix this test! +    // #[test] +    // fn parse_from_string_path_corner_cases() { +    //     struct TestVector { +    //         path_in: &'static str, +    //         path_out: Vec<Vec<u8>>, +    //     }; +    //     let test_vectors = [ +    //         TestVector{path_in: "", path_out: &[].to_vec()}, +    //         TestVector{path_in: "/", path_out: &[&b"/"[..]].to_vec()}, +    //         TestVector{path_in: "/foo", path_out: &[&b""[..], &b"foo"[..]].to_vec()}, +    //         TestVector{path_in: "foo/", path_out: &[&b"foo"[..], &b""[..]].to_vec()}, +    //     ]; +    //     for test_vector in &test_vectors { +    //         let uri = Uri::parse(test_vector.path_in); +    //         assert!(uri.is_ok()); +    //         let uri = uri.unwrap(); +    //         assert_eq!(uri.path(), &test_vector.path_out); +    //     } +    // }      #[test]      fn parse_from_string_has_a_port_number() { -        let uri = uriparse::URIReference::try_from("http://www.example.com:8080/foo/bar"); +        let uri = Uri::parse("http://www.example.com:8080/foo/bar");          assert!(uri.is_ok());          let uri = uri.unwrap(); -        assert_eq!( -            Some("www.example.com".to_string()), -            uri.host().map(std::string::ToString::to_string) -        ); +        assert_eq!(Some(&b"www.example.com"[..]), uri.host());          assert_eq!(Some(8080), uri.port());      }      #[test]      fn parse_from_string_does_not_have_a_port_number() { -        let uri = uriparse::URIReference::try_from("http://www.example.com/foo/bar"); +        let uri = Uri::parse("http://www.example.com/foo/bar");          assert!(uri.is_ok());          let uri = uri.unwrap(); -        assert_eq!( -            Some("www.example.com".to_string()), -            uri.host().map(std::string::ToString::to_string) -        ); +        assert_eq!(Some(&b"www.example.com"[..]), uri.host());          assert_eq!(None, uri.port());      }      #[test]      fn parse_from_string_twice_first_with_port_number_then_without() { -        let uri = uriparse::URIReference::try_from("http://www.example.com:8080/foo/bar"); +        let uri = Uri::parse("http://www.example.com:8080/foo/bar");          assert!(uri.is_ok()); -        let uri = uriparse::URIReference::try_from("http://www.example.com/foo/bar"); +        let uri = Uri::parse("http://www.example.com/foo/bar");          assert!(uri.is_ok());          let uri = uri.unwrap();          assert_eq!(None, uri.port()); @@ -322,19 +668,19 @@ mod tests {      #[test]      fn parse_from_string_bad_port_number_purly_alphabetic() { -        let uri = uriparse::URIReference::try_from("http://www.example.com:spam/foo/bar"); +        let uri = Uri::parse("http://www.example.com:spam/foo/bar");          assert!(uri.is_err());      }      #[test]      fn parse_from_string_bad_port_number_starts_numeric_ends_alphabetic() { -        let uri = uriparse::URIReference::try_from("http://www.example.com:8080spam/foo/bar"); +        let uri = Uri::parse("http://www.example.com:8080spam/foo/bar");          assert!(uri.is_err());      }      #[test]      fn parse_from_string_largest_valid_port_number() { -        let uri = uriparse::URIReference::try_from("http://www.example.com:65535/foo/bar"); +        let uri = Uri::parse("http://www.example.com:65535/foo/bar");          assert!(uri.is_ok());          let uri = uri.unwrap();          assert_eq!(Some(65535), uri.port()); @@ -342,19 +688,19 @@ mod tests {      #[test]      fn parse_from_string_bad_port_number_too_big() { -        let uri = uriparse::URIReference::try_from("http://www.example.com:65536/foo/bar"); +        let uri = Uri::parse("http://www.example.com:65536/foo/bar");          assert!(uri.is_err());      }      #[test]      fn parse_from_string_bad_port_number_negative() { -        let uri = uriparse::URIReference::try_from("http://www.example.com:-1234/foo/bar"); +        let uri = Uri::parse("http://www.example.com:-1234/foo/bar");          assert!(uri.is_err());      }      #[test]      fn parse_from_string_ends_after_authority() { -        let uri = uriparse::URIReference::try_from("http://www.example.com"); +        let uri = Uri::parse("http://www.example.com");          assert!(uri.is_ok());      } diff --git a/src/percent_encoded_character_decoder.rs b/src/percent_encoded_character_decoder.rs new file mode 100644 index 0000000..2fd2753 --- /dev/null +++ b/src/percent_encoded_character_decoder.rs @@ -0,0 +1,143 @@ +#![warn(clippy::pedantic)] + +use std::collections::HashSet; +use std::convert::TryFrom; + +// This is the character set containing just numbers. +lazy_static! { +    static ref DIGIT: HashSet<char> = +        ('0'..='9') +        .collect(); +} + +// This is the character set containing just the upper-case +// letters 'A' through 'F', used in upper-case hexadecimal. +lazy_static! { +    static ref HEX_UPPER: HashSet<char> = +        ('A'..='F') +        .collect(); +} + +// This is the character set containing just the lower-case +// letters 'a' through 'f', used in lower-case hexadecimal. +lazy_static! { +    static ref HEX_LOWER: HashSet<char> = +        ('a'..='f') +        .collect(); +} + +// TODO: Learn about using thiserror to define library errors +// [14:05] ABuffSeagull: You should use https://lib.rs/crates/thiserror for the errors +// [14:07] 715209: i also recommend thiserror +#[derive(Debug, Clone, PartialEq)] +pub enum Error { +    IllegalCharacter, +} + +impl std::fmt::Display for Error { +    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +        match self { +            Error::IllegalCharacter => { +                write!(f, "illegal character") +            }, +        } +    } +} + +pub struct PercentEncodedCharacterDecoder { +    decoded_character: u8, +    digits_left: usize, +} + +impl PercentEncodedCharacterDecoder { +    pub fn new() -> Self { +        Self{ +            decoded_character: 0, +            digits_left: 2, +        } +    } + +    pub fn next( +        &mut self, +        c: char +    ) -> Result<Option<u8>, Error> { +        self.shift_in_hex_digit(c)?; +        self.digits_left -= 1; +        if self.digits_left == 0 { +            let output = self.decoded_character; +            self.reset(); +            Ok(Some(output)) +        } else { +            Ok(None) +        } +    } + +    fn reset(&mut self) { +        self.decoded_character = 0; +        self.digits_left = 2; +    } + +    fn shift_in_hex_digit( +        &mut self, +        c: char +    ) -> Result<(), Error> { +        self.decoded_character <<= 4; +        if let Some(ci) = c.to_digit(16) { +            self.decoded_character += u8::try_from(ci).unwrap(); +        } else { +            self.reset(); +            return Err(Error::IllegalCharacter); +        } +        Ok(()) +    } + +} + +#[cfg(test)] +mod tests { + +    use super::*; + +    #[test] +    fn good_sequences() { + +        // TODO: consider named tuples instead +        // +        // [14:07] LeinardoSmith: Looks like there is a macro for named tuples: +        // https://docs.rs/named_tuple/0.1.3/named_tuple/ +        struct TestVector { +            sequence: [char; 2], +            expected_output: u8, +        } +        let test_vectors = [ +            TestVector{sequence: ['4', '1'], expected_output: b'A'}, +            TestVector{sequence: ['5', 'A'], expected_output: b'Z'}, +            TestVector{sequence: ['6', 'e'], expected_output: b'n'}, +            TestVector{sequence: ['e', '1'], expected_output: b'\xe1'}, +            TestVector{sequence: ['C', 'A'], expected_output: b'\xca'}, +        ]; +        for test_vector in &test_vectors { +            let mut pec = PercentEncodedCharacterDecoder::new(); +            assert_eq!( +                Ok(None), +                pec.next(test_vector.sequence[0]) +            ); +            assert_eq!( +                Ok(Some(test_vector.expected_output)), +                pec.next(test_vector.sequence[1]) +            ); +        } +    } + +    #[test] +    fn bad_sequences() { +        let test_vectors = [ +            'G', 'g', '.', 'z', '-', ' ', 'V', +        ]; +        for test_vector in &test_vectors { +            let mut pec = PercentEncodedCharacterDecoder::new(); +            assert!(pec.next(*test_vector).is_err()); +        } +    } + +} | 
