diff options
author | Richard Walters <rwalters@digitalstirling.com> | 2020-10-06 19:07:05 -0700 |
---|---|---|
committer | Richard Walters <rwalters@digitalstirling.com> | 2020-10-06 19:07:05 -0700 |
commit | 404513ddb356f5e803b0102d408d02eb031cf13f (patch) | |
tree | 29fa94597aadab47f439681aeefed2f61f545e18 /src/percent_encoded_character_decoder.rs | |
parent | af730ddd195521fd25b7c1dd13fd10c349dd4a7c (diff) |
Work in progress porting to Rust
Diffstat (limited to 'src/percent_encoded_character_decoder.rs')
-rw-r--r-- | src/percent_encoded_character_decoder.rs | 143 |
1 files changed, 143 insertions, 0 deletions
diff --git a/src/percent_encoded_character_decoder.rs b/src/percent_encoded_character_decoder.rs new file mode 100644 index 0000000..2fd2753 --- /dev/null +++ b/src/percent_encoded_character_decoder.rs @@ -0,0 +1,143 @@ +#![warn(clippy::pedantic)] + +use std::collections::HashSet; +use std::convert::TryFrom; + +// This is the character set containing just numbers. +lazy_static! { + static ref DIGIT: HashSet<char> = + ('0'..='9') + .collect(); +} + +// This is the character set containing just the upper-case +// letters 'A' through 'F', used in upper-case hexadecimal. +lazy_static! { + static ref HEX_UPPER: HashSet<char> = + ('A'..='F') + .collect(); +} + +// This is the character set containing just the lower-case +// letters 'a' through 'f', used in lower-case hexadecimal. +lazy_static! { + static ref HEX_LOWER: HashSet<char> = + ('a'..='f') + .collect(); +} + +// TODO: Learn about using thiserror to define library errors +// [14:05] ABuffSeagull: You should use https://lib.rs/crates/thiserror for the errors +// [14:07] 715209: i also recommend thiserror +#[derive(Debug, Clone, PartialEq)] +pub enum Error { + IllegalCharacter, +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::IllegalCharacter => { + write!(f, "illegal character") + }, + } + } +} + +pub struct PercentEncodedCharacterDecoder { + decoded_character: u8, + digits_left: usize, +} + +impl PercentEncodedCharacterDecoder { + pub fn new() -> Self { + Self{ + decoded_character: 0, + digits_left: 2, + } + } + + pub fn next( + &mut self, + c: char + ) -> Result<Option<u8>, Error> { + self.shift_in_hex_digit(c)?; + self.digits_left -= 1; + if self.digits_left == 0 { + let output = self.decoded_character; + self.reset(); + Ok(Some(output)) + } else { + Ok(None) + } + } + + fn reset(&mut self) { + self.decoded_character = 0; + self.digits_left = 2; + } + + fn shift_in_hex_digit( + &mut self, + c: char + ) -> Result<(), Error> { + self.decoded_character <<= 4; + if let Some(ci) = c.to_digit(16) { + self.decoded_character += u8::try_from(ci).unwrap(); + } else { + self.reset(); + return Err(Error::IllegalCharacter); + } + Ok(()) + } + +} + +#[cfg(test)] +mod tests { + + use super::*; + + #[test] + fn good_sequences() { + + // TODO: consider named tuples instead + // + // [14:07] LeinardoSmith: Looks like there is a macro for named tuples: + // https://docs.rs/named_tuple/0.1.3/named_tuple/ + struct TestVector { + sequence: [char; 2], + expected_output: u8, + } + let test_vectors = [ + TestVector{sequence: ['4', '1'], expected_output: b'A'}, + TestVector{sequence: ['5', 'A'], expected_output: b'Z'}, + TestVector{sequence: ['6', 'e'], expected_output: b'n'}, + TestVector{sequence: ['e', '1'], expected_output: b'\xe1'}, + TestVector{sequence: ['C', 'A'], expected_output: b'\xca'}, + ]; + for test_vector in &test_vectors { + let mut pec = PercentEncodedCharacterDecoder::new(); + assert_eq!( + Ok(None), + pec.next(test_vector.sequence[0]) + ); + assert_eq!( + Ok(Some(test_vector.expected_output)), + pec.next(test_vector.sequence[1]) + ); + } + } + + #[test] + fn bad_sequences() { + let test_vectors = [ + 'G', 'g', '.', 'z', '-', ' ', 'V', + ]; + for test_vector in &test_vectors { + let mut pec = PercentEncodedCharacterDecoder::new(); + assert!(pec.next(*test_vector).is_err()); + } + } + +} |