src/percent_encoded_character_decoder.rs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128

#![warn(clippy::pedantic)]

use std::collections::HashSet;
use std::convert::TryFrom;

// This is the character set containing just numbers.
lazy_static! {
    static ref DIGIT: HashSet<char> =
        ('0'..='9')
        .collect();
}

// This is the character set containing just the upper-case
// letters 'A' through 'F', used in upper-case hexadecimal.
lazy_static! {
    static ref HEX_UPPER: HashSet<char> =
        ('A'..='F')
        .collect();
}

// This is the character set containing just the lower-case
// letters 'a' through 'f', used in lower-case hexadecimal.
lazy_static! {
    static ref HEX_LOWER: HashSet<char> =
        ('a'..='f')
        .collect();
}

#[derive(Debug, Clone, thiserror::Error, PartialEq)]
pub enum Error {
    #[error("illegal character")]
    IllegalCharacter,
}

pub struct PercentEncodedCharacterDecoder {
    decoded_character: u8,
    digits_left: usize,
}

impl PercentEncodedCharacterDecoder {
    pub fn new() -> Self {
        Self{
            decoded_character: 0,
            digits_left: 2,
        }
    }

    pub fn next(
        &mut self,
        c: char
    ) -> Result<Option<u8>, Error> {
        self.shift_in_hex_digit(c)?;
        self.digits_left -= 1;
        if self.digits_left == 0 {
            let output = self.decoded_character;
            self.reset();
            Ok(Some(output))
        } else {
            Ok(None)
        }
    }

    fn reset(&mut self) {
        self.decoded_character = 0;
        self.digits_left = 2;
    }

    fn shift_in_hex_digit(
        &mut self,
        c: char
    ) -> Result<(), Error> {
        self.decoded_character <<= 4;
        if let Some(ci) = c.to_digit(16) {
            self.decoded_character += u8::try_from(ci).unwrap();
        } else {
            self.reset();
            return Err(Error::IllegalCharacter);
        }
        Ok(())
    }

}

#[cfg(test)]
mod tests {

    use super::*;

    #[test]
    fn good_sequences() {
        named_tuple!(
            struct TestVector {
                sequence: [char; 2],
                expected_output: u8,
            }
        );
        let test_vectors: [TestVector; 5] = [
            (['4', '1'], b'A').into(),
            (['5', 'A'], b'Z').into(),
            (['6', 'e'], b'n').into(),
            (['e', '1'], b'\xe1').into(),
            (['C', 'A'], b'\xca').into(),
        ];
        for test_vector in &test_vectors {
            let mut pec = PercentEncodedCharacterDecoder::new();
            assert_eq!(
                Ok(None),
                pec.next(test_vector.sequence()[0])
            );
            assert_eq!(
                Ok(Some(*test_vector.expected_output())),
                pec.next(test_vector.sequence()[1])
            );
        }
    }

    #[test]
    fn bad_sequences() {
        let test_vectors = [
            'G', 'g', '.', 'z', '-', ' ', 'V',
        ];
        for test_vector in &test_vectors {
            let mut pec = PercentEncodedCharacterDecoder::new();
            assert!(pec.next(*test_vector).is_err());
        }
    }

}