1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
#![warn(clippy::pedantic)]
use std::collections::HashSet;
use std::convert::TryFrom;
// This is the character set containing just numbers.
lazy_static! {
static ref DIGIT: HashSet<char> =
('0'..='9')
.collect();
}
// This is the character set containing just the upper-case
// letters 'A' through 'F', used in upper-case hexadecimal.
lazy_static! {
static ref HEX_UPPER: HashSet<char> =
('A'..='F')
.collect();
}
// This is the character set containing just the lower-case
// letters 'a' through 'f', used in lower-case hexadecimal.
lazy_static! {
static ref HEX_LOWER: HashSet<char> =
('a'..='f')
.collect();
}
#[derive(Debug, Clone, thiserror::Error, PartialEq)]
pub enum Error {
#[error("illegal character")]
IllegalCharacter,
}
pub struct PercentEncodedCharacterDecoder {
decoded_character: u8,
digits_left: usize,
}
impl PercentEncodedCharacterDecoder {
pub fn new() -> Self {
Self{
decoded_character: 0,
digits_left: 2,
}
}
pub fn next(
&mut self,
c: char
) -> Result<Option<u8>, Error> {
self.shift_in_hex_digit(c)?;
self.digits_left -= 1;
if self.digits_left == 0 {
let output = self.decoded_character;
self.reset();
Ok(Some(output))
} else {
Ok(None)
}
}
fn reset(&mut self) {
self.decoded_character = 0;
self.digits_left = 2;
}
fn shift_in_hex_digit(
&mut self,
c: char
) -> Result<(), Error> {
self.decoded_character <<= 4;
if let Some(ci) = c.to_digit(16) {
self.decoded_character += u8::try_from(ci).unwrap();
} else {
self.reset();
return Err(Error::IllegalCharacter);
}
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn good_sequences() {
named_tuple!(
struct TestVector {
sequence: [char; 2],
expected_output: u8,
}
);
let test_vectors: [TestVector; 5] = [
(['4', '1'], b'A').into(),
(['5', 'A'], b'Z').into(),
(['6', 'e'], b'n').into(),
(['e', '1'], b'\xe1').into(),
(['C', 'A'], b'\xca').into(),
];
for test_vector in &test_vectors {
let mut pec = PercentEncodedCharacterDecoder::new();
assert_eq!(
Ok(None),
pec.next(test_vector.sequence()[0])
);
assert_eq!(
Ok(Some(*test_vector.expected_output())),
pec.next(test_vector.sequence()[1])
);
}
}
#[test]
fn bad_sequences() {
let test_vectors = [
'G', 'g', '.', 'z', '-', ' ', 'V',
];
for test_vector in &test_vectors {
let mut pec = PercentEncodedCharacterDecoder::new();
assert!(pec.next(*test_vector).is_err());
}
}
}
|