aboutsummaryrefslogtreecommitdiff
path: root/src/character_classes.rs
blob: a9e7b374886f6dea4875e6d42a3657cb2b8e21cb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
use once_cell::sync::Lazy;
use std::collections::HashSet;

// This is the character set containing just the alphabetic characters
// from the ASCII character set.
pub static ALPHA: Lazy<HashSet<char>> = Lazy::new(||
    ('a'..='z')
    .chain('A'..='Z')
    .collect()
);

// This is the character set containing just numbers.
pub static DIGIT: Lazy<HashSet<char>> = Lazy::new(||
    ('0'..='9')
    .collect()
);

// This is the character set containing just the characters allowed
// in a hexadecimal digit.
pub static HEXDIG: Lazy<HashSet<char>> = Lazy::new(||
    DIGIT.iter().copied()
    .chain('A'..='F')
    .chain('a'..='f')
    .collect()
);

// This is the character set corresponds to the "unreserved" syntax
// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986).
pub static UNRESERVED: Lazy<HashSet<char>> = Lazy::new(||
    ALPHA.iter()
    .chain(DIGIT.iter())
    .chain(['-', '.', '_', '~'].iter())
    .copied()
    .collect()
);

// This is the character set corresponds to the "sub-delims" syntax
// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986).
pub static SUB_DELIMS: Lazy<HashSet<char>> = Lazy::new(||
    [
        '!', '$', '&', '\'', '(', ')',
        '*', '+', ',', ';', '='
    ]
    .iter()
    .copied()
    .collect()
);

// This is the character set corresponds to the second part
// of the "scheme" syntax
// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986).
pub static SCHEME_NOT_FIRST: Lazy<HashSet<char>> = Lazy::new(||
    ALPHA.iter()
    .chain(DIGIT.iter())
    .chain(['+', '-', '.'].iter())
    .copied()
    .collect()
);

// This is the character set corresponds to the "pchar" syntax
// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
// leaving out "pct-encoded".
pub static PCHAR_NOT_PCT_ENCODED: Lazy<HashSet<char>> = Lazy::new(||
    UNRESERVED.iter()
    .chain(SUB_DELIMS.iter())
    .chain([':', '@'].iter())
    .copied()
    .collect()
);

// This is the character set corresponds to the "query" syntax
// and the "fragment" syntax
// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
// leaving out "pct-encoded".
pub static QUERY_OR_FRAGMENT_NOT_PCT_ENCODED: Lazy<HashSet<char>> = Lazy::new(||
    PCHAR_NOT_PCT_ENCODED.iter()
    .chain(['/', '?'].iter())
    .copied()
    .collect()
);

// This is the character set almost corresponds to the "query" syntax
// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
// leaving out "pct-encoded", except that '+' is also excluded, because
// for some web services (e.g. AWS S3) a '+' is treated as
// synonymous with a space (' ') and thus gets misinterpreted.
pub static QUERY_NOT_PCT_ENCODED_WITHOUT_PLUS: Lazy<HashSet<char>> = Lazy::new(||
    UNRESERVED.iter()
    .chain([
        '!', '$', '&', '\'', '(', ')',
        '*', ',', ';', '=',
        ':', '@',
        '/', '?'
    ].iter())
    .copied()
    .collect()
);

// This is the character set corresponds to the "userinfo" syntax
// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
// leaving out "pct-encoded".
pub static USER_INFO_NOT_PCT_ENCODED: Lazy<HashSet<char>> = Lazy::new(||
    UNRESERVED.iter()
    .chain(SUB_DELIMS.iter())
    .chain([':'].iter())
    .copied()
    .collect()
);

// This is the character set corresponds to the "reg-name" syntax
// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
// leaving out "pct-encoded".
pub static REG_NAME_NOT_PCT_ENCODED: Lazy<HashSet<char>> = Lazy::new(||
    UNRESERVED.iter()
    .chain(SUB_DELIMS.iter())
    .copied()
    .collect()
);

// This is the character set corresponds to the last part of
// the "IPvFuture" syntax
// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986).
pub static IPV_FUTURE_LAST_PART: Lazy<HashSet<char>> = Lazy::new(||
    UNRESERVED.iter()
    .chain(SUB_DELIMS.iter())
    .chain([':'].iter())
    .copied()
    .collect()
);