src/character_classes.rs


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119

use once_cell::sync::Lazy;
use std::collections::HashSet;

// This is the character set containing just the alphabetic characters
// from the ASCII character set.
pub static ALPHA: Lazy<HashSet<char>> =
    Lazy::new(|| ('a'..='z').chain('A'..='Z').collect());

// This is the character set containing just numbers.
pub static DIGIT: Lazy<HashSet<char>> = Lazy::new(|| ('0'..='9').collect());

// This is the character set containing just the characters allowed
// in a hexadecimal digit.
pub static HEXDIG: Lazy<HashSet<char>> = Lazy::new(|| {
    DIGIT.iter().copied().chain('A'..='F').chain('a'..='f').collect()
});

// This is the character set corresponds to the "unreserved" syntax
// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986).
pub static UNRESERVED: Lazy<HashSet<char>> = Lazy::new(|| {
    ALPHA
        .iter()
        .chain(DIGIT.iter())
        .chain(['-', '.', '_', '~'].iter())
        .copied()
        .collect()
});

// This is the character set corresponds to the "sub-delims" syntax
// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986).
pub static SUB_DELIMS: Lazy<HashSet<char>> = Lazy::new(|| {
    ['!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=']
        .iter()
        .copied()
        .collect()
});

// This is the character set corresponds to the second part
// of the "scheme" syntax
// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986).
pub static SCHEME_NOT_FIRST: Lazy<HashSet<char>> = Lazy::new(|| {
    ALPHA
        .iter()
        .chain(DIGIT.iter())
        .chain(['+', '-', '.'].iter())
        .copied()
        .collect()
});

// This is the character set corresponds to the "pchar" syntax
// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
// leaving out "pct-encoded".
pub static PCHAR_NOT_PCT_ENCODED: Lazy<HashSet<char>> = Lazy::new(|| {
    UNRESERVED
        .iter()
        .chain(SUB_DELIMS.iter())
        .chain([':', '@'].iter())
        .copied()
        .collect()
});

// This is the character set corresponds to the "query" syntax
// and the "fragment" syntax
// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
// leaving out "pct-encoded".
pub static QUERY_OR_FRAGMENT_NOT_PCT_ENCODED: Lazy<HashSet<char>> =
    Lazy::new(|| {
        PCHAR_NOT_PCT_ENCODED.iter().chain(['/', '?'].iter()).copied().collect()
    });

// This is the character set almost corresponds to the "query" syntax
// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
// leaving out "pct-encoded", except that '+' is also excluded, because
// for some web services (e.g. AWS S3) a '+' is treated as
// synonymous with a space (' ') and thus gets misinterpreted.
pub static QUERY_NOT_PCT_ENCODED_WITHOUT_PLUS: Lazy<HashSet<char>> =
    Lazy::new(|| {
        UNRESERVED
            .iter()
            .chain(
                [
                    '!', '$', '&', '\'', '(', ')', '*', ',', ';', '=', ':',
                    '@', '/', '?',
                ]
                .iter(),
            )
            .copied()
            .collect()
    });

// This is the character set corresponds to the "userinfo" syntax
// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
// leaving out "pct-encoded".
pub static USER_INFO_NOT_PCT_ENCODED: Lazy<HashSet<char>> = Lazy::new(|| {
    UNRESERVED
        .iter()
        .chain(SUB_DELIMS.iter())
        .chain([':'].iter())
        .copied()
        .collect()
});

// This is the character set corresponds to the "reg-name" syntax
// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
// leaving out "pct-encoded".
pub static REG_NAME_NOT_PCT_ENCODED: Lazy<HashSet<char>> =
    Lazy::new(|| UNRESERVED.iter().chain(SUB_DELIMS.iter()).copied().collect());

// This is the character set corresponds to the last part of
// the "IPvFuture" syntax
// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986).
pub static IPV_FUTURE_LAST_PART: Lazy<HashSet<char>> = Lazy::new(|| {
    UNRESERVED
        .iter()
        .chain(SUB_DELIMS.iter())
        .chain([':'].iter())
        .copied()
        .collect()
});