aboutsummaryrefslogtreecommitdiff
path: root/src/character_classes.rs
diff options
context:
space:
mode:
authorRichard Walters <rwalters@digitalstirling.com>2020-10-13 01:09:18 -0700
committerRichard Walters <rwalters@digitalstirling.com>2020-10-13 01:09:18 -0700
commitdc2a011598f4aa9e9de927333e467e623276d5ec (patch)
tree4b5c71634af516cdc96c512f28a02370d48c25b3 /src/character_classes.rs
parent4accf8c296ef7a1f6bd10a90b7a06b3b499ccda6 (diff)
Rust refactoring
* Move Context, Error, and character classes to their own modules. * Move host/port parsing and IP address validation to their own modules, and break the code up into different functions to process their state machines.
Diffstat (limited to 'src/character_classes.rs')
-rw-r--r--src/character_classes.rs131
1 files changed, 131 insertions, 0 deletions
diff --git a/src/character_classes.rs b/src/character_classes.rs
new file mode 100644
index 0000000..4b13f01
--- /dev/null
+++ b/src/character_classes.rs
@@ -0,0 +1,131 @@
+#![warn(clippy::pedantic)]
+
+use once_cell::sync::Lazy;
+use std::collections::HashSet;
+
+// This is the character set containing just the alphabetic characters
+// from the ASCII character set.
+pub static ALPHA: Lazy<HashSet<char>> = Lazy::new(||
+ ('a'..='z')
+ .chain('A'..='Z')
+ .collect()
+);
+
+// This is the character set containing just numbers.
+pub static DIGIT: Lazy<HashSet<char>> = Lazy::new(||
+ ('0'..='9')
+ .collect()
+);
+
+// This is the character set containing just the characters allowed
+// in a hexadecimal digit.
+pub static HEXDIG: Lazy<HashSet<char>> = Lazy::new(||
+ ('0'..='9')
+ .chain('A'..='F')
+ .chain('a'..='f')
+ .collect()
+);
+
+// This is the character set corresponds to the "unreserved" syntax
+// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986).
+pub static UNRESERVED: Lazy<HashSet<char>> = Lazy::new(||
+ ALPHA.iter()
+ .chain(DIGIT.iter())
+ .chain(['-', '.', '_', '~'].iter())
+ .copied()
+ .collect()
+);
+
+// This is the character set corresponds to the "sub-delims" syntax
+// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986).
+pub static SUB_DELIMS: Lazy<HashSet<char>> = Lazy::new(||
+ [
+ '!', '$', '&', '\'', '(', ')',
+ '*', '+', ',', ';', '='
+ ]
+ .iter()
+ .copied()
+ .collect()
+);
+
+// This is the character set corresponds to the second part
+// of the "scheme" syntax
+// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986).
+pub static SCHEME_NOT_FIRST: Lazy<HashSet<char>> = Lazy::new(||
+ ALPHA.iter()
+ .chain(DIGIT.iter())
+ .chain(['+', '-', '.'].iter())
+ .copied()
+ .collect()
+);
+
+// This is the character set corresponds to the "pchar" syntax
+// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
+// leaving out "pct-encoded".
+pub static PCHAR_NOT_PCT_ENCODED: Lazy<HashSet<char>> = Lazy::new(||
+ UNRESERVED.iter()
+ .chain(SUB_DELIMS.iter())
+ .chain([':', '@'].iter())
+ .copied()
+ .collect()
+);
+
+// This is the character set corresponds to the "query" syntax
+// and the "fragment" syntax
+// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
+// leaving out "pct-encoded".
+pub static QUERY_OR_FRAGMENT_NOT_PCT_ENCODED: Lazy<HashSet<char>> = Lazy::new(||
+ PCHAR_NOT_PCT_ENCODED.iter()
+ .chain(['/', '?'].iter())
+ .copied()
+ .collect()
+);
+
+// This is the character set almost corresponds to the "query" syntax
+// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
+// leaving out "pct-encoded", except that '+' is also excluded, because
+// for some web services (e.g. AWS S3) a '+' is treated as
+// synonymous with a space (' ') and thus gets misinterpreted.
+pub static QUERY_NOT_PCT_ENCODED_WITHOUT_PLUS: Lazy<HashSet<char>> = Lazy::new(||
+ UNRESERVED.iter()
+ .chain([
+ '!', '$', '&', '\'', '(', ')',
+ '*', ',', ';', '=',
+ ':', '@',
+ '/', '?'
+ ].iter())
+ .copied()
+ .collect()
+);
+
+// This is the character set corresponds to the "userinfo" syntax
+// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
+// leaving out "pct-encoded".
+pub static USER_INFO_NOT_PCT_ENCODED: Lazy<HashSet<char>> = Lazy::new(||
+ UNRESERVED.iter()
+ .chain(SUB_DELIMS.iter())
+ .chain([':'].iter())
+ .copied()
+ .collect()
+);
+
+// This is the character set corresponds to the "reg-name" syntax
+// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986),
+// leaving out "pct-encoded".
+pub static REG_NAME_NOT_PCT_ENCODED: Lazy<HashSet<char>> = Lazy::new(||
+ UNRESERVED.iter()
+ .chain(SUB_DELIMS.iter())
+ .copied()
+ .collect()
+);
+
+// This is the character set corresponds to the last part of
+// the "IPvFuture" syntax
+// specified in RFC 3986 (https://tools.ietf.org/html/rfc3986).
+pub static IPV_FUTURE_LAST_PART: Lazy<HashSet<char>> = Lazy::new(||
+ UNRESERVED.iter()
+ .chain(SUB_DELIMS.iter())
+ .chain([':'].iter())
+ .copied()
+ .collect()
+);