aboutsummaryrefslogtreecommitdiff
path: root/src/tokenizer/char_ref/data.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/tokenizer/char_ref/data.rs')
-rw-r--r--src/tokenizer/char_ref/data.rs50
1 files changed, 50 insertions, 0 deletions
diff --git a/src/tokenizer/char_ref/data.rs b/src/tokenizer/char_ref/data.rs
new file mode 100644
index 0000000..fa839ba
--- /dev/null
+++ b/src/tokenizer/char_ref/data.rs
@@ -0,0 +1,50 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+//! Data that is known at compile-time and hard-coded into the binary.
+use phf::Map;
+
+/// The spec replaces most characters in the ISO-2022 C1 control code range
+/// (U+0080 through U+009F) with these characters, based on Windows 8-bit
+/// codepages.
+pub static C1_REPLACEMENTS: [Option<char>; 32] = [
+ Some('\u{20ac}'),
+ None,
+ Some('\u{201a}'),
+ Some('\u{0192}'),
+ Some('\u{201e}'),
+ Some('\u{2026}'),
+ Some('\u{2020}'),
+ Some('\u{2021}'),
+ Some('\u{02c6}'),
+ Some('\u{2030}'),
+ Some('\u{0160}'),
+ Some('\u{2039}'),
+ Some('\u{0152}'),
+ None,
+ Some('\u{017d}'),
+ None,
+ None,
+ Some('\u{2018}'),
+ Some('\u{2019}'),
+ Some('\u{201c}'),
+ Some('\u{201d}'),
+ Some('\u{2022}'),
+ Some('\u{2013}'),
+ Some('\u{2014}'),
+ Some('\u{02dc}'),
+ Some('\u{2122}'),
+ Some('\u{0161}'),
+ Some('\u{203a}'),
+ Some('\u{0153}'),
+ None,
+ Some('\u{017e}'),
+ Some('\u{0178}'),
+];
+
+include!(concat!(env!("OUT_DIR"), "/named_entities.rs"));