diff options
Diffstat (limited to 'src/tokenizer/char_ref/data.rs')
-rw-r--r-- | src/tokenizer/char_ref/data.rs | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/src/tokenizer/char_ref/data.rs b/src/tokenizer/char_ref/data.rs new file mode 100644 index 0000000..fa839ba --- /dev/null +++ b/src/tokenizer/char_ref/data.rs @@ -0,0 +1,50 @@ +// Copyright 2014-2017 The html5ever Project Developers. See the +// COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. +//! Data that is known at compile-time and hard-coded into the binary. +use phf::Map; + +/// The spec replaces most characters in the ISO-2022 C1 control code range +/// (U+0080 through U+009F) with these characters, based on Windows 8-bit +/// codepages. +pub static C1_REPLACEMENTS: [Option<char>; 32] = [ + Some('\u{20ac}'), + None, + Some('\u{201a}'), + Some('\u{0192}'), + Some('\u{201e}'), + Some('\u{2026}'), + Some('\u{2020}'), + Some('\u{2021}'), + Some('\u{02c6}'), + Some('\u{2030}'), + Some('\u{0160}'), + Some('\u{2039}'), + Some('\u{0152}'), + None, + Some('\u{017d}'), + None, + None, + Some('\u{2018}'), + Some('\u{2019}'), + Some('\u{201c}'), + Some('\u{201d}'), + Some('\u{2022}'), + Some('\u{2013}'), + Some('\u{2014}'), + Some('\u{02dc}'), + Some('\u{2122}'), + Some('\u{0161}'), + Some('\u{203a}'), + Some('\u{0153}'), + None, + Some('\u{017e}'), + Some('\u{0178}'), +]; + +include!(concat!(env!("OUT_DIR"), "/named_entities.rs")); |