diff options
author | Martin Fischer <martin@push-f.com> | 2021-11-12 03:47:19 +0100 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2021-11-12 03:52:47 +0100 |
commit | c914b71a28ce7177171b83df2815352bf5741844 (patch) | |
tree | e11a30a30730a9fc3dd03ff49e3a5db029ce253d | |
parent | ea7c3c9b8b45d846bcb45573c9b2250d8152be30 (diff) |
fix named entities
In 462bb0ef0ba9e027f5138c87438328db718d15da I dropped the
markup5ever::data dependency but omitted to include its
named_entities data. This commit remedies that mistake.
-rw-r--r-- | Cargo.toml | 6 | ||||
-rw-r--r-- | README.md | 2 | ||||
-rw-r--r-- | build.rs | 61 | ||||
-rw-r--r-- | entities.rs | 2233 | ||||
-rw-r--r-- | src/tokenizer/char_ref/data.rs | 50 | ||||
-rw-r--r-- | src/tokenizer/char_ref/mod.rs | 58 |
6 files changed, 2369 insertions, 41 deletions
@@ -10,6 +10,12 @@ categories = [ "parser-implementations", "web-programming" ] keywords = ["html", "html5", "tokenizer", "parser"] edition = "2018" +[dependencies] +phf = "0.9" + +[build-dependencies] +phf_codegen = "0.9" + [dev-dependencies] typed-arena = "1.3.0" criterion = "0.3" @@ -4,7 +4,7 @@ This crate provides the tokenizer from [html5ever](https://crates.io/crates/html repackaged with all of its dependencies removed. The following dependencies were removed: * [markup5ever](https://crates.io/crates/markup5ever) - `buffer_queue` and `smallcharset` were merged into the source code + `buffer_queue`, `smallcharset` and the entity data were merged into the source code * [tendril](https://crates.io/crates/tendril) According to its README it contains "a substantial amount of unsafe code". diff --git a/build.rs b/build.rs new file mode 100644 index 0000000..8d4404c --- /dev/null +++ b/build.rs @@ -0,0 +1,61 @@ +// Copyright 2014-2017 The html5ever Project Developers. See the +// COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +extern crate phf_codegen; + +use std::collections::HashMap; +use std::env; +use std::fs::File; +use std::io::Write; +use std::path::Path; + +mod entities; + +fn main() { + named_entities_to_phf(&Path::new(&env::var("OUT_DIR").unwrap()).join("named_entities.rs")); +} + +fn named_entities_to_phf(to: &Path) { + let mut entities: HashMap<&str, (u32, u32)> = entities::NAMED_ENTITIES + .iter() + .map(|(name, cp1, cp2)| { + assert!(name.starts_with('&')); + (&name[1..], (*cp1, *cp2)) + }) + .collect(); + + // Add every missing prefix of those keys, mapping to NULL characters. + for key in entities.keys().cloned().collect::<Vec<_>>() { + for n in 1..key.len() { + entities.entry(&key[..n]).or_insert((0, 0)); + } + } + entities.insert("", (0, 0)); + + let mut phf_map = phf_codegen::Map::new(); + for (key, value) in entities { + phf_map.entry(key, &format!("{:?}", value)); + } + + let mut file = File::create(to).unwrap(); + writeln!( + &mut file, + r#" +/// A map of entity names to their codepoints. The second codepoint will +/// be 0 if the entity contains a single codepoint. Entities have their preceeding '&' removed. +"# + ) + .unwrap(); + writeln!( + &mut file, + "pub static NAMED_ENTITIES: Map<&'static str, (u32, u32)> = {};", + phf_map.build(), + ) + .unwrap(); +} diff --git a/entities.rs b/entities.rs new file mode 100644 index 0000000..324d90c --- /dev/null +++ b/entities.rs @@ -0,0 +1,2233 @@ +pub const NAMED_ENTITIES: [(&str, u32, u32); 2231] = [ + ("Á", 193, 0), + ("Á", 193, 0), + ("á", 225, 0), + ("á", 225, 0), + ("Ă", 258, 0), + ("ă", 259, 0), + ("∾", 8766, 0), + ("∿", 8767, 0), + ("∾̳", 8766, 819), + ("Â", 194, 0), + ("Â", 194, 0), + ("â", 226, 0), + ("â", 226, 0), + ("´", 180, 0), + ("´", 180, 0), + ("А", 1040, 0), + ("а", 1072, 0), + ("Æ", 198, 0), + ("Æ", 198, 0), + ("æ", 230, 0), + ("æ", 230, 0), + ("⁡", 8289, 0), + ("𝔄", 120068, 0), + ("𝔞", 120094, 0), + ("À", 192, 0), + ("À", 192, 0), + ("à", 224, 0), + ("à", 224, 0), + ("ℵ", 8501, 0), + ("ℵ", 8501, 0), + ("Α", 913, 0), + ("α", 945, 0), + ("Ā", 256, 0), + ("ā", 257, 0), + ("⨿", 10815, 0), + ("&", 38, 0), + ("&", 38, 0), + ("&", 38, 0), + ("&", 38, 0), + ("⩕", 10837, 0), + ("⩓", 10835, 0), + ("∧", 8743, 0), + ("⩜", 10844, 0), + ("⩘", 10840, 0), + ("⩚", 10842, 0), + ("∠", 8736, 0), + ("⦤", 10660, 0), + ("∠", 8736, 0), + ("⦨", 10664, 0), + ("⦩", 10665, 0), + ("⦪", 10666, 0), + ("⦫", 10667, 0), + ("⦬", 10668, 0), + ("⦭", 10669, 0), + ("⦮", 10670, 0), + ("⦯", 10671, 0), + ("∡", 8737, 0), + ("∟", 8735, 0), + ("⊾", 8894, 0), + ("⦝", 10653, 0), + ("∢", 8738, 0), + ("Å", 197, 0), + ("⍼", 9084, 0), + ("Ą", 260, 0), + ("ą", 261, 0), + ("𝔸", 120120, 0), + ("𝕒", 120146, 0), + ("⩯", 10863, 0), + ("≈", 8776, 0), + ("⩰", 10864, 0), + ("≊", 8778, 0), + ("≋", 8779, 0), + ("'", 39, 0), + ("⁡", 8289, 0), + ("≈", 8776, 0), + ("≊", 8778, 0), + ("Å", 197, 0), + ("Å", 197, 0), + ("å", 229, 0), + ("å", 229, 0), + ("𝒜", 119964, 0), + ("𝒶", 119990, 0), + ("≔", 8788, 0), + ("*", 42, 0), + ("≈", 8776, 0), + ("≍", 8781, 0), + ("Ã", 195, 0), + ("Ã", 195, 0), + ("ã", 227, 0), + ("ã", 227, 0), + ("Ä", 196, 0), + ("Ä", 196, 0), + ("ä", 228, 0), + ("ä", 228, 0), + ("∳", 8755, 0), + ("⨑", 10769, 0), + ("≌", 8780, 0), + ("϶", 1014, 0), + ("‵", 8245, 0), + ("∽", 8765, 0), + ("⋍", 8909, 0), + ("∖", 8726, 0), + ("⫧", 10983, 0), + ("⊽", 8893, 0), + ("⌅", 8965, 0), + ("⌆", 8966, 0), + ("⌅", 8965, 0), + ("⎵", 9141, 0), + ("⎶", 9142, 0), + ("≌", 8780, 0), + ("Б", 1041, 0), + ("б", 1073, 0), + ("„", 8222, 0), + ("∵", 8757, 0), + ("∵", 8757, 0), + ("∵", 8757, 0), + ("⦰", 10672, 0), + ("϶", 1014, 0), + ("ℬ", 8492, 0), + ("ℬ", 8492, 0), + ("Β", 914, 0), + ("β", 946, 0), + ("ℶ", 8502, 0), + ("≬", 8812, 0), + ("𝔅", 120069, 0), + ("𝔟", 120095, 0), + ("⋂", 8898, 0), + ("◯", 9711, 0), + ("⋃", 8899, 0), + ("⨀", 10752, 0), + ("⨁", 10753, 0), + ("⨂", 10754, 0), + ("⨆", 10758, 0), + ("★", 9733, 0), + ("▽", 9661, 0), + ("△", 9651, 0), + ("⨄", 10756, 0), + ("⋁", 8897, 0), + ("⋀", 8896, 0), + ("⤍", 10509, 0), + ("⧫", 10731, 0), + ("▪", 9642, 0), + ("▴", 9652, 0), + ("▾", 9662, 0), + ("◂", 9666, 0), + ("▸", 9656, 0), + ("␣", 9251, 0), + ("▒", 9618, 0), + ("░", 9617, 0), + ("▓", 9619, 0), + ("█", 9608, 0), + ("=⃥", 61, 8421), + ("≡⃥", 8801, 8421), + ("⫭", 10989, 0), + ("⌐", 8976, 0), + ("𝔹", 120121, 0), + ("𝕓", 120147, 0), + ("⊥", 8869, 0), + ("⊥", 8869, 0), + ("⋈", 8904, 0), + ("⧉", 10697, 0), + ("┐", 9488, 0), + ("╕", 9557, 0), + ("╖", 9558, 0), + ("╗", 9559, 0), + ("┌", 9484, 0), + ("╒", 9554, 0), + ("╓", 9555, 0), + ("╔", 9556, 0), + ("─", 9472, 0), + ("═", 9552, 0), + ("┬", 9516, 0), + ("╤", 9572, 0), + ("╥", 9573, 0), + ("╦", 9574, 0), + ("┴", 9524, 0), + ("╧", 9575, 0), + ("╨", 9576, 0), + ("╩", 9577, 0), + ("⊟", 8863, 0), + ("⊞", 8862, 0), + ("⊠", 8864, 0), + ("┘", 9496, 0), + ("╛", 9563, 0), + ("╜", 9564, 0), + ("╝", 9565, 0), + ("└", 9492, 0), + ("╘", 9560, 0), + ("╙", 9561, 0), + ("╚", 9562, 0), + ("│", 9474, 0), + ("║", 9553, 0), + ("┼", 9532, 0), + ("╪", 9578, 0), + ("╫", 9579, 0), + ("╬", 9580, 0), + ("┤", 9508, 0), + ("╡", 9569, 0), + ("╢", 9570, 0), + ("╣", 9571, 0), + ("├", 9500, 0), + ("╞", 9566, 0), + ("╟", 9567, 0), + ("╠", 9568, 0), + ("‵", 8245, 0), + ("˘", 728, 0), + ("˘", 728, 0), + ("¦", 166, 0), + ("¦", 166, 0), + ("𝒷", 119991, 0), + ("ℬ", 8492, 0), + ("⁏", 8271, 0), + ("∽", 8765, 0), + ("⋍", 8909, 0), + ("⧅", 10693, 0), + ("\", 92, 0), + ("⟈", 10184, 0), + ("•", 8226, 0), + ("•", 8226, 0), + ("≎", 8782, 0), + ("⪮", 10926, 0), + ("≏", 8783, 0), + ("≎", 8782, 0), + ("≏", 8783, 0), + ("Ć", 262, 0), + ("ć", 263, 0), + ("⩄", 10820, 0), + ("⩉", 10825, 0), + ("⩋", 10827, 0), + ("∩", 8745, 0), + ("⋒", 8914, 0), + ("⩇", 10823, 0), + ("⩀", 10816, 0), + ("ⅅ", 8517, 0), + ("∩︀", 8745, 65024), + ("⁁", 8257, 0), + ("ˇ", 711, 0), + ("ℭ", 8493, 0), + ("⩍", 10829, 0), + ("Č", 268, 0), + ("č", 269, 0), + ("Ç", 199, 0), + ("Ç", 199, 0), + ("ç", 231, 0), + ("ç", 231, 0), + ("Ĉ", 264, 0), + ("ĉ", 265, 0), + ("∰", 8752, 0), + ("⩌", 10828, 0), + ("⩐", 10832, 0), + ("Ċ", 266, 0), + ("ċ", 267, 0), + ("¸", 184, 0), + ("¸", 184, 0), + ("¸", 184, 0), + ("⦲", 10674, 0), + ("¢", 162, 0), + ("¢", 162, 0), + ("·", 183, 0), + ("·", 183, 0), + ("𝔠", 120096, 0), + ("ℭ", 8493, 0), + ("Ч", 1063, 0), + ("ч", 1095, 0), + ("✓", 10003, 0), + ("✓", 10003, 0), + ("Χ", 935, 0), + ("χ", 967, 0), + ("ˆ", 710, 0), + ("≗", 8791, 0), + ("↺", 8634, 0), + ("↻", 8635, 0), + ("⊛", 8859, 0), + ("⊚", 8858, 0), + ("⊝", 8861, 0), + ("⊙", 8857, 0), + ("®", 174, 0), + ("Ⓢ", 9416, 0), + ("⊖", 8854, 0), + ("⊕", 8853, 0), + ("⊗", 8855, 0), + ("○", 9675, 0), + ("⧃", 10691, 0), + ("≗", 8791, 0), + ("⨐", 10768, 0), + ("⫯", 10991, 0), + ("⧂", 10690, 0), + ("∲", 8754, 0), + ("”", 8221, 0), + ("’", 8217, 0), + ("♣", 9827, 0), + ("♣", 9827, 0), + (":", 58, 0), + ("∷", 8759, 0), + ("⩴", 10868, 0), + ("≔", 8788, 0), + ("≔", 8788, 0), + (",", 44, 0), + ("@", 64, 0), + ("∁", 8705, 0), + ("∘", 8728, 0), + ("∁", 8705, 0), + ("ℂ", 8450, 0), + ("≅", 8773, 0), + ("⩭", 10861, 0), + ("≡", 8801, 0), + ("∮", 8750, 0), + ("∯", 8751, 0), + ("∮", 8750, 0), + ("𝕔", 120148, 0), + ("ℂ", 8450, 0), + ("∐", 8720, 0), + ("∐", 8720, 0), + ("©", 169, 0), + ("©", 169, 0), + ("©", 169, 0), + ("©", 169, 0), + ("℗", 8471, 0), + ("∳", 8755, 0), + ("↵", 8629, 0), + ("✗", 10007, 0), + ("⨯", 10799, 0), + ("𝒞", 119966, 0), + ("𝒸", 119992, 0), + ("⫏", 10959, 0), + ("⫑", 10961, 0), + ("⫐", 10960, 0), + ("⫒", 10962, 0), + ("⋯", 8943, 0), + ("⤸", 10552, 0), + ("⤵", 10549, 0), + ("⋞", 8926, 0), + ("⋟", 8927, 0), + ("↶", 8630, 0), + ("⤽", 10557, 0), + ("⩈", 10824, 0), + ("⩆", 10822, 0), + ("≍", 8781, 0), + ("∪", 8746, 0), + ("⋓", 8915, 0), + ("⩊", 10826, 0), + ("⊍", 8845, 0), + ("⩅", 10821, 0), + ("∪︀", 8746, 65024), + ("↷", 8631, 0), + ("⤼", 10556, 0), + ("⋞", 8926, 0), + ("⋟", 8927, 0), + ("⋎", 8910, 0), + ("⋏", 8911, 0), + ("¤", 164, 0), + ("¤", 164, 0), + ("↶", 8630, 0), + ("↷", 8631, 0), + ("⋎", 8910, 0), + ("⋏", 8911, 0), + ("∲", 8754, 0), + ("∱", 8753, 0), + ("⌭", 9005, 0), + ("†", 8224, 0), + ("‡", 8225, 0), + ("ℸ", 8504, 0), + ("↓", 8595, 0), + ("↡", 8609, 0), + ("⇓", 8659, 0), + ("‐", 8208, 0), + ("⫤", 10980, 0), + ("⊣", 8867, 0), + ("⤏", 10511, 0), + ("˝", 733, 0), + ("Ď", 270, 0), + ("ď", 271, 0), + ("Д", 1044, 0), + ("д", 1076, 0), + ("‡", 8225, 0), + ("⇊", 8650, 0), + ("ⅅ", 8517, 0), + ("ⅆ", 8518, 0), + ("⤑", 10513, 0), + ("⩷", 10871, 0), + ("°", 176, 0), + ("°", 176, 0), + ("∇", 8711, 0), + ("Δ", 916, 0), + ("δ", 948, 0), + ("⦱", 10673, 0), + ("⥿", 10623, 0), + ("𝔇", 120071, 0), + ("𝔡", 120097, 0), + ("⥥", 10597, 0), + ("⇃", 8643, 0), + ("⇂", 8642, 0), + ("´", 180, 0), + ("˙", 729, 0), + ("˝", 733, 0), + ("`", 96, 0), + ("˜", 732, 0), + ("⋄", 8900, 0), + ("⋄", 8900, 0), + ("⋄", 8900, 0), + ("♦", 9830, 0), + ("♦", 9830, 0), + ("¨", 168, 0), + ("ⅆ", 8518, 0), + ("ϝ", 989, 0), + ("⋲", 8946, 0), + ("÷", 247, 0), + ("÷", 247, 0), + ("÷", 247, 0), + ("⋇", 8903, 0), + ("⋇", 8903, 0), + ("Ђ", 1026, 0), + ("ђ", 1106, 0), + ("⌞", 8990, 0), + ("⌍", 8973, 0), + ("$", 36, 0), + ("𝔻", 120123, 0), + ("𝕕", 120149, 0), + ("¨", 168, 0), + ("˙", 729, 0), + ("⃜", 8412, 0), + ("≐", 8784, 0), + ("≑", 8785, 0), + ("≐", 8784, 0), + ("∸", 8760, 0), + ("∔", 8724, 0), + ("⊡", 8865, 0), + ("⌆", 8966, 0), + ("∯", 8751, 0), + ("¨", 168, 0), + ("⇓", 8659, 0), + ("⇐", 8656, 0), + ("⇔", 8660, 0), + ("⫤", 10980, 0), + ("⟸", 10232, 0), + ("⟺", 10234, 0), + ("⟹", 10233, 0), + ("⇒", 8658, 0), + ("⊨", 8872, 0), + ("⇑", 8657, 0), + ("⇕", 8661, 0), + ("∥", 8741, 0), + ("⤓", 10515, 0), + ("↓", 8595, 0), + ("↓", 8595, 0), + ("⇓", 8659, 0), + ("⇵", 8693, 0), + ("̑", 785, 0), + ("⇊", 8650, 0), + ("⇃", 8643, 0), + ("⇂", 8642, 0), + ("⥐", 10576, 0), + ("⥞", 10590, 0), + ("⥖", 10582, 0), + ("↽", 8637, 0), + ("⥟", 10591, 0), + ("⥗", 10583, 0), + ("⇁", 8641, 0), + ("↧", 8615, 0), + ("⊤", 8868, 0), + ("⤐", 10512, 0), + ("⌟", 8991, 0), + ("⌌", 8972, 0), + ("𝒟", 119967, 0), + ("𝒹", 119993, 0), + ("Ѕ", 1029, 0), + ("ѕ", 1109, 0), + ("⧶", 10742, 0), + ("Đ", 272, 0), + ("đ", 273, 0), + ("⋱", 8945, 0), + ("▿", 9663, 0), + ("▾", 9662, 0), + ("⇵", 8693, 0), + ("⥯", 10607, 0), + ("⦦", 10662, 0), + ("Џ", 1039, 0), + ("џ", 1119, 0), + ("⟿", 10239, 0), + ("É", 201, 0), + ("É", 201, 0), + ("é", 233, 0), + ("é", 233, 0), + ("⩮", 10862, 0), + ("Ě", 282, 0), + ("ě", 283, 0), + ("Ê", 202, 0), + ("Ê", 202, 0), + ("ê", 234, 0), + ("ê", 234, 0), + ("≖", 8790, 0), + ("≕", 8789, 0), + ("Э", 1069, 0), + ("э", 1101, 0), + ("⩷", 10871, 0), + ("Ė", 278, 0), + ("ė", 279, 0), + ("≑", 8785, 0), + ("ⅇ", 8519, 0), + ("≒", 8786, 0), + ("𝔈", 120072, 0), + ("𝔢", 120098, 0), + ("⪚", 10906, 0), + ("È", 200, 0), + ("È", 200, 0), + ("è", 232, 0), + ("è", 232, 0), + ("⪖", 10902, 0), + ("⪘", 10904, 0), + ("⪙", 10905, 0), + ("∈", 8712, 0), + ("⏧", 9191, 0), + ("ℓ", 8467, 0), + ("⪕", 10901, 0), + ("⪗", 10903, 0), + ("Ē", 274, 0), + ("ē", 275, 0), + ("∅", 8709, 0), + ("∅", 8709, 0), + ("◻", 9723, 0), + ("∅", 8709, 0), + ("▫", 9643, 0), + (" ", 8196, 0), + (" ", 8197, 0), + (" ", 8195, 0), + ("Ŋ", 330, 0), + ("ŋ", 331, 0), + (" ", 8194, 0), + ("Ę", 280, 0), + ("ę", 281, 0), + ("𝔼", 120124, 0), + ("𝕖", 120150, 0), + ("⋕", 8917, 0), + ("⧣", 10723, 0), + ("⩱", 10865, 0), + ("ε", 949, 0), + ("Ε", 917, 0), + ("ε", 949, 0), + ("ϵ", 1013, 0), + ("≖", 8790, 0), + ("≕", 8789, 0), + ("≂", 8770, 0), + ("⪖", 10902, 0), + ("⪕", 10901, 0), + ("⩵", 10869, 0), + ("=", 61, 0), + ("≂", 8770, 0), + ("≟", 8799, 0), + ("⇌", 8652, 0), + ("≡", 8801, 0), + ("⩸", 10872, 0), + ("⧥", 10725, 0), + ("⥱", 10609, 0), + ("≓", 8787, 0), + ("ℯ", 8495, 0), + ("ℰ", 8496, 0), + ("≐", 8784, 0), + ("⩳", 10867, 0), + ("≂", 8770, 0), + ("Η", 919, 0), + ("η", 951, 0), + ("Ð", 208, 0), + ("Ð", 208, 0), + ("ð", 240, 0), + ("ð", 240, 0), + ("Ë", 203, 0), + ("Ë", 203, 0), + ("ë", 235, 0), + ("ë", 235, 0), + ("€", 8364, 0), + ("!", 33, 0), + ("∃", 8707, 0), + ("∃", 8707, 0), + ("ℰ", 8496, 0), + ("ⅇ", 8519, 0), + ("ⅇ", 8519, 0), + ("≒", 8786, 0), + ("Ф", 1060, 0), + ("ф", 1092, 0), + ("♀", 9792, 0), + ("ffi", 64259, 0), + ("ff", 64256, 0), + ("ffl", 64260, 0), + ("𝔉", 120073, 0), + ("𝔣", 120099, 0), + ("fi", 64257, 0), + ("◼", 9724, 0), + ("▪", 9642, 0), + ("fj", 102, 106), + ("♭", 9837, 0), + ("fl", 64258, 0), + ("▱", 9649, 0), + ("ƒ", 402, 0), + ("𝔽", 120125, 0), + ("𝕗", 120151, 0), + ("∀", 8704, 0), + ("∀", 8704, 0), + ("⋔", 8916, 0), + ("⫙", 10969, 0), + ("ℱ", 8497, 0), + ("⨍", 10765, 0), + ("½", 189, 0), + ("½", 189, 0), + ("⅓", 8531, 0), + ("¼", 188, 0), + ("¼", 188, 0), + ("⅕", 8533, 0), + ("⅙", 8537, 0), + ("⅛", 8539, 0), + ("⅔", 8532, 0), + ("⅖", 8534, 0), + ("¾", 190, 0), + ("¾", 190, 0), + ("⅗", 8535, 0), + ("⅜", 8540, 0), + ("⅘", 8536, 0), + ("⅚", 8538, 0), + ("⅝", 8541, 0), + ("⅞", 8542, 0), + ("⁄", 8260, 0), + ("⌢", 8994, 0), + ("𝒻", 119995, 0), + ("ℱ", 8497, 0), + ("ǵ", 501, 0), + ("Γ", 915, 0), + ("γ", 947, 0), + ("Ϝ", 988, 0), + ("ϝ", 989, 0), + ("⪆", 10886, 0), + ("Ğ", 286, 0), + ("ğ", 287, 0), + ("Ģ", 290, 0), + ("Ĝ", 284, 0), + ("ĝ", 285, 0), + ("Г", 1043, 0), + ("г", 1075, 0), + ("Ġ", 288, 0), + ("ġ", 289, 0), + ("≥", 8805, 0), + ("≧", 8807, 0), + ("⪌", 10892, 0), + ("⋛", 8923, 0), + ("≥", 8805, 0), + ("≧", 8807, 0), + ("⩾", 10878, 0), + ("⪩", 10921, 0), + ("⩾", 10878, 0), + ("⪀", 10880, 0), + ("⪂", 10882, 0), + ("⪄", 10884, 0), + ("⋛︀", 8923, 65024), + ("⪔", 10900, 0), + ("𝔊", 120074, 0), + ("𝔤", 120100, 0), + ("≫", 8811, 0), + ("⋙", 8921, 0), + ("⋙", 8921, 0), + ("ℷ", 8503, 0), + ("Ѓ", 1027, 0), + ("ѓ", 1107, 0), + ("⪥", 10917, 0), + ("≷", 8823, 0), + ("⪒", 10898, 0), + ("⪤", 10916, 0), + ("⪊", 10890, 0), + ("⪊", 10890, 0), + ("⪈", 10888, 0), + ("≩", 8809, 0), + ("⪈", 10888, 0), + ("≩", 8809, 0), + ("⋧", 8935, 0), + ("𝔾", 120126, 0), + ("𝕘", 120152, 0), + ("`", 96, 0), + ("≥", 8805, 0), + ("⋛", 8923, 0), + ("≧", 8807, 0), + ("⪢", 10914, 0), + ("≷", 8823, 0), + ("⩾", 10878, 0), + ("≳", 8819, 0), + ("𝒢", 119970, 0), + ("ℊ", 8458, 0), + ("≳", 8819, 0), + ("⪎", 10894, 0), + ("⪐", 10896, 0), + ("⪧", 10919, 0), + ("⩺", 10874, 0), + (">", 62, 0), + (">", 62, 0), + (">", 62, 0), + (">", 62, 0), + ("≫", 8811, 0), + ("⋗", 8919, 0), + ("⦕", 10645, 0), + ("⩼", 10876, 0), + ("⪆", 10886, 0), + ("⥸", 10616, 0), + ("⋗", 8919, 0), + ("⋛", 8923, 0), + ("⪌", 10892, 0), + ("≷", 8823, 0), + ("≳", 8819, 0), + ("≩︀", 8809, 65024), + ("≩︀", 8809, 65024), + ("ˇ", 711, 0), + (" ", 8202, 0), + ("½", 189, 0), + ("ℋ", 8459, 0), + ("Ъ", 1066, 0), + ("ъ", 1098, 0), + ("⥈", 10568, 0), + ("↔", 8596, 0), + ("⇔", 8660, 0), + ("↭", 8621, 0), + ("^", 94, 0), + ("ℏ", 8463, 0), + ("Ĥ", 292, 0), + ("ĥ", 293, 0), + ("♥", 9829, 0), + ("♥", 9829, 0), + ("…", 8230, 0), + ("⊹", 8889, 0), + ("𝔥", 120101, 0), + ("ℌ", 8460, 0), + ("ℋ", 8459, 0), + ("⤥", 10533, 0), + ("⤦", 10534, 0), + ("⇿", 8703, 0), + ("∻", 8763, 0), + ("↩", 8617, 0), + ("↪", 8618, 0), + ("𝕙", 120153, 0), + ("ℍ", 8461, 0), + ("―", 8213, 0), + ("─", 9472, 0), + ("𝒽", 119997, 0), + ("ℋ", 8459, 0), + ("ℏ", 8463, 0), + ("Ħ", 294, 0), + ("ħ", 295, 0), + ("≎", 8782, 0), + ("≏", 8783, 0), + ("⁃", 8259, 0), + ("‐", 8208, 0), + ("Í", 205, 0), + ("Í", 205, 0), + ("í", 237, 0), + ("í", 237, 0), + ("⁣", 8291, 0), + ("Î", 206, 0), + ("Î", 206, 0), + ("î", 238, 0), + ("î", 238, 0), + ("И", 1048, 0), + ("и", 1080, 0), + ("İ", 304, 0), + ("Е", 1045, 0), + ("е", 1077, 0), + ("¡", 161, 0), + ("¡", 161, 0), + ("⇔", 8660, 0), + ("𝔦", 120102, 0), + ("ℑ", 8465, 0), + ("Ì", 204, 0), + ("Ì", 204, 0), + ("ì", 236, 0), + ("ì", 236, 0), + ("ⅈ", 8520, 0), + ("⨌", 10764, 0), + ("∭", 8749, 0), + ("⧜", 10716, 0), + ("℩", 8489, 0), + ("IJ", 306, 0), + ("ij", 307, 0), + ("Ī", 298, 0), + ("ī", 299, 0), + ("ℑ", 8465, 0), + ("ⅈ", 8520, 0), + ("ℐ", 8464, 0), + ("ℑ", 8465, 0), + ("ı", 305, 0), + ("ℑ", 8465, 0), + ("⊷", 8887, 0), + ("Ƶ", 437, 0), + ("⇒", 8658, 0), + ("℅", 8453, 0), + ("∈", 8712, 0), + ("∞", 8734, 0), + ("⧝", 10717, 0), + ("ı", 305, 0), + ("⊺", 8890, 0), + ("∫", 8747, 0), + ("∬", 8748, 0), + ("ℤ", 8484, 0), + ("∫", 8747, 0), + ("⊺", 8890, 0), + ("⋂", 8898, 0), + ("⨗", 10775, 0), + ("⨼", 10812, 0), + ("⁣", 8291, 0), + ("⁢", 8290, 0), + ("Ё", 1025, 0), + ("ё", 1105, 0), + ("Į", 302, 0), + ("į", 303, 0), + ("𝕀", 120128, 0), + ("𝕚", 120154, 0), + ("Ι", 921, 0), + ("ι", 953, 0), + ("⨼", 10812, 0), + ("¿", 191, 0), + ("¿", 191, 0), + ("𝒾", 119998, 0), + ("ℐ", 8464, 0), + ("∈", 8712, 0), + ("⋵", 8949, 0), + ("⋹", 8953, 0), + ("⋴", 8948, 0), + ("⋳", 8947, 0), + ("∈", 8712, 0), + ("⁢", 8290, 0), + ("Ĩ", 296, 0), + ("ĩ", 297, 0), + ("І", 1030, 0), + ("і", 1110, 0), + ("Ï", 207, 0), + ("Ï", 207, 0), + ("ï", 239, 0), + ("ï", 239, 0), + ("Ĵ", 308, 0), + ("ĵ", 309, 0), + ("Й", 1049, 0), + ("й", 1081, 0), + ("𝔍", 120077, 0), + ("𝔧", 120103, 0), + ("ȷ", 567, 0), + ("𝕁", 120129, 0), + ("𝕛", 120155, 0), + ("𝒥", 119973, 0), + ("𝒿", 119999, 0), + ("Ј", 1032, 0), + ("ј", 1112, 0), + ("Є", 1028, 0), + ("є", 1108, 0), + ("Κ", 922, 0), + ("κ", 954, 0), + ("ϰ", 1008, 0), + ("Ķ", 310, 0), + ("ķ", 311, 0), + ("К", 1050, 0), + ("к", 1082, 0), + ("𝔎", 120078, 0), + ("𝔨", 120104, 0), + ("ĸ", 312, 0), + ("Х", 1061, 0), + ("х", 1093, 0), + ("Ќ", 1036, 0), + ("ќ", 1116, 0), + ("𝕂", 120130, 0), + ("𝕜", 120156, 0), + ("𝒦", 119974, 0), + ("𝓀", 120000, 0), + ("⇚", 8666, 0), + ("Ĺ", 313, 0), + ("ĺ", 314, 0), + ("⦴", 10676, 0), + ("ℒ", 8466, 0), + ("Λ", 923, 0), + ("λ", 955, 0), + ("⟨", 10216, 0), + ("⟪", 10218, 0), + ("⦑", 10641, 0), + ("⟨", 10216, 0), + ("⪅", 10885, 0), + ("ℒ", 8466, 0), + ("«", 171, 0), + ("«", 171, 0), + ("⇤", 8676, 0), + ("⤟", 10527, 0), + ("←", 8592, 0), + ("↞", 8606, 0), + ("⇐", 8656, 0), + ("⤝", 10525, 0), + ("↩", 8617, 0), + ("↫", 8619, 0), + ("⤹", 10553, 0), + ("⥳", 10611, 0), + ("↢", 8610, 0), + ("⤙", 10521, 0), + ("⤛", 10523, 0), + ("⪫", 10923, 0), + ("⪭", 10925, 0), + ("⪭︀", 10925, 65024), + ("⤌", 10508, 0), + ("⤎", 10510, 0), + ("❲", 10098, 0), + ("{", 123, 0), + ("[", 91, 0), + ("⦋", 10635, 0), + ("⦏", 10639, 0), + ("⦍", 10637, 0), + ("Ľ", 317, 0), + ("ľ", 318, 0), + ("Ļ", 315, 0), + ("ļ", 316, 0), + ("⌈", 8968, 0), + ("{", 123, 0), + ("Л", 1051, 0), + ("л", 1083, 0), + ("⤶", 10550, 0), + ("“", 8220, 0), + ("„", 8222, 0), + ("⥧", 10599, 0), + ("⥋", 10571, 0), + ("↲", 8626, 0), + ("≤", 8804, 0), + ("≦", 8806, 0), + ("⟨", 10216, 0), + ("⇤", 8676, 0), + ("←", 8592, 0), + ("←", 8592, 0), + ("⇐", 8656, 0), + ("⇆", 8646, 0), + ("↢", 8610, 0), + ("⌈", 8968, 0), + ("⟦", 10214, 0), + ("⥡", 10593, 0), + ("⥙", 10585, 0), + ("⇃", 8643, 0), + ("⌊", 8970, 0), + ("↽", 8637, 0), + ("↼", 8636, 0), + ("⇇", 8647, 0), + ("↔", 8596, 0), + ("↔", 8596, 0), + ("⇔", 8660, 0), + ("⇆", 8646, 0), + ("⇋", 8651, 0), + ("↭", 8621, 0), + ("⥎", 10574, 0), + ("↤", 8612, 0), + ("⊣", 8867, 0), + ("⥚", 10586, 0), + ("⋋", 8907, 0), + ("⧏", 10703, 0), + ("⊲", 8882, 0), + ("⊴", 8884, 0), + ("⥑", 10577, 0), + ("⥠", 10592, 0), + ("⥘", 10584, 0), + ("↿", 8639, 0), + ("⥒", 10578, 0), + ("↼", 8636, 0), + ("⪋", 10891, 0), + ("⋚", 8922, 0), + ("≤", 8804, 0), + ("≦", 8806, 0), + ("⩽", 10877, 0), + ("⪨", 10920, 0), + ("⩽", 10877, 0), + ("⩿", 10879, 0), + ("⪁", 10881, 0), + ("⪃", 10883, 0), + ("⋚︀", 8922, 65024), + ("⪓", 10899, 0), + ("⪅", 10885, 0), + ("⋖", 8918, 0), + ("⋚", 8922, 0), + ("⪋", 10891, 0), + ("⋚", 8922, 0), + ("≦", 8806, 0), + ("≶", 8822, 0), + ("≶", 8822, 0), + ("⪡", 10913, 0), + ("≲", 8818, 0), + ("⩽", 10877, 0), + ("≲", 8818, 0), + ("⥼", 10620, 0), + ("⌊", 8970, 0), + ("𝔏", 120079, 0), + ("𝔩", 120105, 0), + ("≶", 8822, 0), + ("⪑", 10897, 0), + ("⥢", 10594, 0), + ("↽", 8637, 0), + ("↼", 8636, 0), + ("⥪", 10602, 0), + ("▄", 9604, 0), + ("Љ", 1033, 0), + ("љ", 1113, 0), + ("⇇", 8647, 0), + ("≪", 8810, 0), + ("⋘", 8920, 0), + ("⌞", 8990, 0), + ("⇚", 8666, 0), + ("⥫", 10603, 0), + ("◺", 9722, 0), + ("Ŀ", 319, 0), + ("ŀ", 320, 0), + ("⎰", 9136, 0), + ("⎰", 9136, 0), + ("⪉", 10889, 0), + ("⪉", 10889, 0), + ("⪇", 10887, 0), + ("≨", 8808, 0), + ("⪇", 10887, 0), + ("≨", 8808, 0), + ("⋦", 8934, 0), + ("⟬", 10220, 0), + ("⇽", 8701, 0), + ("⟦", 10214, 0), + ("⟵", 10229, 0), + ("⟵", 10229, 0), + ("⟸", 10232, 0), + ("⟷", 10231, 0), + ("⟷", 10231, 0), + ("⟺", 10234, 0), + ("⟼", 10236, 0), + ("⟶", 10230, 0), + ("⟶", 10230, 0), + ("⟹", 10233, 0), + ("↫", 8619, 0), + ("↬", 8620, 0), + ("⦅", 10629, 0), + ("𝕃", 120131, 0), + ("𝕝", 120157, 0), + ("⨭", 10797, 0), + ("⨴", 10804, 0), + ("∗", 8727, 0), + ("_", 95, 0), + ("↙", 8601, 0), + ("↘", 8600, 0), + ("◊", 9674, 0), + ("◊", 9674, 0), + ("⧫", 10731, 0), + ("(", 40, 0), + ("⦓", 10643, 0), + ("⇆", 8646, 0), + ("⌟", 8991, 0), + ("⇋", 8651, 0), + ("⥭", 10605, 0), + ("‎", 8206, 0), + ("⊿", 8895, 0), + ("‹", 8249, 0), + ("𝓁", 120001, 0), + ("ℒ", 8466, 0), + ("↰", 8624, 0), + ("↰", 8624, 0), + ("≲", 8818, 0), + ("⪍", 10893, 0), + ("⪏", 10895, 0), + ("[", 91, 0), + ("‘", 8216, 0), + ("‚", 8218, 0), + ("Ł", 321, 0), + ("ł", 322, 0), + ("⪦", 10918, 0), + ("⩹", 10873, 0), + ("<", 60, 0), + ("<", 60, 0), + ("<", 60, 0), + ("<", 60, 0), + ("≪", 8810, 0), + ("⋖", 8918, 0), + ("⋋", 8907, 0), + ("⋉", 8905, 0), + ("⥶", 10614, 0), + ("⩻", 10875, 0), + ("◃", 9667, 0), + ("⊴", 8884, 0), + ("◂", 9666, 0), + ("⦖", 10646, 0), + ("⥊", 10570, 0), + ("⥦", 10598, 0), + ("≨︀", 8808, 65024), + ("≨︀", 8808, 65024), + ("¯", 175, 0), + ("¯", 175, 0), + ("♂", 9794, 0), + ("✠", 10016, 0), + ("✠", 10016, 0), + ("⤅", 10501, 0), + ("↦", 8614, 0), + ("↦", 8614, 0), + ("↧", 8615, 0), + ("↤", 8612, 0), + ("↥", 8613, 0), + ("▮", 9646, 0), + ("⨩", 10793, 0), + ("М", 1052, 0), + ("м", 1084, 0), + ("—", 8212, 0), + ("∺", 8762, 0), + ("∡", 8737, 0), + (" ", 8287, 0), + ("ℳ", 8499, 0), + ("𝔐", 120080, 0), + ("𝔪", 120106, 0), + ("℧", 8487, 0), + ("µ", 181, 0), + ("µ", 181, 0), + ("*", 42, 0), + ("⫰", 10992, 0), + ("∣", 8739, 0), + ("·", 183, 0), + ("·", 183, 0), + ("⊟", 8863, 0), + ("−", 8722, 0), + ("∸", 8760, 0), + ("⨪", 10794, 0), + ("∓", 8723, 0), + ("⫛", 10971, 0), + ("…", 8230, 0), + ("∓", 8723, 0), + ("⊧", 8871, 0), + ("𝕄", 120132, 0), + ("𝕞", 120158, 0), + ("∓", 8723, 0), + ("𝓂", 120002, 0), + ("ℳ", 8499, 0), + ("∾", 8766, 0), + ("Μ", 924, 0), + ("μ", 956, 0), + ("⊸", 8888, 0), + ("⊸", 8888, 0), + ("∇", 8711, 0), + ("Ń", 323, 0), + ("ń", 324, 0), + ("∠⃒", 8736, 8402), + ("≉", 8777, 0), + ("⩰̸", 10864, 824), + ("≋̸", 8779, 824), + ("ʼn", 329, 0), + ("≉", 8777, 0), + ("♮", 9838, 0), + ("ℕ", 8469, 0), + ("♮", 9838, 0), + (" ", 160, 0), + (" ", 160, 0), + ("≎̸", 8782, 824), + ("≏̸", 8783, 824), + ("⩃", 10819, 0), + ("Ň", 327, 0), + ("ň", 328, 0), + ("Ņ", 325, 0), + ("ņ", 326, 0), + ("≇", 8775, 0), + ("⩭̸", 10861, 824), + ("⩂", 10818, 0), + ("Н", 1053, 0), + ("н", 1085, 0), + ("–", 8211, 0), + ("⤤", 10532, 0), + ("↗", 8599, 0), + ("⇗", 8663, 0), + ("↗", 8599, 0), + ("≠", 8800, 0), + ("≐̸", 8784, 824), + ("​", 8203, 0), + ("​", 8203, 0), + ("​", 8203, 0), + ("​", 8203, 0), + ("≢", 8802, 0), + ("⤨", 10536, 0), + ("≂̸", 8770, 824), + ("≫", 8811, 0), + ("≪", 8810, 0), + ("
", 10, 0), + ("∄", 8708, 0), + ("∄", 8708, 0), + ("𝔑", 120081, 0), + ("𝔫", 120107, 0), + ("≧̸", 8807, 824), + ("≱", 8817, 0), + ("≱", 8817, 0), + ("≧̸", 8807, 824), + ("⩾̸", 10878, 824), + ("⩾̸", 10878, 824), + ("⋙̸", 8921, 824), + ("≵", 8821, 0), + ("≫⃒", 8811, 8402), + ("≯", 8815, 0), + ("≯", 8815, 0), + ("≫̸", 8811, 824), + ("↮", 8622, 0), + ("⇎", 8654, 0), + ("⫲", 10994, 0), + ("∋", 8715, 0), + ("⋼", 8956, 0), + ("⋺", 8954, 0), + ("∋", 8715, 0), + ("Њ", 1034, 0), + ("њ", 1114, 0), + ("↚", 8602, 0), + ("⇍", 8653, 0), + ("‥", 8229, 0), + ("≦̸", 8806, 824), + ("≰", 8816, 0), + ("↚", 8602, 0), + ("⇍", 8653, 0), + ("↮", 8622, 0), + ("⇎", 8654, 0), + ("≰", 8816, 0), + ("≦̸", 8806, 824), + ("⩽̸", 10877, 824), + ("⩽̸", 10877, 824), + ("≮", 8814, 0), + ("⋘̸", 8920, 824), + ("≴", 8820, 0), + ("≪⃒", 8810, 8402), + ("≮", 8814, 0), + ("⋪", 8938, 0), + ("⋬", 8940, 0), + ("≪̸", 8810, 824), + ("∤", 8740, 0), + ("⁠", 8288, 0), + (" ", 160, 0), + ("𝕟", 120159, 0), + ("ℕ", 8469, 0), + ("⫬", 10988, 0), + ("¬", 172, 0), + ("¬", 172, 0), + ("≢", 8802, 0), + ("≭", 8813, 0), + ("∦", 8742, 0), + ("∉", 8713, 0), + ("≠", 8800, 0), + ("≂̸", 8770, 824), + ("∄", 8708, 0), + ("≯", 8815, 0), + ("≱", 8817, 0), + ("≧̸", 8807, 824), + ("≫̸", 8811, 824), + ("≹", 8825, 0), + ("⩾̸", 10878, 824), + ("≵", 8821, 0), + ("≎̸", 8782, 824), + ("≏̸", 8783, 824), + ("∉", 8713, 0), + ("⋵̸", 8949, 824), + ("⋹̸", 8953, 824), + ("∉", 8713, 0), + ("⋷", 8951, 0), + ("⋶", 8950, 0), + ("⧏̸", 10703, 824), + ("⋪", 8938, 0), + ("⋬", 8940, 0), + ("≮", 8814, 0), + ("≰", 8816, 0), + ("≸", 8824, 0), + ("≪̸", 8810, 824), + ("⩽̸", 10877, 824), + ("≴", 8820, 0), + ("⪢̸", 10914, 824), + ("⪡̸", 10913, 824), + ("∌", 8716, 0), + ("∌", 8716, 0), + ("⋾", 8958, 0), + ("⋽", 8957, 0), + ("⊀", 8832, 0), + ("⪯̸", 10927, 824), + ("⋠", 8928, 0), + ("∌", 8716, 0), + ("⧐̸", 10704, 824), + ("⋫", 8939, 0), + ("⋭", 8941, 0), + ("⊏̸", 8847, 824), + ("⋢", 8930, 0), + ("⊐̸", 8848, 824), + ("⋣", 8931, 0), + ("⊂⃒", 8834, 8402), + ("⊈", 8840, 0), + ("⊁", 8833, 0), + ("⪰̸", 10928, 824), + ("⋡", 8929, 0), + ("≿̸", 8831, 824), + ("⊃⃒", 8835, 8402), + ("⊉", 8841, 0), + ("≁", 8769, 0), + ("≄", 8772, 0), + ("≇", 8775, 0), + ("≉", 8777, 0), + ("∤", 8740, 0), + ("∦", 8742, 0), + ("∦", 8742, 0), + ("⫽⃥", 11005, 8421), + ("∂̸", 8706, 824), + ("⨔", 10772, 0), + ("⊀", 8832, 0), + ("⋠", 8928, 0), + ("⊀", 8832, 0), + ("⪯̸", 10927, 824), + ("⪯̸", 10927, 824), + ("⤳̸", 10547, 824), + ("↛", 8603, 0), + ("⇏", 8655, 0), + ("↝̸", 8605, 824), + ("↛", 8603, 0), + ("⇏", 8655, 0), + ("⋫", 8939, 0), + ("⋭", 8941, 0), + ("⊁", 8833, 0), + ("⋡", 8929, 0), + ("⪰̸", 10928, 824), + ("𝒩", 119977, 0), + ("𝓃", 120003, 0), + ("∤", 8740, 0), + ("∦", 8742, 0), + ("≁", 8769, 0), + ("≄", 8772, 0), + ("≄", 8772, 0), + ("∤", 8740, 0), + ("∦", 8742, 0), + ("⋢", 8930, 0), + ("⋣", 8931, 0), + ("⊄", 8836, 0), + ("⫅̸", 10949, 824), + ("⊈", 8840, 0), + ("⊂⃒", 8834, 8402), + ("⊈", 8840, 0), + ("⫅̸", 10949, 824), + ("⊁", 8833, 0), + ("⪰̸", 10928, 824), + ("⊅", 8837, 0), + ("⫆̸", 10950, 824), + ("⊉", 8841, 0), + ("⊃⃒", 8835, 8402), + ("⊉", 8841, 0), + ("⫆̸", 10950, 824), + ("≹", 8825, 0), + ("Ñ", 209, 0), + ("Ñ", 209, 0), + ("ñ", 241, 0), + ("ñ", 241, 0), + ("≸", 8824, 0), + ("⋪", 8938, 0), + ("⋬", 8940, 0), + ("⋫", 8939, 0), + ("⋭", 8941, 0), + ("Ν", 925, 0), + ("ν", 957, 0), + ("#", 35, 0), + ("№", 8470, 0), + (" ", 8199, 0), + ("≍⃒", 8781, 8402), + ("⊬", 8876, 0), + ("⊭", 8877, 0), + ("⊮", 8878, 0), + ("⊯", 8879, 0), + ("≥⃒", 8805, 8402), + (">⃒", 62, 8402), + ("⤄", 10500, 0), + ("⧞", 10718, 0), + ("⤂", 10498, 0), + ("≤⃒", 8804, 8402), + ("<⃒", 60, 8402), + ("⊴⃒", 8884, 8402), + ("⤃", 10499, 0), + ("⊵⃒", 8885, 8402), + ("∼⃒", 8764, 8402), + ("⤣", 10531, 0), + ("↖", 8598, 0), + ("⇖", 8662, 0), + ("↖", 8598, 0), + ("⤧", 10535, 0), + ("Ó", 211, 0), + ("Ó", 211, 0), + ("ó", 243, 0), + ("ó", 243, 0), + ("⊛", 8859, 0), + ("Ô", 212, 0), + ("Ô", 212, 0), + ("ô", 244, 0), + ("ô", 244, 0), + ("⊚", 8858, 0), + ("О", 1054, 0), + ("о", 1086, 0), + ("⊝", 8861, 0), + ("Ő", 336, 0), + ("ő", 337, 0), + ("⨸", 10808, 0), + ("⊙", 8857, 0), + ("⦼", 10684, 0), + ("Œ", 338, 0), + ("œ", 339, 0), + ("⦿", 10687, 0), + ("𝔒", 120082, 0), + ("𝔬", 120108, 0), + ("˛", 731, 0), + ("Ò", 210, 0), + ("Ò", 210, 0), + ("ò", 242, 0), + ("ò", 242, 0), + ("⧁", 10689, 0), + ("⦵", 10677, 0), + ("Ω", 937, 0), + ("∮", 8750, 0), + ("↺", 8634, 0), + ("⦾", 10686, 0), + ("⦻", 10683, 0), + ("‾", 8254, 0), + ("⧀", 10688, 0), + ("Ō", 332, 0), + ("ō", 333, 0), + ("Ω", 937, 0), + ("ω", 969, 0), + ("Ο", 927, 0), + ("ο", 959, 0), + ("⦶", 10678, 0), + ("⊖", 8854, 0), + ("𝕆", 120134, 0), + ("𝕠", 120160, 0), + ("⦷", 10679, 0), + ("“", 8220, 0), + ("‘", 8216, 0), + ("⦹", 10681, 0), + ("⊕", 8853, 0), + ("↻", 8635, 0), + ("⩔", 10836, 0), + ("∨", 8744, 0), + ("⩝", 10845, 0), + ("ℴ", 8500, 0), + ("ℴ", 8500, 0), + ("ª", 170, 0), + ("ª", 170, 0), + ("º", 186, 0), + ("º", 186, 0), + ("⊶", 8886, 0), + ("⩖", 10838, 0), + ("⩗", 10839, 0), + ("⩛", 10843, 0), + ("Ⓢ", 9416, 0), + ("𝒪", 119978, 0), + ("ℴ", 8500, 0), + ("Ø", 216, 0), + ("Ø", 216, 0), + ("ø", 248, 0), + ("ø", 248, 0), + ("⊘", 8856, 0), + ("Õ", 213, 0), + ("Õ", 213, 0), + ("õ", 245, 0), + ("õ", 245, 0), + ("⨶", 10806, 0), + ("⨷", 10807, 0), + ("⊗", 8855, 0), + ("Ö", 214, 0), + ("Ö", 214, 0), + ("ö", 246, 0), + ("ö", 246, 0), + ("⌽", 9021, 0), + ("‾", 8254, 0), + ("⏞", 9182, 0), + ("⎴", 9140, 0), + ("⏜", 9180, 0), + ("¶", 182, 0), + ("¶", 182, 0), + ("∥", 8741, 0), + ("∥", 8741, 0), + ("⫳", 10995, 0), + ("⫽", 11005, 0), + ("∂", 8706, 0), + ("∂", 8706, 0), + ("П", 1055, 0), + ("п", 1087, 0), + ("%", 37, 0), + (".", 46, 0), + ("‰", 8240, 0), + ("⊥", 8869, 0), + ("‱", 8241, 0), + ("𝔓", 120083, 0), + ("𝔭", 120109, 0), + ("Φ", 934, 0), + ("φ", 966, 0), + ("ϕ", 981, 0), + ("ℳ", 8499, 0), + ("☎", 9742, 0), + ("Π", 928, 0), + ("π", 960, 0), + ("⋔", 8916, 0), + ("ϖ", 982, 0), + ("ℏ", 8463, 0), + ("ℎ", 8462, 0), + ("ℏ", 8463, 0), + ("⨣", 10787, 0), + ("⊞", 8862, 0), + ("⨢", 10786, 0), + ("+", 43, 0), + ("∔", 8724, 0), + ("⨥", 10789, 0), + ("⩲", 10866, 0), + ("±", 177, 0), + ("±", 177, 0), + ("±", 177, 0), + ("⨦", 10790, 0), + ("⨧", 10791, 0), + ("±", 177, 0), + ("ℌ", 8460, 0), + ("⨕", 10773, 0), + ("𝕡", 120161, 0), + ("ℙ", 8473, 0), + ("£", 163, 0), + ("£", 163, 0), + ("⪷", 10935, 0), + ("⪻", 10939, 0), + ("≺", 8826, 0), + ("≼", 8828, 0), + ("⪷", 10935, 0), + ("≺", 8826, 0), + ("≼", 8828, 0), + ("≺", 8826, 0), + ("⪯", 10927, 0), + ("≼", 8828, 0), + ("≾", 8830, 0), + ("⪯", 10927, 0), + ("⪹", 10937, 0), + ("⪵", 10933, 0), + ("⋨", 8936, 0), + ("⪯", 10927, 0), + ("⪳", 10931, 0), + ("≾", 8830, 0), + ("′", 8242, 0), + ("″", 8243, 0), + ("ℙ", 8473, 0), + ("⪹", 10937, 0), + ("⪵", 10933, 0), + ("⋨", 8936, 0), + ("∏", 8719, 0), + ("∏", 8719, 0), + ("⌮", 9006, 0), + ("⌒", 8978, 0), + ("⌓", 8979, 0), + ("∝", 8733, 0), + ("∝", 8733, 0), + ("∷", 8759, 0), + ("∝", 8733, 0), + ("≾", 8830, 0), + ("⊰", 8880, 0), + ("𝒫", 119979, 0), + ("𝓅", 120005, 0), + ("Ψ", 936, 0), + ("ψ", 968, 0), + (" ", 8200, 0), + ("𝔔", 120084, 0), + ("𝔮", 120110, 0), + ("⨌", 10764, 0), + ("𝕢", 120162, 0), + ("ℚ", 8474, 0), + ("⁗", 8279, 0), + ("𝒬", 119980, 0), + ("𝓆", 120006, 0), + ("ℍ", 8461, 0), + ("⨖", 10774, 0), + ("?", 63, 0), + ("≟", 8799, 0), + (""", 34, 0), + (""", 34, 0), + (""", 34, 0), + (""", 34, 0), + ("⇛", 8667, 0), + ("∽̱", 8765, 817), + ("Ŕ", 340, 0), + ("ŕ", 341, 0), + ("√", 8730, 0), + ("⦳", 10675, 0), + ("⟩", 10217, 0), + ("⟫", 10219, 0), + ("⦒", 10642, 0), + ("⦥", 10661, 0), + ("⟩", 10217, 0), + ("»", 187, 0), + ("»", 187, 0), + ("⥵", 10613, 0), + ("⇥", 8677, 0), + ("⤠", 10528, 0), + ("⤳", 10547, 0), + ("→", 8594, 0), + ("↠", 8608, 0), + ("⇒", 8658, 0), + ("⤞", 10526, 0), + ("↪", 8618, 0), + ("↬", 8620, 0), + ("⥅", 10565, 0), + ("⥴", 10612, 0), + ("⤖", 10518, 0), + ("↣", 8611, 0), + ("↝", 8605, 0), + ("⤚", 10522, 0), + ("⤜", 10524, 0), + ("∶", 8758, 0), + ("ℚ", 8474, 0), + ("⤍", 10509, 0), + ("⤏", 10511, 0), + ("⤐", 10512, 0), + ("❳", 10099, 0), + ("}", 125, 0), + ("]", 93, 0), + ("⦌", 10636, 0), + ("⦎", 10638, 0), + ("⦐", 10640, 0), + ("Ř", 344, 0), + ("ř", 345, 0), + ("Ŗ", 342, 0), + ("ŗ", 343, 0), + ("⌉", 8969, 0), + ("}", 125, 0), + ("Р", 1056, 0), + ("р", 1088, 0), + ("⤷", 10551, 0), + ("⥩", 10601, 0), + ("”", 8221, 0), + ("”", 8221, 0), + ("↳", 8627, 0), + ("ℜ", 8476, 0), + ("ℛ", 8475, 0), + ("ℜ", 8476, 0), + ("ℝ", 8477, 0), + ("ℜ", 8476, 0), + ("▭", 9645, 0), + ("®", 174, 0), + ("®", 174, 0), + ("®", 174, 0), + ("®", 174, 0), + ("∋", 8715, 0), + ("⇋", 8651, 0), + ("⥯", 10607, 0), + ("⥽", 10621, 0), + ("⌋", 8971, 0), + ("𝔯", 120111, 0), + ("ℜ", 8476, 0), + ("⥤", 10596, 0), + ("⇁", 8641, 0), + ("⇀", 8640, 0), + ("⥬", 10604, 0), + ("Ρ", 929, 0), + ("ρ", 961, 0), + ("ϱ", 1009, 0), + ("⟩", 10217, 0), + ("⇥", 8677, 0), + ("→", 8594, 0), + ("→", 8594, 0), + ("⇒", 8658, 0), + ("⇄", 8644, 0), + ("↣", 8611, 0), + ("⌉", 8969, 0), + ("⟧", 10215, 0), + ("⥝", 10589, 0), + ("⥕", 10581, 0), + ("⇂", 8642, 0), + ("⌋", 8971, 0), + ("⇁", 8641, 0), + ("⇀", 8640, 0), + ("⇄", 8644, 0), + ("⇌", 8652, 0), + ("⇉", 8649, 0), + ("↝", 8605, 0), + ("↦", 8614, 0), + ("⊢", 8866, 0), + ("⥛", 10587, 0), + ("⋌", 8908, 0), + ("⧐", 10704, 0), + ("⊳", 8883, 0), + ("⊵", 8885, 0), + ("⥏", 10575, 0), + ("⥜", 10588, 0), + ("⥔", 10580, 0), + ("↾", 8638, 0), + ("⥓", 10579, 0), + ("⇀", 8640, 0), + ("˚", 730, 0), + ("≓", 8787, 0), + ("⇄", 8644, 0), + ("⇌", 8652, 0), + ("‏", 8207, 0), + ("⎱", 9137, 0), + ("⎱", 9137, 0), + ("⫮", 10990, 0), + ("⟭", 10221, 0), + ("⇾", 8702, 0), + ("⟧", 10215, 0), + ("⦆", 10630, 0), + ("𝕣", 120163, 0), + ("ℝ", 8477, 0), + ("⨮", 10798, 0), + ("⨵", 10805, 0), + ("⥰", 10608, 0), + (")", 41, 0), + ("⦔", 10644, 0), + ("⨒", 10770, 0), + ("⇉", 8649, 0), + ("⇛", 8667, 0), + ("›", 8250, 0), + ("𝓇", 120007, 0), + ("ℛ", 8475, 0), + ("↱", 8625, 0), + ("↱", 8625, 0), + ("]", 93, 0), + ("’", 8217, 0), + ("’", 8217, 0), + ("⋌", 8908, 0), + ("⋊", 8906, 0), + ("▹", 9657, 0), + ("⊵", 8885, 0), + ("▸", 9656, 0), + ("⧎", 10702, 0), + ("⧴", 10740, 0), + ("⥨", 10600, 0), + ("℞", 8478, 0), + ("Ś", 346, 0), + ("ś", 347, 0), + ("‚", 8218, 0), + ("⪸", 10936, 0), + ("Š", 352, 0), + ("š", 353, 0), + ("⪼", 10940, 0), + ("≻", 8827, 0), + ("≽", 8829, 0), + ("⪰", 10928, 0), + ("⪴", 10932, 0), + ("Ş", 350, 0), + ("ş", 351, 0), + ("Ŝ", 348, 0), + ("ŝ", 349, 0), + ("⪺", 10938, 0), + ("⪶", 10934, 0), + ("⋩", 8937, 0), + ("⨓", 10771, 0), + ("≿", 8831, 0), + ("С", 1057, 0), + ("с", 1089, 0), + ("⊡", 8865, 0), + ("⋅", 8901, 0), + ("⩦", 10854, 0), + ("⤥", 10533, 0), + ("↘", 8600, 0), + ("⇘", 8664, 0), + ("↘", 8600, 0), + ("§", 167, 0), + ("§", 167, 0), + (";", 59, 0), + ("⤩", 10537, 0), + ("∖", 8726, 0), + ("∖", 8726, 0), + ("✶", 10038, 0), + ("𝔖", 120086, 0), + ("𝔰", 120112, 0), + ("⌢", 8994, 0), + ("♯", 9839, 0), + ("Щ", 1065, 0), + ("щ", 1097, 0), + ("Ш", 1064, 0), + ("ш", 1096, 0), + ("↓", 8595, 0), + ("←", 8592, 0), + ("∣", 8739, 0), + ("∥", 8741, 0), + ("→", 8594, 0), + ("↑", 8593, 0), + ("­", 173, 0), + ("­", 173, 0), + ("Σ", 931, 0), + ("σ", 963, 0), + ("ς", 962, 0), + ("ς", 962, 0), + ("∼", 8764, 0), + ("⩪", 10858, 0), + ("≃", 8771, 0), + ("≃", 8771, 0), + ("⪞", 10910, 0), + ("⪠", 10912, 0), + ("⪝", 10909, 0), + ("⪟", 10911, 0), + ("≆", 8774, 0), + ("⨤", 10788, 0), + ("⥲", 10610, 0), + ("←", 8592, 0), + ("∘", 8728, 0), + ("∖", 8726, 0), + ("⨳", 10803, 0), + ("⧤", 10724, 0), + ("∣", 8739, 0), + ("⌣", 8995, 0), + ("⪪", 10922, 0), + ("⪬", 10924, 0), + ("⪬︀", 10924, 65024), + ("Ь", 1068, 0), + ("ь", 1100, 0), + ("⌿", 9023, 0), + ("⧄", 10692, 0), + ("/", 47, 0), + ("𝕊", 120138, 0), + ("𝕤", 120164, 0), + ("♠", 9824, 0), + ("♠", 9824, 0), + ("∥", 8741, 0), + ("⊓", 8851, 0), + ("⊓︀", 8851, 65024), + ("⊔", 8852, 0), + ("⊔︀", 8852, 65024), + ("√", 8730, 0), + ("⊏", 8847, 0), + ("⊑", 8849, 0), + ("⊏", 8847, 0), + ("⊑", 8849, 0), + ("⊐", 8848, 0), + ("⊒", 8850, 0), + ("⊐", 8848, 0), + ("⊒", 8850, 0), + ("□", 9633, 0), + ("□", 9633, 0), + ("⊓", 8851, 0), + ("⊏", 8847, 0), + ("⊑", 8849, 0), + ("⊐", 8848, 0), + ("⊒", 8850, 0), + ("⊔", 8852, 0), + ("▪", 9642, 0), + ("□", 9633, 0), + ("▪", 9642, 0), + ("→", 8594, 0), + ("𝒮", 119982, 0), + ("𝓈", 120008, 0), + ("∖", 8726, 0), + ("⌣", 8995, 0), + ("⋆", 8902, 0), + ("⋆", 8902, 0), + ("☆", 9734, 0), + ("★", 9733, 0), + ("ϵ", 1013, 0), + ("ϕ", 981, 0), + ("¯", 175, 0), + ("⊂", 8834, 0), + ("⋐", 8912, 0), + ("⪽", 10941, 0), + ("⫅", 10949, 0), + ("⊆", 8838, 0), + ("⫃", 10947, 0), + ("⫁", 10945, 0), + ("⫋", 10955, 0), + ("⊊", 8842, 0), + ("⪿", 10943, 0), + ("⥹", 10617, 0), + ("⊂", 8834, 0), + ("⋐", 8912, 0), + ("⊆", 8838, 0), + ("⫅", 10949, 0), + ("⊆", 8838, 0), + ("⊊", 8842, 0), + ("⫋", 10955, 0), + ("⫇", 10951, 0), + ("⫕", 10965, 0), + ("⫓", 10963, 0), + ("⪸", 10936, 0), + ("≻", 8827, 0), + ("≽", 8829, 0), + ("≻", 8827, 0), + ("⪰", 10928, 0), + ("≽", 8829, 0), + ("≿", 8831, 0), + ("⪰", 10928, 0), + ("⪺", 10938, 0), + ("⪶", 10934, 0), + ("⋩", 8937, 0), + ("≿", 8831, 0), + ("∋", 8715, 0), + ("∑", 8721, 0), + ("∑", 8721, 0), + ("♪", 9834, 0), + ("¹", 185, 0), + ("¹", 185, 0), + ("²", 178, 0), + ("²", 178, 0), + ("³", 179, 0), + ("³", 179, 0), + ("⊃", 8835, 0), + ("⋑", 8913, 0), + ("⪾", 10942, 0), + ("⫘", 10968, 0), + ("⫆", 10950, 0), + ("⊇", 8839, 0), + ("⫄", 10948, 0), + ("⊃", 8835, 0), + ("⊇", 8839, 0), + ("⟉", 10185, 0), + ("⫗", 10967, 0), + ("⥻", 10619, 0), + ("⫂", 10946, 0), + ("⫌", 10956, 0), + ("⊋", 8843, 0), + ("⫀", 10944, 0), + ("⊃", 8835, 0), + ("⋑", 8913, 0), + ("⊇", 8839, 0), + ("⫆", 10950, 0), + ("⊋", 8843, 0), + ("⫌", 10956, 0), + ("⫈", 10952, 0), + ("⫔", 10964, 0), + ("⫖", 10966, 0), + ("⤦", 10534, 0), + ("↙", 8601, 0), + ("⇙", 8665, 0), + ("↙", 8601, 0), + ("⤪", 10538, 0), + ("ß", 223, 0), + ("ß", 223, 0), + ("	", 9, 0), + ("⌖", 8982, 0), + ("Τ", 932, 0), + ("τ", 964, 0), + ("⎴", 9140, 0), + ("Ť", 356, 0), + ("ť", 357, 0), + ("Ţ", 354, 0), + ("ţ", 355, 0), + ("Т", 1058, 0), + ("т", 1090, 0), + ("⃛", 8411, 0), + ("⌕", 8981, 0), + ("𝔗", 120087, 0), + ("𝔱", 120113, 0), + ("∴", 8756, 0), + ("∴", 8756, 0), + ("∴", 8756, 0), + ("Θ", 920, 0), + ("θ", 952, 0), + ("ϑ", 977, 0), + ("ϑ", 977, 0), + ("≈", 8776, 0), + ("∼", 8764, 0), + ("  ", 8287, 8202), + (" ", 8201, 0), + (" ", 8201, 0), + ("≈", 8776, 0), + ("∼", 8764, 0), + ("Þ", 222, 0), + ("Þ", 222, 0), + ("þ", 254, 0), + ("þ", 254, 0), + ("˜", 732, 0), + ("∼", 8764, 0), + ("≃", 8771, 0), + ("≅", 8773, 0), + ("≈", 8776, 0), + ("⨱", 10801, 0), + ("⊠", 8864, 0), + ("×", 215, 0), + ("×", 215, 0), + ("⨰", 10800, 0), + ("∭", 8749, 0), + ("⤨", 10536, 0), + ("⌶", 9014, 0), + ("⫱", 10993, 0), + ("⊤", 8868, 0), + ("𝕋", 120139, 0), + ("𝕥", 120165, 0), + ("⫚", 10970, 0), + ("⤩", 10537, 0), + ("‴", 8244, 0), + ("™", 8482, 0), + ("™", 8482, 0), + ("▵", 9653, 0), + ("▿", 9663, 0), + ("◃", 9667, 0), + ("⊴", 8884, 0), + ("≜", 8796, 0), + ("▹", 9657, 0), + ("⊵", 8885, 0), + ("◬", 9708, 0), + ("≜", 8796, 0), + ("⨺", 10810, 0), + ("⃛", 8411, 0), + ("⨹", 10809, 0), + ("⧍", 10701, 0), + ("⨻", 10811, 0), + ("⏢", 9186, 0), + ("𝒯", 119983, 0), + ("𝓉", 120009, 0), + ("Ц", 1062, 0), + ("ц", 1094, 0), + ("Ћ", 1035, 0), + ("ћ", 1115, 0), + ("Ŧ", 358, 0), + ("ŧ", 359, 0), + ("≬", 8812, 0), + ("↞", 8606, 0), + ("↠", 8608, 0), + ("Ú", 218, 0), + ("Ú", 218, 0), + ("ú", 250, 0), + ("ú", 250, 0), + ("↑", 8593, 0), + ("↟", 8607, 0), + ("⇑", 8657, 0), + ("⥉", 10569, 0), + ("Ў", 1038, 0), + ("ў", 1118, 0), + ("Ŭ", 364, 0), + ("ŭ", 365, 0), + ("Û", 219, 0), + ("Û", 219, 0), + ("û", 251, 0), + ("û", 251, 0), + ("У", 1059, 0), + ("у", 1091, 0), + ("⇅", 8645, 0), + ("Ű", 368, 0), + ("ű", 369, 0), + ("⥮", 10606, 0), + ("⥾", 10622, 0), + ("𝔘", 120088, 0), + ("𝔲", 120114, 0), + ("Ù", 217, 0), + ("Ù", 217, 0), + ("ù", 249, 0), + ("ù", 249, 0), + ("⥣", 10595, 0), + ("↿", 8639, 0), + ("↾", 8638, 0), + ("▀", 9600, 0), + ("⌜", 8988, 0), + ("⌜", 8988, 0), + ("⌏", 8975, 0), + ("◸", 9720, 0), + ("Ū", 362, 0), + ("ū", 363, 0), + ("¨", 168, 0), + ("¨", 168, 0), + ("_", 95, 0), + ("⏟", 9183, 0), + ("⎵", 9141, 0), + ("⏝", 9181, 0), + ("⋃", 8899, 0), + ("⊎", 8846, 0), + ("Ų", 370, 0), + ("ų", 371, 0), + ("𝕌", 120140, 0), + ("𝕦", 120166, 0), + ("⤒", 10514, 0), + ("↑", 8593, 0), + ("↑", 8593, 0), + ("⇑", 8657, 0), + ("⇅", 8645, 0), + ("↕", 8597, 0), + ("↕", 8597, 0), + ("⇕", 8661, 0), + ("⥮", 10606, 0), + ("↿", 8639, 0), + ("↾", 8638, 0), + ("⊎", 8846, 0), + ("↖", 8598, 0), + ("↗", 8599, 0), + ("υ", 965, 0), + ("ϒ", 978, 0), + ("ϒ", 978, 0), + ("Υ", 933, 0), + ("υ", 965, 0), + ("↥", 8613, 0), + ("⊥", 8869, 0), + ("⇈", 8648, 0), + ("⌝", 8989, 0), + ("⌝", 8989, 0), + ("⌎", 8974, 0), + ("Ů", 366, 0), + ("ů", 367, 0), + ("◹", 9721, 0), + ("𝒰", 119984, 0), + ("𝓊", 120010, 0), + ("⋰", 8944, 0), + ("Ũ", 360, 0), + ("ũ", 361, 0), + ("▵", 9653, 0), + ("▴", 9652, 0), + ("⇈", 8648, 0), + ("Ü", 220, 0), + ("Ü", 220, 0), + ("ü", 252, 0), + ("ü", 252, 0), + ("⦧", 10663, 0), + ("⦜", 10652, 0), + ("ϵ", 1013, 0), + ("ϰ", 1008, 0), + ("∅", 8709, 0), + ("ϕ", 981, 0), + ("ϖ", 982, 0), + ("∝", 8733, 0), + ("↕", 8597, 0), + ("⇕", 8661, 0), + ("ϱ", 1009, 0), + ("ς", 962, 0), + ("⊊︀", 8842, 65024), + ("⫋︀", 10955, 65024), + ("⊋︀", 8843, 65024), + ("⫌︀", 10956, 65024), + ("ϑ", 977, 0), + ("⊲", 8882, 0), + ("⊳", 8883, 0), + ("⫨", 10984, 0), + ("⫫", 10987, 0), + ("⫩", 10985, 0), + ("В", 1042, 0), + ("в", 1074, 0), + ("⊢", 8866, 0), + ("⊨", 8872, 0), + ("⊩", 8873, 0), + ("⊫", 8875, 0), + ("⫦", 10982, 0), + ("⊻", 8891, 0), + ("∨", 8744, 0), + ("⋁", 8897, 0), + ("≚", 8794, 0), + ("⋮", 8942, 0), + ("|", 124, 0), + ("‖", 8214, 0), + ("|", 124, 0), + ("‖", 8214, 0), + ("∣", 8739, 0), + ("|", 124, 0), + ("❘", 10072, 0), + ("≀", 8768, 0), + (" ", 8202, 0), + ("𝔙", 120089, 0), + ("𝔳", 120115, 0), + ("⊲", 8882, 0), + ("⊂⃒", 8834, 8402), + ("⊃⃒", 8835, 8402), + ("𝕍", 120141, 0), + ("𝕧", 120167, 0), + ("∝", 8733, 0), + ("⊳", 8883, 0), + ("𝒱", 119985, 0), + ("𝓋", 120011, 0), + ("⫋︀", 10955, 65024), + ("⊊︀", 8842, 65024), + ("⫌︀", 10956, 65024), + ("⊋︀", 8843, 65024), + ("⊪", 8874, 0), + ("⦚", 10650, 0), + ("Ŵ", 372, 0), + ("ŵ", 373, 0), + ("⩟", 10847, 0), + ("∧", 8743, 0), + ("⋀", 8896, 0), + ("≙", 8793, 0), + ("℘", 8472, 0), + ("𝔚", 120090, 0), + ("𝔴", 120116, 0), + ("𝕎", 120142, 0), + ("𝕨", 120168, 0), + ("℘", 8472, 0), + ("≀", 8768, 0), + ("≀", 8768, 0), + ("𝒲", 119986, 0), + ("𝓌", 120012, 0), + ("⋂", 8898, 0), + ("◯", 9711, 0), + ("⋃", 8899, 0), + ("▽", 9661, 0), + ("𝔛", 120091, 0), + ("𝔵", 120117, 0), + ("⟷", 10231, 0), + ("⟺", 10234, 0), + ("Ξ", 926, 0), + ("ξ", 958, 0), + ("⟵", 10229, 0), + ("⟸", 10232, 0), + ("⟼", 10236, 0), + ("⋻", 8955, 0), + ("⨀", 10752, 0), + ("𝕏", 120143, 0), + ("𝕩", 120169, 0), + ("⨁", 10753, 0), + ("⨂", 10754, 0), + ("⟶", 10230, 0), + ("⟹", 10233, 0), + ("𝒳", 119987, 0), + ("𝓍", 120013, 0), + ("⨆", 10758, 0), + ("⨄", 10756, 0), + ("△", 9651, 0), + ("⋁", 8897, 0), + ("⋀", 8896, 0), + ("Ý", 221, 0), + ("Ý", 221, 0), + ("ý", 253, 0), + ("ý", 253, 0), + ("Я", 1071, 0), + ("я", 1103, 0), + ("Ŷ", 374, 0), + ("ŷ", 375, 0), + ("Ы", 1067, 0), + ("ы", 1099, 0), + ("¥", 165, 0), + ("¥", 165, 0), + ("𝔜", 120092, 0), + ("𝔶", 120118, 0), + ("Ї", 1031, 0), + ("ї", 1111, 0), + ("𝕐", 120144, 0), + ("𝕪", 120170, 0), + ("𝒴", 119988, 0), + ("𝓎", 120014, 0), + ("Ю", 1070, 0), + ("ю", 1102, 0), + ("ÿ", 255, 0), + ("ÿ", 255, 0), + ("Ÿ", 376, 0), + ("Ź", 377, 0), + ("ź", 378, 0), + ("Ž", 381, 0), + ("ž", 382, 0), + ("З", 1047, 0), + ("з", 1079, 0), + ("Ż", 379, 0), + ("ż", 380, 0), + ("ℨ", 8488, 0), + ("​", 8203, 0), + ("Ζ", 918, 0), + ("ζ", 950, 0), + ("𝔷", 120119, 0), + ("ℨ", 8488, 0), + ("Ж", 1046, 0), + ("ж", 1078, 0), + ("⇝", 8669, 0), + ("𝕫", 120171, 0), + ("ℤ", 8484, 0), + ("𝒵", 119989, 0), + ("𝓏", 120015, 0), + ("‍", 8205, 0), + ("‌", 8204, 0), +]; diff --git a/src/tokenizer/char_ref/data.rs b/src/tokenizer/char_ref/data.rs new file mode 100644 index 0000000..fa839ba --- /dev/null +++ b/src/tokenizer/char_ref/data.rs @@ -0,0 +1,50 @@ +// Copyright 2014-2017 The html5ever Project Developers. See the +// COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. +//! Data that is known at compile-time and hard-coded into the binary. +use phf::Map; + +/// The spec replaces most characters in the ISO-2022 C1 control code range +/// (U+0080 through U+009F) with these characters, based on Windows 8-bit +/// codepages. +pub static C1_REPLACEMENTS: [Option<char>; 32] = [ + Some('\u{20ac}'), + None, + Some('\u{201a}'), + Some('\u{0192}'), + Some('\u{201e}'), + Some('\u{2026}'), + Some('\u{2020}'), + Some('\u{2021}'), + Some('\u{02c6}'), + Some('\u{2030}'), + Some('\u{0160}'), + Some('\u{2039}'), + Some('\u{0152}'), + None, + Some('\u{017d}'), + None, + None, + Some('\u{2018}'), + Some('\u{2019}'), + Some('\u{201c}'), + Some('\u{201d}'), + Some('\u{2022}'), + Some('\u{2013}'), + Some('\u{2014}'), + Some('\u{02dc}'), + Some('\u{2122}'), + Some('\u{0161}'), + Some('\u{203a}'), + Some('\u{0153}'), + None, + Some('\u{017e}'), + Some('\u{0178}'), +]; + +include!(concat!(env!("OUT_DIR"), "/named_entities.rs")); diff --git a/src/tokenizer/char_ref/mod.rs b/src/tokenizer/char_ref/mod.rs index 4c231b2..7b27bff 100644 --- a/src/tokenizer/char_ref/mod.rs +++ b/src/tokenizer/char_ref/mod.rs @@ -17,6 +17,8 @@ use std::char::from_u32; use self::State::*; pub use self::Status::*; +mod data; + //ยง tokenizing-character-references pub struct CharRef { /// The resulting character(s) @@ -110,44 +112,6 @@ impl CharRefTokenizer { } } -/// The spec replaces most characters in the ISO-2022 C1 control code range -/// (U+0080 through U+009F) with these characters, based on Windows 8-bit -/// codepages. -pub static C1_REPLACEMENTS: [Option<char>; 32] = [ - Some('\u{20ac}'), - None, - Some('\u{201a}'), - Some('\u{0192}'), - Some('\u{201e}'), - Some('\u{2026}'), - Some('\u{2020}'), - Some('\u{2021}'), - Some('\u{02c6}'), - Some('\u{2030}'), - Some('\u{0160}'), - Some('\u{2039}'), - Some('\u{0152}'), - None, - Some('\u{017d}'), - None, - None, - Some('\u{2018}'), - Some('\u{2019}'), - Some('\u{201c}'), - Some('\u{201d}'), - Some('\u{2022}'), - Some('\u{2013}'), - Some('\u{2014}'), - Some('\u{02dc}'), - Some('\u{2122}'), - Some('\u{0161}'), - Some('\u{203a}'), - Some('\u{0153}'), - None, - Some('\u{017e}'), - Some('\u{0178}'), -]; - impl CharRefTokenizer { pub fn step<Sink: TokenSink>( &mut self, @@ -281,7 +245,7 @@ impl CharRefTokenizer { n if (n > 0x10FFFF) || self.num_too_big => ('\u{fffd}', true), 0x00 | 0xD800..=0xDFFF => ('\u{fffd}', true), - 0x80..=0x9F => match C1_REPLACEMENTS[(self.num - 0x80) as usize] { + 0x80..=0x9F => match data::C1_REPLACEMENTS[(self.num - 0x80) as usize] { Some(c) => (c, true), None => (conv(self.num), true), }, @@ -313,7 +277,21 @@ impl CharRefTokenizer { ) -> Status { let c = unwrap_or_return!(tokenizer.get_char(input), Stuck); self.name_buf_mut().push(c); - self.finish_named(tokenizer, input, Some(c)) + match data::NAMED_ENTITIES.get(&self.name_buf()[..]) { + // We have either a full match or a prefix of one. + Some(&m) => { + if m.0 != 0 { + // We have a full match, but there might be a longer one to come. + self.name_match = Some(m); + self.name_len = self.name_buf().len(); + } + // Otherwise we just have a prefix match. + Progress + }, + + // Can't continue the match. + None => self.finish_named(tokenizer, input, Some(c)), + } } fn emit_name_error<Sink: TokenSink>(&mut self, tokenizer: &mut Tokenizer<Sink>) { |