aboutsummaryrefslogtreecommitdiff
path: root/generate_entities.py
diff options
context:
space:
mode:
authorMarkus Unterwaditzer <markus-honeypot@unterwaditzer.net>2021-11-24 20:44:08 +0100
committerMarkus Unterwaditzer <markus-honeypot@unterwaditzer.net>2021-11-24 20:51:21 +0100
commit9909fc4580855a58a10eb84f0d143d1b3b3f464a (patch)
tree36941a6a714a10b9ce554ba249975108e6a17274 /generate_entities.py
hello world
Diffstat (limited to 'generate_entities.py')
-rw-r--r--generate_entities.py39
1 files changed, 39 insertions, 0 deletions
diff --git a/generate_entities.py b/generate_entities.py
new file mode 100644
index 0000000..1a1407d
--- /dev/null
+++ b/generate_entities.py
@@ -0,0 +1,39 @@
+import json
+import sys
+
+key_and_value = list(json.load(sys.stdin).items())
+# Sort by descending length so we match the largest prefix first
+key_and_value.sort(key=lambda x: (-len(x[0]), x[0]))
+
+with open("src/entities.rs", "w") as f:
+ f.write("""
+// @generated
+// this file is autogenerated by
+// curl https://html.spec.whatwg.org/entities.json | python generate_entities.py
+
+pub struct CharRef {
+ /// Name as it appears escaped in HTML
+ pub name: &'static str,
+ /// Unescaped character codepoints
+ pub characters: &'static str,
+}
+
+pub fn try_read_character_reference(first_char: char, mut try_read: impl FnMut(&str) -> bool) -> Option<CharRef> {
+""")
+
+ for key, value in key_and_value:
+ assert key[0] == '&'
+ key = key[1:]
+ characters = ""
+ for c in value['codepoints']:
+ characters += r"\u{" + hex(c)[2:] + r"}"
+
+ first_char = key[0]
+ key = key[1:]
+ f.write("""
+ if first_char == '%(first_char)s' && try_read("%(key)s") {
+ return Some(CharRef { name: "%(key)s", characters: "%(characters)s" });
+ }
+ """ % {"key": key, "characters": characters, "first_char": first_char})
+
+ f.write(" None }");