summaryrefslogtreecommitdiff
path: root/macros
diff options
context:
space:
mode:
authorMartin Fischer <martin@push-f.com>2021-04-08 08:42:01 +0200
committerMartin Fischer <martin@push-f.com>2021-04-08 15:40:37 +0200
commit57e7eefcbe6fb8c3dc4b01c707be9de4c34963a7 (patch)
tree6a9d296389bf3023396592c8514ed6712e011c7f /macros
import https://github.com/servo/html5ever
commit d1206daa740305f55a5fa159e43eb33afc359cb4
Diffstat (limited to 'macros')
-rw-r--r--macros/match_token.rs464
1 files changed, 464 insertions, 0 deletions
diff --git a/macros/match_token.rs b/macros/match_token.rs
new file mode 100644
index 0000000..7d73519
--- /dev/null
+++ b/macros/match_token.rs
@@ -0,0 +1,464 @@
+// Copyright 2014-2017 The html5ever Project Developers. See the
+// COPYRIGHT file at the top-level directory of this distribution.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+/*!
+
+Implements the `match_token!()` macro for use by the HTML tree builder
+in `src/tree_builder/rules.rs`.
+
+
+## Example
+
+```rust
+match_token!(token {
+ CommentToken(text) => 1,
+
+ tag @ <base> <link> <meta> => 2,
+
+ </head> => 3,
+
+ </body> </html> </br> => else,
+
+ tag @ </_> => 4,
+
+ token => 5,
+})
+```
+
+
+## Syntax
+
+Because of the simplistic parser, the macro invocation must
+start with exactly `match_token!(token {` (with whitespace as specified)
+and end with exactly `})`.
+
+The left-hand side of each match arm is an optional `name @` binding, followed by
+
+ - an ordinary Rust pattern that starts with an identifier or an underscore, or
+
+ - a sequence of HTML tag names as identifiers, each inside "<...>" or "</...>"
+ to match an open or close tag respectively, or
+
+ - a "wildcard tag" "<_>" or "</_>" to match all open tags or all close tags
+ respectively.
+
+The right-hand side is either an expression or the keyword `else`.
+
+Note that this syntax does not support guards or pattern alternation like
+`Foo | Bar`. This is not a fundamental limitation; it's done for implementation
+simplicity.
+
+
+## Semantics
+
+Ordinary Rust patterns match as usual. If present, the `name @` binding has
+the usual meaning.
+
+A sequence of named tags matches any of those tags. A single sequence can
+contain both open and close tags. If present, the `name @` binding binds (by
+move) the `Tag` struct, not the outer `Token`. That is, a match arm like
+
+```rust
+tag @ <html> <head> => ...
+```
+
+expands to something like
+
+```rust
+TagToken(tag @ Tag { name: local_name!("html"), kind: StartTag })
+| TagToken(tag @ Tag { name: local_name!("head"), kind: StartTag }) => ...
+```
+
+A wildcard tag matches any tag of the appropriate kind, *unless* it was
+previously matched with an `else` right-hand side (more on this below).
+
+The expansion of this macro reorders code somewhat, to satisfy various
+restrictions arising from moves. However it provides the semantics of in-order
+matching, by enforcing the following restrictions on its input:
+
+ - The last pattern must be a variable or the wildcard "_". In other words
+ it must match everything.
+
+ - Otherwise, ordinary Rust patterns and specific-tag patterns cannot appear
+ after wildcard tag patterns.
+
+ - No tag name may appear more than once.
+
+ - A wildcard tag pattern may not occur in the same arm as any other tag.
+ "<_> <html> => ..." and "<_> </_> => ..." are both forbidden.
+
+ - The right-hand side "else" may only appear with specific-tag patterns.
+ It means that these specific tags should be handled by the last,
+ catch-all case arm, rather than by any wildcard tag arm. This situation
+ is common in the HTML5 syntax.
+*/
+
+use quote::quote;
+use syn::{braced, parse_quote, Token};
+
+use proc_macro2::TokenStream;
+use quote::ToTokens;
+use std::collections::HashSet;
+use std::fs::File;
+use std::io::{Read, Write};
+use std::path::Path;
+use syn::ext::IdentExt;
+use syn::fold::Fold;
+use syn::parse::{Parse, ParseStream, Result};
+
+pub fn expand(from: &Path, to: &Path) {
+ let mut source = String::new();
+ File::open(from)
+ .unwrap()
+ .read_to_string(&mut source)
+ .unwrap();
+ let ast = syn::parse_file(&source).expect("Parsing rules.rs module");
+ let mut m = MatchTokenParser {};
+ let ast = m.fold_file(ast);
+ let code = ast
+ .into_token_stream()
+ .to_string()
+ .replace("{ ", "{\n")
+ .replace(" }", "\n}");
+ File::create(to)
+ .unwrap()
+ .write_all(code.as_bytes())
+ .unwrap();
+}
+
+struct MatchTokenParser {}
+
+struct MatchToken {
+ ident: syn::Ident,
+ arms: Vec<MatchTokenArm>,
+}
+
+struct MatchTokenArm {
+ binding: Option<syn::Ident>,
+ lhs: LHS,
+ rhs: RHS,
+}
+
+enum LHS {
+ Tags(Vec<Tag>),
+ Pattern(syn::Pat),
+}
+
+enum RHS {
+ Expression(syn::Expr),
+ Else,
+}
+
+#[derive(PartialEq, Eq, Hash, Clone)]
+enum TagKind {
+ StartTag,
+ EndTag,
+}
+
+// Option is None if wildcard
+#[derive(PartialEq, Eq, Hash, Clone)]
+pub struct Tag {
+ kind: TagKind,
+ name: Option<syn::Ident>,
+}
+
+impl Parse for Tag {
+ fn parse(input: ParseStream) -> Result<Self> {
+ input.parse::<Token![<]>()?;
+ let closing: Option<Token![/]> = input.parse()?;
+ let name = match input.call(syn::Ident::parse_any)? {
+ ref wildcard if wildcard == "_" => None,
+ other => Some(other),
+ };
+ input.parse::<Token![>]>()?;
+ Ok(Tag {
+ kind: if closing.is_some() {
+ TagKind::EndTag
+ } else {
+ TagKind::StartTag
+ },
+ name,
+ })
+ }
+}
+
+impl Parse for LHS {
+ fn parse(input: ParseStream) -> Result<Self> {
+ if input.peek(Token![<]) {
+ let mut tags = Vec::new();
+ while !input.peek(Token![=>]) {
+ tags.push(input.parse()?);
+ }
+ Ok(LHS::Tags(tags))
+ } else {
+ let p: syn::Pat = input.parse()?;
+ Ok(LHS::Pattern(p))
+ }
+ }
+}
+
+impl Parse for MatchTokenArm {
+ fn parse(input: ParseStream) -> Result<Self> {
+ let binding = if input.peek2(Token![@]) {
+ let binding = input.parse::<syn::Ident>()?;
+ input.parse::<Token![@]>()?;
+ Some(binding)
+ } else {
+ None
+ };
+ let lhs = input.parse::<LHS>()?;
+ input.parse::<Token![=>]>()?;
+ let rhs = if input.peek(syn::token::Brace) {
+ let block = input.parse::<syn::Block>().unwrap();
+ let block = syn::ExprBlock {
+ attrs: vec![],
+ label: None,
+ block,
+ };
+ input.parse::<Option<Token![,]>>()?;
+ RHS::Expression(syn::Expr::Block(block))
+ } else if input.peek(Token![else]) {
+ input.parse::<Token![else]>()?;
+ input.parse::<Token![,]>()?;
+ RHS::Else
+ } else {
+ let expr = input.parse::<syn::Expr>().unwrap();
+ input.parse::<Option<Token![,]>>()?;
+ RHS::Expression(expr)
+ };
+
+ Ok(MatchTokenArm { binding, lhs, rhs })
+ }
+}
+
+impl Parse for MatchToken {
+ fn parse(input: ParseStream) -> Result<Self> {
+ let ident = input.parse::<syn::Ident>()?;
+ let content;
+ braced!(content in input);
+ let mut arms = vec![];
+ while !content.is_empty() {
+ arms.push(content.parse()?);
+ }
+ Ok(MatchToken { ident, arms })
+ }
+}
+
+pub fn expand_match_token(body: &TokenStream) -> syn::Expr {
+ let match_token = syn::parse2::<MatchToken>(body.clone());
+ let ast = expand_match_token_macro(match_token.unwrap());
+ syn::parse2(ast).unwrap()
+}
+
+fn expand_match_token_macro(match_token: MatchToken) -> TokenStream {
+ let mut arms = match_token.arms;
+ let to_be_matched = match_token.ident;
+ // Handle the last arm specially at the end.
+ let last_arm = arms.pop().unwrap();
+
+ // Tags we've seen, used for detecting duplicates.
+ let mut seen_tags: HashSet<Tag> = HashSet::new();
+
+ // Case arms for wildcard matching. We collect these and
+ // emit them later.
+ let mut wildcards_patterns: Vec<TokenStream> = Vec::new();
+ let mut wildcards_expressions: Vec<syn::Expr> = Vec::new();
+
+ // Tags excluded (by an 'else' RHS) from wildcard matching.
+ let mut wild_excluded_patterns: Vec<TokenStream> = Vec::new();
+
+ let mut arms_code = Vec::new();
+
+ for MatchTokenArm { binding, lhs, rhs } in arms {
+ // Build Rust syntax for the `name @` binding, if any.
+ let binding = match binding {
+ Some(ident) => quote!(#ident @),
+ None => quote!(),
+ };
+
+ match (lhs, rhs) {
+ (LHS::Pattern(_), RHS::Else) => {
+ panic!("'else' may not appear with an ordinary pattern")
+ },
+
+ // ordinary pattern => expression
+ (LHS::Pattern(pat), RHS::Expression(expr)) => {
+ if !wildcards_patterns.is_empty() {
+ panic!(
+ "ordinary patterns may not appear after wildcard tags {:?} {:?}",
+ pat, expr
+ );
+ }
+ arms_code.push(quote!(#binding #pat => #expr,))
+ },
+
+ // <tag> <tag> ... => else
+ (LHS::Tags(tags), RHS::Else) => {
+ for tag in tags {
+ if !seen_tags.insert(tag.clone()) {
+ panic!("duplicate tag");
+ }
+ if tag.name.is_none() {
+ panic!("'else' may not appear with a wildcard tag");
+ }
+ wild_excluded_patterns.push(make_tag_pattern(&TokenStream::new(), tag));
+ }
+ },
+
+ // <_> => expression
+ // <tag> <tag> ... => expression
+ (LHS::Tags(tags), RHS::Expression(expr)) => {
+ // Is this arm a tag wildcard?
+ // `None` if we haven't processed the first tag yet.
+ let mut wildcard = None;
+ for tag in tags {
+ if !seen_tags.insert(tag.clone()) {
+ panic!("duplicate tag");
+ }
+
+ match tag.name {
+ // <tag>
+ Some(_) => {
+ if !wildcards_patterns.is_empty() {
+ panic!("specific tags may not appear after wildcard tags");
+ }
+
+ if wildcard == Some(true) {
+ panic!("wildcard tags must appear alone");
+ }
+
+ if wildcard.is_some() {
+ // Push the delimeter `|` if it's not the first tag.
+ arms_code.push(quote!( | ))
+ }
+ arms_code.push(make_tag_pattern(&binding, tag));
+
+ wildcard = Some(false);
+ },
+
+ // <_>
+ None => {
+ if wildcard.is_some() {
+ panic!("wildcard tags must appear alone");
+ }
+ wildcard = Some(true);
+ wildcards_patterns.push(make_tag_pattern(&binding, tag));
+ wildcards_expressions.push(expr.clone());
+ },
+ }
+ }
+
+ match wildcard {
+ None => panic!("[internal macro error] tag arm with no tags"),
+ Some(false) => arms_code.push(quote!( => #expr,)),
+ Some(true) => {}, // codegen for wildcards is deferred
+ }
+ },
+ }
+ }
+
+ // Time to process the last, catch-all arm. We will generate something like
+ //
+ // last_arm_token => {
+ // let enable_wildcards = match last_arm_token {
+ // TagToken(Tag { kind: EndTag, name: local_name!("body"), .. }) => false,
+ // TagToken(Tag { kind: EndTag, name: local_name!("html"), .. }) => false,
+ // // ...
+ // _ => true,
+ // };
+ //
+ // match (enable_wildcards, last_arm_token) {
+ // (true, TagToken(name @ Tag { kind: StartTag, .. }))
+ // => ..., // wildcard action for start tags
+ //
+ // (true, TagToken(name @ Tag { kind: EndTag, .. }))
+ // => ..., // wildcard action for end tags
+ //
+ // (_, token) => ... // using the pattern from that last arm
+ // }
+ // }
+
+ let MatchTokenArm { binding, lhs, rhs } = last_arm;
+
+ let (last_pat, last_expr) = match (binding, lhs, rhs) {
+ (Some(_), _, _) => panic!("the last arm cannot have an @-binding"),
+ (None, LHS::Tags(_), _) => panic!("the last arm cannot have tag patterns"),
+ (None, _, RHS::Else) => panic!("the last arm cannot use 'else'"),
+ (None, LHS::Pattern(p), RHS::Expression(e)) => (p, e),
+ };
+
+ quote! {
+ match #to_be_matched {
+ #(
+ #arms_code
+ )*
+ last_arm_token => {
+ let enable_wildcards = match last_arm_token {
+ #(
+ #wild_excluded_patterns => false,
+ )*
+ _ => true,
+ };
+ match (enable_wildcards, last_arm_token) {
+ #(
+ (true, #wildcards_patterns) => #wildcards_expressions,
+ )*
+ (_, #last_pat) => #last_expr,
+ }
+ }
+ }
+ }
+}
+
+impl Fold for MatchTokenParser {
+ fn fold_stmt(&mut self, stmt: syn::Stmt) -> syn::Stmt {
+ match stmt {
+ syn::Stmt::Item(syn::Item::Macro(syn::ItemMacro { ref mac, .. })) => {
+ if mac.path == parse_quote!(match_token) {
+ return syn::fold::fold_stmt(
+ self,
+ syn::Stmt::Expr(expand_match_token(&mac.tokens)),
+ );
+ }
+ },
+ _ => {},
+ }
+
+ syn::fold::fold_stmt(self, stmt)
+ }
+
+ fn fold_expr(&mut self, expr: syn::Expr) -> syn::Expr {
+ match expr {
+ syn::Expr::Macro(syn::ExprMacro { ref mac, .. }) => {
+ if mac.path == parse_quote!(match_token) {
+ return syn::fold::fold_expr(self, expand_match_token(&mac.tokens));
+ }
+ },
+ _ => {},
+ }
+
+ syn::fold::fold_expr(self, expr)
+ }
+}
+
+fn make_tag_pattern(binding: &TokenStream, tag: Tag) -> TokenStream {
+ let kind = match tag.kind {
+ TagKind::StartTag => quote!(crate::tokenizer::StartTag),
+ TagKind::EndTag => quote!(crate::tokenizer::EndTag),
+ };
+ let name_field = if let Some(name) = tag.name {
+ let name = name.to_string();
+ quote!(name: local_name!(#name),)
+ } else {
+ quote!()
+ };
+ quote! {
+ crate::tree_builder::types::TagToken(#binding crate::tokenizer::Tag { kind: #kind, #name_field .. })
+ }
+}