diff options
author | Martin Fischer <martin@push-f.com> | 2021-04-08 08:42:01 +0200 |
---|---|---|
committer | Martin Fischer <martin@push-f.com> | 2021-04-08 15:40:37 +0200 |
commit | 57e7eefcbe6fb8c3dc4b01c707be9de4c34963a7 (patch) | |
tree | 6a9d296389bf3023396592c8514ed6712e011c7f /macros/match_token.rs |
import https://github.com/servo/html5ever
commit d1206daa740305f55a5fa159e43eb33afc359cb4
Diffstat (limited to 'macros/match_token.rs')
-rw-r--r-- | macros/match_token.rs | 464 |
1 files changed, 464 insertions, 0 deletions
diff --git a/macros/match_token.rs b/macros/match_token.rs new file mode 100644 index 0000000..7d73519 --- /dev/null +++ b/macros/match_token.rs @@ -0,0 +1,464 @@ +// Copyright 2014-2017 The html5ever Project Developers. See the +// COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/*! + +Implements the `match_token!()` macro for use by the HTML tree builder +in `src/tree_builder/rules.rs`. + + +## Example + +```rust +match_token!(token { + CommentToken(text) => 1, + + tag @ <base> <link> <meta> => 2, + + </head> => 3, + + </body> </html> </br> => else, + + tag @ </_> => 4, + + token => 5, +}) +``` + + +## Syntax + +Because of the simplistic parser, the macro invocation must +start with exactly `match_token!(token {` (with whitespace as specified) +and end with exactly `})`. + +The left-hand side of each match arm is an optional `name @` binding, followed by + + - an ordinary Rust pattern that starts with an identifier or an underscore, or + + - a sequence of HTML tag names as identifiers, each inside "<...>" or "</...>" + to match an open or close tag respectively, or + + - a "wildcard tag" "<_>" or "</_>" to match all open tags or all close tags + respectively. + +The right-hand side is either an expression or the keyword `else`. + +Note that this syntax does not support guards or pattern alternation like +`Foo | Bar`. This is not a fundamental limitation; it's done for implementation +simplicity. + + +## Semantics + +Ordinary Rust patterns match as usual. If present, the `name @` binding has +the usual meaning. + +A sequence of named tags matches any of those tags. A single sequence can +contain both open and close tags. If present, the `name @` binding binds (by +move) the `Tag` struct, not the outer `Token`. That is, a match arm like + +```rust +tag @ <html> <head> => ... +``` + +expands to something like + +```rust +TagToken(tag @ Tag { name: local_name!("html"), kind: StartTag }) +| TagToken(tag @ Tag { name: local_name!("head"), kind: StartTag }) => ... +``` + +A wildcard tag matches any tag of the appropriate kind, *unless* it was +previously matched with an `else` right-hand side (more on this below). + +The expansion of this macro reorders code somewhat, to satisfy various +restrictions arising from moves. However it provides the semantics of in-order +matching, by enforcing the following restrictions on its input: + + - The last pattern must be a variable or the wildcard "_". In other words + it must match everything. + + - Otherwise, ordinary Rust patterns and specific-tag patterns cannot appear + after wildcard tag patterns. + + - No tag name may appear more than once. + + - A wildcard tag pattern may not occur in the same arm as any other tag. + "<_> <html> => ..." and "<_> </_> => ..." are both forbidden. + + - The right-hand side "else" may only appear with specific-tag patterns. + It means that these specific tags should be handled by the last, + catch-all case arm, rather than by any wildcard tag arm. This situation + is common in the HTML5 syntax. +*/ + +use quote::quote; +use syn::{braced, parse_quote, Token}; + +use proc_macro2::TokenStream; +use quote::ToTokens; +use std::collections::HashSet; +use std::fs::File; +use std::io::{Read, Write}; +use std::path::Path; +use syn::ext::IdentExt; +use syn::fold::Fold; +use syn::parse::{Parse, ParseStream, Result}; + +pub fn expand(from: &Path, to: &Path) { + let mut source = String::new(); + File::open(from) + .unwrap() + .read_to_string(&mut source) + .unwrap(); + let ast = syn::parse_file(&source).expect("Parsing rules.rs module"); + let mut m = MatchTokenParser {}; + let ast = m.fold_file(ast); + let code = ast + .into_token_stream() + .to_string() + .replace("{ ", "{\n") + .replace(" }", "\n}"); + File::create(to) + .unwrap() + .write_all(code.as_bytes()) + .unwrap(); +} + +struct MatchTokenParser {} + +struct MatchToken { + ident: syn::Ident, + arms: Vec<MatchTokenArm>, +} + +struct MatchTokenArm { + binding: Option<syn::Ident>, + lhs: LHS, + rhs: RHS, +} + +enum LHS { + Tags(Vec<Tag>), + Pattern(syn::Pat), +} + +enum RHS { + Expression(syn::Expr), + Else, +} + +#[derive(PartialEq, Eq, Hash, Clone)] +enum TagKind { + StartTag, + EndTag, +} + +// Option is None if wildcard +#[derive(PartialEq, Eq, Hash, Clone)] +pub struct Tag { + kind: TagKind, + name: Option<syn::Ident>, +} + +impl Parse for Tag { + fn parse(input: ParseStream) -> Result<Self> { + input.parse::<Token![<]>()?; + let closing: Option<Token![/]> = input.parse()?; + let name = match input.call(syn::Ident::parse_any)? { + ref wildcard if wildcard == "_" => None, + other => Some(other), + }; + input.parse::<Token![>]>()?; + Ok(Tag { + kind: if closing.is_some() { + TagKind::EndTag + } else { + TagKind::StartTag + }, + name, + }) + } +} + +impl Parse for LHS { + fn parse(input: ParseStream) -> Result<Self> { + if input.peek(Token![<]) { + let mut tags = Vec::new(); + while !input.peek(Token![=>]) { + tags.push(input.parse()?); + } + Ok(LHS::Tags(tags)) + } else { + let p: syn::Pat = input.parse()?; + Ok(LHS::Pattern(p)) + } + } +} + +impl Parse for MatchTokenArm { + fn parse(input: ParseStream) -> Result<Self> { + let binding = if input.peek2(Token![@]) { + let binding = input.parse::<syn::Ident>()?; + input.parse::<Token![@]>()?; + Some(binding) + } else { + None + }; + let lhs = input.parse::<LHS>()?; + input.parse::<Token![=>]>()?; + let rhs = if input.peek(syn::token::Brace) { + let block = input.parse::<syn::Block>().unwrap(); + let block = syn::ExprBlock { + attrs: vec![], + label: None, + block, + }; + input.parse::<Option<Token![,]>>()?; + RHS::Expression(syn::Expr::Block(block)) + } else if input.peek(Token![else]) { + input.parse::<Token![else]>()?; + input.parse::<Token![,]>()?; + RHS::Else + } else { + let expr = input.parse::<syn::Expr>().unwrap(); + input.parse::<Option<Token![,]>>()?; + RHS::Expression(expr) + }; + + Ok(MatchTokenArm { binding, lhs, rhs }) + } +} + +impl Parse for MatchToken { + fn parse(input: ParseStream) -> Result<Self> { + let ident = input.parse::<syn::Ident>()?; + let content; + braced!(content in input); + let mut arms = vec![]; + while !content.is_empty() { + arms.push(content.parse()?); + } + Ok(MatchToken { ident, arms }) + } +} + +pub fn expand_match_token(body: &TokenStream) -> syn::Expr { + let match_token = syn::parse2::<MatchToken>(body.clone()); + let ast = expand_match_token_macro(match_token.unwrap()); + syn::parse2(ast).unwrap() +} + +fn expand_match_token_macro(match_token: MatchToken) -> TokenStream { + let mut arms = match_token.arms; + let to_be_matched = match_token.ident; + // Handle the last arm specially at the end. + let last_arm = arms.pop().unwrap(); + + // Tags we've seen, used for detecting duplicates. + let mut seen_tags: HashSet<Tag> = HashSet::new(); + + // Case arms for wildcard matching. We collect these and + // emit them later. + let mut wildcards_patterns: Vec<TokenStream> = Vec::new(); + let mut wildcards_expressions: Vec<syn::Expr> = Vec::new(); + + // Tags excluded (by an 'else' RHS) from wildcard matching. + let mut wild_excluded_patterns: Vec<TokenStream> = Vec::new(); + + let mut arms_code = Vec::new(); + + for MatchTokenArm { binding, lhs, rhs } in arms { + // Build Rust syntax for the `name @` binding, if any. + let binding = match binding { + Some(ident) => quote!(#ident @), + None => quote!(), + }; + + match (lhs, rhs) { + (LHS::Pattern(_), RHS::Else) => { + panic!("'else' may not appear with an ordinary pattern") + }, + + // ordinary pattern => expression + (LHS::Pattern(pat), RHS::Expression(expr)) => { + if !wildcards_patterns.is_empty() { + panic!( + "ordinary patterns may not appear after wildcard tags {:?} {:?}", + pat, expr + ); + } + arms_code.push(quote!(#binding #pat => #expr,)) + }, + + // <tag> <tag> ... => else + (LHS::Tags(tags), RHS::Else) => { + for tag in tags { + if !seen_tags.insert(tag.clone()) { + panic!("duplicate tag"); + } + if tag.name.is_none() { + panic!("'else' may not appear with a wildcard tag"); + } + wild_excluded_patterns.push(make_tag_pattern(&TokenStream::new(), tag)); + } + }, + + // <_> => expression + // <tag> <tag> ... => expression + (LHS::Tags(tags), RHS::Expression(expr)) => { + // Is this arm a tag wildcard? + // `None` if we haven't processed the first tag yet. + let mut wildcard = None; + for tag in tags { + if !seen_tags.insert(tag.clone()) { + panic!("duplicate tag"); + } + + match tag.name { + // <tag> + Some(_) => { + if !wildcards_patterns.is_empty() { + panic!("specific tags may not appear after wildcard tags"); + } + + if wildcard == Some(true) { + panic!("wildcard tags must appear alone"); + } + + if wildcard.is_some() { + // Push the delimeter `|` if it's not the first tag. + arms_code.push(quote!( | )) + } + arms_code.push(make_tag_pattern(&binding, tag)); + + wildcard = Some(false); + }, + + // <_> + None => { + if wildcard.is_some() { + panic!("wildcard tags must appear alone"); + } + wildcard = Some(true); + wildcards_patterns.push(make_tag_pattern(&binding, tag)); + wildcards_expressions.push(expr.clone()); + }, + } + } + + match wildcard { + None => panic!("[internal macro error] tag arm with no tags"), + Some(false) => arms_code.push(quote!( => #expr,)), + Some(true) => {}, // codegen for wildcards is deferred + } + }, + } + } + + // Time to process the last, catch-all arm. We will generate something like + // + // last_arm_token => { + // let enable_wildcards = match last_arm_token { + // TagToken(Tag { kind: EndTag, name: local_name!("body"), .. }) => false, + // TagToken(Tag { kind: EndTag, name: local_name!("html"), .. }) => false, + // // ... + // _ => true, + // }; + // + // match (enable_wildcards, last_arm_token) { + // (true, TagToken(name @ Tag { kind: StartTag, .. })) + // => ..., // wildcard action for start tags + // + // (true, TagToken(name @ Tag { kind: EndTag, .. })) + // => ..., // wildcard action for end tags + // + // (_, token) => ... // using the pattern from that last arm + // } + // } + + let MatchTokenArm { binding, lhs, rhs } = last_arm; + + let (last_pat, last_expr) = match (binding, lhs, rhs) { + (Some(_), _, _) => panic!("the last arm cannot have an @-binding"), + (None, LHS::Tags(_), _) => panic!("the last arm cannot have tag patterns"), + (None, _, RHS::Else) => panic!("the last arm cannot use 'else'"), + (None, LHS::Pattern(p), RHS::Expression(e)) => (p, e), + }; + + quote! { + match #to_be_matched { + #( + #arms_code + )* + last_arm_token => { + let enable_wildcards = match last_arm_token { + #( + #wild_excluded_patterns => false, + )* + _ => true, + }; + match (enable_wildcards, last_arm_token) { + #( + (true, #wildcards_patterns) => #wildcards_expressions, + )* + (_, #last_pat) => #last_expr, + } + } + } + } +} + +impl Fold for MatchTokenParser { + fn fold_stmt(&mut self, stmt: syn::Stmt) -> syn::Stmt { + match stmt { + syn::Stmt::Item(syn::Item::Macro(syn::ItemMacro { ref mac, .. })) => { + if mac.path == parse_quote!(match_token) { + return syn::fold::fold_stmt( + self, + syn::Stmt::Expr(expand_match_token(&mac.tokens)), + ); + } + }, + _ => {}, + } + + syn::fold::fold_stmt(self, stmt) + } + + fn fold_expr(&mut self, expr: syn::Expr) -> syn::Expr { + match expr { + syn::Expr::Macro(syn::ExprMacro { ref mac, .. }) => { + if mac.path == parse_quote!(match_token) { + return syn::fold::fold_expr(self, expand_match_token(&mac.tokens)); + } + }, + _ => {}, + } + + syn::fold::fold_expr(self, expr) + } +} + +fn make_tag_pattern(binding: &TokenStream, tag: Tag) -> TokenStream { + let kind = match tag.kind { + TagKind::StartTag => quote!(crate::tokenizer::StartTag), + TagKind::EndTag => quote!(crate::tokenizer::EndTag), + }; + let name_field = if let Some(name) = tag.name { + let name = name.to_string(); + quote!(name: local_name!(#name),) + } else { + quote!() + }; + quote! { + crate::tree_builder::types::TagToken(#binding crate::tokenizer::Tag { kind: #kind, #name_field .. }) + } +} |