diff options
| author | Martin Fischer <martin@push-f.com> | 2021-04-08 08:42:01 +0200 | 
|---|---|---|
| committer | Martin Fischer <martin@push-f.com> | 2021-04-08 15:40:37 +0200 | 
| commit | 57e7eefcbe6fb8c3dc4b01c707be9de4c34963a7 (patch) | |
| tree | 6a9d296389bf3023396592c8514ed6712e011c7f /macros | |
import https://github.com/servo/html5ever
commit d1206daa740305f55a5fa159e43eb33afc359cb4
Diffstat (limited to 'macros')
| -rw-r--r-- | macros/match_token.rs | 464 | 
1 files changed, 464 insertions, 0 deletions
| diff --git a/macros/match_token.rs b/macros/match_token.rs new file mode 100644 index 0000000..7d73519 --- /dev/null +++ b/macros/match_token.rs @@ -0,0 +1,464 @@ +// Copyright 2014-2017 The html5ever Project Developers. See the +// COPYRIGHT file at the top-level directory of this distribution. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +/*! + +Implements the `match_token!()` macro for use by the HTML tree builder +in `src/tree_builder/rules.rs`. + + +## Example + +```rust +match_token!(token { +    CommentToken(text) => 1, + +    tag @ <base> <link> <meta> => 2, + +    </head> => 3, + +    </body> </html> </br> => else, + +    tag @ </_> => 4, + +    token => 5, +}) +``` + + +## Syntax + +Because of the simplistic parser, the macro invocation must +start with exactly `match_token!(token {` (with whitespace as specified) +and end with exactly `})`. + +The left-hand side of each match arm is an optional `name @` binding, followed by + +  - an ordinary Rust pattern that starts with an identifier or an underscore, or + +  - a sequence of HTML tag names as identifiers, each inside "<...>" or "</...>" +    to match an open or close tag respectively, or + +  - a "wildcard tag" "<_>" or "</_>" to match all open tags or all close tags +    respectively. + +The right-hand side is either an expression or the keyword `else`. + +Note that this syntax does not support guards or pattern alternation like +`Foo | Bar`.  This is not a fundamental limitation; it's done for implementation +simplicity. + + +## Semantics + +Ordinary Rust patterns match as usual.  If present, the `name @` binding has +the usual meaning. + +A sequence of named tags matches any of those tags.  A single sequence can +contain both open and close tags.  If present, the `name @` binding binds (by +move) the `Tag` struct, not the outer `Token`.  That is, a match arm like + +```rust +tag @ <html> <head> => ... +``` + +expands to something like + +```rust +TagToken(tag @ Tag { name: local_name!("html"), kind: StartTag }) +| TagToken(tag @ Tag { name: local_name!("head"), kind: StartTag }) => ... +``` + +A wildcard tag matches any tag of the appropriate kind, *unless* it was +previously matched with an `else` right-hand side (more on this below). + +The expansion of this macro reorders code somewhat, to satisfy various +restrictions arising from moves.  However it provides the semantics of in-order +matching, by enforcing the following restrictions on its input: + +  - The last pattern must be a variable or the wildcard "_".  In other words +    it must match everything. + +  - Otherwise, ordinary Rust patterns and specific-tag patterns cannot appear +    after wildcard tag patterns. + +  - No tag name may appear more than once. + +  - A wildcard tag pattern may not occur in the same arm as any other tag. +    "<_> <html> => ..." and "<_> </_> => ..." are both forbidden. + +  - The right-hand side "else" may only appear with specific-tag patterns. +    It means that these specific tags should be handled by the last, +    catch-all case arm, rather than by any wildcard tag arm.  This situation +    is common in the HTML5 syntax. +*/ + +use quote::quote; +use syn::{braced, parse_quote, Token}; + +use proc_macro2::TokenStream; +use quote::ToTokens; +use std::collections::HashSet; +use std::fs::File; +use std::io::{Read, Write}; +use std::path::Path; +use syn::ext::IdentExt; +use syn::fold::Fold; +use syn::parse::{Parse, ParseStream, Result}; + +pub fn expand(from: &Path, to: &Path) { +    let mut source = String::new(); +    File::open(from) +        .unwrap() +        .read_to_string(&mut source) +        .unwrap(); +    let ast = syn::parse_file(&source).expect("Parsing rules.rs module"); +    let mut m = MatchTokenParser {}; +    let ast = m.fold_file(ast); +    let code = ast +        .into_token_stream() +        .to_string() +        .replace("{ ", "{\n") +        .replace(" }", "\n}"); +    File::create(to) +        .unwrap() +        .write_all(code.as_bytes()) +        .unwrap(); +} + +struct MatchTokenParser {} + +struct MatchToken { +    ident: syn::Ident, +    arms: Vec<MatchTokenArm>, +} + +struct MatchTokenArm { +    binding: Option<syn::Ident>, +    lhs: LHS, +    rhs: RHS, +} + +enum LHS { +    Tags(Vec<Tag>), +    Pattern(syn::Pat), +} + +enum RHS { +    Expression(syn::Expr), +    Else, +} + +#[derive(PartialEq, Eq, Hash, Clone)] +enum TagKind { +    StartTag, +    EndTag, +} + +// Option is None if wildcard +#[derive(PartialEq, Eq, Hash, Clone)] +pub struct Tag { +    kind: TagKind, +    name: Option<syn::Ident>, +} + +impl Parse for Tag { +    fn parse(input: ParseStream) -> Result<Self> { +        input.parse::<Token![<]>()?; +        let closing: Option<Token![/]> = input.parse()?; +        let name = match input.call(syn::Ident::parse_any)? { +            ref wildcard if wildcard == "_" => None, +            other => Some(other), +        }; +        input.parse::<Token![>]>()?; +        Ok(Tag { +            kind: if closing.is_some() { +                TagKind::EndTag +            } else { +                TagKind::StartTag +            }, +            name, +        }) +    } +} + +impl Parse for LHS { +    fn parse(input: ParseStream) -> Result<Self> { +        if input.peek(Token![<]) { +            let mut tags = Vec::new(); +            while !input.peek(Token![=>]) { +                tags.push(input.parse()?); +            } +            Ok(LHS::Tags(tags)) +        } else { +            let p: syn::Pat = input.parse()?; +            Ok(LHS::Pattern(p)) +        } +    } +} + +impl Parse for MatchTokenArm { +    fn parse(input: ParseStream) -> Result<Self> { +        let binding = if input.peek2(Token![@]) { +            let binding = input.parse::<syn::Ident>()?; +            input.parse::<Token![@]>()?; +            Some(binding) +        } else { +            None +        }; +        let lhs = input.parse::<LHS>()?; +        input.parse::<Token![=>]>()?; +        let rhs = if input.peek(syn::token::Brace) { +            let block = input.parse::<syn::Block>().unwrap(); +            let block = syn::ExprBlock { +                attrs: vec![], +                label: None, +                block, +            }; +            input.parse::<Option<Token![,]>>()?; +            RHS::Expression(syn::Expr::Block(block)) +        } else if input.peek(Token![else]) { +            input.parse::<Token![else]>()?; +            input.parse::<Token![,]>()?; +            RHS::Else +        } else { +            let expr = input.parse::<syn::Expr>().unwrap(); +            input.parse::<Option<Token![,]>>()?; +            RHS::Expression(expr) +        }; + +        Ok(MatchTokenArm { binding, lhs, rhs }) +    } +} + +impl Parse for MatchToken { +    fn parse(input: ParseStream) -> Result<Self> { +        let ident = input.parse::<syn::Ident>()?; +        let content; +        braced!(content in input); +        let mut arms = vec![]; +        while !content.is_empty() { +            arms.push(content.parse()?); +        } +        Ok(MatchToken { ident, arms }) +    } +} + +pub fn expand_match_token(body: &TokenStream) -> syn::Expr { +    let match_token = syn::parse2::<MatchToken>(body.clone()); +    let ast = expand_match_token_macro(match_token.unwrap()); +    syn::parse2(ast).unwrap() +} + +fn expand_match_token_macro(match_token: MatchToken) -> TokenStream { +    let mut arms = match_token.arms; +    let to_be_matched = match_token.ident; +    // Handle the last arm specially at the end. +    let last_arm = arms.pop().unwrap(); + +    // Tags we've seen, used for detecting duplicates. +    let mut seen_tags: HashSet<Tag> = HashSet::new(); + +    // Case arms for wildcard matching.  We collect these and +    // emit them later. +    let mut wildcards_patterns: Vec<TokenStream> = Vec::new(); +    let mut wildcards_expressions: Vec<syn::Expr> = Vec::new(); + +    // Tags excluded (by an 'else' RHS) from wildcard matching. +    let mut wild_excluded_patterns: Vec<TokenStream> = Vec::new(); + +    let mut arms_code = Vec::new(); + +    for MatchTokenArm { binding, lhs, rhs } in arms { +        // Build Rust syntax for the `name @` binding, if any. +        let binding = match binding { +            Some(ident) => quote!(#ident @), +            None => quote!(), +        }; + +        match (lhs, rhs) { +            (LHS::Pattern(_), RHS::Else) => { +                panic!("'else' may not appear with an ordinary pattern") +            }, + +            // ordinary pattern => expression +            (LHS::Pattern(pat), RHS::Expression(expr)) => { +                if !wildcards_patterns.is_empty() { +                    panic!( +                        "ordinary patterns may not appear after wildcard tags {:?} {:?}", +                        pat, expr +                    ); +                } +                arms_code.push(quote!(#binding #pat => #expr,)) +            }, + +            // <tag> <tag> ... => else +            (LHS::Tags(tags), RHS::Else) => { +                for tag in tags { +                    if !seen_tags.insert(tag.clone()) { +                        panic!("duplicate tag"); +                    } +                    if tag.name.is_none() { +                        panic!("'else' may not appear with a wildcard tag"); +                    } +                    wild_excluded_patterns.push(make_tag_pattern(&TokenStream::new(), tag)); +                } +            }, + +            // <_> => expression +            // <tag> <tag> ... => expression +            (LHS::Tags(tags), RHS::Expression(expr)) => { +                // Is this arm a tag wildcard? +                // `None` if we haven't processed the first tag yet. +                let mut wildcard = None; +                for tag in tags { +                    if !seen_tags.insert(tag.clone()) { +                        panic!("duplicate tag"); +                    } + +                    match tag.name { +                        // <tag> +                        Some(_) => { +                            if !wildcards_patterns.is_empty() { +                                panic!("specific tags may not appear after wildcard tags"); +                            } + +                            if wildcard == Some(true) { +                                panic!("wildcard tags must appear alone"); +                            } + +                            if wildcard.is_some() { +                                // Push the delimeter `|` if it's not the first tag. +                                arms_code.push(quote!( | )) +                            } +                            arms_code.push(make_tag_pattern(&binding, tag)); + +                            wildcard = Some(false); +                        }, + +                        // <_> +                        None => { +                            if wildcard.is_some() { +                                panic!("wildcard tags must appear alone"); +                            } +                            wildcard = Some(true); +                            wildcards_patterns.push(make_tag_pattern(&binding, tag)); +                            wildcards_expressions.push(expr.clone()); +                        }, +                    } +                } + +                match wildcard { +                    None => panic!("[internal macro error] tag arm with no tags"), +                    Some(false) => arms_code.push(quote!( => #expr,)), +                    Some(true) => {}, // codegen for wildcards is deferred +                } +            }, +        } +    } + +    // Time to process the last, catch-all arm.  We will generate something like +    // +    //     last_arm_token => { +    //         let enable_wildcards = match last_arm_token { +    //             TagToken(Tag { kind: EndTag, name: local_name!("body"), .. }) => false, +    //             TagToken(Tag { kind: EndTag, name: local_name!("html"), .. }) => false, +    //             // ... +    //             _ => true, +    //         }; +    // +    //         match (enable_wildcards, last_arm_token) { +    //             (true, TagToken(name @ Tag { kind: StartTag, .. })) +    //                 => ...,  // wildcard action for start tags +    // +    //             (true, TagToken(name @ Tag { kind: EndTag, .. })) +    //                 => ...,  // wildcard action for end tags +    // +    //             (_, token) => ...  // using the pattern from that last arm +    //         } +    //     } + +    let MatchTokenArm { binding, lhs, rhs } = last_arm; + +    let (last_pat, last_expr) = match (binding, lhs, rhs) { +        (Some(_), _, _) => panic!("the last arm cannot have an @-binding"), +        (None, LHS::Tags(_), _) => panic!("the last arm cannot have tag patterns"), +        (None, _, RHS::Else) => panic!("the last arm cannot use 'else'"), +        (None, LHS::Pattern(p), RHS::Expression(e)) => (p, e), +    }; + +    quote! { +        match #to_be_matched { +            #( +                #arms_code +            )* +            last_arm_token => { +                let enable_wildcards = match last_arm_token { +                    #( +                        #wild_excluded_patterns => false, +                    )* +                    _ => true, +                }; +                match (enable_wildcards, last_arm_token) { +                    #( +                        (true, #wildcards_patterns) => #wildcards_expressions, +                    )* +                    (_, #last_pat) => #last_expr, +                } +            } +        } +    } +} + +impl Fold for MatchTokenParser { +    fn fold_stmt(&mut self, stmt: syn::Stmt) -> syn::Stmt { +        match stmt { +            syn::Stmt::Item(syn::Item::Macro(syn::ItemMacro { ref mac, .. })) => { +                if mac.path == parse_quote!(match_token) { +                    return syn::fold::fold_stmt( +                        self, +                        syn::Stmt::Expr(expand_match_token(&mac.tokens)), +                    ); +                } +            }, +            _ => {}, +        } + +        syn::fold::fold_stmt(self, stmt) +    } + +    fn fold_expr(&mut self, expr: syn::Expr) -> syn::Expr { +        match expr { +            syn::Expr::Macro(syn::ExprMacro { ref mac, .. }) => { +                if mac.path == parse_quote!(match_token) { +                    return syn::fold::fold_expr(self, expand_match_token(&mac.tokens)); +                } +            }, +            _ => {}, +        } + +        syn::fold::fold_expr(self, expr) +    } +} + +fn make_tag_pattern(binding: &TokenStream, tag: Tag) -> TokenStream { +    let kind = match tag.kind { +        TagKind::StartTag => quote!(crate::tokenizer::StartTag), +        TagKind::EndTag => quote!(crate::tokenizer::EndTag), +    }; +    let name_field = if let Some(name) = tag.name { +        let name = name.to_string(); +        quote!(name: local_name!(#name),) +    } else { +        quote!() +    }; +    quote! { +        crate::tree_builder::types::TagToken(#binding crate::tokenizer::Tag { kind: #kind, #name_field .. }) +    } +} | 
