465 lines
14 KiB
Rust
465 lines
14 KiB
Rust
// Copyright 2014-2017 The html5ever Project Developers. See the
|
|
// COPYRIGHT file at the top-level directory of this distribution.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
|
// option. This file may not be copied, modified, or distributed
|
|
// except according to those terms.
|
|
|
|
/*!
|
|
|
|
Implements the `match_token!()` macro for use by the HTML tree builder
|
|
in `src/tree_builder/rules.rs`.
|
|
|
|
|
|
## Example
|
|
|
|
```rust
|
|
match_token!(token {
|
|
CommentToken(text) => 1,
|
|
|
|
tag @ <base> <link> <meta> => 2,
|
|
|
|
</head> => 3,
|
|
|
|
</body> </html> </br> => else,
|
|
|
|
tag @ </_> => 4,
|
|
|
|
token => 5,
|
|
})
|
|
```
|
|
|
|
|
|
## Syntax
|
|
|
|
Because of the simplistic parser, the macro invocation must
|
|
start with exactly `match_token!(token {` (with whitespace as specified)
|
|
and end with exactly `})`.
|
|
|
|
The left-hand side of each match arm is an optional `name @` binding, followed by
|
|
|
|
- an ordinary Rust pattern that starts with an identifier or an underscore, or
|
|
|
|
- a sequence of HTML tag names as identifiers, each inside "<...>" or "</...>"
|
|
to match an open or close tag respectively, or
|
|
|
|
- a "wildcard tag" "<_>" or "</_>" to match all open tags or all close tags
|
|
respectively.
|
|
|
|
The right-hand side is either an expression or the keyword `else`.
|
|
|
|
Note that this syntax does not support guards or pattern alternation like
|
|
`Foo | Bar`. This is not a fundamental limitation; it's done for implementation
|
|
simplicity.
|
|
|
|
|
|
## Semantics
|
|
|
|
Ordinary Rust patterns match as usual. If present, the `name @` binding has
|
|
the usual meaning.
|
|
|
|
A sequence of named tags matches any of those tags. A single sequence can
|
|
contain both open and close tags. If present, the `name @` binding binds (by
|
|
move) the `Tag` struct, not the outer `Token`. That is, a match arm like
|
|
|
|
```rust
|
|
tag @ <html> <head> => ...
|
|
```
|
|
|
|
expands to something like
|
|
|
|
```rust
|
|
TagToken(tag @ Tag { name: local_name!("html"), kind: StartTag })
|
|
| TagToken(tag @ Tag { name: local_name!("head"), kind: StartTag }) => ...
|
|
```
|
|
|
|
A wildcard tag matches any tag of the appropriate kind, *unless* it was
|
|
previously matched with an `else` right-hand side (more on this below).
|
|
|
|
The expansion of this macro reorders code somewhat, to satisfy various
|
|
restrictions arising from moves. However it provides the semantics of in-order
|
|
matching, by enforcing the following restrictions on its input:
|
|
|
|
- The last pattern must be a variable or the wildcard "_". In other words
|
|
it must match everything.
|
|
|
|
- Otherwise, ordinary Rust patterns and specific-tag patterns cannot appear
|
|
after wildcard tag patterns.
|
|
|
|
- No tag name may appear more than once.
|
|
|
|
- A wildcard tag pattern may not occur in the same arm as any other tag.
|
|
"<_> <html> => ..." and "<_> </_> => ..." are both forbidden.
|
|
|
|
- The right-hand side "else" may only appear with specific-tag patterns.
|
|
It means that these specific tags should be handled by the last,
|
|
catch-all case arm, rather than by any wildcard tag arm. This situation
|
|
is common in the HTML5 syntax.
|
|
*/
|
|
|
|
use quote::quote;
|
|
use syn::{braced, parse_quote, Token};
|
|
|
|
use proc_macro2::TokenStream;
|
|
use quote::ToTokens;
|
|
use std::collections::HashSet;
|
|
use std::fs::File;
|
|
use std::io::{Read, Write};
|
|
use std::path::Path;
|
|
use syn::ext::IdentExt;
|
|
use syn::fold::Fold;
|
|
use syn::parse::{Parse, ParseStream, Result};
|
|
|
|
pub fn expand(from: &Path, to: &Path) {
|
|
let mut source = String::new();
|
|
File::open(from)
|
|
.unwrap()
|
|
.read_to_string(&mut source)
|
|
.unwrap();
|
|
let ast = syn::parse_file(&source).expect("Parsing rules.rs module");
|
|
let mut m = MatchTokenParser {};
|
|
let ast = m.fold_file(ast);
|
|
let code = ast
|
|
.into_token_stream()
|
|
.to_string()
|
|
.replace("{ ", "{\n")
|
|
.replace(" }", "\n}");
|
|
File::create(to)
|
|
.unwrap()
|
|
.write_all(code.as_bytes())
|
|
.unwrap();
|
|
}
|
|
|
|
struct MatchTokenParser {}
|
|
|
|
struct MatchToken {
|
|
ident: syn::Ident,
|
|
arms: Vec<MatchTokenArm>,
|
|
}
|
|
|
|
struct MatchTokenArm {
|
|
binding: Option<syn::Ident>,
|
|
lhs: LHS,
|
|
rhs: RHS,
|
|
}
|
|
|
|
enum LHS {
|
|
Tags(Vec<Tag>),
|
|
Pattern(syn::Pat),
|
|
}
|
|
|
|
enum RHS {
|
|
Expression(syn::Expr),
|
|
Else,
|
|
}
|
|
|
|
#[derive(PartialEq, Eq, Hash, Clone)]
|
|
enum TagKind {
|
|
StartTag,
|
|
EndTag,
|
|
}
|
|
|
|
// Option is None if wildcard
|
|
#[derive(PartialEq, Eq, Hash, Clone)]
|
|
pub struct Tag {
|
|
kind: TagKind,
|
|
name: Option<syn::Ident>,
|
|
}
|
|
|
|
impl Parse for Tag {
|
|
fn parse(input: ParseStream) -> Result<Self> {
|
|
input.parse::<Token![<]>()?;
|
|
let closing: Option<Token![/]> = input.parse()?;
|
|
let name = match input.call(syn::Ident::parse_any)? {
|
|
ref wildcard if wildcard == "_" => None,
|
|
other => Some(other),
|
|
};
|
|
input.parse::<Token![>]>()?;
|
|
Ok(Tag {
|
|
kind: if closing.is_some() {
|
|
TagKind::EndTag
|
|
} else {
|
|
TagKind::StartTag
|
|
},
|
|
name,
|
|
})
|
|
}
|
|
}
|
|
|
|
impl Parse for LHS {
|
|
fn parse(input: ParseStream) -> Result<Self> {
|
|
if input.peek(Token![<]) {
|
|
let mut tags = Vec::new();
|
|
while !input.peek(Token![=>]) {
|
|
tags.push(input.parse()?);
|
|
}
|
|
Ok(LHS::Tags(tags))
|
|
} else {
|
|
let p: syn::Pat = input.parse()?;
|
|
Ok(LHS::Pattern(p))
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Parse for MatchTokenArm {
|
|
fn parse(input: ParseStream) -> Result<Self> {
|
|
let binding = if input.peek2(Token![@]) {
|
|
let binding = input.parse::<syn::Ident>()?;
|
|
input.parse::<Token![@]>()?;
|
|
Some(binding)
|
|
} else {
|
|
None
|
|
};
|
|
let lhs = input.parse::<LHS>()?;
|
|
input.parse::<Token![=>]>()?;
|
|
let rhs = if input.peek(syn::token::Brace) {
|
|
let block = input.parse::<syn::Block>().unwrap();
|
|
let block = syn::ExprBlock {
|
|
attrs: vec![],
|
|
label: None,
|
|
block,
|
|
};
|
|
input.parse::<Option<Token![,]>>()?;
|
|
RHS::Expression(syn::Expr::Block(block))
|
|
} else if input.peek(Token![else]) {
|
|
input.parse::<Token![else]>()?;
|
|
input.parse::<Token![,]>()?;
|
|
RHS::Else
|
|
} else {
|
|
let expr = input.parse::<syn::Expr>().unwrap();
|
|
input.parse::<Option<Token![,]>>()?;
|
|
RHS::Expression(expr)
|
|
};
|
|
|
|
Ok(MatchTokenArm { binding, lhs, rhs })
|
|
}
|
|
}
|
|
|
|
impl Parse for MatchToken {
|
|
fn parse(input: ParseStream) -> Result<Self> {
|
|
let ident = input.parse::<syn::Ident>()?;
|
|
let content;
|
|
braced!(content in input);
|
|
let mut arms = vec![];
|
|
while !content.is_empty() {
|
|
arms.push(content.parse()?);
|
|
}
|
|
Ok(MatchToken { ident, arms })
|
|
}
|
|
}
|
|
|
|
pub fn expand_match_token(body: &TokenStream) -> syn::Expr {
|
|
let match_token = syn::parse2::<MatchToken>(body.clone());
|
|
let ast = expand_match_token_macro(match_token.unwrap());
|
|
syn::parse2(ast).unwrap()
|
|
}
|
|
|
|
fn expand_match_token_macro(match_token: MatchToken) -> TokenStream {
|
|
let mut arms = match_token.arms;
|
|
let to_be_matched = match_token.ident;
|
|
// Handle the last arm specially at the end.
|
|
let last_arm = arms.pop().unwrap();
|
|
|
|
// Tags we've seen, used for detecting duplicates.
|
|
let mut seen_tags: HashSet<Tag> = HashSet::new();
|
|
|
|
// Case arms for wildcard matching. We collect these and
|
|
// emit them later.
|
|
let mut wildcards_patterns: Vec<TokenStream> = Vec::new();
|
|
let mut wildcards_expressions: Vec<syn::Expr> = Vec::new();
|
|
|
|
// Tags excluded (by an 'else' RHS) from wildcard matching.
|
|
let mut wild_excluded_patterns: Vec<TokenStream> = Vec::new();
|
|
|
|
let mut arms_code = Vec::new();
|
|
|
|
for MatchTokenArm { binding, lhs, rhs } in arms {
|
|
// Build Rust syntax for the `name @` binding, if any.
|
|
let binding = match binding {
|
|
Some(ident) => quote!(#ident @),
|
|
None => quote!(),
|
|
};
|
|
|
|
match (lhs, rhs) {
|
|
(LHS::Pattern(_), RHS::Else) => {
|
|
panic!("'else' may not appear with an ordinary pattern")
|
|
},
|
|
|
|
// ordinary pattern => expression
|
|
(LHS::Pattern(pat), RHS::Expression(expr)) => {
|
|
if !wildcards_patterns.is_empty() {
|
|
panic!(
|
|
"ordinary patterns may not appear after wildcard tags {:?} {:?}",
|
|
pat, expr
|
|
);
|
|
}
|
|
arms_code.push(quote!(#binding #pat => #expr,))
|
|
},
|
|
|
|
// <tag> <tag> ... => else
|
|
(LHS::Tags(tags), RHS::Else) => {
|
|
for tag in tags {
|
|
if !seen_tags.insert(tag.clone()) {
|
|
panic!("duplicate tag");
|
|
}
|
|
if tag.name.is_none() {
|
|
panic!("'else' may not appear with a wildcard tag");
|
|
}
|
|
wild_excluded_patterns.push(make_tag_pattern(&TokenStream::new(), tag));
|
|
}
|
|
},
|
|
|
|
// <_> => expression
|
|
// <tag> <tag> ... => expression
|
|
(LHS::Tags(tags), RHS::Expression(expr)) => {
|
|
// Is this arm a tag wildcard?
|
|
// `None` if we haven't processed the first tag yet.
|
|
let mut wildcard = None;
|
|
for tag in tags {
|
|
if !seen_tags.insert(tag.clone()) {
|
|
panic!("duplicate tag");
|
|
}
|
|
|
|
match tag.name {
|
|
// <tag>
|
|
Some(_) => {
|
|
if !wildcards_patterns.is_empty() {
|
|
panic!("specific tags may not appear after wildcard tags");
|
|
}
|
|
|
|
if wildcard == Some(true) {
|
|
panic!("wildcard tags must appear alone");
|
|
}
|
|
|
|
if wildcard.is_some() {
|
|
// Push the delimeter `|` if it's not the first tag.
|
|
arms_code.push(quote!( | ))
|
|
}
|
|
arms_code.push(make_tag_pattern(&binding, tag));
|
|
|
|
wildcard = Some(false);
|
|
},
|
|
|
|
// <_>
|
|
None => {
|
|
if wildcard.is_some() {
|
|
panic!("wildcard tags must appear alone");
|
|
}
|
|
wildcard = Some(true);
|
|
wildcards_patterns.push(make_tag_pattern(&binding, tag));
|
|
wildcards_expressions.push(expr.clone());
|
|
},
|
|
}
|
|
}
|
|
|
|
match wildcard {
|
|
None => panic!("[internal macro error] tag arm with no tags"),
|
|
Some(false) => arms_code.push(quote!( => #expr,)),
|
|
Some(true) => {}, // codegen for wildcards is deferred
|
|
}
|
|
},
|
|
}
|
|
}
|
|
|
|
// Time to process the last, catch-all arm. We will generate something like
|
|
//
|
|
// last_arm_token => {
|
|
// let enable_wildcards = match last_arm_token {
|
|
// TagToken(Tag { kind: EndTag, name: local_name!("body"), .. }) => false,
|
|
// TagToken(Tag { kind: EndTag, name: local_name!("html"), .. }) => false,
|
|
// // ...
|
|
// _ => true,
|
|
// };
|
|
//
|
|
// match (enable_wildcards, last_arm_token) {
|
|
// (true, TagToken(name @ Tag { kind: StartTag, .. }))
|
|
// => ..., // wildcard action for start tags
|
|
//
|
|
// (true, TagToken(name @ Tag { kind: EndTag, .. }))
|
|
// => ..., // wildcard action for end tags
|
|
//
|
|
// (_, token) => ... // using the pattern from that last arm
|
|
// }
|
|
// }
|
|
|
|
let MatchTokenArm { binding, lhs, rhs } = last_arm;
|
|
|
|
let (last_pat, last_expr) = match (binding, lhs, rhs) {
|
|
(Some(_), _, _) => panic!("the last arm cannot have an @-binding"),
|
|
(None, LHS::Tags(_), _) => panic!("the last arm cannot have tag patterns"),
|
|
(None, _, RHS::Else) => panic!("the last arm cannot use 'else'"),
|
|
(None, LHS::Pattern(p), RHS::Expression(e)) => (p, e),
|
|
};
|
|
|
|
quote! {
|
|
match #to_be_matched {
|
|
#(
|
|
#arms_code
|
|
)*
|
|
last_arm_token => {
|
|
let enable_wildcards = match last_arm_token {
|
|
#(
|
|
#wild_excluded_patterns => false,
|
|
)*
|
|
_ => true,
|
|
};
|
|
match (enable_wildcards, last_arm_token) {
|
|
#(
|
|
(true, #wildcards_patterns) => #wildcards_expressions,
|
|
)*
|
|
(_, #last_pat) => #last_expr,
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Fold for MatchTokenParser {
|
|
fn fold_stmt(&mut self, stmt: syn::Stmt) -> syn::Stmt {
|
|
match stmt {
|
|
syn::Stmt::Item(syn::Item::Macro(syn::ItemMacro { ref mac, .. })) => {
|
|
if mac.path == parse_quote!(match_token) {
|
|
return syn::fold::fold_stmt(
|
|
self,
|
|
syn::Stmt::Expr(expand_match_token(&mac.tokens)),
|
|
);
|
|
}
|
|
},
|
|
_ => {},
|
|
}
|
|
|
|
syn::fold::fold_stmt(self, stmt)
|
|
}
|
|
|
|
fn fold_expr(&mut self, expr: syn::Expr) -> syn::Expr {
|
|
match expr {
|
|
syn::Expr::Macro(syn::ExprMacro { ref mac, .. }) => {
|
|
if mac.path == parse_quote!(match_token) {
|
|
return syn::fold::fold_expr(self, expand_match_token(&mac.tokens));
|
|
}
|
|
},
|
|
_ => {},
|
|
}
|
|
|
|
syn::fold::fold_expr(self, expr)
|
|
}
|
|
}
|
|
|
|
fn make_tag_pattern(binding: &TokenStream, tag: Tag) -> TokenStream {
|
|
let kind = match tag.kind {
|
|
TagKind::StartTag => quote!(crate::tokenizer::StartTag),
|
|
TagKind::EndTag => quote!(crate::tokenizer::EndTag),
|
|
};
|
|
let name_field = if let Some(name) = tag.name {
|
|
let name = name.to_string();
|
|
quote!(name: local_name!(#name),)
|
|
} else {
|
|
quote!()
|
|
};
|
|
quote! {
|
|
crate::tree_builder::types::TagToken(#binding crate::tokenizer::Tag { kind: #kind, #name_field .. })
|
|
}
|
|
}
|