diff options
-rw-r--r-- | Cargo.toml | 1 | ||||
-rw-r--r-- | build.rs | 33 | ||||
-rw-r--r-- | examples/arena.rs | 335 | ||||
-rw-r--r-- | examples/noop-tree-builder.rs | 112 | ||||
-rw-r--r-- | examples/print-tree-actions.rs | 177 | ||||
-rw-r--r-- | fuzz/.gitignore | 4 | ||||
-rw-r--r-- | fuzz/Cargo.toml | 27 | ||||
-rw-r--r-- | fuzz/fuzz_targets/fuzz_document_parse.rs | 35 | ||||
-rw-r--r-- | src/driver.rs | 137 | ||||
-rw-r--r-- | src/lib.rs | 6 | ||||
-rw-r--r-- | src/serialize/mod.rs | 256 | ||||
-rw-r--r-- | src/tree_builder/data.rs | 171 | ||||
-rw-r--r-- | src/tree_builder/mod.rs | 1681 | ||||
-rw-r--r-- | src/tree_builder/rules.rs | 1449 | ||||
-rw-r--r-- | src/tree_builder/tag_sets.rs | 115 | ||||
-rw-r--r-- | src/tree_builder/types.rs | 95 | ||||
-rw-r--r-- | src/util/str.rs | 13 |
17 files changed, 0 insertions, 4647 deletions
@@ -7,7 +7,6 @@ license = "MIT / Apache-2.0" repository = "https://github.com/servo/html5ever" description = "High-performance browser-grade HTML5 parser" documentation = "https://docs.rs/html5ever" -build = "build.rs" categories = [ "parser-implementations", "web-programming" ] edition = "2018" diff --git a/build.rs b/build.rs deleted file mode 100644 index bfac771..0000000 --- a/build.rs +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2014-2017 The html5ever Project Developers. See the -// COPYRIGHT file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -use std::env; -use std::path::Path; -use std::thread::Builder; - -#[path = "macros/match_token.rs"] -mod match_token; - -fn main() { - let manifest_dir = env::var("CARGO_MANIFEST_DIR").unwrap(); - - let input = Path::new(&manifest_dir).join("src/tree_builder/rules.rs"); - let output = Path::new(&env::var("OUT_DIR").unwrap()).join("rules.rs"); - println!("cargo:rerun-if-changed={}", input.display()); - - // We have stack overflows on Servo's CI. - let handle = Builder::new() - .stack_size(128 * 1024 * 1024) - .spawn(move || { - match_token::expand(&input, &output); - }) - .unwrap(); - - handle.join().unwrap(); -} diff --git a/examples/arena.rs b/examples/arena.rs deleted file mode 100644 index 1b59ae1..0000000 --- a/examples/arena.rs +++ /dev/null @@ -1,335 +0,0 @@ -// Copyright 2014-2017 The html5ever Project Developers. See the -// COPYRIGHT file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -extern crate html5ever; -extern crate typed_arena; - -use html5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; -use html5ever::tendril::{StrTendril, TendrilSink}; -use html5ever::{parse_document, Attribute, ExpandedName, QualName}; -use std::borrow::Cow; -use std::cell::{Cell, RefCell}; -use std::collections::HashSet; -use std::io::{self, Read}; -use std::ptr; - -fn main() { - let mut bytes = Vec::new(); - io::stdin().read_to_end(&mut bytes).unwrap(); - let arena = typed_arena::Arena::new(); - html5ever_parse_slice_into_arena(&bytes, &arena); -} - -fn html5ever_parse_slice_into_arena<'a>(bytes: &[u8], arena: Arena<'a>) -> Ref<'a> { - let sink = Sink { - arena: arena, - document: arena.alloc(Node::new(NodeData::Document)), - quirks_mode: QuirksMode::NoQuirks, - }; - parse_document(sink, Default::default()) - .from_utf8() - .one(bytes) -} - -type Arena<'arena> = &'arena typed_arena::Arena<Node<'arena>>; - -type Ref<'arena> = &'arena Node<'arena>; - -type Link<'arena> = Cell<Option<Ref<'arena>>>; - -struct Sink<'arena> { - arena: Arena<'arena>, - document: Ref<'arena>, - quirks_mode: QuirksMode, -} - -pub struct Node<'arena> { - parent: Link<'arena>, - next_sibling: Link<'arena>, - previous_sibling: Link<'arena>, - first_child: Link<'arena>, - last_child: Link<'arena>, - data: NodeData<'arena>, -} - -pub enum NodeData<'arena> { - Document, - Doctype { - name: StrTendril, - public_id: StrTendril, - system_id: StrTendril, - }, - Text { - contents: RefCell<StrTendril>, - }, - Comment { - contents: StrTendril, - }, - Element { - name: QualName, - attrs: RefCell<Vec<Attribute>>, - template_contents: Option<Ref<'arena>>, - mathml_annotation_xml_integration_point: bool, - }, - ProcessingInstruction { - target: StrTendril, - contents: StrTendril, - }, -} - -impl<'arena> Node<'arena> { - fn new(data: NodeData<'arena>) -> Self { - Node { - parent: Cell::new(None), - previous_sibling: Cell::new(None), - next_sibling: Cell::new(None), - first_child: Cell::new(None), - last_child: Cell::new(None), - data: data, - } - } - - fn detach(&self) { - let parent = self.parent.take(); - let previous_sibling = self.previous_sibling.take(); - let next_sibling = self.next_sibling.take(); - - if let Some(next_sibling) = next_sibling { - next_sibling.previous_sibling.set(previous_sibling); - } else if let Some(parent) = parent { - parent.last_child.set(previous_sibling); - } - - if let Some(previous_sibling) = previous_sibling { - previous_sibling.next_sibling.set(next_sibling); - } else if let Some(parent) = parent { - parent.first_child.set(next_sibling); - } - } - - fn append(&'arena self, new_child: &'arena Self) { - new_child.detach(); - new_child.parent.set(Some(self)); - if let Some(last_child) = self.last_child.take() { - new_child.previous_sibling.set(Some(last_child)); - debug_assert!(last_child.next_sibling.get().is_none()); - last_child.next_sibling.set(Some(new_child)); - } else { - debug_assert!(self.first_child.get().is_none()); - self.first_child.set(Some(new_child)); - } - self.last_child.set(Some(new_child)); - } - - fn insert_before(&'arena self, new_sibling: &'arena Self) { - new_sibling.detach(); - new_sibling.parent.set(self.parent.get()); - new_sibling.next_sibling.set(Some(self)); - if let Some(previous_sibling) = self.previous_sibling.take() { - new_sibling.previous_sibling.set(Some(previous_sibling)); - debug_assert!(ptr::eq::<Node>( - previous_sibling.next_sibling.get().unwrap(), - self - )); - previous_sibling.next_sibling.set(Some(new_sibling)); - } else if let Some(parent) = self.parent.get() { - debug_assert!(ptr::eq::<Node>(parent.first_child.get().unwrap(), self)); - parent.first_child.set(Some(new_sibling)); - } - self.previous_sibling.set(Some(new_sibling)); - } -} - -impl<'arena> Sink<'arena> { - fn new_node(&self, data: NodeData<'arena>) -> Ref<'arena> { - self.arena.alloc(Node::new(data)) - } - - fn append_common<P, A>(&self, child: NodeOrText<Ref<'arena>>, previous: P, append: A) - where - P: FnOnce() -> Option<Ref<'arena>>, - A: FnOnce(Ref<'arena>), - { - let new_node = match child { - NodeOrText::AppendText(text) => { - // Append to an existing Text node if we have one. - if let Some(&Node { - data: NodeData::Text { ref contents }, - .. - }) = previous() - { - contents.borrow_mut().push_tendril(&text); - return; - } - self.new_node(NodeData::Text { - contents: RefCell::new(text), - }) - }, - NodeOrText::AppendNode(node) => node, - }; - - append(new_node) - } -} - -impl<'arena> TreeSink for Sink<'arena> { - type Handle = Ref<'arena>; - type Output = Ref<'arena>; - - fn finish(self) -> Ref<'arena> { - self.document - } - - fn parse_error(&mut self, _: Cow<'static, str>) {} - - fn get_document(&mut self) -> Ref<'arena> { - self.document - } - - fn set_quirks_mode(&mut self, mode: QuirksMode) { - self.quirks_mode = mode; - } - - fn same_node(&self, x: &Ref<'arena>, y: &Ref<'arena>) -> bool { - ptr::eq::<Node>(*x, *y) - } - - fn elem_name<'a>(&self, target: &'a Ref<'arena>) -> ExpandedName<'a> { - match target.data { - NodeData::Element { ref name, .. } => name.expanded(), - _ => panic!("not an element!"), - } - } - - fn get_template_contents(&mut self, target: &Ref<'arena>) -> Ref<'arena> { - if let NodeData::Element { - template_contents: Some(ref contents), - .. - } = target.data - { - contents - } else { - panic!("not a template element!") - } - } - - fn is_mathml_annotation_xml_integration_point(&self, target: &Ref<'arena>) -> bool { - if let NodeData::Element { - mathml_annotation_xml_integration_point, - .. - } = target.data - { - mathml_annotation_xml_integration_point - } else { - panic!("not an element!") - } - } - - fn create_element( - &mut self, - name: QualName, - attrs: Vec<Attribute>, - flags: ElementFlags, - ) -> Ref<'arena> { - self.new_node(NodeData::Element { - name, - attrs: RefCell::new(attrs), - template_contents: if flags.template { - Some(self.new_node(NodeData::Document)) - } else { - None - }, - mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point, - }) - } - - fn create_comment(&mut self, text: StrTendril) -> Ref<'arena> { - self.new_node(NodeData::Comment { contents: text }) - } - - fn create_pi(&mut self, target: StrTendril, data: StrTendril) -> Ref<'arena> { - self.new_node(NodeData::ProcessingInstruction { - target: target, - contents: data, - }) - } - - fn append(&mut self, parent: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) { - self.append_common( - child, - || parent.last_child.get(), - |new_node| parent.append(new_node), - ) - } - - fn append_before_sibling(&mut self, sibling: &Ref<'arena>, child: NodeOrText<Ref<'arena>>) { - self.append_common( - child, - || sibling.previous_sibling.get(), - |new_node| sibling.insert_before(new_node), - ) - } - - fn append_based_on_parent_node( - &mut self, - element: &Ref<'arena>, - prev_element: &Ref<'arena>, - child: NodeOrText<Ref<'arena>>, - ) { - if element.parent.get().is_some() { - self.append_before_sibling(element, child) - } else { - self.append(prev_element, child) - } - } - - fn append_doctype_to_document( - &mut self, - name: StrTendril, - public_id: StrTendril, - system_id: StrTendril, - ) { - self.document.append(self.new_node(NodeData::Doctype { - name, - public_id, - system_id, - })) - } - - fn add_attrs_if_missing(&mut self, target: &Ref<'arena>, attrs: Vec<Attribute>) { - let mut existing = if let NodeData::Element { ref attrs, .. } = target.data { - attrs.borrow_mut() - } else { - panic!("not an element") - }; - - let existing_names = existing - .iter() - .map(|e| e.name.clone()) - .collect::<HashSet<_>>(); - existing.extend( - attrs - .into_iter() - .filter(|attr| !existing_names.contains(&attr.name)), - ); - } - - fn remove_from_parent(&mut self, target: &Ref<'arena>) { - target.detach() - } - - fn reparent_children(&mut self, node: &Ref<'arena>, new_parent: &Ref<'arena>) { - let mut next_child = node.first_child.get(); - while let Some(child) = next_child { - debug_assert!(ptr::eq::<Node>(child.parent.get().unwrap(), *node)); - next_child = child.next_sibling.get(); - new_parent.append(child) - } - } -} diff --git a/examples/noop-tree-builder.rs b/examples/noop-tree-builder.rs deleted file mode 100644 index 0775449..0000000 --- a/examples/noop-tree-builder.rs +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright 2014-2017 The html5ever Project Developers. See the -// COPYRIGHT file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -#[macro_use] -extern crate html5ever; - -use std::borrow::Cow; -use std::collections::HashMap; -use std::default::Default; -use std::io; - -use html5ever::parse_document; -use html5ever::tendril::*; -use html5ever::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink}; -use html5ever::{Attribute, ExpandedName, QualName}; - -struct Sink { - next_id: usize, - names: HashMap<usize, QualName>, -} - -impl Sink { - fn get_id(&mut self) -> usize { - let id = self.next_id; - self.next_id += 2; - id - } -} - -impl TreeSink for Sink { - type Handle = usize; - type Output = Self; - fn finish(self) -> Self { - self - } - - fn get_document(&mut self) -> usize { - 0 - } - - fn get_template_contents(&mut self, target: &usize) -> usize { - if let Some(expanded_name!(html "template")) = self.names.get(&target).map(|n| n.expanded()) - { - target + 1 - } else { - panic!("not a template element") - } - } - - fn same_node(&self, x: &usize, y: &usize) -> bool { - x == y - } - - fn elem_name(&self, target: &usize) -> ExpandedName { - self.names.get(target).expect("not an element").expanded() - } - - fn create_element(&mut self, name: QualName, _: Vec<Attribute>, _: ElementFlags) -> usize { - let id = self.get_id(); - self.names.insert(id, name); - id - } - - fn create_comment(&mut self, _text: StrTendril) -> usize { - self.get_id() - } - - #[allow(unused_variables)] - fn create_pi(&mut self, target: StrTendril, value: StrTendril) -> usize { - unimplemented!() - } - - fn append_before_sibling(&mut self, _sibling: &usize, _new_node: NodeOrText<usize>) {} - - fn append_based_on_parent_node( - &mut self, - _element: &usize, - _prev_element: &usize, - _new_node: NodeOrText<usize>, - ) { - } - - fn parse_error(&mut self, _msg: Cow<'static, str>) {} - fn set_quirks_mode(&mut self, _mode: QuirksMode) {} - fn append(&mut self, _parent: &usize, _child: NodeOrText<usize>) {} - - fn append_doctype_to_document(&mut self, _: StrTendril, _: StrTendril, _: StrTendril) {} - fn add_attrs_if_missing(&mut self, target: &usize, _attrs: Vec<Attribute>) { - assert!(self.names.contains_key(&target), "not an element"); - } - fn remove_from_parent(&mut self, _target: &usize) {} - fn reparent_children(&mut self, _node: &usize, _new_parent: &usize) {} - fn mark_script_already_started(&mut self, _node: &usize) {} -} - -fn main() { - let sink = Sink { - next_id: 1, - names: HashMap::new(), - }; - let stdin = io::stdin(); - parse_document(sink, Default::default()) - .from_utf8() - .read_from(&mut stdin.lock()) - .unwrap(); -} diff --git a/examples/print-tree-actions.rs b/examples/print-tree-actions.rs deleted file mode 100644 index dbb6c6e..0000000 --- a/examples/print-tree-actions.rs +++ /dev/null @@ -1,177 +0,0 @@ -// Copyright 2014-2017 The html5ever Project Developers. See the -// COPYRIGHT file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -#[macro_use] -extern crate html5ever; - -use std::borrow::Cow; -use std::collections::HashMap; -use std::default::Default; -use std::io; - -use html5ever::parse_document; -use html5ever::tendril::*; -use html5ever::tree_builder::{ - AppendNode, AppendText, ElementFlags, NodeOrText, QuirksMode, TreeSink, -}; -use html5ever::{Attribute, ExpandedName, QualName}; - -struct Sink { - next_id: usize, - names: HashMap<usize, QualName>, -} - -impl Sink { - fn get_id(&mut self) -> usize { - let id = self.next_id; - self.next_id += 2; - id - } -} - -impl TreeSink for Sink { - type Handle = usize; - type Output = Self; - fn finish(self) -> Self { - self - } - - fn parse_error(&mut self, msg: Cow<'static, str>) { - println!("Parse error: {}", msg); - } - - fn get_document(&mut self) -> usize { - 0 - } - - fn get_template_contents(&mut self, target: &usize) -> usize { - if let Some(expanded_name!(html "template")) = self.names.get(target).map(|n| n.expanded()) - { - target + 1 - } else { - panic!("not a template element") - } - } - - fn set_quirks_mode(&mut self, mode: QuirksMode) { - println!("Set quirks mode to {:?}", mode); - } - - fn same_node(&self, x: &usize, y: &usize) -> bool { - x == y - } - - fn elem_name(&self, target: &usize) -> ExpandedName { - self.names.get(target).expect("not an element").expanded() - } - - fn create_element(&mut self, name: QualName, _: Vec<Attribute>, _: ElementFlags) -> usize { - let id = self.get_id(); - println!("Created {:?} as {}", name, id); - self.names.insert(id, name); - id - } - - fn create_comment(&mut self, text: StrTendril) -> usize { - let id = self.get_id(); - println!("Created comment \"{}\" as {}", escape_default(&text), id); - id - } - - #[allow(unused_variables)] - fn create_pi(&mut self, target: StrTendril, value: StrTendril) -> usize { - unimplemented!() - } - - fn append(&mut self, parent: &usize, child: NodeOrText<usize>) { - match child { - AppendNode(n) => println!("Append node {} to {}", n, parent), - AppendText(t) => println!("Append text to {}: \"{}\"", parent, escape_default(&t)), - } - } - - fn append_before_sibling(&mut self, sibling: &usize, new_node: NodeOrText<usize>) { - match new_node { - AppendNode(n) => println!("Append node {} before {}", n, sibling), - AppendText(t) => println!("Append text before {}: \"{}\"", sibling, escape_default(&t)), - } - } - - fn append_based_on_parent_node( - &mut self, - element: &Self::Handle, - _prev_element: &Self::Handle, - child: NodeOrText<Self::Handle>, - ) { - self.append_before_sibling(element, child); - } - - fn append_doctype_to_document( - &mut self, - name: StrTendril, - public_id: StrTendril, - system_id: StrTendril, - ) { - println!("Append doctype: {} {} {}", name, public_id, system_id); - } - - fn add_attrs_if_missing(&mut self, target: &usize, attrs: Vec<Attribute>) { - assert!(self.names.contains_key(target), "not an element"); - println!("Add missing attributes to {}:", target); - for attr in attrs.into_iter() { - println!(" {:?} = {}", attr.name, attr.value); - } - } - - fn associate_with_form( - &mut self, - _target: &usize, - _form: &usize, - _nodes: (&usize, Option<&usize>), - ) { - // No form owner support. - } - - fn remove_from_parent(&mut self, target: &usize) { - println!("Remove {} from parent", target); - } - - fn reparent_children(&mut self, node: &usize, new_parent: &usize) { - println!("Move children from {} to {}", node, new_parent); - } - - fn mark_script_already_started(&mut self, node: &usize) { - println!("Mark script {} as already started", node); - } - - fn set_current_line(&mut self, line_number: u64) { - println!("Set current line to {}", line_number); - } - - fn pop(&mut self, elem: &usize) { - println!("Popped element {}", elem); - } -} - -// FIXME: Copy of str::escape_default from std, which is currently unstable -pub fn escape_default(s: &str) -> String { - s.chars().flat_map(|c| c.escape_default()).collect() -} - -fn main() { - let sink = Sink { - next_id: 1, - names: HashMap::new(), - }; - let stdin = io::stdin(); - parse_document(sink, Default::default()) - .from_utf8() - .read_from(&mut stdin.lock()) - .unwrap(); -} diff --git a/fuzz/.gitignore b/fuzz/.gitignore deleted file mode 100644 index 572e03b..0000000 --- a/fuzz/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ - -target -corpus -artifacts diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml deleted file mode 100644 index b4dcbe1..0000000 --- a/fuzz/Cargo.toml +++ /dev/null @@ -1,27 +0,0 @@ - -[package] -name = "html5ever-fuzz" -version = "0.0.0" -authors = ["David Korczynski <david@adalogics.com>"] -publish = false -edition = "2018" - -[package.metadata] -cargo-fuzz = true - -[dependencies] -libfuzzer-sys = "0.3" - -[dependencies.html5ever] -path = ".." - -[dependencies.markup5ever_rcdom] -path = "../../rcdom/" - -# Prevent this from interfering with workspaces -[workspace] -members = ["."] - -[[bin]] -name = "fuzz_document_parse" -path = "fuzz_targets/fuzz_document_parse.rs" diff --git a/fuzz/fuzz_targets/fuzz_document_parse.rs b/fuzz/fuzz_targets/fuzz_document_parse.rs deleted file mode 100644 index 17840de..0000000 --- a/fuzz/fuzz_targets/fuzz_document_parse.rs +++ /dev/null @@ -1,35 +0,0 @@ -#![no_main] -use libfuzzer_sys::fuzz_target; - -use std::io::BufReader; -use html5ever::driver::ParseOpts; -use markup5ever_rcdom::{RcDom, SerializableHandle}; -use html5ever::tendril::TendrilSink; -use html5ever::tree_builder::TreeBuilderOpts; -use html5ever::{parse_document, serialize}; - -// Target inspired by the Rust-Fuzz project -// https://github.com/rust-fuzz/targets -fuzz_target!(|data: &[u8]| { - let opts = ParseOpts { - tree_builder: TreeBuilderOpts { - drop_doctype: true, - ..Default::default() - }, - ..Default::default() - }; - - let dom = parse_document(RcDom::default(), opts) - .from_utf8() - .read_from(&mut BufReader::new(data)); - - let dom = if let Ok(dom) = dom { - dom - } else { - return; - }; - - let mut out = std::io::sink(); - let document: SerializableHandle = dom.document.into(); - let _ = serialize(&mut out, &document, Default::default()); -}); diff --git a/src/driver.rs b/src/driver.rs deleted file mode 100644 index 26db9b8..0000000 --- a/src/driver.rs +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright 2014-2017 The html5ever Project Developers. See the -// COPYRIGHT file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -//! High-level interface to the parser. - -use crate::buffer_queue::BufferQueue; -use crate::tokenizer::{Tokenizer, TokenizerOpts, TokenizerResult}; -use crate::tree_builder::{create_element, TreeBuilder, TreeBuilderOpts, TreeSink}; -use crate::{Attribute, QualName}; - -use std::borrow::Cow; - -use crate::tendril; -use crate::tendril::stream::{TendrilSink, Utf8LossyDecoder}; -use crate::tendril::StrTendril; - -/// All-encompassing options struct for the parser. -#[derive(Clone, Default)] -pub struct ParseOpts { - /// Tokenizer options. - pub tokenizer: TokenizerOpts, - - /// Tree builder options. - pub tree_builder: TreeBuilderOpts, -} - -/// Parse an HTML document -/// -/// The returned value implements `tendril::TendrilSink` -/// so that Unicode input may be provided incrementally, -/// or all at once with the `one` method. -/// -/// If your input is bytes, use `Parser::from_utf8`. -pub fn parse_document<Sink>(sink: Sink, opts: ParseOpts) -> Parser<Sink> -where - Sink: TreeSink, -{ - let tb = TreeBuilder::new(sink, opts.tree_builder); - let tok = Tokenizer::new(tb, opts.tokenizer); - Parser { - tokenizer: tok, - input_buffer: BufferQueue::new(), - } -} - -/// Parse an HTML fragment -/// -/// The returned value implements `tendril::TendrilSink` -/// so that Unicode input may be provided incrementally, -/// or all at once with the `one` method. -/// -/// If your input is bytes, use `Parser::from_utf8`. -pub fn parse_fragment<Sink>( - mut sink: Sink, - opts: ParseOpts, - context_name: QualName, - context_attrs: Vec<Attribute>, -) -> Parser<Sink> -where - Sink: TreeSink, -{ - let context_elem = create_element(&mut sink, context_name, context_attrs); - parse_fragment_for_element(sink, opts, context_elem, None) -} - -/// Like `parse_fragment`, but with an existing context element -/// and optionally a form element. -pub fn parse_fragment_for_element<Sink>( - sink: Sink, - opts: ParseOpts, - context_element: Sink::Handle, - form_element: Option<Sink::Handle>, -) -> Parser<Sink> -where - Sink: TreeSink, -{ - let tb = TreeBuilder::new_for_fragment(sink, context_element, form_element, opts.tree_builder); - let tok_opts = TokenizerOpts { - initial_state: Some(tb.tokenizer_state_for_context_elem()), - ..opts.tokenizer - }; - let tok = Tokenizer::new(tb, tok_opts); - Parser { - tokenizer: tok, - input_buffer: BufferQueue::new(), - } -} - -/// An HTML parser, -/// ready to receive Unicode input through the `tendril::TendrilSink` trait’s methods. -pub struct Parser<Sink> -where - Sink: TreeSink, -{ - pub tokenizer: Tokenizer<TreeBuilder<Sink::Handle, Sink>>, - pub input_buffer: BufferQueue, -} - -impl<Sink: TreeSink> TendrilSink<tendril::fmt::UTF8> for Parser<Sink> { - fn process(&mut self, t: StrTendril) { - self.input_buffer.push_back(t); - // FIXME: Properly support </script> somehow. - while let TokenizerResult::Script(_) = self.tokenizer.feed(&mut self.input_buffer) {} - } - - // FIXME: Is it too noisy to report every character decoding error? - fn error(&mut self, desc: Cow<'static, str>) { - self.tokenizer.sink.sink.parse_error(desc) - } - - type Output = Sink::Output; - - fn finish(mut self) -> Self::Output { - // FIXME: Properly support </script> somehow. - while let TokenizerResult::Script(_) = self.tokenizer.feed(&mut self.input_buffer) {} - assert!(self.input_buffer.is_empty()); - self.tokenizer.end(); - self.tokenizer.sink.sink.finish() - } -} - -impl<Sink: TreeSink> Parser<Sink> { - /// Wrap this parser into a `TendrilSink` that accepts UTF-8 bytes. - /// - /// Use this when your input is bytes that are known to be in the UTF-8 encoding. - /// Decoding is lossy, like `String::from_utf8_lossy`. - #[allow(clippy::wrong_self_convention)] - pub fn from_utf8(self) -> Utf8LossyDecoder<Self> { - Utf8LossyDecoder::new(self) - } -} @@ -12,11 +12,8 @@ #![cfg_attr(test, deny(warnings))] #![allow(unused_parens)] -pub use driver::{parse_document, parse_fragment, ParseOpts, Parser}; pub use markup5ever::*; -pub use serialize::serialize; - #[macro_use] mod macros; @@ -24,7 +21,4 @@ mod util { pub mod str; } -pub mod driver; -pub mod serialize; pub mod tokenizer; -pub mod tree_builder; diff --git a/src/serialize/mod.rs b/src/serialize/mod.rs deleted file mode 100644 index 3a57b47..0000000 --- a/src/serialize/mod.rs +++ /dev/null @@ -1,256 +0,0 @@ -// Copyright 2014-2017 The html5ever Project Developers. See the -// COPYRIGHT file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -use log::warn; -pub use markup5ever::serialize::{AttrRef, Serialize, Serializer, TraversalScope}; -use markup5ever::{local_name, namespace_url, ns}; -use std::default::Default; -use std::io::{self, Write}; - -use crate::{LocalName, QualName}; - -pub fn serialize<Wr, T>(writer: Wr, node: &T, opts: SerializeOpts) -> io::Result<()> -where - Wr: Write, - T: Serialize, -{ - let mut ser = HtmlSerializer::new(writer, opts.clone()); - node.serialize(&mut ser, opts.traversal_scope) -} - -#[derive(Clone)] -pub struct SerializeOpts { - /// Is scripting enabled? - pub scripting_enabled: bool, - - /// Serialize the root node? Default: ChildrenOnly - pub traversal_scope: TraversalScope, - - /// If the serializer is asked to serialize an invalid tree, the default - /// behavior is to panic in the event that an `end_elem` is created without a - /// matching `start_elem`. Setting this to true will prevent those panics by - /// creating a default parent on the element stack. No extra start elem will - /// actually be written. Default: false - pub create_missing_parent: bool, -} - -impl Default for SerializeOpts { - fn default() -> SerializeOpts { - SerializeOpts { - scripting_enabled: true, - traversal_scope: TraversalScope::ChildrenOnly(None), - create_missing_parent: false, - } - } -} - -#[derive(Default)] -struct ElemInfo { - html_name: Option<LocalName>, - ignore_children: bool -} - -pub struct HtmlSerializer<Wr: Write> { - pub writer: Wr, - opts: SerializeOpts, - stack: Vec<ElemInfo>, -} - -fn tagname(name: &QualName) -> LocalName { - match name.ns { - ns!(html) | ns!(mathml) | ns!(svg) => (), - ref ns => { - // FIXME(#122) - warn!("node with weird namespace {:?}", ns); - }, - } - - name.local.clone() -} - -impl<Wr: Write> HtmlSerializer<Wr> { - pub fn new(writer: Wr, opts: SerializeOpts) -> Self { - let html_name = match opts.traversal_scope { - TraversalScope::IncludeNode | TraversalScope::ChildrenOnly(None) => None, - TraversalScope::ChildrenOnly(Some(ref n)) => Some(tagname(n)), - }; - HtmlSerializer { - writer, - opts, - stack: vec![ElemInfo { - html_name, - ignore_children: false, - }], - } - } - - fn parent(&mut self) -> &mut ElemInfo { - if self.stack.is_empty() { - if self.opts.create_missing_parent { - warn!("ElemInfo stack empty, creating new parent"); - self.stack.push(Default::default()); - } else { - panic!("no parent ElemInfo") - } - } - self.stack.last_mut().unwrap() - } - - fn write_escaped(&mut self, text: &str, attr_mode: bool) -> io::Result<()> { - for c in text.chars() { - match c { - '&' => self.writer.write_all(b"&"), - '\u{00A0}' => self.writer.write_all(b" "), - '"' if attr_mode => self.writer.write_all(b"""), - '<' if !attr_mode => self.writer.write_all(b"<"), - '>' if !attr_mode => self.writer.write_all(b">"), - c => self.writer.write_fmt(format_args!("{}", c)), - }?; - } - Ok(()) - } -} - -impl<Wr: Write> Serializer for HtmlSerializer<Wr> { - fn start_elem<'a, AttrIter>(&mut self, name: QualName, attrs: AttrIter) -> io::Result<()> - where - AttrIter: Iterator<Item = AttrRef<'a>>, - { - let html_name = match name.ns { - ns!(html) => Some(name.local.clone()), - _ => None, - }; - - if self.parent().ignore_children { - self.stack.push(ElemInfo { - html_name, - ignore_children: true, - }); - return Ok(()); - } - - self.writer.write_all(b"<")?; - self.writer.write_all(tagname(&name).as_bytes())?; - for (name, value) in attrs { - self.writer.write_all(b" ")?; - - match name.ns { - ns!() => (), - ns!(xml) => self.writer.write_all(b"xml:")?, - ns!(xmlns) => { - if name.local != local_name!("xmlns") { - self.writer.write_all(b"xmlns:")?; - } - }, - ns!(xlink) => self.writer.write_all(b"xlink:")?, - ref ns => { - // FIXME(#122) - warn!("attr with weird namespace {:?}", ns); - self.writer.write_all(b"unknown_namespace:")?; - }, - } - - self.writer.write_all(name.local.as_bytes())?; - self.writer.write_all(b"=\"")?; - self.write_escaped(value, true)?; - self.writer.write_all(b"\"")?; - } - self.writer.write_all(b">")?; - - let ignore_children = name.ns == ns!(html) && - match name.local { - local_name!("area") | - local_name!("base") | - local_name!("basefont") | - local_name!("bgsound") | - local_name!("br") | - local_name!("col") | - local_name!("embed") | - local_name!("frame") | - local_name!("hr") | - local_name!("img") | - local_name!("input") | - local_name!("keygen") | - local_name!("link") | - local_name!("meta") | - local_name!("param") | - local_name!("source") | - local_name!("track") | - local_name!("wbr") => true, - _ => false, - }; - - self.stack.push(ElemInfo { - html_name, - ignore_children, - }); - - Ok(()) - } - - fn end_elem(&mut self, name: QualName) -> io::Result<()> { - let info = match self.stack.pop() { - Some(info) => info, - None if self.opts.create_missing_parent => { - warn!("missing ElemInfo, creating default."); - Default::default() - }, - _ => panic!("no ElemInfo"), - }; - if info.ignore_children { - return Ok(()); - } - - self.writer.write_all(b"</")?; - self.writer.write_all(tagname(&name).as_bytes())?; - self.writer.write_all(b">") - } - - fn write_text(&mut self, text: &str) -> io::Result<()> { - let escape = match self.parent().html_name { - Some(local_name!("style")) | - Some(local_name!("script")) | - Some(local_name!("xmp")) | - Some(local_name!("iframe")) | - Some(local_name!("noembed")) | - Some(local_name!("noframes")) | - Some(local_name!("plaintext")) => false, - - Some(local_name!("noscript")) => !self.opts.scripting_enabled, - - _ => true, - }; - - if escape { - self.write_escaped(text, false) - } else { - self.writer.write_all(text.as_bytes()) - } - } - - fn write_comment(&mut self, text: &str) -> io::Result<()> { - self.writer.write_all(b"<!--")?; - self.writer.write_all(text.as_bytes())?; - self.writer.write_all(b"-->") - } - - fn write_doctype(&mut self, name: &str) -> io::Result<()> { - self.writer.write_all(b"<!DOCTYPE ")?; - self.writer.write_all(name.as_bytes())?; - self.writer.write_all(b">") - } - - fn write_processing_instruction(&mut self, target: &str, data: &str) -> io::Result<()> { - self.writer.write_all(b"<?")?; - self.writer.write_all(target.as_bytes())?; - self.writer.write_all(b" ")?; - self.writer.write_all(data.as_bytes())?; - self.writer.write_all(b">") - } -} diff --git a/src/tree_builder/data.rs b/src/tree_builder/data.rs deleted file mode 100644 index 9d51a71..0000000 --- a/src/tree_builder/data.rs +++ /dev/null @@ -1,171 +0,0 @@ -// Copyright 2014-2017 The html5ever Project Developers. See the -// COPYRIGHT file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -use crate::interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode}; -use crate::tendril::StrTendril; -use crate::tokenizer::Doctype; - -// These should all be lowercase, for ASCII-case-insensitive matching. -static QUIRKY_PUBLIC_PREFIXES: &'static [&'static str] = &[ - "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", - "-//as//dtd html 3.0 aswedit + extensions//", - "-//ietf//dtd html 2.0 level 1//", - "-//ietf//dtd html 2.0 level 2//", - "-//ietf//dtd html 2.0 strict level 1//", - "-//ietf//dtd html 2.0 strict level 2//", - "-//ietf//dtd html 2.0 strict//", - "-//ietf//dtd html 2.0//", - "-//ietf//dtd html 2.1e//", - "-//ietf//dtd html 3.0//", - "-//ietf//dtd html 3.2 final//", - "-//ietf//dtd html 3.2//", - "-//ietf//dtd html 3//", - "-//ietf//dtd html level 0//", - "-//ietf//dtd html level 1//", - "-//ietf//dtd html level 2//", - "-//ietf//dtd html level 3//", - "-//ietf//dtd html strict level 0//", - "-//ietf//dtd html strict level 1//", - "-//ietf//dtd html strict level 2//", - "-//ietf//dtd html strict level 3//", - "-//ietf//dtd html strict//", - "-//ietf//dtd html//", - "-//metrius//dtd metrius presentational//", - "-//microsoft//dtd internet explorer 2.0 html strict//", - "-//microsoft//dtd internet explorer 2.0 html//", - "-//microsoft//dtd internet explorer 2.0 tables//", - "-//microsoft//dtd internet explorer 3.0 html strict//", - "-//microsoft//dtd internet explorer 3.0 html//", - "-//microsoft//dtd internet explorer 3.0 tables//", - "-//netscape comm. corp.//dtd html//", - "-//netscape comm. corp.//dtd strict html//", - "-//o'reilly and associates//dtd html 2.0//", - "-//o'reilly and associates//dtd html extended 1.0//", - "-//o'reilly and associates//dtd html extended relaxed 1.0//", - "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", - "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", - "-//spyglass//dtd html 2.0 extended//", - "-//sq//dtd html 2.0 hotmetal + extensions//", - "-//sun microsystems corp.//dtd hotjava html//", - "-//sun microsystems corp.//dtd hotjava strict html//", - "-//w3c//dtd html 3 1995-03-24//", - "-//w3c//dtd html 3.2 draft//", - "-//w3c//dtd html 3.2 final//", - "-//w3c//dtd html 3.2//", - "-//w3c//dtd html 3.2s draft//", - "-//w3c//dtd html 4.0 frameset//", - "-//w3c//dtd html 4.0 transitional//", - "-//w3c//dtd html experimental 19960712//", - "-//w3c//dtd html experimental 970421//", - "-//w3c//dtd w3 html//", - "-//w3o//dtd w3 html 3.0//", - "-//webtechs//dtd mozilla html 2.0//", - "-//webtechs//dtd mozilla html//", -]; - -static QUIRKY_PUBLIC_MATCHES: &'static [&'static str] = &[ - "-//w3o//dtd w3 html strict 3.0//en//", - "-/w3c/dtd html 4.0 transitional/en", - "html", -]; - -static QUIRKY_SYSTEM_MATCHES: &'static [&'static str] = - &["http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"]; - -static LIMITED_QUIRKY_PUBLIC_PREFIXES: &'static [&'static str] = &[ - "-//w3c//dtd xhtml 1.0 frameset//", - "-//w3c//dtd xhtml 1.0 transitional//", -]; - -static HTML4_PUBLIC_PREFIXES: &'static [&'static str] = &[ - "-//w3c//dtd html 4.01 frameset//", - "-//w3c//dtd html 4.01 transitional//", -]; - -pub fn doctype_error_and_quirks(doctype: &Doctype, iframe_srcdoc: bool) -> (bool, QuirksMode) { - fn opt_string_as_slice<'t>(x: &'t Option<String>) -> Option<&'t str> { - x.as_ref().map(|y| &y[..]) - } - - fn opt_tendril_as_slice<'t>(x: &'t Option<StrTendril>) -> Option<&'t str> { - match *x { - Some(ref t) => Some(t), - None => None, - } - } - - fn opt_to_ascii_lower(x: Option<&str>) -> Option<String> { - x.map(|y| y.to_ascii_lowercase()) - } - - let name = opt_tendril_as_slice(&doctype.name); - let public = opt_tendril_as_slice(&doctype.public_id); - let system = opt_tendril_as_slice(&doctype.system_id); - - let err = match (name, public, system) { - (Some("html"), None, None) | - (Some("html"), None, Some("about:legacy-compat")) | - (Some("html"), Some("-//W3C//DTD HTML 4.0//EN"), None) | - ( - Some("html"), - Some("-//W3C//DTD HTML 4.0//EN"), - Some("http://www.w3.org/TR/REC-html40/strict.dtd"), - ) | - (Some("html"), Some("-//W3C//DTD HTML 4.01//EN"), None) | - ( - Some("html"), - Some("-//W3C//DTD HTML 4.01//EN"), - Some("http://www.w3.org/TR/html4/strict.dtd"), - ) | - ( - Some("html"), - Some("-//W3C//DTD XHTML 1.0 Strict//EN"), - Some("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"), - ) | - ( - Some("html"), - Some("-//W3C//DTD XHTML 1.1//EN"), - Some("http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"), - ) => false, - - _ => true, - }; - - // FIXME: We could do something asymptotically faster here. - // But there aren't many strings, and this happens at most once per parse. - fn contains_pfx(haystack: &[&str], needle: &str) -> bool { - haystack.iter().any(|&x| needle.starts_with(x)) - } - - // Quirks-mode matches are case-insensitive. - let public = opt_to_ascii_lower(public); - let system = opt_to_ascii_lower(system); - - let quirk = match (opt_string_as_slice(&public), opt_string_as_slice(&system)) { - _ if doctype.force_quirks => Quirks, - _ if name != Some("html") => Quirks, - - _ if iframe_srcdoc => NoQuirks, - - (Some(ref p), _) if QUIRKY_PUBLIC_MATCHES.contains(p) => Quirks, - (_, Some(ref s)) if QUIRKY_SYSTEM_MATCHES.contains(s) => Quirks, - - (Some(p), _) if contains_pfx(QUIRKY_PUBLIC_PREFIXES, p) => Quirks, - (Some(p), _) if contains_pfx(LIMITED_QUIRKY_PUBLIC_PREFIXES, p) => LimitedQuirks, - - (Some(p), s) if contains_pfx(HTML4_PUBLIC_PREFIXES, p) => match s { - None => Quirks, - Some(_) => LimitedQuirks, - }, - - _ => NoQuirks, - }; - - (err, quirk) -} diff --git a/src/tree_builder/mod.rs b/src/tree_builder/mod.rs deleted file mode 100644 index a6fa8bf..0000000 --- a/src/tree_builder/mod.rs +++ /dev/null @@ -1,1681 +0,0 @@ -// Copyright 2014-2017 The html5ever Project Developers. See the -// COPYRIGHT file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -#![allow(warnings)] - -//! The HTML5 tree builder. - -pub use crate::interface::{create_element, ElementFlags, NextParserState, Tracer, TreeSink}; -pub use crate::interface::{AppendNode, AppendText, Attribute, NodeOrText}; -pub use crate::interface::{LimitedQuirks, NoQuirks, Quirks, QuirksMode}; - -use self::types::*; - -use crate::tendril::StrTendril; -use crate::{ExpandedName, LocalName, Namespace, QualName}; - -use crate::tokenizer; -use crate::tokenizer::states as tok_state; -use crate::tokenizer::{Doctype, EndTag, StartTag, Tag, TokenSink, TokenSinkResult}; - -use crate::util::str::is_ascii_whitespace; - -use std::borrow::Cow::Borrowed; -use std::collections::VecDeque; -use std::default::Default; -use std::iter::{Enumerate, Rev}; -use std::mem::replace; -use std::{fmt, slice}; - -use crate::tokenizer::states::{RawData, RawKind}; -use crate::tree_builder::tag_sets::*; -use crate::tree_builder::types::*; -use crate::util::str::to_escaped_string; -use log::{debug, log_enabled, warn, Level}; -use mac::{_tt_as_expr_hack, format_if, matches}; - -pub use self::PushFlag::*; - -#[macro_use] -mod tag_sets; - -mod data; -mod types; - -include!(concat!(env!("OUT_DIR"), "/rules.rs")); - -/// Tree builder options, with an impl for Default. -#[derive(Copy, Clone)] -pub struct TreeBuilderOpts { - /// Report all parse errors described in the spec, at some - /// performance penalty? Default: false - pub exact_errors: bool, - - /// Is scripting enabled? - pub scripting_enabled: bool, - - /// Is this an `iframe srcdoc` document? - pub iframe_srcdoc: bool, - - /// Should we drop the DOCTYPE (if any) from the tree? - pub drop_doctype: bool, - - /// Obsolete, ignored. - pub ignore_missing_rules: bool, - - /// Initial TreeBuilder quirks mode. Default: NoQuirks - pub quirks_mode: QuirksMode, -} - -impl Default for TreeBuilderOpts { - fn default() -> TreeBuilderOpts { - TreeBuilderOpts { - exact_errors: false, - scripting_enabled: true, - iframe_srcdoc: false, - drop_doctype: false, - ignore_missing_rules: false, - quirks_mode: NoQuirks, - } - } -} - -/// The HTML tree builder. -pub struct TreeBuilder<Handle, Sink> { - /// Options controlling the behavior of the tree builder. - opts: TreeBuilderOpts, - - /// Consumer of tree modifications. - pub sink: Sink, - - /// Insertion mode. - mode: InsertionMode, - - /// Original insertion mode, used by Text and InTableText modes. - orig_mode: Option<InsertionMode>, - - /// Stack of template insertion modes. - template_modes: Vec<InsertionMode>, - - /// Pending table character tokens. - pending_table_text: Vec<(SplitStatus, StrTendril)>, - - /// Quirks mode as set by the parser. - /// FIXME: can scripts etc. change this? - quirks_mode: QuirksMode, - - /// The document node, which is created by the sink. - doc_handle: Handle, - - /// Stack of open elements, most recently added at end. - open_elems: Vec<Handle>, - - /// List of active formatting elements. - active_formatting: Vec<FormatEntry<Handle>>, - - //§ the-element-pointers - /// Head element pointer. - head_elem: Option<Handle>, - - /// Form element pointer. - form_elem: Option<Handle>, - //§ END - /// Frameset-ok flag. - frameset_ok: bool, - - /// Ignore a following U+000A LINE FEED? - ignore_lf: bool, - - /// Is foster parenting enabled? - foster_parenting: bool, - - /// The context element for the fragment parsing algorithm. - context_elem: Option<Handle>, - - /// Track current line - current_line: u64, - // WARNING: If you add new fields that contain Handles, you - // must add them to trace_handles() below to preserve memory - // safety! - // - // FIXME: Auto-generate the trace hooks like Servo does. -} - -impl<Handle, Sink> TreeBuilder<Handle, Sink> -where - Handle: Clone, - Sink: TreeSink<Handle = Handle>, -{ - /// Create a new tree builder which sends tree modifications to a particular `TreeSink`. - /// - /// The tree builder is also a `TokenSink`. - pub fn new(mut sink: Sink, opts: TreeBuilderOpts) -> TreeBuilder<Handle, Sink> { - let doc_handle = sink.get_document(); - TreeBuilder { - opts: opts, - sink: sink, - mode: Initial, - orig_mode: None, - template_modes: vec![], - pending_table_text: vec![], - quirks_mode: opts.quirks_mode, - doc_handle: doc_handle, - open_elems: vec![], - active_formatting: vec![], - head_elem: None, - form_elem: None, - frameset_ok: true, - ignore_lf: false, - foster_parenting: false, - context_elem: None, - current_line: 1, - } - } - - /// Create a new tree builder which sends tree modifications to a particular `TreeSink`. - /// This is for parsing fragments. - /// - /// The tree builder is also a `TokenSink`. - pub fn new_for_fragment( - mut sink: Sink, - context_elem: Handle, - form_elem: Option<Handle>, - opts: TreeBuilderOpts, - ) -> TreeBuilder<Handle, Sink> { - let doc_handle = sink.get_document(); - let context_is_template = sink.elem_name(&context_elem) == expanded_name!(html "template"); - let mut tb = TreeBuilder { - opts: opts, - sink: sink, - mode: Initial, - orig_mode: None, - template_modes: if context_is_template { - vec![InTemplate] - } else { - vec![] - }, - pending_table_text: vec![], - quirks_mode: opts.quirks_mode, - doc_handle: doc_handle, - open_elems: vec![], - active_formatting: vec![], - head_elem: None, - form_elem: form_elem, - frameset_ok: true, - ignore_lf: false, - foster_parenting: false, - context_elem: Some(context_elem), - current_line: 1, - }; - - // https://html.spec.whatwg.org/multipage/#parsing-html-fragments - // 5. Let root be a new html element with no attributes. - // 6. Append the element root to the Document node created above. - // 7. Set up the parser's stack of open elements so that it contains just the single element root. - tb.create_root(vec![]); - // 10. Reset the parser's insertion mode appropriately. - tb.mode = tb.reset_insertion_mode(); - - tb - } - - // https://html.spec.whatwg.org/multipage/#concept-frag-parse-context - // Step 4. Set the state of the HTML parser's tokenization stage as follows: - pub fn tokenizer_state_for_context_elem(&self) -> tok_state::State { - let elem = self.context_elem.as_ref().expect("no context element"); - let name = match self.sink.elem_name(elem) { - ExpandedName { - ns: &ns!(html), - local, - } => local, - _ => return tok_state::Data, - }; - match *name { - local_name!("title") | local_name!("textarea") => tok_state::RawData(tok_state::Rcdata), - - local_name!("style") | - local_name!("xmp") | - local_name!("iframe") | - local_name!("noembed") | - local_name!("noframes") => tok_state::RawData(tok_state::Rawtext), - - local_name!("script") => tok_state::RawData(tok_state::ScriptData), - - local_name!("noscript") => { - if self.opts.scripting_enabled { - tok_state::RawData(tok_state::Rawtext) - } else { - tok_state::Data - } - }, - - local_name!("plaintext") => tok_state::Plaintext, - - _ => tok_state::Data, - } - } - - /// Call the `Tracer`'s `trace_handle` method on every `Handle` in the tree builder's - /// internal state. This is intended to support garbage-collected DOMs. - pub fn trace_handles(&self, tracer: &Tracer<Handle = Handle>) { - tracer.trace_handle(&self.doc_handle); - for e in &self.open_elems { - tracer.trace_handle(e); - } - for e in &self.active_formatting { - match e { - &Element(ref h, _) => tracer.trace_handle(h), - _ => (), - } - } - self.head_elem.as_ref().map(|h| tracer.trace_handle(h)); - self.form_elem.as_ref().map(|h| tracer.trace_handle(h)); - self.context_elem.as_ref().map(|h| tracer.trace_handle(h)); - } - - #[allow(dead_code)] - fn dump_state(&self, label: String) { - println!("dump_state on {}", label); - print!(" open_elems:"); - for node in self.open_elems.iter() { - let name = self.sink.elem_name(node); - match *name.ns { - ns!(html) => print!(" {}", name.local), - _ => panic!(), - } - } - println!(""); - print!(" active_formatting:"); - for entry in self.active_formatting.iter() { - match entry { - &Marker => print!(" Marker"), - &Element(ref h, _) => { - let name = self.sink.elem_name(h); - match *name.ns { - ns!(html) => print!(" {}", name.local), - _ => panic!(), - } - }, - } - } - println!(""); - } - - fn debug_step(&self, mode: InsertionMode, token: &Token) { - if log_enabled!(Level::Debug) { - debug!( - "processing {} in insertion mode {:?}", - to_escaped_string(token), - mode - ); - } - } - - fn process_to_completion(&mut self, mut token: Token) -> TokenSinkResult<Handle> { - // Queue of additional tokens yet to be processed. - // This stays empty in the common case where we don't split whitespace. - let mut more_tokens = VecDeque::new(); - - loop { - let should_have_acknowledged_self_closing_flag = matches!( - token, - TagToken(Tag { - self_closing: true, - kind: StartTag, - .. - }) - ); - let result = if self.is_foreign(&token) { - self.step_foreign(token) - } else { - let mode = self.mode; - self.step(mode, token) - }; - match result { - Done => { - if should_have_acknowledged_self_closing_flag { - self.sink - .parse_error(Borrowed("Unacknowledged self-closing tag")); - } - token = unwrap_or_return!( - more_tokens.pop_front(), - tokenizer::TokenSinkResult::Continue - ); - }, - DoneAckSelfClosing => { - token = unwrap_or_return!( - more_tokens.pop_front(), - tokenizer::TokenSinkResult::Continue - ); - }, - Reprocess(m, t) => { - self.mode = m; - token = t; - }, - ReprocessForeign(t) => { - token = t; - }, - SplitWhitespace(mut buf) => { - let p = buf.pop_front_char_run(is_ascii_whitespace); - let (first, is_ws) = unwrap_or_return!(p, tokenizer::TokenSinkResult::Continue); - let status = if is_ws { Whitespace } else { NotWhitespace }; - token = CharacterTokens(status, first); - - if buf.len32() > 0 { - more_tokens.push_back(CharacterTokens(NotSplit, buf)); - } - }, - Script(node) => { - assert!(more_tokens.is_empty()); - return tokenizer::TokenSinkResult::Script(node); - }, - ToPlaintext => { - assert!(more_tokens.is_empty()); - return tokenizer::TokenSinkResult::Plaintext; - }, - ToRawData(k) => { - assert!(more_tokens.is_empty()); - return tokenizer::TokenSinkResult::RawData(k); - }, - } - } - } - - /// Are we parsing a HTML fragment? - pub fn is_fragment(&self) -> bool { - self.context_elem.is_some() - } - - /// https://html.spec.whatwg.org/multipage/#appropriate-place-for-inserting-a-node - fn appropriate_place_for_insertion( - &mut self, - override_target: Option<Handle>, - ) -> InsertionPoint<Handle> { - use self::tag_sets::*; - - declare_tag_set!(foster_target = "table" "tbody" "tfoot" "thead" "tr"); - let target = override_target.unwrap_or_else(|| self.current_node().clone()); - if !(self.foster_parenting && self.elem_in(&target, foster_target)) { - if self.html_elem_named(&target, local_name!("template")) { - // No foster parenting (inside template). - let contents = self.sink.get_template_contents(&target); - return LastChild(contents); - } else { - // No foster parenting (the common case). - return LastChild(target); - } - } - - // Foster parenting - let mut iter = self.open_elems.iter().rev().peekable(); - while let Some(elem) = iter.next() { - if self.html_elem_named(&elem, local_name!("template")) { - let contents = self.sink.get_template_contents(&elem); - return LastChild(contents); - } else if self.html_elem_named(&elem, local_name!("table")) { - return TableFosterParenting { - element: elem.clone(), - prev_element: (*iter.peek().unwrap()).clone(), - }; - } - } - let html_elem = self.html_elem(); - LastChild(html_elem.clone()) - } - - fn insert_at(&mut self, insertion_point: InsertionPoint<Handle>, child: NodeOrText<Handle>) { - match insertion_point { - LastChild(parent) => self.sink.append(&parent, child), - BeforeSibling(sibling) => self.sink.append_before_sibling(&sibling, child), - TableFosterParenting { - element, - prev_element, - } => self - .sink - .append_based_on_parent_node(&element, &prev_element, child), - } - } -} - -impl<Handle, Sink> TokenSink for TreeBuilder<Handle, Sink> -where - Handle: Clone, - Sink: TreeSink<Handle = Handle>, -{ - type Handle = Handle; - - fn process_token( - &mut self, - token: tokenizer::Token, - line_number: u64, - ) -> TokenSinkResult<Handle> { - if line_number != self.current_line { - self.sink.set_current_line(line_number); - } - let ignore_lf = replace(&mut self.ignore_lf, false); - - // Handle `ParseError` and `DoctypeToken`; convert everything else to the local `Token` type. - let token = match token { - tokenizer::ParseError(e) => { - self.sink.parse_error(e); - return tokenizer::TokenSinkResult::Continue; - }, - - tokenizer::DoctypeToken(dt) => { - if self.mode == Initial { - let (err, quirk) = data::doctype_error_and_quirks(&dt, self.opts.iframe_srcdoc); - if err { - self.sink.parse_error(format_if!( - self.opts.exact_errors, - "Bad DOCTYPE", - "Bad DOCTYPE: {:?}", - dt - )); - } - let Doctype { - name, - public_id, - system_id, - force_quirks: _, - } = dt; - if !self.opts.drop_doctype { - self.sink.append_doctype_to_document( - name.unwrap_or(StrTendril::new()), - public_id.unwrap_or(StrTendril::new()), - system_id.unwrap_or(StrTendril::new()), - ); - } - self.set_quirks_mode(quirk); - - self.mode = BeforeHtml; - return tokenizer::TokenSinkResult::Continue; - } else { - self.sink.parse_error(format_if!( - self.opts.exact_errors, - "DOCTYPE in body", - "DOCTYPE in insertion mode {:?}", - self.mode - )); - return tokenizer::TokenSinkResult::Continue; - } - }, - - tokenizer::TagToken(x) => TagToken(x), - tokenizer::CommentToken(x) => CommentToken(x), - tokenizer::NullCharacterToken => NullCharacterToken, - tokenizer::EOFToken => EOFToken, - - tokenizer::CharacterTokens(mut x) => { - if ignore_lf && x.starts_with("\n") { - x.pop_front(1); - } - if x.is_empty() { - return tokenizer::TokenSinkResult::Continue; - } - CharacterTokens(NotSplit, x) - }, - }; - - self.process_to_completion(token) - } - - fn end(&mut self) { - for elem in self.open_elems.drain(..).rev() { - self.sink.pop(&elem); - } - } - - fn adjusted_current_node_present_but_not_in_html_namespace(&self) -> bool { - !self.open_elems.is_empty() && - self.sink.elem_name(self.adjusted_current_node()).ns != &ns!(html) - } -} - -pub fn html_elem<Handle>(open_elems: &[Handle]) -> &Handle { - &open_elems[0] -} - -pub struct ActiveFormattingIter<'a, Handle: 'a> { - iter: Rev<Enumerate<slice::Iter<'a, FormatEntry<Handle>>>>, -} - -impl<'a, Handle> Iterator for ActiveFormattingIter<'a, Handle> { - type Item = (usize, &'a Handle, &'a Tag); - fn next(&mut self) -> Option<(usize, &'a Handle, &'a Tag)> { - match self.iter.next() { - None | Some((_, &Marker)) => None, - Some((i, &Element(ref h, ref t))) => Some((i, h, t)), - } - } -} - -pub enum PushFlag { - Push, - NoPush, -} - -enum Bookmark<Handle> { - Replace(Handle), - InsertAfter(Handle), -} - -macro_rules! qualname { - ("", $local:tt) => { - QualName { - prefix: None, - ns: ns!(), - local: local_name!($local), - } - }; - ($prefix: tt $ns:tt $local:tt) => { - QualName { - prefix: Some(namespace_prefix!($prefix)), - ns: ns!($ns), - local: local_name!($local), - } - }; -} - -#[doc(hidden)] -impl<Handle, Sink> TreeBuilder<Handle, Sink> -where - Handle: Clone, - Sink: TreeSink<Handle = Handle>, -{ - fn unexpected<T: fmt::Debug>(&mut self, _thing: &T) -> ProcessResult<Handle> { - self.sink.parse_error(format_if!( - self.opts.exact_errors, - "Unexpected token", - "Unexpected token {} in insertion mode {:?}", - to_escaped_string(_thing), - self.mode - )); - Done - } - - fn assert_named(&mut self, node: &Handle, name: LocalName) { - assert!(self.html_elem_named(&node, name)); - } - - /// Iterate over the active formatting elements (with index in the list) from the end - /// to the last marker, or the beginning if there are no markers. - fn active_formatting_end_to_marker<'a>(&'a self) -> ActiveFormattingIter<'a, Handle> { - ActiveFormattingIter { - iter: self.active_formatting.iter().enumerate().rev(), - } - } - - fn position_in_active_formatting(&self, element: &Handle) -> Option<usize> { - self.active_formatting.iter().position(|n| match n { - &Marker => false, - &Element(ref handle, _) => self.sink.same_node(handle, element), - }) - } - - fn set_quirks_mode(&mut self, mode: QuirksMode) { - self.quirks_mode = mode; - self.sink.set_quirks_mode(mode); - } - - fn stop_parsing(&mut self) -> ProcessResult<Handle> { - Done - } - - //§ parsing-elements-that-contain-only-text - // Switch to `Text` insertion mode, save the old mode, and - // switch the tokenizer to a raw-data state. - // The latter only takes effect after the current / next - // `process_token` of a start tag returns! - fn to_raw_text_mode(&mut self, k: RawKind) -> ProcessResult<Handle> { - self.orig_mode = Some(self.mode); - self.mode = Text; - ToRawData(k) - } - - // The generic raw text / RCDATA parsing algorithm. - fn parse_raw_data(&mut self, tag: Tag, k: RawKind) -> ProcessResult<Handle> { - self.insert_element_for(tag); - self.to_raw_text_mode(k) - } - //§ END - - fn current_node(&self) -> &Handle { - self.open_elems.last().expect("no current element") - } - - fn adjusted_current_node(&self) -> &Handle { - if self.open_elems.len() == 1 { - if let Some(ctx) = self.context_elem.as_ref() { - return ctx; - } - } - self.current_node() - } - - fn current_node_in<TagSet>(&self, set: TagSet) -> bool - where - TagSet: Fn(ExpandedName) -> bool, - { - set(self.sink.elem_name(self.current_node())) - } - - // Insert at the "appropriate place for inserting a node". - fn insert_appropriately(&mut self, child: NodeOrText<Handle>, override_target: Option<Handle>) { - let insertion_point = self.appropriate_place_for_insertion(override_target); - self.insert_at(insertion_point, child); - } - - fn adoption_agency(&mut self, subject: LocalName) { - // 1. - if self.current_node_named(subject.clone()) { - if self - .position_in_active_formatting(self.current_node()) - .is_none() - { - self.pop(); - return; - } - } - - // 2. 3. 4. - for _ in 0..8 { - // 5. - let (fmt_elem_index, fmt_elem, fmt_elem_tag) = unwrap_or_return!( - // We clone the Handle and Tag so they don't cause an immutable borrow of self. - self.active_formatting_end_to_marker() - .filter(|&(_, _, tag)| tag.name == subject) - .next() - .map(|(i, h, t)| (i, h.clone(), t.clone())), - { - self.process_end_tag_in_body(Tag { - kind: EndTag, - name: subject, - self_closing: false, - attrs: vec![], - }); - } - ); - - let fmt_elem_stack_index = unwrap_or_return!( - self.open_elems - .iter() - .rposition(|n| self.sink.same_node(n, &fmt_elem)), - { - self.sink - .parse_error(Borrowed("Formatting element not open")); - self.active_formatting.remove(fmt_elem_index); - } - ); - - // 7. - if !self.in_scope(default_scope, |n| self.sink.same_node(&n, &fmt_elem)) { - self.sink - .parse_error(Borrowed("Formatting element not in scope")); - return; - } - - // 8. - if !self.sink.same_node(self.current_node(), &fmt_elem) { - self.sink - .parse_error(Borrowed("Formatting element not current node")); - } - - // 9. - let (furthest_block_index, furthest_block) = unwrap_or_return!( - self.open_elems - .iter() - .enumerate() - .skip(fmt_elem_stack_index) - .filter(|&(_, open_element)| self.elem_in(open_element, special_tag)) - .next() - .map(|(i, h)| (i, h.clone())), - // 10. - { - self.open_elems.truncate(fmt_elem_stack_index); - self.active_formatting.remove(fmt_elem_index); - } - ); - - // 11. - let common_ancestor = self.open_elems[fmt_elem_stack_index - 1].clone(); - - // 12. - let mut bookmark = Bookmark::Replace(fmt_elem.clone()); - - // 13. - let mut node; - let mut node_index = furthest_block_index; - let mut last_node = furthest_block.clone(); - - // 13.1. - let mut inner_counter = 0; - loop { - // 13.2. - inner_counter += 1; - - // 13.3. - node_index -= 1; - node = self.open_elems[node_index].clone(); - - // 13.4. - if self.sink.same_node(&node, &fmt_elem) { - break; - } - - // 13.5. - if inner_counter > 3 { - self.position_in_active_formatting(&node) - .map(|position| self.active_formatting.remove(position)); - self.open_elems.remove(node_index); - continue; - } - - let node_formatting_index = unwrap_or_else!( - self.position_in_active_formatting(&node), - // 13.6. - { - self.open_elems.remove(node_index); - continue; - } - ); - - // 13.7. - let tag = match self.active_formatting[node_formatting_index] { - Element(ref h, ref t) => { - assert!(self.sink.same_node(h, &node)); - t.clone() - }, - Marker => panic!("Found marker during adoption agency"), - }; - // FIXME: Is there a way to avoid cloning the attributes twice here (once on their - // own, once as part of t.clone() above)? - let new_element = create_element( - &mut self.sink, - QualName::new(None, ns!(html), tag.name.clone()), - tag.attrs.clone(), - ); - self.open_elems[node_index] = new_element.clone(); - self.active_formatting[node_formatting_index] = Element(new_element.clone(), tag); - node = new_element; - - // 13.8. - if self.sink.same_node(&last_node, &furthest_block) { - bookmark = Bookmark::InsertAfter(node.clone()); - } - - // 13.9. - self.sink.remove_from_parent(&last_node); - self.sink.append(&node, AppendNode(last_node.clone())); - - // 13.10. - last_node = node.clone(); - - // 13.11. - } - - // 14. - self.sink.remove_from_parent(&last_node); - self.insert_appropriately(AppendNode(last_node.clone()), Some(common_ancestor)); - - // 15. - // FIXME: Is there a way to avoid cloning the attributes twice here (once on their own, - // once as part of t.clone() above)? - let new_element = create_element( - &mut self.sink, - QualName::new(None, ns!(html), fmt_elem_tag.name.clone()), - fmt_elem_tag.attrs.clone(), - ); - let new_entry = Element(new_element.clone(), fmt_elem_tag); - - // 16. - self.sink.reparent_children(&furthest_block, &new_element); - - // 17. - self.sink - .append(&furthest_block, AppendNode(new_element.clone())); - - // 18. - // FIXME: We could probably get rid of the position_in_active_formatting() calls here - // if we had a more clever Bookmark representation. - match bookmark { - Bookmark::Replace(to_replace) => { - let index = self - .position_in_active_formatting(&to_replace) - .expect("bookmark not found in active formatting elements"); - self.active_formatting[index] = new_entry; - }, - Bookmark::InsertAfter(previous) => { - let index = self - .position_in_active_formatting(&previous) - .expect("bookmark not found in active formatting elements") + - 1; - self.active_formatting.insert(index, new_entry); - let old_index = self - .position_in_active_formatting(&fmt_elem) - .expect("formatting element not found in active formatting elements"); - self.active_formatting.remove(old_index); - }, - } - - // 19. - self.remove_from_stack(&fmt_elem); - let new_furthest_block_index = self - .open_elems - .iter() - .position(|n| self.sink.same_node(n, &furthest_block)) - .expect("furthest block missing from open element stack"); - self.open_elems - .insert(new_furthest_block_index + 1, new_element); - - // 20. - } - } - - fn push(&mut self, elem: &Handle) { - self.open_elems.push(elem.clone()); - } - - fn pop(&mut self) -> Handle { - let elem = self.open_elems.pop().expect("no current element"); - self.sink.pop(&elem); - elem - } - - fn remove_from_stack(&mut self, elem: &Handle) { - let sink = &mut self.sink; - let position = self - .open_elems - .iter() - .rposition(|x| sink.same_node(elem, &x)); - if let Some(position) = position { - self.open_elems.remove(position); - sink.pop(elem); - } - } - - fn is_marker_or_open(&self, entry: &FormatEntry<Handle>) -> bool { - match *entry { - Marker => true, - Element(ref node, _) => self - .open_elems - .iter() - .rev() - .any(|n| self.sink.same_node(&n, &node)), - } - } - - /// Reconstruct the active formatting elements. - fn reconstruct_formatting(&mut self) { - { - let last = unwrap_or_return!(self.active_formatting.last(), ()); - if self.is_marker_or_open(last) { - return; - } - } - - let mut entry_index = self.active_formatting.len() - 1; - loop { - if entry_index == 0 { - break; - } - entry_index -= 1; - if self.is_marker_or_open(&self.active_formatting[entry_index]) { - entry_index += 1; - break; - } - } - - loop { - let tag = match self.active_formatting[entry_index] { - Element(_, ref t) => t.clone(), - Marker => panic!("Found marker during formatting element reconstruction"), - }; - - // FIXME: Is there a way to avoid cloning the attributes twice here (once on their own, - // once as part of t.clone() above)? - let new_element = - self.insert_element(Push, ns!(html), tag.name.clone(), tag.attrs.clone()); - self.active_formatting[entry_index] = Element(new_element, tag); - if entry_index == self.active_formatting.len() - 1 { - break; - } - entry_index += 1; - } - } - - /// Get the first element on the stack, which will be the <html> element. - fn html_elem(&self) -> &Handle { - &self.open_elems[0] - } - - /// Get the second element on the stack, if it's a HTML body element. - fn body_elem(&self) -> Option<&Handle> { - if self.open_elems.len() <= 1 { - return None; - } - - let node = &self.open_elems[1]; - if self.html_elem_named(node, local_name!("body")) { - Some(node) - } else { - None - } - } - - /// Signal an error depending on the state of the stack of open elements at - /// the end of the body. - fn check_body_end(&mut self) { - declare_tag_set!(body_end_ok = - "dd" "dt" "li" "optgroup" "option" "p" "rp" "rt" "tbody" "td" "tfoot" "th" - "thead" "tr" "body" "html"); - - for elem in self.open_elems.iter() { - let error; - { - let name = self.sink.elem_name(elem); - if body_end_ok(name) { - continue; - } - error = format_if!( - self.opts.exact_errors, - "Unexpected open tag at end of body", - "Unexpected open tag {:?} at end of body", - name - ); - } - self.sink.parse_error(error); - // FIXME: Do we keep checking after finding one bad tag? - // The spec suggests not. - return; - } - } - - fn in_scope<TagSet, Pred>(&self, scope: TagSet, pred: Pred) -> bool - where - TagSet: Fn(ExpandedName) -> bool, - Pred: Fn(Handle) -> bool, - { - for node in self.open_elems.iter().rev() { - if pred(node.clone()) { - return true; - } - if scope(self.sink.elem_name(node)) { - return false; - } - } - - // supposed to be impossible, because <html> is always in scope - - false - } - - fn elem_in<TagSet>(&self, elem: &Handle, set: TagSet) -> bool - where - TagSet: Fn(ExpandedName) -> bool, - { - set(self.sink.elem_name(elem)) - } - - fn html_elem_named(&self, elem: &Handle, name: LocalName) -> bool { - let expanded = self.sink.elem_name(elem); - *expanded.ns == ns!(html) && *expanded.local == name - } - - fn in_html_elem_named(&self, name: LocalName) -> bool { - self.open_elems - .iter() - .any(|elem| self.html_elem_named(elem, name.clone())) - } - - fn current_node_named(&self, name: LocalName) -> bool { - self.html_elem_named(self.current_node(), name) - } - - fn in_scope_named<TagSet>(&self, scope: TagSet, name: LocalName) -> bool - where - TagSet: Fn(ExpandedName) -> bool, - { - self.in_scope(scope, |elem| self.html_elem_named(&elem, name.clone())) - } - - //§ closing-elements-that-have-implied-end-tags - fn generate_implied_end<TagSet>(&mut self, set: TagSet) - where - TagSet: Fn(ExpandedName) -> bool, - { - loop { - { - let elem = unwrap_or_return!(self.open_elems.last(), ()); - let nsname = self.sink.elem_name(elem); - if !set(nsname) { - return; - } - } - self.pop(); - } - } - - fn generate_implied_end_except(&mut self, except: LocalName) { - self.generate_implied_end(|p| { - if *p.ns == ns!(html) && *p.local == except { - false - } else { - cursory_implied_end(p) - } - }); - } - //§ END - - // Pop elements until the current element is in the set. - fn pop_until_current<TagSet>(&mut self, pred: TagSet) - where - TagSet: Fn(ExpandedName) -> bool, - { - loop { - if self.current_node_in(|x| pred(x)) { - break; - } - self.open_elems.pop(); - } - } - - // Pop elements until an element from the set has been popped. Returns the - // number of elements popped. - fn pop_until<P>(&mut self, pred: P) -> usize - where - P: Fn(ExpandedName) -> bool, - { - let mut n = 0; - loop { - n += 1; - match self.open_elems.pop() { - None => break, - Some(elem) => { - if pred(self.sink.elem_name(&elem)) { - break; - } - }, - } - } - n - } - - fn pop_until_named(&mut self, name: LocalName) -> usize { - self.pop_until(|p| *p.ns == ns!(html) && *p.local == name) - } - - // Pop elements until one with the specified name has been popped. - // Signal an error if it was not the first one. - fn expect_to_close(&mut self, name: LocalName) { - if self.pop_until_named(name.clone()) != 1 { - self.sink.parse_error(format_if!( - self.opts.exact_errors, - "Unexpected open element", - "Unexpected open element while closing {:?}", - name - )); - } - } - - fn close_p_element(&mut self) { - declare_tag_set!(implied = [cursory_implied_end] - "p"); - self.generate_implied_end(implied); - self.expect_to_close(local_name!("p")); - } - - fn close_p_element_in_button_scope(&mut self) { - if self.in_scope_named(button_scope, local_name!("p")) { - self.close_p_element(); - } - } - - // Check <input> tags for type=hidden - fn is_type_hidden(&self, tag: &Tag) -> bool { - match tag - .attrs - .iter() - .find(|&at| at.name.expanded() == expanded_name!("", "type")) - { - None => false, - Some(at) => (&*at.value).eq_ignore_ascii_case("hidden"), - } - } - - fn foster_parent_in_body(&mut self, token: Token) -> ProcessResult<Handle> { - warn!("foster parenting not implemented"); - self.foster_parenting = true; - let res = self.step(InBody, token); - // FIXME: what if res is Reprocess? - self.foster_parenting = false; - res - } - - fn process_chars_in_table(&mut self, token: Token) -> ProcessResult<Handle> { - declare_tag_set!(table_outer = "table" "tbody" "tfoot" "thead" "tr"); - if self.current_node_in(table_outer) { - assert!(self.pending_table_text.is_empty()); - self.orig_mode = Some(self.mode); - Reprocess(InTableText, token) - } else { - self.sink.parse_error(format_if!( - self.opts.exact_errors, - "Unexpected characters in table", - "Unexpected characters {} in table", - to_escaped_string(&token) - )); - self.foster_parent_in_body(token) - } - } - - // https://html.spec.whatwg.org/multipage/#reset-the-insertion-mode-appropriately - fn reset_insertion_mode(&mut self) -> InsertionMode { - for (i, mut node) in self.open_elems.iter().enumerate().rev() { - let last = i == 0usize; - if let (true, Some(ctx)) = (last, self.context_elem.as_ref()) { - node = ctx; - } - let name = match self.sink.elem_name(node) { - ExpandedName { - ns: &ns!(html), - local, - } => local, - _ => continue, - }; - match *name { - local_name!("select") => { - for ancestor in self.open_elems[0..i].iter().rev() { - if self.html_elem_named(ancestor, local_name!("template")) { - return InSelect; - } else if self.html_elem_named(ancestor, local_name!("table")) { - return InSelectInTable; - } - } - return InSelect; - }, - local_name!("td") | local_name!("th") => { - if !last { - return InCell; - } - }, - local_name!("tr") => return InRow, - local_name!("tbody") | local_name!("thead") | local_name!("tfoot") => { - return InTableBody; - }, - local_name!("caption") => return InCaption, - local_name!("colgroup") => return InColumnGroup, - local_name!("table") => return InTable, - local_name!("template") => return *self.template_modes.last().unwrap(), - local_name!("head") => { - if !last { - return InHead; - } - }, - local_name!("body") => return InBody, - local_name!("frameset") => return InFrameset, - local_name!("html") => match self.head_elem { - None => return BeforeHead, - Some(_) => return AfterHead, - }, - - _ => (), - } - } - InBody - } - - fn close_the_cell(&mut self) { - self.generate_implied_end(cursory_implied_end); - if self.pop_until(td_th) != 1 { - self.sink - .parse_error(Borrowed("expected to close <td> or <th> with cell")); - } - self.clear_active_formatting_to_marker(); - } - - fn append_text(&mut self, text: StrTendril) -> ProcessResult<Handle> { - self.insert_appropriately(AppendText(text), None); - Done - } - - fn append_comment(&mut self, text: StrTendril) -> ProcessResult<Handle> { - let comment = self.sink.create_comment(text); - self.insert_appropriately(AppendNode(comment), None); - Done - } - - fn append_comment_to_doc(&mut self, text: StrTendril) -> ProcessResult<Handle> { - let comment = self.sink.create_comment(text); - self.sink.append(&self.doc_handle, AppendNode(comment)); - Done - } - - fn append_comment_to_html(&mut self, text: StrTendril) -> ProcessResult<Handle> { - let target = html_elem(&self.open_elems); - let comment = self.sink.create_comment(text); - self.sink.append(target, AppendNode(comment)); - Done - } - - //§ creating-and-inserting-nodes - fn create_root(&mut self, attrs: Vec<Attribute>) { - let elem = create_element( - &mut self.sink, - QualName::new(None, ns!(html), local_name!("html")), - attrs, - ); - self.push(&elem); - self.sink.append(&self.doc_handle, AppendNode(elem)); - // FIXME: application cache selection algorithm - } - - // https://html.spec.whatwg.org/multipage/#create-an-element-for-the-token - fn insert_element( - &mut self, - push: PushFlag, - ns: Namespace, - name: LocalName, - attrs: Vec<Attribute>, - ) -> Handle { - declare_tag_set!(form_associatable = - "button" "fieldset" "input" "object" - "output" "select" "textarea" "img"); - - declare_tag_set!(listed = [form_associatable] - "img"); - - // Step 7. - let qname = QualName::new(None, ns, name); - let elem = create_element(&mut self.sink, qname.clone(), attrs.clone()); - - let insertion_point = self.appropriate_place_for_insertion(None); - let (node1, node2) = match insertion_point { - LastChild(ref p) | BeforeSibling(ref p) => (p.clone(), None), - TableFosterParenting { - ref element, - ref prev_element, - } => (element.clone(), Some(prev_element.clone())), - }; - - // Step 12. - if form_associatable(qname.expanded()) && - self.form_elem.is_some() && - !self.in_html_elem_named(local_name!("template")) && - !(listed(qname.expanded()) && - attrs - .iter() - .any(|a| a.name.expanded() == expanded_name!("", "form"))) - { - let form = self.form_elem.as_ref().unwrap().clone(); - let node2 = match node2 { - Some(ref n) => Some(n), - None => None, - }; - self.sink.associate_with_form(&elem, &form, (&node1, node2)); - } - - self.insert_at(insertion_point, AppendNode(elem.clone())); - - match push { - Push => self.push(&elem), - NoPush => (), - } - // FIXME: Remove from the stack if we can't append? - elem - } - - fn insert_element_for(&mut self, tag: Tag) -> Handle { - self.insert_element(Push, ns!(html), tag.name, tag.attrs) - } - - fn insert_and_pop_element_for(&mut self, tag: Tag) -> Handle { - self.insert_element(NoPush, ns!(html), tag.name, tag.attrs) - } - - fn insert_phantom(&mut self, name: LocalName) -> Handle { - self.insert_element(Push, ns!(html), name, vec![]) - } - //§ END - - fn create_formatting_element_for(&mut self, tag: Tag) -> Handle { - // FIXME: This really wants unit tests. - let mut first_match = None; - let mut matches = 0usize; - for (i, _, old_tag) in self.active_formatting_end_to_marker() { - if tag.equiv_modulo_attr_order(old_tag) { - first_match = Some(i); - matches += 1; - } - } - - if matches >= 3 { - self.active_formatting - .remove(first_match.expect("matches with no index")); - } - - let elem = self.insert_element(Push, ns!(html), tag.name.clone(), tag.attrs.clone()); - self.active_formatting.push(Element(elem.clone(), tag)); - elem - } - - fn clear_active_formatting_to_marker(&mut self) { - loop { - match self.active_formatting.pop() { - None | Some(Marker) => break, - _ => (), - } - } - } - - fn process_end_tag_in_body(&mut self, tag: Tag) { - // Look back for a matching open element. - let mut match_idx = None; - for (i, elem) in self.open_elems.iter().enumerate().rev() { - if self.html_elem_named(elem, tag.name.clone()) { - match_idx = Some(i); - break; - } - - if self.elem_in(elem, special_tag) { - self.sink - .parse_error(Borrowed("Found special tag while closing generic tag")); - return; - } - } - - // Can't use unwrap_or_return!() due to rust-lang/rust#16617. - let match_idx = match match_idx { - None => { - // I believe this is impossible, because the root - // <html> element is in special_tag. - self.unexpected(&tag); - return; - }, - Some(x) => x, - }; - - self.generate_implied_end_except(tag.name.clone()); - - if match_idx != self.open_elems.len() - 1 { - // mis-nested tags - self.unexpected(&tag); - } - self.open_elems.truncate(match_idx); - } - - fn handle_misnested_a_tags(&mut self, tag: &Tag) { - let node = unwrap_or_return!( - self.active_formatting_end_to_marker() - .filter(|&(_, n, _)| self.html_elem_named(n, local_name!("a"))) - .next() - .map(|(_, n, _)| n.clone()), - () - ); - - self.unexpected(tag); - self.adoption_agency(local_name!("a")); - self.position_in_active_formatting(&node) - .map(|index| self.active_formatting.remove(index)); - self.remove_from_stack(&node); - } - - //§ tree-construction - fn is_foreign(&mut self, token: &Token) -> bool { - if let EOFToken = *token { - return false; - } - - if self.open_elems.is_empty() { - return false; - } - - let name = self.sink.elem_name(self.adjusted_current_node()); - if let ns!(html) = *name.ns { - return false; - } - - if mathml_text_integration_point(name) { - match *token { - CharacterTokens(..) | NullCharacterToken => return false, - TagToken(Tag { - kind: StartTag, - ref name, - .. - }) if !matches!(*name, local_name!("mglyph") | local_name!("malignmark")) => { - return false; - }, - _ => (), - } - } - - if svg_html_integration_point(name) { - match *token { - CharacterTokens(..) | NullCharacterToken => return false, - TagToken(Tag { kind: StartTag, .. }) => return false, - _ => (), - } - } - - if let expanded_name!(mathml "annotation-xml") = name { - match *token { - TagToken(Tag { - kind: StartTag, - name: local_name!("svg"), - .. - }) => return false, - CharacterTokens(..) | NullCharacterToken | TagToken(Tag { kind: StartTag, .. }) => { - return !self - .sink - .is_mathml_annotation_xml_integration_point(self.adjusted_current_node()); - }, - _ => {}, - }; - } - - true - } - //§ END - - fn enter_foreign(&mut self, mut tag: Tag, ns: Namespace) -> ProcessResult<Handle> { - match ns { - ns!(mathml) => self.adjust_mathml_attributes(&mut tag), - ns!(svg) => self.adjust_svg_attributes(&mut tag), - _ => (), - } - self.adjust_foreign_attributes(&mut tag); - - if tag.self_closing { - self.insert_element(NoPush, ns, tag.name, tag.attrs); - DoneAckSelfClosing - } else { - self.insert_element(Push, ns, tag.name, tag.attrs); - Done - } - } - - fn adjust_svg_tag_name(&mut self, tag: &mut Tag) { - let Tag { ref mut name, .. } = *tag; - match *name { - local_name!("altglyph") => *name = local_name!("altGlyph"), - local_name!("altglyphdef") => *name = local_name!("altGlyphDef"), - local_name!("altglyphitem") => *name = local_name!("altGlyphItem"), - local_name!("animatecolor") => *name = local_name!("animateColor"), - local_name!("animatemotion") => *name = local_name!("animateMotion"), - local_name!("animatetransform") => *name = local_name!("animateTransform"), - local_name!("clippath") => *name = local_name!("clipPath"), - local_name!("feblend") => *name = local_name!("feBlend"), - local_name!("fecolormatrix") => *name = local_name!("feColorMatrix"), - local_name!("fecomponenttransfer") => *name = local_name!("feComponentTransfer"), - local_name!("fecomposite") => *name = local_name!("feComposite"), - local_name!("feconvolvematrix") => *name = local_name!("feConvolveMatrix"), - local_name!("fediffuselighting") => *name = local_name!("feDiffuseLighting"), - local_name!("fedisplacementmap") => *name = local_name!("feDisplacementMap"), - local_name!("fedistantlight") => *name = local_name!("feDistantLight"), - local_name!("fedropshadow") => *name = local_name!("feDropShadow"), - local_name!("feflood") => *name = local_name!("feFlood"), - local_name!("fefunca") => *name = local_name!("feFuncA"), - local_name!("fefuncb") => *name = local_name!("feFuncB"), - local_name!("fefuncg") => *name = local_name!("feFuncG"), - local_name!("fefuncr") => *name = local_name!("feFuncR"), - local_name!("fegaussianblur") => *name = local_name!("feGaussianBlur"), - local_name!("feimage") => *name = local_name!("feImage"), - local_name!("femerge") => *name = local_name!("feMerge"), - local_name!("femergenode") => *name = local_name!("feMergeNode"), - local_name!("femorphology") => *name = local_name!("feMorphology"), - local_name!("feoffset") => *name = local_name!("feOffset"), - local_name!("fepointlight") => *name = local_name!("fePointLight"), - local_name!("fespecularlighting") => *name = local_name!("feSpecularLighting"), - local_name!("fespotlight") => *name = local_name!("feSpotLight"), - local_name!("fetile") => *name = local_name!("feTile"), - local_name!("feturbulence") => *name = local_name!("feTurbulence"), - local_name!("foreignobject") => *name = local_name!("foreignObject"), - local_name!("glyphref") => *name = local_name!("glyphRef"), - local_name!("lineargradient") => *name = local_name!("linearGradient"), - local_name!("radialgradient") => *name = local_name!("radialGradient"), - local_name!("textpath") => *name = local_name!("textPath"), - _ => (), - } - } - - fn adjust_attributes<F>(&mut self, tag: &mut Tag, mut map: F) - where - F: FnMut(LocalName) -> Option<QualName>, - { - for &mut Attribute { ref mut name, .. } in &mut tag.attrs { - if let Some(replacement) = map(name.local.clone()) { - *name = replacement; - } - } - } - - fn adjust_svg_attributes(&mut self, tag: &mut Tag) { - self.adjust_attributes(tag, |k| match k { - local_name!("attributename") => Some(qualname!("", "attributeName")), - local_name!("attributetype") => Some(qualname!("", "attributeType")), - local_name!("basefrequency") => Some(qualname!("", "baseFrequency")), - local_name!("baseprofile") => Some(qualname!("", "baseProfile")), - local_name!("calcmode") => Some(qualname!("", "calcMode")), - local_name!("clippathunits") => Some(qualname!("", "clipPathUnits")), - local_name!("diffuseconstant") => Some(qualname!("", "diffuseConstant")), - local_name!("edgemode") => Some(qualname!("", "edgeMode")), - local_name!("filterunits") => Some(qualname!("", "filterUnits")), - local_name!("glyphref") => Some(qualname!("", "glyphRef")), - local_name!("gradienttransform") => Some(qualname!("", "gradientTransform")), - local_name!("gradientunits") => Some(qualname!("", "gradientUnits")), - local_name!("kernelmatrix") => Some(qualname!("", "kernelMatrix")), - local_name!("kernelunitlength") => Some(qualname!("", "kernelUnitLength")), - local_name!("keypoints") => Some(qualname!("", "keyPoints")), - local_name!("keysplines") => Some(qualname!("", "keySplines")), - local_name!("keytimes") => Some(qualname!("", "keyTimes")), - local_name!("lengthadjust") => Some(qualname!("", "lengthAdjust")), - local_name!("limitingconeangle") => Some(qualname!("", "limitingConeAngle")), - local_name!("markerheight") => Some(qualname!("", "markerHeight")), - local_name!("markerunits") => Some(qualname!("", "markerUnits")), - local_name!("markerwidth") => Some(qualname!("", "markerWidth")), - local_name!("maskcontentunits") => Some(qualname!("", "maskContentUnits")), - local_name!("maskunits") => Some(qualname!("", "maskUnits")), - local_name!("numoctaves") => Some(qualname!("", "numOctaves")), - local_name!("pathlength") => Some(qualname!("", "pathLength")), - local_name!("patterncontentunits") => Some(qualname!("", "patternContentUnits")), - local_name!("patterntransform") => Some(qualname!("", "patternTransform")), - local_name!("patternunits") => Some(qualname!("", "patternUnits")), - local_name!("pointsatx") => Some(qualname!("", "pointsAtX")), - local_name!("pointsaty") => Some(qualname!("", "pointsAtY")), - local_name!("pointsatz") => Some(qualname!("", "pointsAtZ")), - local_name!("preservealpha") => Some(qualname!("", "preserveAlpha")), - local_name!("preserveaspectratio") => Some(qualname!("", "preserveAspectRatio")), - local_name!("primitiveunits") => Some(qualname!("", "primitiveUnits")), - local_name!("refx") => Some(qualname!("", "refX")), - local_name!("refy") => Some(qualname!("", "refY")), - local_name!("repeatcount") => Some(qualname!("", "repeatCount")), - local_name!("repeatdur") => Some(qualname!("", "repeatDur")), - local_name!("requiredextensions") => Some(qualname!("", "requiredExtensions")), - local_name!("requiredfeatures") => Some(qualname!("", "requiredFeatures")), - local_name!("specularconstant") => Some(qualname!("", "specularConstant")), - local_name!("specularexponent") => Some(qualname!("", "specularExponent")), - local_name!("spreadmethod") => Some(qualname!("", "spreadMethod")), - local_name!("startoffset") => Some(qualname!("", "startOffset")), - local_name!("stddeviation") => Some(qualname!("", "stdDeviation")), - local_name!("stitchtiles") => Some(qualname!("", "stitchTiles")), - local_name!("surfacescale") => Some(qualname!("", "surfaceScale")), - local_name!("systemlanguage") => Some(qualname!("", "systemLanguage")), - local_name!("tablevalues") => Some(qualname!("", "tableValues")), - local_name!("targetx") => Some(qualname!("", "targetX")), - local_name!("targety") => Some(qualname!("", "targetY")), - local_name!("textlength") => Some(qualname!("", "textLength")), - local_name!("viewbox") => Some(qualname!("", "viewBox")), - local_name!("viewtarget") => Some(qualname!("", "viewTarget")), - local_name!("xchannelselector") => Some(qualname!("", "xChannelSelector")), - local_name!("ychannelselector") => Some(qualname!("", "yChannelSelector")), - local_name!("zoomandpan") => Some(qualname!("", "zoomAndPan")), - _ => None, - }); - } - - fn adjust_mathml_attributes(&mut self, tag: &mut Tag) { - self.adjust_attributes(tag, |k| match k { - local_name!("definitionurl") => Some(qualname!("", "definitionURL")), - _ => None, - }); - } - - fn adjust_foreign_attributes(&mut self, tag: &mut Tag) { - self.adjust_attributes(tag, |k| match k { - local_name!("xlink:actuate") => Some(qualname!("xlink" xlink "actuate")), - local_name!("xlink:arcrole") => Some(qualname!("xlink" xlink "arcrole")), - local_name!("xlink:href") => Some(qualname!("xlink" xlink "href")), - local_name!("xlink:role") => Some(qualname!("xlink" xlink "role")), - local_name!("xlink:show") => Some(qualname!("xlink" xlink "show")), - local_name!("xlink:title") => Some(qualname!("xlink" xlink "title")), - local_name!("xlink:type") => Some(qualname!("xlink" xlink "type")), - local_name!("xml:base") => Some(qualname!("xml" xml "base")), - local_name!("xml:lang") => Some(qualname!("xml" xml "lang")), - local_name!("xml:space") => Some(qualname!("xml" xml "space")), - local_name!("xmlns") => Some(qualname!("" xmlns "xmlns")), - local_name!("xmlns:xlink") => Some(qualname!("xmlns" xmlns "xlink")), - _ => None, - }); - } - - fn foreign_start_tag(&mut self, mut tag: Tag) -> ProcessResult<Handle> { - let current_ns = self.sink.elem_name(self.adjusted_current_node()).ns.clone(); - match current_ns { - ns!(mathml) => self.adjust_mathml_attributes(&mut tag), - ns!(svg) => { - self.adjust_svg_tag_name(&mut tag); - self.adjust_svg_attributes(&mut tag); - }, - _ => (), - } - self.adjust_foreign_attributes(&mut tag); - if tag.self_closing { - // FIXME(#118): <script /> in SVG - self.insert_element(NoPush, current_ns, tag.name, tag.attrs); - DoneAckSelfClosing - } else { - self.insert_element(Push, current_ns, tag.name, tag.attrs); - Done - } - } - - fn unexpected_start_tag_in_foreign_content(&mut self, tag: Tag) -> ProcessResult<Handle> { - self.unexpected(&tag); - if self.is_fragment() { - self.foreign_start_tag(tag) - } else { - self.pop(); - while !self.current_node_in(|n| { - *n.ns == ns!(html) || - mathml_text_integration_point(n) || - svg_html_integration_point(n) - }) { - self.pop(); - } - ReprocessForeign(TagToken(tag)) - } - } -} diff --git a/src/tree_builder/rules.rs b/src/tree_builder/rules.rs deleted file mode 100644 index bdc8afd..0000000 --- a/src/tree_builder/rules.rs +++ /dev/null @@ -1,1449 +0,0 @@ -// Copyright 2014-2017 The html5ever Project Developers. See the -// COPYRIGHT file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -// The tree builder rules, as a single, enormous nested match expression. - -use markup5ever::{expanded_name, local_name, namespace_prefix, namespace_url, ns}; -use crate::tokenizer::states::{Plaintext, Rawtext, Rcdata, ScriptData}; -use crate::tree_builder::tag_sets::*; -use crate::tree_builder::types::*; - -use std::borrow::ToOwned; - -use crate::tendril::SliceExt; - -fn any_not_whitespace(x: &StrTendril) -> bool { - // FIXME: this might be much faster as a byte scan - x.chars().any(|c| !is_ascii_whitespace(c)) -} - -fn current_node<Handle>(open_elems: &[Handle]) -> &Handle { - open_elems.last().expect("no current element") -} - -#[doc(hidden)] -impl<Handle, Sink> TreeBuilder<Handle, Sink> -where - Handle: Clone, - Sink: TreeSink<Handle = Handle>, -{ - fn step(&mut self, mode: InsertionMode, token: Token) -> ProcessResult<Handle> { - self.debug_step(mode, &token); - - match mode { - //§ the-initial-insertion-mode - Initial => match_token!(token { - CharacterTokens(NotSplit, text) => SplitWhitespace(text), - CharacterTokens(Whitespace, _) => Done, - CommentToken(text) => self.append_comment_to_doc(text), - token => { - if !self.opts.iframe_srcdoc { - self.unexpected(&token); - self.set_quirks_mode(Quirks); - } - Reprocess(BeforeHtml, token) - } - }), - - //§ the-before-html-insertion-mode - BeforeHtml => match_token!(token { - CharacterTokens(NotSplit, text) => SplitWhitespace(text), - CharacterTokens(Whitespace, _) => Done, - CommentToken(text) => self.append_comment_to_doc(text), - - tag @ <html> => { - self.create_root(tag.attrs); - self.mode = BeforeHead; - Done - } - - </head> </body> </html> </br> => else, - - tag @ </_> => self.unexpected(&tag), - - token => { - self.create_root(vec!()); - Reprocess(BeforeHead, token) - } - }), - - //§ the-before-head-insertion-mode - BeforeHead => match_token!(token { - CharacterTokens(NotSplit, text) => SplitWhitespace(text), - CharacterTokens(Whitespace, _) => Done, - CommentToken(text) => self.append_comment(text), - - <html> => self.step(InBody, token), - - tag @ <head> => { - self.head_elem = Some(self.insert_element_for(tag)); - self.mode = InHead; - Done - } - - </head> </body> </html> </br> => else, - - tag @ </_> => self.unexpected(&tag), - - token => { - self.head_elem = Some(self.insert_phantom(local_name!("head"))); - Reprocess(InHead, token) - } - }), - - //§ parsing-main-inhead - InHead => match_token!(token { - CharacterTokens(NotSplit, text) => SplitWhitespace(text), - CharacterTokens(Whitespace, text) => self.append_text(text), - CommentToken(text) => self.append_comment(text), - - <html> => self.step(InBody, token), - - tag @ <base> <basefont> <bgsound> <link> <meta> => { - // FIXME: handle <meta charset=...> and <meta http-equiv="Content-Type"> - self.insert_and_pop_element_for(tag); - DoneAckSelfClosing - } - - tag @ <title> => { - self.parse_raw_data(tag, Rcdata) - } - - tag @ <noframes> <style> <noscript> => { - if (!self.opts.scripting_enabled) && (tag.name == local_name!("noscript")) { - self.insert_element_for(tag); - self.mode = InHeadNoscript; - Done - } else { - self.parse_raw_data(tag, Rawtext) - } - } - - tag @ <script> => { - let elem = create_element( - &mut self.sink, QualName::new(None, ns!(html), local_name!("script")), - tag.attrs); - if self.is_fragment() { - self.sink.mark_script_already_started(&elem); - } - self.insert_appropriately(AppendNode(elem.clone()), None); - self.open_elems.push(elem); - self.to_raw_text_mode(ScriptData) - } - - </head> => { - self.pop(); - self.mode = AfterHead; - Done - } - - </body> </html> </br> => else, - - tag @ <template> => { - self.insert_element_for(tag); - self.active_formatting.push(Marker); - self.frameset_ok = false; - self.mode = InTemplate; - self.template_modes.push(InTemplate); - Done - } - - tag @ </template> => { - if !self.in_html_elem_named(local_name!("template")) { - self.unexpected(&tag); - } else { - self.generate_implied_end(thorough_implied_end); - self.expect_to_close(local_name!("template")); - self.clear_active_formatting_to_marker(); - self.template_modes.pop(); - self.mode = self.reset_insertion_mode(); - } - Done - } - - <head> => self.unexpected(&token), - tag @ </_> => self.unexpected(&tag), - - token => { - self.pop(); - Reprocess(AfterHead, token) - } - }), - - //§ parsing-main-inheadnoscript - InHeadNoscript => match_token!(token { - <html> => self.step(InBody, token), - - </noscript> => { - self.pop(); - self.mode = InHead; - Done - }, - - CharacterTokens(NotSplit, text) => SplitWhitespace(text), - CharacterTokens(Whitespace, _) => self.step(InHead, token), - - CommentToken(_) => self.step(InHead, token), - - <basefont> <bgsound> <link> <meta> <noframes> <style> - => self.step(InHead, token), - - </br> => else, - - <head> <noscript> => self.unexpected(&token), - tag @ </_> => self.unexpected(&tag), - - token => { - self.unexpected(&token); - self.pop(); - Reprocess(InHead, token) - }, - }), - - //§ the-after-head-insertion-mode - AfterHead => match_token!(token { - CharacterTokens(NotSplit, text) => SplitWhitespace(text), - CharacterTokens(Whitespace, text) => self.append_text(text), - CommentToken(text) => self.append_comment(text), - - <html> => self.step(InBody, token), - - tag @ <body> => { - self.insert_element_for(tag); - self.frameset_ok = false; - self.mode = InBody; - Done - } - - tag @ <frameset> => { - self.insert_element_for(tag); - self.mode = InFrameset; - Done - } - - <base> <basefont> <bgsound> <link> <meta> - <noframes> <script> <style> <template> <title> => { - self.unexpected(&token); - let head = self.head_elem.as_ref().expect("no head element").clone(); - self.push(&head); - let result = self.step(InHead, token); - self.remove_from_stack(&head); - result - } - - </template> => self.step(InHead, token), - - </body> </html> </br> => else, - - <head> => self.unexpected(&token), - tag @ </_> => self.unexpected(&tag), - - token => { - self.insert_phantom(local_name!("body")); - Reprocess(InBody, token) - } - }), - - //§ parsing-main-inbody - InBody => match_token!(token { - NullCharacterToken => self.unexpected(&token), - - CharacterTokens(_, text) => { - self.reconstruct_formatting(); - if any_not_whitespace(&text) { - self.frameset_ok = false; - } - self.append_text(text) - } - - CommentToken(text) => self.append_comment(text), - - tag @ <html> => { - self.unexpected(&tag); - if !self.in_html_elem_named(local_name!("template")) { - let top = html_elem(&self.open_elems); - self.sink.add_attrs_if_missing(top, tag.attrs); - } - Done - } - - <base> <basefont> <bgsound> <link> <meta> <noframes> - <script> <style> <template> <title> </template> => { - self.step(InHead, token) - } - - tag @ <body> => { - self.unexpected(&tag); - match self.body_elem().cloned() { - Some(ref node) if self.open_elems.len() != 1 && - !self.in_html_elem_named(local_name!("template")) => { - self.frameset_ok = false; - self.sink.add_attrs_if_missing(node, tag.attrs) - }, - _ => {} - } - Done - } - - tag @ <frameset> => { - self.unexpected(&tag); - if !self.frameset_ok { return Done; } - - let body = unwrap_or_return!(self.body_elem(), Done).clone(); - self.sink.remove_from_parent(&body); - - // FIXME: can we get here in the fragment case? - // What to do with the first element then? - self.open_elems.truncate(1); - self.insert_element_for(tag); - self.mode = InFrameset; - Done - } - - EOFToken => { - if !self.template_modes.is_empty() { - self.step(InTemplate, token) - } else { - self.check_body_end(); - self.stop_parsing() - } - } - - </body> => { - if self.in_scope_named(default_scope, local_name!("body")) { - self.check_body_end(); - self.mode = AfterBody; - } else { - self.sink.parse_error(Borrowed("</body> with no <body> in scope")); - } - Done - } - - </html> => { - if self.in_scope_named(default_scope, local_name!("body")) { - self.check_body_end(); - Reprocess(AfterBody, token) - } else { - self.sink.parse_error(Borrowed("</html> with no <body> in scope")); - Done - } - } - - tag @ <address> <article> <aside> <blockquote> <center> <details> <dialog> - <dir> <div> <dl> <fieldset> <figcaption> <figure> <footer> <header> - <hgroup> <main> <nav> <ol> <p> <section> <summary> <ul> => { - self.close_p_element_in_button_scope(); - self.insert_element_for(tag); - Done - } - - tag @ <menu> => { - self.close_p_element_in_button_scope(); - self.insert_element_for(tag); - Done - } - - tag @ <h1> <h2> <h3> <h4> <h5> <h6> => { - self.close_p_element_in_button_scope(); - if self.current_node_in(heading_tag) { - self.sink.parse_error(Borrowed("nested heading tags")); - self.pop(); - } - self.insert_element_for(tag); - Done - } - - tag @ <pre> <listing> => { - self.close_p_element_in_button_scope(); - self.insert_element_for(tag); - self.ignore_lf = true; - self.frameset_ok = false; - Done - } - - tag @ <form> => { - if self.form_elem.is_some() && - !self.in_html_elem_named(local_name!("template")) { - self.sink.parse_error(Borrowed("nested forms")); - } else { - self.close_p_element_in_button_scope(); - let elem = self.insert_element_for(tag); - if !self.in_html_elem_named(local_name!("template")) { - self.form_elem = Some(elem); - } - } - Done - } - - tag @ <li> <dd> <dt> => { - declare_tag_set!(close_list = "li"); - declare_tag_set!(close_defn = "dd" "dt"); - declare_tag_set!(extra_special = [special_tag] - "address" "div" "p"); - let list = match tag.name { - local_name!("li") => true, - local_name!("dd") | local_name!("dt") => false, - _ => unreachable!(), - }; - - self.frameset_ok = false; - - let mut to_close = None; - for node in self.open_elems.iter().rev() { - let name = self.sink.elem_name(node); - let can_close = if list { - close_list(name) - } else { - close_defn(name) - }; - if can_close { - to_close = Some(name.local.clone()); - break; - } - if extra_special(name) { - break; - } - } - - match to_close { - Some(name) => { - self.generate_implied_end_except(name.clone()); - self.expect_to_close(name); - } - None => (), - } - - self.close_p_element_in_button_scope(); - self.insert_element_for(tag); - Done - } - - tag @ <plaintext> => { - self.close_p_element_in_button_scope(); - self.insert_element_for(tag); - ToPlaintext - } - - tag @ <button> => { - if self.in_scope_named(default_scope, local_name!("button")) { - self.sink.parse_error(Borrowed("nested buttons")); - self.generate_implied_end(cursory_implied_end); - self.pop_until_named(local_name!("button")); - } - self.reconstruct_formatting(); - self.insert_element_for(tag); - self.frameset_ok = false; - Done - } - - tag @ </address> </article> </aside> </blockquote> </button> </center> - </details> </dialog> </dir> </div> </dl> </fieldset> </figcaption> - </figure> </footer> </header> </hgroup> </listing> </main> </menu> - </nav> </ol> </pre> </section> </summary> </ul> => { - if !self.in_scope_named(default_scope, tag.name.clone()) { - self.unexpected(&tag); - } else { - self.generate_implied_end(cursory_implied_end); - self.expect_to_close(tag.name); - } - Done - } - - </form> => { - if !self.in_html_elem_named(local_name!("template")) { - // Can't use unwrap_or_return!() due to rust-lang/rust#16617. - let node = match self.form_elem.take() { - None => { - self.sink.parse_error(Borrowed("Null form element pointer on </form>")); - return Done; - } - Some(x) => x, - }; - if !self.in_scope(default_scope, |n| self.sink.same_node(&node, &n)) { - self.sink.parse_error(Borrowed("Form element not in scope on </form>")); - return Done; - } - self.generate_implied_end(cursory_implied_end); - let current = self.current_node().clone(); - self.remove_from_stack(&node); - if !self.sink.same_node(¤t, &node) { - self.sink.parse_error(Borrowed("Bad open element on </form>")); - } - } else { - if !self.in_scope_named(default_scope, local_name!("form")) { - self.sink.parse_error(Borrowed("Form element not in scope on </form>")); - return Done; - } - self.generate_implied_end(cursory_implied_end); - if !self.current_node_named(local_name!("form")) { - self.sink.parse_error(Borrowed("Bad open element on </form>")); - } - self.pop_until_named(local_name!("form")); - } - Done - } - - </p> => { - if !self.in_scope_named(button_scope, local_name!("p")) { - self.sink.parse_error(Borrowed("No <p> tag to close")); - self.insert_phantom(local_name!("p")); - } - self.close_p_element(); - Done - } - - tag @ </li> </dd> </dt> => { - let in_scope = if tag.name == local_name!("li") { - self.in_scope_named(list_item_scope, tag.name.clone()) - } else { - self.in_scope_named(default_scope, tag.name.clone()) - }; - if in_scope { - self.generate_implied_end_except(tag.name.clone()); - self.expect_to_close(tag.name); - } else { - self.sink.parse_error(Borrowed("No matching tag to close")); - } - Done - } - - tag @ </h1> </h2> </h3> </h4> </h5> </h6> => { - if self.in_scope(default_scope, |n| self.elem_in(&n, heading_tag)) { - self.generate_implied_end(cursory_implied_end); - if !self.current_node_named(tag.name) { - self.sink.parse_error(Borrowed("Closing wrong heading tag")); - } - self.pop_until(heading_tag); - } else { - self.sink.parse_error(Borrowed("No heading tag to close")); - } - Done - } - - tag @ <a> => { - self.handle_misnested_a_tags(&tag); - self.reconstruct_formatting(); - self.create_formatting_element_for(tag); - Done - } - - tag @ <b> <big> <code> <em> <font> <i> <s> <small> <strike> <strong> <tt> <u> => { - self.reconstruct_formatting(); - self.create_formatting_element_for(tag); - Done - } - - tag @ <nobr> => { - self.reconstruct_formatting(); - if self.in_scope_named(default_scope, local_name!("nobr")) { - self.sink.parse_error(Borrowed("Nested <nobr>")); - self.adoption_agency(local_name!("nobr")); - self.reconstruct_formatting(); - } - self.create_formatting_element_for(tag); - Done - } - - tag @ </a> </b> </big> </code> </em> </font> </i> </nobr> - </s> </small> </strike> </strong> </tt> </u> => { - self.adoption_agency(tag.name); - Done - } - - tag @ <applet> <marquee> <object> => { - self.reconstruct_formatting(); - self.insert_element_for(tag); - self.active_formatting.push(Marker); - self.frameset_ok = false; - Done - } - - tag @ </applet> </marquee> </object> => { - if !self.in_scope_named(default_scope, tag.name.clone()) { - self.unexpected(&tag); - } else { - self.generate_implied_end(cursory_implied_end); - self.expect_to_close(tag.name); - self.clear_active_formatting_to_marker(); - } - Done - } - - tag @ <table> => { - if self.quirks_mode != Quirks { - self.close_p_element_in_button_scope(); - } - self.insert_element_for(tag); - self.frameset_ok = false; - self.mode = InTable; - Done - } - - tag @ </br> => { - self.unexpected(&tag); - self.step(InBody, TagToken(Tag { - kind: StartTag, - attrs: vec!(), - ..tag - })) - } - - tag @ <area> <br> <embed> <img> <keygen> <wbr> <input> => { - let keep_frameset_ok = match tag.name { - local_name!("input") => self.is_type_hidden(&tag), - _ => false, - }; - self.reconstruct_formatting(); - self.insert_and_pop_element_for(tag); - if !keep_frameset_ok { - self.frameset_ok = false; - } - DoneAckSelfClosing - } - - tag @ <param> <source> <track> => { - self.insert_and_pop_element_for(tag); - DoneAckSelfClosing - } - - tag @ <hr> => { - self.close_p_element_in_button_scope(); - self.insert_and_pop_element_for(tag); - self.frameset_ok = false; - DoneAckSelfClosing - } - - tag @ <image> => { - self.unexpected(&tag); - self.step(InBody, TagToken(Tag { - name: local_name!("img"), - ..tag - })) - } - - tag @ <textarea> => { - self.ignore_lf = true; - self.frameset_ok = false; - self.parse_raw_data(tag, Rcdata) - } - - tag @ <xmp> => { - self.close_p_element_in_button_scope(); - self.reconstruct_formatting(); - self.frameset_ok = false; - self.parse_raw_data(tag, Rawtext) - } - - tag @ <iframe> => { - self.frameset_ok = false; - self.parse_raw_data(tag, Rawtext) - } - - tag @ <noembed> => { - self.parse_raw_data(tag, Rawtext) - } - - // <noscript> handled in wildcard case below - - tag @ <select> => { - self.reconstruct_formatting(); - self.insert_element_for(tag); - self.frameset_ok = false; - // NB: mode == InBody but possibly self.mode != mode, if - // we're processing "as in the rules for InBody". - self.mode = match self.mode { - InTable | InCaption | InTableBody - | InRow | InCell => InSelectInTable, - _ => InSelect, - }; - Done - } - - tag @ <optgroup> <option> => { - if self.current_node_named(local_name!("option")) { - self.pop(); - } - self.reconstruct_formatting(); - self.insert_element_for(tag); - Done - } - - tag @ <rb> <rtc> => { - if self.in_scope_named(default_scope, local_name!("ruby")) { - self.generate_implied_end(cursory_implied_end); - } - if !self.current_node_named(local_name!("ruby")) { - self.unexpected(&tag); - } - self.insert_element_for(tag); - Done - } - - tag @ <rp> <rt> => { - if self.in_scope_named(default_scope, local_name!("ruby")) { - self.generate_implied_end_except(local_name!("rtc")); - } - if !self.current_node_named(local_name!("rtc")) && !self.current_node_named(local_name!("ruby")) { - self.unexpected(&tag); - } - self.insert_element_for(tag); - Done - } - - tag @ <math> => self.enter_foreign(tag, ns!(mathml)), - - tag @ <svg> => self.enter_foreign(tag, ns!(svg)), - - <caption> <col> <colgroup> <frame> <head> - <tbody> <td> <tfoot> <th> <thead> <tr> => { - self.unexpected(&token); - Done - } - - tag @ <_> => { - if self.opts.scripting_enabled && tag.name == local_name!("noscript") { - self.parse_raw_data(tag, Rawtext) - } else { - self.reconstruct_formatting(); - self.insert_element_for(tag); - Done - } - } - - tag @ </_> => { - self.process_end_tag_in_body(tag); - Done - } - - // FIXME: This should be unreachable, but match_token requires a - // catch-all case. - _ => panic!("impossible case in InBody mode"), - }), - - //§ parsing-main-incdata - Text => match_token!(token { - CharacterTokens(_, text) => self.append_text(text), - - EOFToken => { - self.unexpected(&token); - if self.current_node_named(local_name!("script")) { - let current = current_node(&self.open_elems); - self.sink.mark_script_already_started(current); - } - self.pop(); - Reprocess(self.orig_mode.take().unwrap(), token) - } - - tag @ </_> => { - let node = self.pop(); - self.mode = self.orig_mode.take().unwrap(); - if tag.name == local_name!("script") { - return Script(node); - } - Done - } - - // The spec doesn't say what to do here. - // Other tokens are impossible? - _ => panic!("impossible case in Text mode"), - }), - - //§ parsing-main-intable - InTable => match_token!(token { - // FIXME: hack, should implement pat | pat for match_token instead - NullCharacterToken => self.process_chars_in_table(token), - - CharacterTokens(..) => self.process_chars_in_table(token), - - CommentToken(text) => self.append_comment(text), - - tag @ <caption> => { - self.pop_until_current(table_scope); - self.active_formatting.push(Marker); - self.insert_element_for(tag); - self.mode = InCaption; - Done - } - - tag @ <colgroup> => { - self.pop_until_current(table_scope); - self.insert_element_for(tag); - self.mode = InColumnGroup; - Done - } - - <col> => { - self.pop_until_current(table_scope); - self.insert_phantom(local_name!("colgroup")); - Reprocess(InColumnGroup, token) - } - - tag @ <tbody> <tfoot> <thead> => { - self.pop_until_current(table_scope); - self.insert_element_for(tag); - self.mode = InTableBody; - Done - } - - <td> <th> <tr> => { - self.pop_until_current(table_scope); - self.insert_phantom(local_name!("tbody")); - Reprocess(InTableBody, token) - } - - <table> => { - self.unexpected(&token); - if self.in_scope_named(table_scope, local_name!("table")) { - self.pop_until_named(local_name!("table")); - Reprocess(self.reset_insertion_mode(), token) - } else { - Done - } - } - - </table> => { - if self.in_scope_named(table_scope, local_name!("table")) { - self.pop_until_named(local_name!("table")); - self.mode = self.reset_insertion_mode(); - } else { - self.unexpected(&token); - } - Done - } - - </body> </caption> </col> </colgroup> </html> - </tbody> </td> </tfoot> </th> </thead> </tr> => - self.unexpected(&token), - - <style> <script> <template> </template> - => self.step(InHead, token), - - tag @ <input> => { - self.unexpected(&tag); - if self.is_type_hidden(&tag) { - self.insert_and_pop_element_for(tag); - DoneAckSelfClosing - } else { - self.foster_parent_in_body(TagToken(tag)) - } - } - - tag @ <form> => { - self.unexpected(&tag); - if !self.in_html_elem_named(local_name!("template")) && self.form_elem.is_none() { - self.form_elem = Some(self.insert_and_pop_element_for(tag)); - } - Done - } - - EOFToken => self.step(InBody, token), - - token => { - self.unexpected(&token); - self.foster_parent_in_body(token) - } - }), - - //§ parsing-main-intabletext - InTableText => match_token!(token { - NullCharacterToken => self.unexpected(&token), - - CharacterTokens(split, text) => { - self.pending_table_text.push((split, text)); - Done - } - - token => { - let pending = replace(&mut self.pending_table_text, vec!()); - let contains_nonspace = pending.iter().any(|&(split, ref text)| { - match split { - Whitespace => false, - NotWhitespace => true, - NotSplit => any_not_whitespace(text), - } - }); - - if contains_nonspace { - self.sink.parse_error(Borrowed("Non-space table text")); - for (split, text) in pending.into_iter() { - match self.foster_parent_in_body(CharacterTokens(split, text)) { - Done => (), - _ => panic!("not prepared to handle this!"), - } - } - } else { - for (_, text) in pending.into_iter() { - self.append_text(text); - } - } - - Reprocess(self.orig_mode.take().unwrap(), token) - } - }), - - //§ parsing-main-incaption - InCaption => match_token!(token { - tag @ <caption> <col> <colgroup> <tbody> <td> <tfoot> - <th> <thead> <tr> </table> </caption> => { - if self.in_scope_named(table_scope, local_name!("caption")) { - self.generate_implied_end(cursory_implied_end); - self.expect_to_close(local_name!("caption")); - self.clear_active_formatting_to_marker(); - match tag { - Tag { kind: EndTag, name: local_name!("caption"), .. } => { - self.mode = InTable; - Done - } - _ => Reprocess(InTable, TagToken(tag)) - } - } else { - self.unexpected(&tag); - Done - } - } - - </body> </col> </colgroup> </html> </tbody> - </td> </tfoot> </th> </thead> </tr> => self.unexpected(&token), - - token => self.step(InBody, token), - }), - - //§ parsing-main-incolgroup - InColumnGroup => match_token!(token { - CharacterTokens(NotSplit, text) => SplitWhitespace(text), - CharacterTokens(Whitespace, text) => self.append_text(text), - CommentToken(text) => self.append_comment(text), - - <html> => self.step(InBody, token), - - tag @ <col> => { - self.insert_and_pop_element_for(tag); - DoneAckSelfClosing - } - - </colgroup> => { - if self.current_node_named(local_name!("colgroup")) { - self.pop(); - self.mode = InTable; - } else { - self.unexpected(&token); - } - Done - } - - </col> => self.unexpected(&token), - - <template> </template> => self.step(InHead, token), - - EOFToken => self.step(InBody, token), - - token => { - if self.current_node_named(local_name!("colgroup")) { - self.pop(); - Reprocess(InTable, token) - } else { - self.unexpected(&token) - } - } - }), - - //§ parsing-main-intbody - InTableBody => match_token!(token { - tag @ <tr> => { - self.pop_until_current(table_body_context); - self.insert_element_for(tag); - self.mode = InRow; - Done - } - - <th> <td> => { - self.unexpected(&token); - self.pop_until_current(table_body_context); - self.insert_phantom(local_name!("tr")); - Reprocess(InRow, token) - } - - tag @ </tbody> </tfoot> </thead> => { - if self.in_scope_named(table_scope, tag.name.clone()) { - self.pop_until_current(table_body_context); - self.pop(); - self.mode = InTable; - } else { - self.unexpected(&tag); - } - Done - } - - <caption> <col> <colgroup> <tbody> <tfoot> <thead> </table> => { - declare_tag_set!(table_outer = "table" "tbody" "tfoot"); - if self.in_scope(table_scope, |e| self.elem_in(&e, table_outer)) { - self.pop_until_current(table_body_context); - self.pop(); - Reprocess(InTable, token) - } else { - self.unexpected(&token) - } - } - - </body> </caption> </col> </colgroup> </html> </td> </th> </tr> - => self.unexpected(&token), - - token => self.step(InTable, token), - }), - - //§ parsing-main-intr - InRow => match_token!(token { - tag @ <th> <td> => { - self.pop_until_current(table_row_context); - self.insert_element_for(tag); - self.mode = InCell; - self.active_formatting.push(Marker); - Done - } - - </tr> => { - if self.in_scope_named(table_scope, local_name!("tr")) { - self.pop_until_current(table_row_context); - let node = self.pop(); - self.assert_named(&node, local_name!("tr")); - self.mode = InTableBody; - } else { - self.unexpected(&token); - } - Done - } - - <caption> <col> <colgroup> <tbody> <tfoot> <thead> <tr> </table> => { - if self.in_scope_named(table_scope, local_name!("tr")) { - self.pop_until_current(table_row_context); - let node = self.pop(); - self.assert_named(&node, local_name!("tr")); - Reprocess(InTableBody, token) - } else { - self.unexpected(&token) - } - } - - tag @ </tbody> </tfoot> </thead> => { - if self.in_scope_named(table_scope, tag.name.clone()) { - if self.in_scope_named(table_scope, local_name!("tr")) { - self.pop_until_current(table_row_context); - let node = self.pop(); - self.assert_named(&node, local_name!("tr")); - Reprocess(InTableBody, TagToken(tag)) - } else { - Done - } - } else { - self.unexpected(&tag) - } - } - - </body> </caption> </col> </colgroup> </html> </td> </th> - => self.unexpected(&token), - - token => self.step(InTable, token), - }), - - //§ parsing-main-intd - InCell => match_token!(token { - tag @ </td> </th> => { - if self.in_scope_named(table_scope, tag.name.clone()) { - self.generate_implied_end(cursory_implied_end); - self.expect_to_close(tag.name); - self.clear_active_formatting_to_marker(); - self.mode = InRow; - } else { - self.unexpected(&tag); - } - Done - } - - <caption> <col> <colgroup> <tbody> <td> <tfoot> <th> <thead> <tr> => { - if self.in_scope(table_scope, |n| self.elem_in(&n, td_th)) { - self.close_the_cell(); - Reprocess(InRow, token) - } else { - self.unexpected(&token) - } - } - - </body> </caption> </col> </colgroup> </html> - => self.unexpected(&token), - - tag @ </table> </tbody> </tfoot> </thead> </tr> => { - if self.in_scope_named(table_scope, tag.name.clone()) { - self.close_the_cell(); - Reprocess(InRow, TagToken(tag)) - } else { - self.unexpected(&tag) - } - } - - token => self.step(InBody, token), - }), - - //§ parsing-main-inselect - InSelect => match_token!(token { - NullCharacterToken => self.unexpected(&token), - CharacterTokens(_, text) => self.append_text(text), - CommentToken(text) => self.append_comment(text), - - <html> => self.step(InBody, token), - - tag @ <option> => { - if self.current_node_named(local_name!("option")) { - self.pop(); - } - self.insert_element_for(tag); - Done - } - - tag @ <optgroup> => { - if self.current_node_named(local_name!("option")) { - self.pop(); - } - if self.current_node_named(local_name!("optgroup")) { - self.pop(); - } - self.insert_element_for(tag); - Done - } - - </optgroup> => { - if self.open_elems.len() >= 2 - && self.current_node_named(local_name!("option")) - && self.html_elem_named(&self.open_elems[self.open_elems.len() - 2], - local_name!("optgroup")) { - self.pop(); - } - if self.current_node_named(local_name!("optgroup")) { - self.pop(); - } else { - self.unexpected(&token); - } - Done - } - - </option> => { - if self.current_node_named(local_name!("option")) { - self.pop(); - } else { - self.unexpected(&token); - } - Done - } - - tag @ <select> </select> => { - let in_scope = self.in_scope_named(select_scope, local_name!("select")); - - if !in_scope || tag.kind == StartTag { - self.unexpected(&tag); - } - - if in_scope { - self.pop_until_named(local_name!("select")); - self.mode = self.reset_insertion_mode(); - } - Done - } - - <input> <keygen> <textarea> => { - self.unexpected(&token); - if self.in_scope_named(select_scope, local_name!("select")) { - self.pop_until_named(local_name!("select")); - Reprocess(self.reset_insertion_mode(), token) - } else { - Done - } - } - - <script> <template> </template> => self.step(InHead, token), - - EOFToken => self.step(InBody, token), - - token => self.unexpected(&token), - }), - - //§ parsing-main-inselectintable - InSelectInTable => match_token!(token { - <caption> <table> <tbody> <tfoot> <thead> <tr> <td> <th> => { - self.unexpected(&token); - self.pop_until_named(local_name!("select")); - Reprocess(self.reset_insertion_mode(), token) - } - - tag @ </caption> </table> </tbody> </tfoot> </thead> </tr> </td> </th> => { - self.unexpected(&tag); - if self.in_scope_named(table_scope, tag.name.clone()) { - self.pop_until_named(local_name!("select")); - Reprocess(self.reset_insertion_mode(), TagToken(tag)) - } else { - Done - } - } - - token => self.step(InSelect, token), - }), - - //§ parsing-main-intemplate - InTemplate => match_token!(token { - CharacterTokens(_, _) => self.step(InBody, token), - CommentToken(_) => self.step(InBody, token), - - <base> <basefont> <bgsound> <link> <meta> <noframes> <script> - <style> <template> <title> </template> => { - self.step(InHead, token) - } - - <caption> <colgroup> <tbody> <tfoot> <thead> => { - self.template_modes.pop(); - self.template_modes.push(InTable); - Reprocess(InTable, token) - } - - <col> => { - self.template_modes.pop(); - self.template_modes.push(InColumnGroup); - Reprocess(InColumnGroup, token) - } - - <tr> => { - self.template_modes.pop(); - self.template_modes.push(InTableBody); - Reprocess(InTableBody, token) - } - - <td> <th> => { - self.template_modes.pop(); - self.template_modes.push(InRow); - Reprocess(InRow, token) - } - - EOFToken => { - if !self.in_html_elem_named(local_name!("template")) { - self.stop_parsing() - } else { - self.unexpected(&token); - self.pop_until_named(local_name!("template")); - self.clear_active_formatting_to_marker(); - self.template_modes.pop(); - self.mode = self.reset_insertion_mode(); - Reprocess(self.reset_insertion_mode(), token) - } - } - - tag @ <_> => { - self.template_modes.pop(); - self.template_modes.push(InBody); - Reprocess(InBody, TagToken(tag)) - } - - token => self.unexpected(&token), - }), - - //§ parsing-main-afterbody - AfterBody => match_token!(token { - CharacterTokens(NotSplit, text) => SplitWhitespace(text), - CharacterTokens(Whitespace, _) => self.step(InBody, token), - CommentToken(text) => self.append_comment_to_html(text), - - <html> => self.step(InBody, token), - - </html> => { - if self.is_fragment() { - self.unexpected(&token); - } else { - self.mode = AfterAfterBody; - } - Done - } - - EOFToken => self.stop_parsing(), - - token => { - self.unexpected(&token); - Reprocess(InBody, token) - } - }), - - //§ parsing-main-inframeset - InFrameset => match_token!(token { - CharacterTokens(NotSplit, text) => SplitWhitespace(text), - CharacterTokens(Whitespace, text) => self.append_text(text), - CommentToken(text) => self.append_comment(text), - - <html> => self.step(InBody, token), - - tag @ <frameset> => { - self.insert_element_for(tag); - Done - } - - </frameset> => { - if self.open_elems.len() == 1 { - self.unexpected(&token); - } else { - self.pop(); - if !self.is_fragment() && !self.current_node_named(local_name!("frameset")) { - self.mode = AfterFrameset; - } - } - Done - } - - tag @ <frame> => { - self.insert_and_pop_element_for(tag); - DoneAckSelfClosing - } - - <noframes> => self.step(InHead, token), - - EOFToken => { - if self.open_elems.len() != 1 { - self.unexpected(&token); - } - self.stop_parsing() - } - - token => self.unexpected(&token), - }), - - //§ parsing-main-afterframeset - AfterFrameset => match_token!(token { - CharacterTokens(NotSplit, text) => SplitWhitespace(text), - CharacterTokens(Whitespace, text) => self.append_text(text), - CommentToken(text) => self.append_comment(text), - - <html> => self.step(InBody, token), - - </html> => { - self.mode = AfterAfterFrameset; - Done - } - - <noframes> => self.step(InHead, token), - - EOFToken => self.stop_parsing(), - - token => self.unexpected(&token), - }), - - //§ the-after-after-body-insertion-mode - AfterAfterBody => match_token!(token { - CharacterTokens(NotSplit, text) => SplitWhitespace(text), - CharacterTokens(Whitespace, _) => self.step(InBody, token), - CommentToken(text) => self.append_comment_to_doc(text), - - <html> => self.step(InBody, token), - - EOFToken => self.stop_parsing(), - - token => { - self.unexpected(&token); - Reprocess(InBody, token) - } - }), - - //§ the-after-after-frameset-insertion-mode - AfterAfterFrameset => match_token!(token { - CharacterTokens(NotSplit, text) => SplitWhitespace(text), - CharacterTokens(Whitespace, _) => self.step(InBody, token), - CommentToken(text) => self.append_comment_to_doc(text), - - <html> => self.step(InBody, token), - - EOFToken => self.stop_parsing(), - - <noframes> => self.step(InHead, token), - - token => self.unexpected(&token), - }), - //§ END - } - } - - fn step_foreign(&mut self, token: Token) -> ProcessResult<Handle> { - match_token!(token { - NullCharacterToken => { - self.unexpected(&token); - self.append_text("\u{fffd}".to_tendril()) - } - - CharacterTokens(_, text) => { - if any_not_whitespace(&text) { - self.frameset_ok = false; - } - self.append_text(text) - } - - CommentToken(text) => self.append_comment(text), - - tag @ <b> <big> <blockquote> <body> <br> <center> <code> <dd> <div> <dl> - <dt> <em> <embed> <h1> <h2> <h3> <h4> <h5> <h6> <head> <hr> <i> - <img> <li> <listing> <menu> <meta> <nobr> <ol> <p> <pre> <ruby> - <s> <small> <span> <strong> <strike> <sub> <sup> <table> <tt> - <u> <ul> <var> => self.unexpected_start_tag_in_foreign_content(tag), - - tag @ <font> => { - let unexpected = tag.attrs.iter().any(|attr| { - matches!(attr.name.expanded(), - expanded_name!("", "color") | - expanded_name!("", "face") | - expanded_name!("", "size")) - }); - if unexpected { - self.unexpected_start_tag_in_foreign_content(tag) - } else { - self.foreign_start_tag(tag) - } - } - - tag @ <_> => self.foreign_start_tag(tag), - - // FIXME(#118): </script> in SVG - - tag @ </_> => { - let mut first = true; - let mut stack_idx = self.open_elems.len() - 1; - loop { - if stack_idx == 0 { - return Done; - } - - let html; - let eq; - { - let node_name = self.sink.elem_name(&self.open_elems[stack_idx]); - html = *node_name.ns == ns!(html); - eq = node_name.local.eq_ignore_ascii_case(&tag.name); - } - if !first && html { - let mode = self.mode; - return self.step(mode, TagToken(tag)); - } - - if eq { - self.open_elems.truncate(stack_idx); - return Done; - } - - if first { - self.unexpected(&tag); - first = false; - } - stack_idx -= 1; - } - } - - // FIXME: This should be unreachable, but match_token requires a - // catch-all case. - _ => panic!("impossible case in foreign content"), - }) - } -} diff --git a/src/tree_builder/tag_sets.rs b/src/tree_builder/tag_sets.rs deleted file mode 100644 index 377b34c..0000000 --- a/src/tree_builder/tag_sets.rs +++ /dev/null @@ -1,115 +0,0 @@ -// Copyright 2014-2017 The html5ever Project Developers. See the -// COPYRIGHT file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -//! Various sets of HTML tag names, and macros for declaring them. - -use crate::ExpandedName; -use mac::{_tt_as_expr_hack, matches}; -use markup5ever::{expanded_name, local_name, namespace_prefix, namespace_url, ns}; - -macro_rules! declare_tag_set_impl ( ($param:ident, $b:ident, $supr:ident, $($tag:tt)+) => ( - match $param { - $( expanded_name!(html $tag) => $b, )+ - p => $supr(p), - } -)); - -macro_rules! declare_tag_set_body ( - ($param:ident = [$supr:ident] - $($tag:tt)+) - => ( declare_tag_set_impl!($param, false, $supr, $($tag)+) ); - - ($param:ident = [$supr:ident] + $($tag:tt)+) - => ( declare_tag_set_impl!($param, true, $supr, $($tag)+) ); - - ($param:ident = $($tag:tt)+) - => ( declare_tag_set_impl!($param, true, empty_set, $($tag)+) ); -); - -macro_rules! declare_tag_set ( - (pub $name:ident = $($toks:tt)+) => ( - pub fn $name(p: crate::ExpandedName) -> bool { - declare_tag_set_body!(p = $($toks)+) - } - ); - - ($name:ident = $($toks:tt)+) => ( - fn $name(p: crate::ExpandedName) -> bool { - declare_tag_set_body!(p = $($toks)+) - } - ); -); - -#[inline(always)] -pub fn empty_set(_: ExpandedName) -> bool { - false -} -#[inline(always)] -pub fn full_set(_: ExpandedName) -> bool { - true -} - -declare_tag_set!(pub html_default_scope = - "applet" "caption" "html" "table" "td" "th" "marquee" "object" "template"); - -#[inline(always)] -pub fn default_scope(name: ExpandedName) -> bool { - html_default_scope(name) || - mathml_text_integration_point(name) || - svg_html_integration_point(name) -} - -declare_tag_set!(pub list_item_scope = [default_scope] + "ol" "ul"); -declare_tag_set!(pub button_scope = [default_scope] + "button"); -declare_tag_set!(pub table_scope = "html" "table" "template"); -declare_tag_set!(pub select_scope = [full_set] - "optgroup" "option"); - -declare_tag_set!(pub table_body_context = "tbody" "tfoot" "thead" "template" "html"); -declare_tag_set!(pub table_row_context = "tr" "template" "html"); -declare_tag_set!(pub td_th = "td" "th"); - -declare_tag_set!(pub cursory_implied_end = - "dd" "dt" "li" "option" "optgroup" "p" "rb" "rp" "rt" "rtc"); - -declare_tag_set!(pub thorough_implied_end = [cursory_implied_end] - + "caption" "colgroup" "tbody" "td" "tfoot" "th" "thead" "tr"); - -declare_tag_set!(pub heading_tag = "h1" "h2" "h3" "h4" "h5" "h6"); - -declare_tag_set!(pub special_tag = - "address" "applet" "area" "article" "aside" "base" "basefont" "bgsound" "blockquote" "body" - "br" "button" "caption" "center" "col" "colgroup" "dd" "details" "dir" "div" "dl" "dt" "embed" - "fieldset" "figcaption" "figure" "footer" "form" "frame" "frameset" "h1" "h2" "h3" "h4" "h5" - "h6" "head" "header" "hgroup" "hr" "html" "iframe" "img" "input" "isindex" "li" "link" - "listing" "main" "marquee" "menu" "meta" "nav" "noembed" "noframes" "noscript" - "object" "ol" "p" "param" "plaintext" "pre" "script" "section" "select" "source" "style" - "summary" "table" "tbody" "td" "template" "textarea" "tfoot" "th" "thead" "title" "tr" "track" - "ul" "wbr" "xmp"); -//§ END - -pub fn mathml_text_integration_point(p: ExpandedName) -> bool { - matches!( - p, - expanded_name!(mathml "mi") | - expanded_name!(mathml "mo") | - expanded_name!(mathml "mn") | - expanded_name!(mathml "ms") | - expanded_name!(mathml "mtext") - ) -} - -/// https://html.spec.whatwg.org/multipage/#html-integration-point -pub fn svg_html_integration_point(p: ExpandedName) -> bool { - // annotation-xml are handle in another place - matches!( - p, - expanded_name!(svg "foreignObject") | - expanded_name!(svg "desc") | - expanded_name!(svg "title") - ) -} diff --git a/src/tree_builder/types.rs b/src/tree_builder/types.rs deleted file mode 100644 index e47d69b..0000000 --- a/src/tree_builder/types.rs +++ /dev/null @@ -1,95 +0,0 @@ -// Copyright 2014-2017 The html5ever Project Developers. See the -// COPYRIGHT file at the top-level directory of this distribution. -// -// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or -// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license -// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -//! Types used within the tree builder code. Not exported to users. - -use crate::tokenizer::states::RawKind; -use crate::tokenizer::Tag; - -use crate::tendril::StrTendril; - -pub use self::FormatEntry::*; -pub use self::InsertionMode::*; -pub use self::InsertionPoint::*; -pub use self::ProcessResult::*; -pub use self::SplitStatus::*; -pub use self::Token::*; - -#[derive(PartialEq, Eq, Copy, Clone, Debug)] -pub enum InsertionMode { - Initial, - BeforeHtml, - BeforeHead, - InHead, - InHeadNoscript, - AfterHead, - InBody, - Text, - InTable, - InTableText, - InCaption, - InColumnGroup, - InTableBody, - InRow, - InCell, - InSelect, - InSelectInTable, - InTemplate, - AfterBody, - InFrameset, - AfterFrameset, - AfterAfterBody, - AfterAfterFrameset, -} - -#[derive(PartialEq, Eq, Copy, Clone, Debug)] -pub enum SplitStatus { - NotSplit, - Whitespace, - NotWhitespace, -} - -/// A subset/refinement of `tokenizer::Token`. Everything else is handled -/// specially at the beginning of `process_token`. -#[derive(PartialEq, Eq, Clone, Debug)] -pub enum Token { - TagToken(Tag), - CommentToken(StrTendril), - CharacterTokens(SplitStatus, StrTendril), - NullCharacterToken, - EOFToken, -} - -pub enum ProcessResult<Handle> { - Done, - DoneAckSelfClosing, - SplitWhitespace(StrTendril), - Reprocess(InsertionMode, Token), - ReprocessForeign(Token), - Script(Handle), - ToPlaintext, - ToRawData(RawKind), -} - -pub enum FormatEntry<Handle> { - Element(Handle, Tag), - Marker, -} - -pub enum InsertionPoint<Handle> { - /// Insert as last child in this parent. - LastChild(Handle), - /// Insert before this following sibling. - BeforeSibling(Handle), - /// Insertion point is decided based on existence of element's parent node. - TableFosterParenting { - element: Handle, - prev_element: Handle, - }, -} diff --git a/src/util/str.rs b/src/util/str.rs index b2eb41a..c0f89f0 100644 --- a/src/util/str.rs +++ b/src/util/str.rs @@ -8,13 +8,6 @@ // except according to those terms. use mac::{_tt_as_expr_hack, matches}; -use std::fmt; - -pub fn to_escaped_string<T: fmt::Debug>(x: &T) -> String { - // FIXME: don't allocate twice - let string = format!("{:?}", x); - string.chars().flat_map(|c| c.escape_default()).collect() -} /// If `c` is an ASCII letter, return the corresponding lowercase /// letter, otherwise None. @@ -31,12 +24,6 @@ pub fn is_ascii_alnum(c: char) -> bool { matches!(c, '0'..='9' | 'a'..='z' | 'A'..='Z') } -/// ASCII whitespace characters, as defined by -/// tree construction modes that treat them specially. -pub fn is_ascii_whitespace(c: char) -> bool { - matches!(c, '\t' | '\r' | '\n' | '\x0C' | ' ') -} - #[cfg(test)] #[allow(non_snake_case)] mod test { |